diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
219 files changed, 14725 insertions, 5108 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig index 825e05fb8607..0b1cb340206f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig @@ -7,6 +7,7 @@ config MLX4_EN tristate "Mellanox Technologies 1/10/40Gbit Ethernet support" depends on PCI && NETDEVICES && ETHERNET && INET depends on PTP_1588_CLOCK_OPTIONAL + select PAGE_POOL select MLX4_CORE help This driver supports Mellanox Technologies ConnectX Ethernet diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index b330020dc0d6..07b061a97a6e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -526,28 +526,6 @@ out: return res; } -u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones, u32 uid, u32 obj, u32 count) -{ - struct mlx4_zone_entry *zone; - int res = 0; - - spin_lock(&zones->lock); - - zone = __mlx4_find_zone_by_uid(zones, uid); - - if (NULL == zone) { - res = -1; - goto out; - } - - __mlx4_free_from_zone(zone, obj, count); - -out: - spin_unlock(&zones->lock); - - return res; -} - u32 mlx4_zone_free_entries_unique(struct mlx4_zone_allocator *zones, u32 obj, u32 count) { struct mlx4_zone_entry *zone; @@ -682,9 +660,9 @@ static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device) } static int mlx4_alloc_db_from_pgdir(struct mlx4_db_pgdir *pgdir, - struct mlx4_db *db, int order) + struct mlx4_db *db, unsigned int order) { - int o; + unsigned int o; int i; for (o = order; o <= 1; ++o) { @@ -712,7 +690,7 @@ found: return 0; } -int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order) +int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, unsigned int order) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_db_pgdir *pgdir; diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 0d8a362c2673..edcc6f662618 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -236,7 +236,7 @@ static void dump_err_buf(struct mlx4_dev *dev) static void poll_catas(struct timer_list *t) { - struct mlx4_priv *priv = from_timer(priv, t, catas_err.timer); + struct mlx4_priv *priv = timer_container_of(priv, t, catas_err.timer); struct mlx4_dev *dev = &priv->dev; u32 slave_read; @@ -305,7 +305,7 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - del_timer_sync(&priv->catas_err.timer); + timer_delete_sync(&priv->catas_err.timer); if (priv->catas_err.map) { iounmap(priv->catas_err.map); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index cd754cd76bde..2aeaafcfb993 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -38,7 +38,7 @@ /* mlx4_en_read_clock - read raw cycle counter (to be used by time counter) */ -static u64 mlx4_en_read_clock(const struct cyclecounter *tc) +static u64 mlx4_en_read_clock(struct cyclecounter *tc) { struct mlx4_en_dev *mdev = container_of(tc, struct mlx4_en_dev, cycles); @@ -249,7 +249,7 @@ static const struct ptp_clock_info mlx4_en_ptp_clock_info = { static u32 freq_to_shift(u16 freq) { u32 freq_khz = freq * 1000; - u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; + u64 max_val_cycles = freq_khz * 1000ULL * MLX4_EN_WRAP_AROUND_SEC; u64 max_val_cycles_rounded = 1ULL << fls64(max_val_cycles - 1); /* calculate max possible multiplier in order to fit in 64bit */ u64 max_mul = div64_u64(ULLONG_MAX, max_val_cycles_rounded); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c index 752a72499b4f..be80da03a594 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c @@ -290,9 +290,6 @@ static int mlx4_en_dcbnl_ieee_getets(struct net_device *dev, struct mlx4_en_priv *priv = netdev_priv(dev); struct ieee_ets *my_ets = &priv->ets; - if (!my_ets) - return -EINVAL; - ets->ets_cap = IEEE_8021QAZ_MAX_TCS; ets->cbs = my_ets->cbs; memcpy(ets->tc_tx_bw, my_ets->tc_tx_bw, sizeof(ets->tc_tx_bw)); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index cd17a3f4faf8..a68cd3f0304c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1897,6 +1897,7 @@ static int mlx4_en_get_ts_info(struct net_device *dev, if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) { info->so_timestamping |= SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 281b34af0bb4..d2071aff7b8f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2670,8 +2670,7 @@ static int mlx4_udp_tunnel_sync(struct net_device *dev, unsigned int table) static const struct udp_tunnel_nic_info mlx4_udp_tunnels = { .sync_table = mlx4_udp_tunnel_sync, - .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | - UDP_TUNNEL_NIC_INFO_IPV4_ONLY, + .flags = UDP_TUNNEL_NIC_INFO_IPV4_ONLY, .tables = { { .n_entries = 1, .tunnel_types = UDP_TUNNEL_TYPE_VXLAN, }, }, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 15c57e9517e9..92a16ddb7d86 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -48,60 +48,43 @@ #if IS_ENABLED(CONFIG_IPV6) #include <net/ip6_checksum.h> #endif +#include <net/page_pool/helpers.h> #include "mlx4_en.h" -static int mlx4_alloc_page(struct mlx4_en_priv *priv, - struct mlx4_en_rx_alloc *frag, - gfp_t gfp) -{ - struct page *page; - dma_addr_t dma; - - page = alloc_page(gfp); - if (unlikely(!page)) - return -ENOMEM; - dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir); - if (unlikely(dma_mapping_error(priv->ddev, dma))) { - __free_page(page); - return -ENOMEM; - } - frag->page = page; - frag->dma = dma; - frag->page_offset = priv->rx_headroom; - return 0; -} - static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_alloc *frags, gfp_t gfp) { + dma_addr_t dma; int i; for (i = 0; i < priv->num_frags; i++, frags++) { if (!frags->page) { - if (mlx4_alloc_page(priv, frags, gfp)) { + frags->page = page_pool_alloc_pages(ring->pp, gfp); + if (!frags->page) { ring->alloc_fail++; return -ENOMEM; } + page_pool_fragment_page(frags->page, 1); + frags->page_offset = priv->rx_headroom; + ring->rx_alloc_pages++; } - rx_desc->data[i].addr = cpu_to_be64(frags->dma + - frags->page_offset); + dma = page_pool_get_dma_addr(frags->page); + rx_desc->data[i].addr = cpu_to_be64(dma + frags->page_offset); } return 0; } static void mlx4_en_free_frag(const struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_alloc *frag) { - if (frag->page) { - dma_unmap_page(priv->ddev, frag->dma, - PAGE_SIZE, priv->dma_dir); - __free_page(frag->page); - } + if (frag->page) + page_pool_put_full_page(ring->pp, frag->page, false); /* We need to clear all fields, otherwise a change of priv->log_rx_info * could lead to see garbage later in frag->page. */ @@ -141,18 +124,6 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, (index << ring->log_stride); struct mlx4_en_rx_alloc *frags = ring->rx_info + (index << priv->log_rx_info); - if (likely(ring->page_cache.index > 0)) { - /* XDP uses a single page per frame */ - if (!frags->page) { - ring->page_cache.index--; - frags->page = ring->page_cache.buf[ring->page_cache.index].page; - frags->dma = ring->page_cache.buf[ring->page_cache.index].dma; - } - frags->page_offset = XDP_PACKET_HEADROOM; - rx_desc->data[0].addr = cpu_to_be64(frags->dma + - XDP_PACKET_HEADROOM); - return 0; - } return mlx4_en_alloc_frags(priv, ring, rx_desc, frags, gfp); } @@ -178,7 +149,7 @@ static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv, frags = ring->rx_info + (index << priv->log_rx_info); for (nr = 0; nr < priv->num_frags; nr++) { en_dbg(DRV, priv, "Freeing fragment:%d\n", nr); - mlx4_en_free_frag(priv, frags + nr); + mlx4_en_free_frag(priv, ring, frags + nr); } } @@ -268,6 +239,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, u32 size, u16 stride, int node, int queue_index) { struct mlx4_en_dev *mdev = priv->mdev; + struct page_pool_params pp = {}; struct mlx4_en_rx_ring *ring; int err = -ENOMEM; int tmp; @@ -286,9 +258,26 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride + TXBB_SIZE; - if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index, 0) < 0) + pp.flags = PP_FLAG_DMA_MAP; + pp.pool_size = size * DIV_ROUND_UP(priv->rx_skb_size, PAGE_SIZE); + pp.nid = node; + pp.napi = &priv->rx_cq[queue_index]->napi; + pp.netdev = priv->dev; + pp.dev = &mdev->dev->persist->pdev->dev; + pp.dma_dir = priv->dma_dir; + + ring->pp = page_pool_create(&pp); + if (!ring->pp) goto err_ring; + if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index, 0) < 0) + goto err_pp; + + err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_POOL, + ring->pp); + if (err) + goto err_xdp_info; + tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * sizeof(struct mlx4_en_rx_alloc)); ring->rx_info = kvzalloc_node(tmp, GFP_KERNEL, node); @@ -319,6 +308,8 @@ err_info: ring->rx_info = NULL; err_xdp_info: xdp_rxq_info_unreg(&ring->xdp_rxq); +err_pp: + page_pool_destroy(ring->pp); err_ring: kfree(ring); *pring = NULL; @@ -409,26 +400,6 @@ void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv) } } -/* When the rx ring is running in page-per-packet mode, a released frame can go - * directly into a small cache, to avoid unmapping or touching the page - * allocator. In bpf prog performance scenarios, buffers are either forwarded - * or dropped, never converted to skbs, so every page can come directly from - * this cache when it is sized to be a multiple of the napi budget. - */ -bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, - struct mlx4_en_rx_alloc *frame) -{ - struct mlx4_en_page_cache *cache = &ring->page_cache; - - if (cache->index >= MLX4_EN_CACHE_SIZE) - return false; - - cache->buf[cache->index].page = frame->page; - cache->buf[cache->index].dma = frame->dma; - cache->index++; - return true; -} - void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, u16 stride) @@ -445,6 +416,7 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, xdp_rxq_info_unreg(&ring->xdp_rxq); mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); kvfree(ring->rx_info); + page_pool_destroy(ring->pp); ring->rx_info = NULL; kfree(ring); *pring = NULL; @@ -453,14 +425,6 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { - int i; - - for (i = 0; i < ring->page_cache.index; i++) { - dma_unmap_page(priv->ddev, ring->page_cache.buf[i].dma, - PAGE_SIZE, priv->dma_dir); - put_page(ring->page_cache.buf[i].page); - } - ring->page_cache.index = 0; mlx4_en_free_rx_buf(priv, ring); if (ring->stride <= TXBB_SIZE) ring->buf -= TXBB_SIZE; @@ -487,7 +451,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, if (unlikely(!page)) goto fail; - dma = frags->dma; + dma = page_pool_get_dma_addr(page); dma_sync_single_range_for_cpu(priv->ddev, dma, frags->page_offset, frag_size, priv->dma_dir); @@ -496,8 +460,11 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, truesize += frag_info->frag_stride; if (frag_info->frag_stride == PAGE_SIZE / 2) { + struct netmem_desc *desc = pp_page_to_nmdesc(page); + frags->page_offset ^= PAGE_SIZE / 2; release = page_count(page) != 1 || + atomic_long_read(&desc->pp_ref_count) != 1 || page_is_pfmemalloc(page) || page_to_nid(page) != numa_mem_id(); } else if (!priv->rx_headroom) { @@ -511,10 +478,9 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, release = frags->page_offset + frag_info->frag_size > PAGE_SIZE; } if (release) { - dma_unmap_page(priv->ddev, dma, PAGE_SIZE, priv->dma_dir); frags->page = NULL; } else { - page_ref_inc(page); + page_pool_ref_page(page); } nr++; @@ -784,7 +750,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud /* Get pointer to first fragment since we haven't * skb yet and cast it to ethhdr struct */ - dma = frags[0].dma + frags[0].page_offset; + dma = page_pool_get_dma_addr(frags[0].page); + dma += frags[0].page_offset; dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh), DMA_FROM_DEVICE); @@ -823,7 +790,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud void *orig_data; u32 act; - dma = frags[0].dma + frags[0].page_offset; + dma = page_pool_get_dma_addr(frags[0].page); + dma += frags[0].page_offset; dma_sync_single_for_cpu(priv->ddev, dma, priv->frag_info[0].frag_size, DMA_FROM_DEVICE); @@ -886,6 +854,7 @@ xdp_drop_no_cnt: skb = napi_get_frags(&cq->napi); if (unlikely(!skb)) goto next; + skb_mark_for_recycle(skb); if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) { u64 timestamp = mlx4_en_get_cqe_ts(cqe); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 1ddb11cb25f9..87f35bcbeff8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -44,6 +44,7 @@ #include <linux/ipv6.h> #include <linux/indirect_call_wrapper.h> #include <net/ipv6.h> +#include <net/page_pool/helpers.h> #include "mlx4_en.h" @@ -350,16 +351,10 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, int napi_mode) { struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; - struct mlx4_en_rx_alloc frame = { - .page = tx_info->page, - .dma = tx_info->map0_dma, - }; - - if (!napi_mode || !mlx4_en_rx_recycle(ring->recycle_ring, &frame)) { - dma_unmap_page(priv->ddev, tx_info->map0_dma, - PAGE_SIZE, priv->dma_dir); - put_page(tx_info->page); - } + struct page_pool *pool = ring->recycle_ring->pp; + + /* Note that napi_mode = 0 means ndo_close() path, not budget = 0 */ + page_pool_put_full_page(pool, tx_info->page, !!napi_mode); return tx_info->nr_txbb; } @@ -450,6 +445,8 @@ int mlx4_en_process_tx_cq(struct net_device *dev, if (unlikely(!priv->port_up)) return 0; + if (unlikely(!napi_budget) && cq->type == TX_XDP) + return 0; netdev_txq_bql_complete_prefetchw(ring->tx_queue); @@ -1194,7 +1191,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, tx_desc = ring->buf + (index << LOG_TXBB_SIZE); data = &tx_desc->data; - dma = frame->dma; + dma = page_pool_get_dma_addr(frame->page); tx_info->page = frame->page; frame->page = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index febeadfdd5a5..03d2fc7d9b09 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -49,6 +49,8 @@ #include <linux/mlx4/device.h> #include <linux/mlx4/doorbell.h> +#include <rdma/ib_verbs.h> + #include "mlx4.h" #include "fw.h" #include "icm.h" @@ -1246,14 +1248,6 @@ err_out: return err ? err : count; } -enum ibta_mtu { - IB_MTU_256 = 1, - IB_MTU_512 = 2, - IB_MTU_1024 = 3, - IB_MTU_2048 = 4, - IB_MTU_4096 = 5 -}; - static inline int int_to_ibta_mtu(int mtu) { switch (mtu) { @@ -1266,7 +1260,7 @@ static inline int int_to_ibta_mtu(int mtu) } } -static inline int ibta_mtu_to_int(enum ibta_mtu mtu) +static inline int ibta_mtu_to_int(enum ib_mtu mtu) { switch (mtu) { case IB_MTU_256: return 256; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index d7d856d1758a..b213094ea30f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1478,12 +1478,6 @@ void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc); u32 mlx4_zone_alloc_entries(struct mlx4_zone_allocator *zones, u32 uid, int count, int align, u32 skip_mask, u32 *puid); -/* Free <count> objects, start from <obj> of the uid <uid> from zone_allocator - * <zones>. - */ -u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones, - u32 uid, u32 obj, u32 count); - /* If <zones> was allocated with MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP, instead of * specifying the uid when freeing an object, zone allocator could figure it by * itself. Other parameters are similar to mlx4_zone_free. diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 28b70dcc652e..ad0d91a75184 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -247,20 +247,11 @@ struct mlx4_en_tx_desc { struct mlx4_en_rx_alloc { struct page *page; - dma_addr_t dma; u32 page_offset; }; #define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT) -struct mlx4_en_page_cache { - u32 index; - struct { - struct page *page; - dma_addr_t dma; - } buf[MLX4_EN_CACHE_SIZE]; -}; - enum { MLX4_EN_TX_RING_STATE_RECOVERING, }; @@ -335,14 +326,14 @@ struct mlx4_en_rx_ring { u16 stride; u16 log_stride; u16 cqn; /* index of port CQ associated with this ring */ + u8 fcs_del; u32 prod; u32 cons; u32 buf_size; - u8 fcs_del; + struct page_pool *pp; void *buf; void *rx_info; struct bpf_prog __rcu *xdp_prog; - struct mlx4_en_page_cache page_cache; unsigned long bytes; unsigned long packets; unsigned long csum_ok; @@ -707,8 +698,6 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring, struct mlx4_en_priv *priv, unsigned int length, int tx_ind, bool *doorbell_pending); void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring); -bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, - struct mlx4_en_rx_alloc *frame); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index d7444782bfdd..698a5d1f0d7e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -106,7 +106,7 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) buddy->max_order = max_order; spin_lock_init(&buddy->lock); - buddy->bits = kcalloc(buddy->max_order + 1, sizeof(long *), + buddy->bits = kcalloc(buddy->max_order + 1, sizeof(*buddy->bits), GFP_KERNEL); buddy->num_free = kcalloc(buddy->max_order + 1, sizeof(*buddy->num_free), GFP_KERNEL); diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 4e43f4a7d246..e3d0b13c1610 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -147,26 +147,6 @@ static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port, return err; } -int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx) -{ - struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; - struct mlx4_mac_table *table = &info->mac_table; - int i; - - for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { - if (!table->refs[i]) - continue; - - if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) { - *idx = i; - return 0; - } - } - - return -ENOENT; -} -EXPORT_SYMBOL_GPL(mlx4_find_cached_mac); - static bool mlx4_need_mf_bond(struct mlx4_dev *dev) { int i, num_eth_ports = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index ea6070180c96..6ec7d6e0181d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -31,6 +31,7 @@ config MLX5_CORE_EN bool "Mellanox 5th generation network adapters (ConnectX series) Ethernet support" depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE select PAGE_POOL + select PAGE_POOL_STATS select DIMLIB help Ethernet support in Mellanox Technologies ConnectX-4 NIC. @@ -80,8 +81,8 @@ config MLX5_BRIDGE default y help mlx5 ConnectX offloads support for Ethernet Bridging (BRIDGE). - Enable adding representors of mlx5 uplink and VF ports to Bridge and - offloading rules for traffic between such ports. Supports VLANs (trunk and + Enable offloading FDB rules from a bridge device containing + representors of mlx5 uplink and VF ports. Supports VLANs (trunk and access modes). config MLX5_CLS_ACT diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index be3d0876c521..a253c73db9e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -17,7 +17,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \ - fw_reset.o qos.o lib/tout.o lib/aso.o wc.o + fw_reset.o qos.o lib/tout.o lib/aso.o wc.o fs_pool.o # # Netdev basic @@ -29,7 +29,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en/rqt.o en/tir.o en/rss.o en/rx_res.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ en/qos.o en/htb.o en/trap.o en/fs_tt_redirect.o en/selq.o \ - lib/crypto.o lib/sd.o + lib/crypto.o lib/sd.o en/pcie_cong_event.o # # Netdev extra @@ -60,6 +60,7 @@ mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en/tc/act/act.o en/tc/act/drop.o en/tc/a ifneq ($(CONFIG_MLX5_TC_CT),) mlx5_core-y += en/tc_ct.o en/tc/ct_fs_dmfs.o mlx5_core-$(CONFIG_MLX5_SW_STEERING) += en/tc/ct_fs_smfs.o + mlx5_core-$(CONFIG_MLX5_HW_STEERING) += en/tc/ct_fs_hmfs.o endif mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o @@ -123,6 +124,7 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/sws/dr_domain.o \ steering/sws/dr_ste_v0.o \ steering/sws/dr_ste_v1.o \ steering/sws/dr_ste_v2.o \ + steering/sws/dr_ste_v3.o \ steering/sws/dr_cmd.o \ steering/sws/dr_fw.o \ steering/sws/dr_action.o \ @@ -150,8 +152,10 @@ mlx5_core-$(CONFIG_MLX5_HW_STEERING) += steering/hws/cmd.o \ steering/hws/bwc.o \ steering/hws/debug.o \ steering/hws/vport.o \ - steering/hws/bwc_complex.o - + steering/hws/bwc_complex.o \ + steering/hws/fs_hws_pools.o \ + steering/hws/fs_hws.o \ + steering/hws/action_ste_pool.o # # SF device @@ -163,5 +167,10 @@ mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o irq_ # mlx5_core-$(CONFIG_MLX5_SF_MANAGER) += sf/cmd.o sf/hw_table.o sf/devlink.o +# +# TPH support +# +mlx5_core-$(CONFIG_PCIE_TPH) += lib/st.o + obj-$(CONFIG_MLX5_DPLL) += mlx5_dpll.o mlx5_dpll-y := dpll.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 6bd8a18e3af3..e395ef5f356e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -94,6 +94,11 @@ static u16 in_to_opcode(void *in) return MLX5_GET(mbox_in, in, opcode); } +static u16 in_to_uid(void *in) +{ + return MLX5_GET(mbox_in, in, uid); +} + /* Returns true for opcodes that might be triggered very frequently and throttle * the command interface. Limit their command slots usage. */ @@ -823,7 +828,7 @@ static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out) opcode = in_to_opcode(in); op_mod = MLX5_GET(mbox_in, in, op_mod); - uid = MLX5_GET(mbox_in, in, uid); + uid = in_to_uid(in); status = MLX5_GET(mbox_out, out, status); if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY && @@ -922,8 +927,7 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force static void cb_timeout_handler(struct work_struct *work) { - struct delayed_work *dwork = container_of(work, struct delayed_work, - work); + struct delayed_work *dwork = to_delayed_work(work); struct mlx5_cmd_work_ent *ent = container_of(dwork, struct mlx5_cmd_work_ent, cb_timeout_work); @@ -1013,6 +1017,7 @@ static void cmd_work_handler(struct work_struct *work) complete(&ent->done); } up(&cmd->vars.sem); + complete(&ent->slotted); return; } } else { @@ -1870,6 +1875,17 @@ static int is_manage_pages(void *in) return in_to_opcode(in) == MLX5_CMD_OP_MANAGE_PAGES; } +static bool mlx5_has_privileged_uid(struct mlx5_core_dev *dev) +{ + return !xa_empty(&dev->cmd.vars.privileged_uids); +} + +static bool mlx5_cmd_is_privileged_uid(struct mlx5_core_dev *dev, + u16 uid) +{ + return !!xa_load(&dev->cmd.vars.privileged_uids, uid); +} + /* Notes: * 1. Callback functions may not sleep * 2. Page queue commands do not support asynchrous completion @@ -1880,7 +1896,9 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, { struct mlx5_cmd_msg *inb, *outb; u16 opcode = in_to_opcode(in); - bool throttle_op; + bool throttle_locked = false; + bool unpriv_locked = false; + u16 uid = in_to_uid(in); int pages_queue; gfp_t gfp; u8 token; @@ -1889,12 +1907,17 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) return -ENXIO; - throttle_op = mlx5_cmd_is_throttle_opcode(opcode); - if (throttle_op) { - if (callback) { - if (down_trylock(&dev->cmd.vars.throttle_sem)) - return -EBUSY; - } else { + if (!callback) { + /* The semaphore is already held for callback commands. It was + * acquired in mlx5_cmd_exec_cb() + */ + if (uid && mlx5_has_privileged_uid(dev)) { + if (!mlx5_cmd_is_privileged_uid(dev, uid)) { + unpriv_locked = true; + down(&dev->cmd.vars.unprivileged_sem); + } + } else if (mlx5_cmd_is_throttle_opcode(opcode)) { + throttle_locked = true; down(&dev->cmd.vars.throttle_sem); } } @@ -1924,8 +1947,8 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context, pages_queue, token, force_polling); - if (callback) - return err; + if (callback && !err) + return 0; if (err > 0) /* Failed in FW, command didn't execute */ err = deliv_status_to_err(err); @@ -1940,8 +1963,11 @@ out_out: out_in: free_msg(dev, inb); out_up: - if (throttle_op) + if (throttle_locked) up(&dev->cmd.vars.throttle_sem); + if (unpriv_locked) + up(&dev->cmd.vars.unprivileged_sem); + return err; } @@ -2103,18 +2129,22 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work) struct mlx5_async_work *work = _work; struct mlx5_async_ctx *ctx; struct mlx5_core_dev *dev; - u16 opcode; + bool throttle_locked; + bool unpriv_locked; ctx = work->ctx; dev = ctx->dev; - opcode = work->opcode; + throttle_locked = work->throttle_locked; + unpriv_locked = work->unpriv_locked; status = cmd_status_err(dev, status, work->opcode, work->op_mod, work->out); work->user_callback(status, work); /* Can't access "work" from this point on. It could have been freed in * the callback. */ - if (mlx5_cmd_is_throttle_opcode(opcode)) + if (throttle_locked) up(&dev->cmd.vars.throttle_sem); + if (unpriv_locked) + up(&dev->cmd.vars.unprivileged_sem); if (atomic_dec_and_test(&ctx->num_inflight)) complete(&ctx->inflight_done); } @@ -2123,6 +2153,8 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, void *out, int out_size, mlx5_async_cbk_t callback, struct mlx5_async_work *work) { + struct mlx5_core_dev *dev = ctx->dev; + u16 uid; int ret; work->ctx = ctx; @@ -2130,11 +2162,43 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, work->opcode = in_to_opcode(in); work->op_mod = MLX5_GET(mbox_in, in, op_mod); work->out = out; + work->throttle_locked = false; + work->unpriv_locked = false; + uid = in_to_uid(in); + if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) return -EIO; - ret = cmd_exec(ctx->dev, in, in_size, out, out_size, + + if (uid && mlx5_has_privileged_uid(dev)) { + if (!mlx5_cmd_is_privileged_uid(dev, uid)) { + if (down_trylock(&dev->cmd.vars.unprivileged_sem)) { + ret = -EBUSY; + goto dec_num_inflight; + } + work->unpriv_locked = true; + } + } else if (mlx5_cmd_is_throttle_opcode(in_to_opcode(in))) { + if (down_trylock(&dev->cmd.vars.throttle_sem)) { + ret = -EBUSY; + goto dec_num_inflight; + } + work->throttle_locked = true; + } + + ret = cmd_exec(dev, in, in_size, out, out_size, mlx5_cmd_exec_cb_handler, work, false); - if (ret && atomic_dec_and_test(&ctx->num_inflight)) + if (ret) + goto sem_up; + + return 0; + +sem_up: + if (work->throttle_locked) + up(&dev->cmd.vars.throttle_sem); + if (work->unpriv_locked) + up(&dev->cmd.vars.unprivileged_sem); +dec_num_inflight: + if (atomic_dec_and_test(&ctx->num_inflight)) complete(&ctx->inflight_done); return ret; @@ -2370,10 +2434,16 @@ int mlx5_cmd_enable(struct mlx5_core_dev *dev) sema_init(&cmd->vars.sem, cmd->vars.max_reg_cmds); sema_init(&cmd->vars.pages_sem, 1); sema_init(&cmd->vars.throttle_sem, DIV_ROUND_UP(cmd->vars.max_reg_cmds, 2)); + sema_init(&cmd->vars.unprivileged_sem, + DIV_ROUND_UP(cmd->vars.max_reg_cmds, 2)); + + xa_init(&cmd->vars.privileged_uids); cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); - if (!cmd->pool) - return -ENOMEM; + if (!cmd->pool) { + err = -ENOMEM; + goto err_destroy_xa; + } err = alloc_cmd_page(dev, cmd); if (err) @@ -2407,6 +2477,8 @@ err_cmd_page: free_cmd_page(dev, cmd); err_free_pool: dma_pool_destroy(cmd->pool); +err_destroy_xa: + xa_destroy(&dev->cmd.vars.privileged_uids); return err; } @@ -2419,6 +2491,7 @@ void mlx5_cmd_disable(struct mlx5_core_dev *dev) destroy_msg_cache(dev); free_cmd_page(dev, cmd); dma_pool_destroy(cmd->pool); + xa_destroy(&dev->cmd.vars.privileged_uids); } void mlx5_cmd_set_state(struct mlx5_core_dev *dev, @@ -2426,3 +2499,18 @@ void mlx5_cmd_set_state(struct mlx5_core_dev *dev, { dev->cmd.state = cmdif_state; } + +int mlx5_cmd_add_privileged_uid(struct mlx5_core_dev *dev, u16 uid) +{ + return xa_insert(&dev->cmd.vars.privileged_uids, uid, + xa_mk_value(uid), GFP_KERNEL); +} +EXPORT_SYMBOL(mlx5_cmd_add_privileged_uid); + +void mlx5_cmd_remove_privileged_uid(struct mlx5_core_dev *dev, u16 uid) +{ + void *data = xa_erase(&dev->cmd.vars.privileged_uids, uid); + + WARN(!data, "Privileged UID %u does not exist\n", uid); +} +EXPORT_SYMBOL(mlx5_cmd_remove_privileged_uid); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 9a79674d27f1..891bbbbfbbf1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -228,8 +228,15 @@ enum { MLX5_INTERFACE_PROTOCOL_VNET, MLX5_INTERFACE_PROTOCOL_DPLL, + MLX5_INTERFACE_PROTOCOL_FWCTL, }; +static bool is_fwctl_supported(struct mlx5_core_dev *dev) +{ + /* fwctl is most useful on PFs, prevent fwctl on SFs for now */ + return MLX5_CAP_GEN(dev, uctx_cap) && !mlx5_core_is_sf(dev); +} + static const struct mlx5_adev_device { const char *suffix; bool (*is_supported)(struct mlx5_core_dev *dev); @@ -252,6 +259,8 @@ static const struct mlx5_adev_device { .is_supported = &is_mp_supported }, [MLX5_INTERFACE_PROTOCOL_DPLL] = { .suffix = "dpll", .is_supported = &is_dpll_supported }, + [MLX5_INTERFACE_PROTOCOL_FWCTL] = { .suffix = "fwctl", + .is_supported = &is_fwctl_supported }, }; int mlx5_adev_idx_alloc(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 98d4306929f3..3ffa3fbacd16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -35,6 +35,55 @@ static u16 mlx5_fw_ver_subminor(u32 version) return version & 0xffff; } +static int mlx5_devlink_serial_numbers_put(struct mlx5_core_dev *dev, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct pci_dev *pdev = dev->pdev; + unsigned int vpd_size, kw_len; + char *str, *end; + u8 *vpd_data; + int err = 0; + int start; + + vpd_data = pci_vpd_alloc(pdev, &vpd_size); + if (IS_ERR(vpd_data)) + return 0; + + start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, + PCI_VPD_RO_KEYWORD_SERIALNO, &kw_len); + if (start >= 0) { + str = kstrndup(vpd_data + start, kw_len, GFP_KERNEL); + if (!str) { + err = -ENOMEM; + goto end; + } + end = strchrnul(str, ' '); + *end = '\0'; + err = devlink_info_board_serial_number_put(req, str); + kfree(str); + if (err) + goto end; + } + + start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, "V3", &kw_len); + if (start >= 0) { + str = kstrndup(vpd_data + start, kw_len, GFP_KERNEL); + if (!str) { + err = -ENOMEM; + goto end; + } + err = devlink_info_serial_number_put(req, str); + kfree(str); + if (err) + goto end; + } + +end: + kfree(vpd_data); + return err; +} + #define DEVLINK_FW_STRING_LEN 32 static int @@ -46,6 +95,13 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, u32 running_fw, stored_fw; int err; + if (!mlx5_core_is_pf(dev)) + return 0; + + err = mlx5_devlink_serial_numbers_put(dev, req, extack); + if (err) + return err; + err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id); if (err) return err; @@ -320,11 +376,14 @@ static const struct devlink_ops mlx5_devlink_ops = { .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, .rate_leaf_tx_share_set = mlx5_esw_devlink_rate_leaf_tx_share_set, .rate_leaf_tx_max_set = mlx5_esw_devlink_rate_leaf_tx_max_set, + .rate_leaf_tc_bw_set = mlx5_esw_devlink_rate_leaf_tc_bw_set, + .rate_node_tc_bw_set = mlx5_esw_devlink_rate_node_tc_bw_set, .rate_node_tx_share_set = mlx5_esw_devlink_rate_node_tx_share_set, .rate_node_tx_max_set = mlx5_esw_devlink_rate_node_tx_max_set, .rate_node_new = mlx5_esw_devlink_rate_node_new, .rate_node_del = mlx5_esw_devlink_rate_node_del, - .rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set, + .rate_leaf_parent_set = mlx5_esw_devlink_rate_leaf_parent_set, + .rate_node_parent_set = mlx5_esw_devlink_rate_node_parent_set, #endif #ifdef CONFIG_MLX5_SF_MANAGER .port_new = mlx5_devlink_sf_port_new, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index 9aed29fa4900..d6e736c1fb24 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -292,7 +292,7 @@ TRACE_EVENT(mlx5_fs_add_rule, if (rule->dest_attr.type & MLX5_FLOW_DESTINATION_TYPE_COUNTER) __entry->counter_id = - rule->dest_attr.counter_id; + mlx5_fc_id(rule->dest_attr.counter); ), TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n", __entry->rule, __entry->fte, __entry->index, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c index c7216e84ef8c..86253a89c24c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c @@ -13,6 +13,50 @@ struct mlx5_vnic_diag_stats { __be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)]; }; +static void mlx5_reporter_vnic_diagnose_counter_icm(struct mlx5_core_dev *dev, + struct devlink_fmsg *fmsg, + u16 vport_num, bool other_vport) +{ + u32 out_icm_reg[MLX5_ST_SZ_DW(vhca_icm_ctrl_reg)] = {}; + u32 in_icm_reg[MLX5_ST_SZ_DW(vhca_icm_ctrl_reg)] = {}; + u32 out_reg[MLX5_ST_SZ_DW(nic_cap_reg)] = {}; + u32 in_reg[MLX5_ST_SZ_DW(nic_cap_reg)] = {}; + u32 cur_alloc_icm; + int vhca_icm_ctrl; + u16 vhca_id; + int err; + + err = mlx5_core_access_reg(dev, in_reg, sizeof(in_reg), out_reg, + sizeof(out_reg), MLX5_REG_NIC_CAP, 0, 0); + if (err) { + mlx5_core_warn(dev, "Reading nic_cap_reg failed. err = %d\n", err); + return; + } + vhca_icm_ctrl = MLX5_GET(nic_cap_reg, out_reg, vhca_icm_ctrl); + if (!vhca_icm_ctrl) + return; + + MLX5_SET(vhca_icm_ctrl_reg, in_icm_reg, vhca_id_valid, other_vport); + if (other_vport) { + err = mlx5_vport_get_vhca_id(dev, vport_num, &vhca_id); + if (err) { + mlx5_core_warn(dev, "vport to vhca_id failed. vport_num = %d, err = %d\n", + vport_num, err); + return; + } + MLX5_SET(vhca_icm_ctrl_reg, in_icm_reg, vhca_id, vhca_id); + } + err = mlx5_core_access_reg(dev, in_icm_reg, sizeof(in_icm_reg), + out_icm_reg, sizeof(out_icm_reg), + MLX5_REG_VHCA_ICM_CTRL, 0, 0); + if (err) { + mlx5_core_warn(dev, "Reading vhca_icm_ctrl failed. err = %d\n", err); + return; + } + cur_alloc_icm = MLX5_GET(vhca_icm_ctrl_reg, out_icm_reg, cur_alloc_icm); + devlink_fmsg_u32_pair_put(fmsg, "icm_consumption", cur_alloc_icm); +} + void mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, struct devlink_fmsg *fmsg, u16 vport_num, bool other_vport) @@ -59,6 +103,8 @@ void mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail", VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail)); } + if (MLX5_CAP_GEN(dev, nic_cap_reg)) + mlx5_reporter_vnic_diagnose_counter_icm(dev, fmsg, vport_num, other_vport); devlink_fmsg_obj_nest_end(fmsg); devlink_fmsg_pair_nest_end(fmsg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c index 31142f6cc372..1e5522a19483 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dpll.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dpll.c @@ -242,7 +242,7 @@ static int mlx5_dpll_clock_quality_level_get(const struct dpll_device *dpll, return 0; } errout: - NL_SET_ERR_MSG_MOD(extack, "Invalid clock quality level obtained from firmware\n"); + NL_SET_ERR_MSG_MOD(extack, "Invalid clock quality level obtained from firmware"); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 979fc56205e1..0dd3bc0f4caa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -84,9 +84,10 @@ struct page_pool; #define MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE (9) #define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (PAGE_SIZE >> MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE) #define MLX5E_SHAMPO_LOG_WQ_HEADER_PER_PAGE (PAGE_SHIFT - MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE) -#define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE (64) -#define MLX5E_SHAMPO_WQ_RESRV_SIZE (64 * 1024) -#define MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE (4096) +#define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE_SHIFT (6) +#define MLX5E_SHAMPO_WQ_RESRV_SIZE_BASE_SHIFT (12) +#define MLX5E_SHAMPO_WQ_LOG_RESRV_SIZE (16) +#define MLX5E_SHAMPO_WQ_RESRV_SIZE BIT(MLX5E_SHAMPO_WQ_LOG_RESRV_SIZE) #define MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev) \ (6 + MLX5_CAP_GEN(mdev, cache_line_128byte)) /* HW restriction */ @@ -95,8 +96,6 @@ struct page_pool; #define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) \ MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, order_base_2(MLX5E_RX_MAX_HEAD)) -#define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18 - /* Keep in sync with mlx5e_mpwrq_log_wqe_sz. * These are theoretical maximums, which can be further restricted by * capabilities. These values are used for static resource allocations and @@ -232,16 +231,22 @@ struct mlx5e_rx_wqe_cyc { DECLARE_FLEX_ARRAY(struct mlx5_wqe_data_seg, data); }; -struct mlx5e_umr_wqe { +struct mlx5e_umr_wqe_hdr { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_umr_ctrl_seg uctrl; struct mlx5_mkey_seg mkc; +}; + +struct mlx5e_umr_wqe { + struct mlx5e_umr_wqe_hdr hdr; union { DECLARE_FLEX_ARRAY(struct mlx5_mtt, inline_mtts); DECLARE_FLEX_ARRAY(struct mlx5_klm, inline_klms); DECLARE_FLEX_ARRAY(struct mlx5_ksm, inline_ksms); }; }; +static_assert(offsetof(struct mlx5e_umr_wqe, inline_mtts) == sizeof(struct mlx5e_umr_wqe_hdr), + "struct members should be included in struct mlx5e_umr_wqe_hdr, not in struct mlx5e_umr_wqe"); enum mlx5e_priv_flag { MLX5E_PFLAG_RX_CQE_BASED_MODER, @@ -274,10 +279,6 @@ enum packet_merge { struct mlx5e_packet_merge_param { enum packet_merge type; u32 timeout; - struct { - u8 match_criteria_type; - u8 alignment_granularity; - } shampo; }; struct mlx5e_params { @@ -374,7 +375,7 @@ struct mlx5e_sq_dma { enum mlx5e_dma_map_type type; }; -/* Keep this enum consistent with with the corresponding strings array +/* Keep this enum consistent with the corresponding strings array * declared in en/reporter_tx.c */ enum { @@ -383,10 +384,8 @@ enum { MLX5E_SQ_STATE_RECOVERING, MLX5E_SQ_STATE_IPSEC, MLX5E_SQ_STATE_DIM, - MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, MLX5E_SQ_STATE_PENDING_XSK_TX, MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, - MLX5E_SQ_STATE_XDP_MULTIBUF, MLX5E_NUM_SQ_STATES, /* Must be kept last */ }; @@ -395,6 +394,7 @@ struct mlx5e_tx_mpwqe { struct mlx5e_tx_wqe *wqe; u32 bytes_count; u8 ds_count; + u8 ds_count_max; u8 pkt_count; u8 inline_on; }; @@ -516,6 +516,12 @@ struct mlx5e_xdpsq { struct mlx5e_channel *channel; } ____cacheline_aligned_in_smp; +struct mlx5e_xdp_buff { + struct xdp_buff xdp; + struct mlx5_cqe64 *cqe; + struct mlx5e_rq *rq; +}; + struct mlx5e_ktls_resync_resp; struct mlx5e_icosq { @@ -547,7 +553,7 @@ struct mlx5e_icosq { } ____cacheline_aligned_in_smp; struct mlx5e_frag_page { - struct page *page; + netmem_ref netmem; u16 frags; }; @@ -624,15 +630,13 @@ struct mlx5e_dma_info { }; struct mlx5e_shampo_hd { - u32 mkey; struct mlx5e_frag_page *pages; u32 hd_per_wq; u16 hd_per_wqe; - u16 pages_per_wq; unsigned long *bitmap; u16 pi; u16 ci; - __be32 key; + __be32 mkey_be; }; struct mlx5e_hw_gro_data { @@ -660,7 +664,7 @@ struct mlx5e_rq { } wqe; struct { struct mlx5_wq_ll wq; - struct mlx5e_umr_wqe umr_wqe; + struct mlx5e_umr_wqe_hdr umr_wqe; struct mlx5e_mpw_info *info; mlx5e_fp_skb_from_cqe_mpwrq skb_from_cqe_mpwrq; __be32 umr_mkey_be; @@ -711,12 +715,18 @@ struct mlx5e_rq { struct bpf_prog __rcu *xdp_prog; struct mlx5e_xdpsq *xdpsq; DECLARE_BITMAP(flags, 8); + + /* page pools */ struct page_pool *page_pool; + struct page_pool *hd_page_pool; + + struct mlx5e_xdp_buff mxbuf; /* AF_XDP zero-copy */ struct xsk_buff_pool *xsk_pool; struct work_struct recover_work; + struct work_struct rx_timeout_work; /* control */ struct mlx5_wq_ctrl wq_ctrl; @@ -911,6 +921,8 @@ struct mlx5e_priv { struct notifier_block events_nb; struct notifier_block blocking_events_nb; + struct mlx5e_pcie_cong_event *cong_event; + struct udp_tunnel_nic_info nic_info; #ifdef CONFIG_MLX5_CORE_EN_DCB struct mlx5e_dcbx dcbx; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h index b59aee75de94..2c98a5299df3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h @@ -26,7 +26,6 @@ struct mlx5e_dcbx { u8 cap; /* Buffer configuration */ - bool manual_buffer; u32 cable_len; u32 xoff; u16 port_buff_cell_sz; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 1e8b7d330701..9560fcba643f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -18,7 +18,8 @@ enum { enum { MLX5E_TC_PRIO = 0, - MLX5E_NIC_PRIO + MLX5E_PROMISC_PRIO, + MLX5E_NIC_PRIO, }; struct mlx5e_flow_table { @@ -68,9 +69,13 @@ struct mlx5e_l2_table { MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_IPSEC_SPI) -/* NIC prio FTS */ +/* NIC promisc FT level */ enum { MLX5E_PROMISC_FT_LEVEL, +}; + +/* NIC prio FTS */ +enum { MLX5E_VLAN_FT_LEVEL, MLX5E_L2_FT_LEVEL, MLX5E_TTC_FT_LEVEL, @@ -84,9 +89,9 @@ enum { MLX5E_ARFS_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, #endif #ifdef CONFIG_MLX5_EN_IPSEC - MLX5E_ACCEL_FS_POL_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, - MLX5E_ACCEL_FS_ESP_FT_LEVEL, + MLX5E_ACCEL_FS_ESP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL, + MLX5E_ACCEL_FS_POL_FT_LEVEL, MLX5E_ACCEL_FS_ESP_FT_ROCE_LEVEL, #endif }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h index 9e276fd3c0cf..c21fe36527a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_ethtool.h @@ -11,6 +11,11 @@ int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool); void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool); void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs); void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs); +int mlx5e_ethtool_set_rxfh_fields(struct mlx5e_priv *priv, + const struct ethtool_rxfh_fields *nfc, + struct netlink_ext_ack *extack); +int mlx5e_ethtool_get_rxfh_fields(struct mlx5e_priv *priv, + struct ethtool_rxfh_fields *nfc); int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd); int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *info, u32 *rule_locs); @@ -20,6 +25,15 @@ static inline int mlx5e_ethtool_alloc(struct mlx5e_ethtool_steering **ethtool) static inline void mlx5e_ethtool_free(struct mlx5e_ethtool_steering *ethtool) { } static inline void mlx5e_ethtool_init_steering(struct mlx5e_flow_steering *fs) { } static inline void mlx5e_ethtool_cleanup_steering(struct mlx5e_flow_steering *fs) { } +static inline int +mlx5e_ethtool_set_rxfh_fields(struct mlx5e_priv *priv, + const struct ethtool_rxfh_fields *nfc, + struct netlink_ext_ack *extack) +{ return -EOPNOTSUPP; } +static inline int +mlx5e_ethtool_get_rxfh_fields(struct mlx5e_priv *priv, + struct ethtool_rxfh_fields *nfc) +{ return -EOPNOTSUPP; } static inline int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd) { return -EOPNOTSUPP; } static inline int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c index 81523825faa2..cb972b2d46e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c @@ -114,6 +114,7 @@ int mlx5e_health_recover_channels(struct mlx5e_priv *priv) int err = 0; rtnl_lock(); + netdev_lock(priv->netdev); mutex_lock(&priv->state_lock); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) @@ -123,6 +124,7 @@ int mlx5e_health_recover_channels(struct mlx5e_priv *priv) out: mutex_unlock(&priv->state_lock); + netdev_unlock(priv->netdev); rtnl_unlock(); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 64b62ed17b07..3cca06a74cf9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -10,6 +10,9 @@ #include <net/page_pool/types.h> #include <net/xdp_sock_drv.h> +#define MLX5_MPWRQ_MAX_LOG_WQE_SZ 18 +#define MLX5_REP_MPWRQ_MAX_LOG_WQE_SZ 17 + static u8 mlx5e_mpwrq_min_page_shift(struct mlx5_core_dev *mdev) { u8 min_page_shift = MLX5_CAP_GEN_2(mdev, log_min_mkey_entity_size); @@ -103,18 +106,22 @@ u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, enum mlx5e_mpwrq_umr_mode umr_mode) { u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); - u8 max_pages_per_wqe, max_log_mpwqe_size; + u8 max_pages_per_wqe, max_log_wqe_size_calc; + u8 max_log_wqe_size_cap; u16 max_wqe_size; /* Keep in sync with MLX5_MPWRQ_MAX_PAGES_PER_WQE. */ max_wqe_size = mlx5e_get_max_sq_aligned_wqebbs(mdev) * MLX5_SEND_WQE_BB; max_pages_per_wqe = ALIGN_DOWN(max_wqe_size - sizeof(struct mlx5e_umr_wqe), MLX5_UMR_FLEX_ALIGNMENT) / umr_entry_size; - max_log_mpwqe_size = ilog2(max_pages_per_wqe) + page_shift; + max_log_wqe_size_calc = ilog2(max_pages_per_wqe) + page_shift; + + WARN_ON_ONCE(max_log_wqe_size_calc < MLX5E_ORDER2_MAX_PACKET_MTU); - WARN_ON_ONCE(max_log_mpwqe_size < MLX5E_ORDER2_MAX_PACKET_MTU); + max_log_wqe_size_cap = mlx5_core_is_ecpf(mdev) ? + MLX5_REP_MPWRQ_MAX_LOG_WQE_SZ : MLX5_MPWRQ_MAX_LOG_WQE_SZ; - return min_t(u8, max_log_mpwqe_size, MLX5_MPWRQ_MAX_LOG_WQE_SZ); + return min_t(u8, max_log_wqe_size_calc, max_log_wqe_size_cap); } u8 mlx5e_mpwrq_pages_per_wqe(struct mlx5_core_dev *mdev, u8 page_shift, @@ -407,25 +414,10 @@ u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev, return params->log_rq_mtu_frames - log_pkts_per_wqe; } -u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) -{ - return order_base_2(DIV_ROUND_UP(MLX5E_RX_MAX_HEAD, MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE)); -} - -u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) -{ - return order_base_2(MLX5E_SHAMPO_WQ_RESRV_SIZE / MLX5E_SHAMPO_WQ_BASE_RESRV_SIZE); -} - -u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) +static u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5e_params *params) { - u32 resrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * - PAGE_SIZE; - - return order_base_2(DIV_ROUND_UP(resrv_size, params->sw_mtu)); + return order_base_2(DIV_ROUND_UP(MLX5E_SHAMPO_WQ_RESRV_SIZE, + params->sw_mtu)); } u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, @@ -827,12 +819,12 @@ static u32 mlx5e_shampo_get_log_cq_size(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { - int rsrv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE; u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); - int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(params)); int wq_size = BIT(mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); int wqe_size = BIT(log_stride_sz) * num_strides; + int rsrv_size = MLX5E_SHAMPO_WQ_RESRV_SIZE; /* +1 is for the case that the pkt_per_rsrv dont consume the reservation * so we get a filler cqe for the rest of the reservation. @@ -893,6 +885,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, { void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); + u32 lro_timeout; int ndsegs = 1; int err; @@ -918,22 +911,27 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, MLX5_SET(wq, wq, log_wqe_stride_size, log_wqe_stride_size - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk)); - if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { - MLX5_SET(wq, wq, shampo_enable, true); - MLX5_SET(wq, wq, log_reservation_size, - mlx5e_shampo_get_log_rsrv_size(mdev, params)); - MLX5_SET(wq, wq, - log_max_num_of_packets_per_reservation, - mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); - MLX5_SET(wq, wq, log_headers_entry_size, - mlx5e_shampo_get_log_hd_entry_size(mdev, params)); - MLX5_SET(rqc, rqc, reservation_timeout, - mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_SHAMPO_TIMEOUT)); - MLX5_SET(rqc, rqc, shampo_match_criteria_type, - params->packet_merge.shampo.match_criteria_type); - MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity, - params->packet_merge.shampo.alignment_granularity); - } + if (params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO) + break; + + MLX5_SET(wq, wq, shampo_enable, true); + MLX5_SET(wq, wq, log_reservation_size, + MLX5E_SHAMPO_WQ_LOG_RESRV_SIZE - + MLX5E_SHAMPO_WQ_RESRV_SIZE_BASE_SHIFT); + MLX5_SET(wq, wq, + log_max_num_of_packets_per_reservation, + mlx5e_shampo_get_log_pkt_per_rsrv(params)); + MLX5_SET(wq, wq, log_headers_entry_size, + MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE - + MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE_SHIFT); + lro_timeout = + mlx5e_choose_lro_timeout(mdev, + MLX5E_DEFAULT_SHAMPO_TIMEOUT); + MLX5_SET(rqc, rqc, reservation_timeout, lro_timeout); + MLX5_SET(rqc, rqc, shampo_match_criteria_type, + MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED); + MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity, + MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE); break; } default: /* MLX5_WQ_TYPE_CYCLIC */ @@ -1036,17 +1034,17 @@ u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rq_param) { - int resv_size = BIT(mlx5e_shampo_get_log_rsrv_size(mdev, params)) * PAGE_SIZE; u16 num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, NULL)); - int pkt_per_resv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(mdev, params)); u8 log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL); + int pkt_per_rsrv = BIT(mlx5e_shampo_get_log_pkt_per_rsrv(params)); int wqe_size = BIT(log_stride_sz) * num_strides; + int rsrv_size = MLX5E_SHAMPO_WQ_RESRV_SIZE; u32 hd_per_wqe; /* Assumption: hd_per_wqe % 8 == 0. */ - hd_per_wqe = (wqe_size / resv_size) * pkt_per_resv; - mlx5_core_dbg(mdev, "%s hd_per_wqe = %d rsrv_size = %d wqe_size = %d pkt_per_resv = %d\n", - __func__, hd_per_wqe, resv_size, wqe_size, pkt_per_resv); + hd_per_wqe = (wqe_size / rsrv_size) * pkt_per_rsrv; + mlx5_core_dbg(mdev, "%s hd_per_wqe = %d rsrv_size = %d wqe_size = %d pkt_per_rsrv = %d\n", + __func__, hd_per_wqe, rsrv_size, wqe_size, pkt_per_rsrv); return hd_per_wqe; } @@ -1240,7 +1238,6 @@ void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, mlx5e_build_sq_param_common(mdev, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE); - param->is_xdp_mb = !mlx5e_rx_is_linear_skb(mdev, params, xsk); mlx5e_build_tx_cq_param(mdev, params, ¶m->cqp); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 3f8986f9d862..488ccdbc1e2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -33,7 +33,6 @@ struct mlx5e_sq_param { struct mlx5_wq_param wq; bool is_mpw; bool is_tls; - bool is_xdp_mb; u16 stop_room; }; @@ -96,12 +95,6 @@ bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); -u8 mlx5e_shampo_get_log_hd_entry_size(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); -u8 mlx5e_shampo_get_log_rsrv_size(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); -u8 mlx5e_shampo_get_log_pkt_per_rsrv(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rq_param); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c b/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c new file mode 100644 index 000000000000..0ed017569a19 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. + +#include "en.h" +#include "pcie_cong_event.h" + +#define MLX5E_CONG_HIGH_STATE 0x7 + +enum { + MLX5E_INBOUND_CONG = BIT(0), + MLX5E_OUTBOUND_CONG = BIT(1), +}; + +struct mlx5e_pcie_cong_thresh { + u16 inbound_high; + u16 inbound_low; + u16 outbound_high; + u16 outbound_low; +}; + +struct mlx5e_pcie_cong_stats { + u32 pci_bw_inbound_high; + u32 pci_bw_inbound_low; + u32 pci_bw_outbound_high; + u32 pci_bw_outbound_low; +}; + +struct mlx5e_pcie_cong_event { + u64 obj_id; + + struct mlx5e_priv *priv; + + /* For event notifier and workqueue. */ + struct work_struct work; + struct mlx5_nb nb; + + /* Stores last read state. */ + u8 state; + + /* For ethtool stats group. */ + struct mlx5e_pcie_cong_stats stats; +}; + +/* In units of 0.01 % */ +static const struct mlx5e_pcie_cong_thresh default_thresh_config = { + .inbound_high = 9000, + .inbound_low = 7500, + .outbound_high = 9000, + .outbound_low = 7500, +}; + +static const struct counter_desc mlx5e_pcie_cong_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats, + pci_bw_inbound_high) }, + { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats, + pci_bw_inbound_low) }, + { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats, + pci_bw_outbound_high) }, + { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats, + pci_bw_outbound_low) }, +}; + +#define NUM_PCIE_CONG_COUNTERS ARRAY_SIZE(mlx5e_pcie_cong_stats_desc) + +static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(pcie_cong) +{ + return priv->cong_event ? NUM_PCIE_CONG_COUNTERS : 0; +} + +static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pcie_cong) {} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(pcie_cong) +{ + if (!priv->cong_event) + return; + + for (int i = 0; i < NUM_PCIE_CONG_COUNTERS; i++) + ethtool_puts(data, mlx5e_pcie_cong_stats_desc[i].format); +} + +static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pcie_cong) +{ + if (!priv->cong_event) + return; + + for (int i = 0; i < NUM_PCIE_CONG_COUNTERS; i++) { + u32 ctr = MLX5E_READ_CTR32_CPU(&priv->cong_event->stats, + mlx5e_pcie_cong_stats_desc, + i); + + mlx5e_ethtool_put_stat(data, ctr); + } +} + +MLX5E_DEFINE_STATS_GRP(pcie_cong, 0); + +static int +mlx5_cmd_pcie_cong_event_set(struct mlx5_core_dev *dev, + const struct mlx5e_pcie_cong_thresh *config, + u64 *obj_id) +{ + u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + void *cong_obj; + void *hdr; + int err; + + hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr); + cong_obj = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, cong_obj); + + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, + MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT); + + MLX5_SET(pcie_cong_event_obj, cong_obj, inbound_event_en, 1); + MLX5_SET(pcie_cong_event_obj, cong_obj, outbound_event_en, 1); + + MLX5_SET(pcie_cong_event_obj, cong_obj, + inbound_cong_high_threshold, config->inbound_high); + MLX5_SET(pcie_cong_event_obj, cong_obj, + inbound_cong_low_threshold, config->inbound_low); + + MLX5_SET(pcie_cong_event_obj, cong_obj, + outbound_cong_high_threshold, config->outbound_high); + MLX5_SET(pcie_cong_event_obj, cong_obj, + outbound_cong_low_threshold, config->outbound_low); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + mlx5_core_dbg(dev, "PCIe congestion event (obj_id=%llu) created. Config: in: [%u, %u], out: [%u, %u]\n", + *obj_id, + config->inbound_high, config->inbound_low, + config->outbound_high, config->outbound_low); + + return 0; +} + +static int mlx5_cmd_pcie_cong_event_destroy(struct mlx5_core_dev *dev, + u64 obj_id) +{ + u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + void *hdr; + + hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr); + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, + MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_id, obj_id); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_cmd_pcie_cong_event_query(struct mlx5_core_dev *dev, + u64 obj_id, + u32 *state) +{ + u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {}; + u32 out[MLX5_ST_SZ_DW(pcie_cong_event_cmd_out)]; + void *obj; + void *hdr; + u8 cong; + int err; + + hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr); + + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, + MLX5_CMD_OP_QUERY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, + MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_id, obj_id); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + obj = MLX5_ADDR_OF(pcie_cong_event_cmd_out, out, cong_obj); + + if (state) { + cong = MLX5_GET(pcie_cong_event_obj, obj, inbound_cong_state); + if (cong == MLX5E_CONG_HIGH_STATE) + *state |= MLX5E_INBOUND_CONG; + + cong = MLX5_GET(pcie_cong_event_obj, obj, outbound_cong_state); + if (cong == MLX5E_CONG_HIGH_STATE) + *state |= MLX5E_OUTBOUND_CONG; + } + + return 0; +} + +static void mlx5e_pcie_cong_event_work(struct work_struct *work) +{ + struct mlx5e_pcie_cong_event *cong_event; + struct mlx5_core_dev *dev; + struct mlx5e_priv *priv; + u32 new_cong_state = 0; + u32 changes; + int err; + + cong_event = container_of(work, struct mlx5e_pcie_cong_event, work); + priv = cong_event->priv; + dev = priv->mdev; + + err = mlx5_cmd_pcie_cong_event_query(dev, cong_event->obj_id, + &new_cong_state); + if (err) { + mlx5_core_warn(dev, "Error %d when querying PCIe cong event object (obj_id=%llu).\n", + err, cong_event->obj_id); + return; + } + + changes = cong_event->state ^ new_cong_state; + if (!changes) + return; + + cong_event->state = new_cong_state; + + if (changes & MLX5E_INBOUND_CONG) { + if (new_cong_state & MLX5E_INBOUND_CONG) + cong_event->stats.pci_bw_inbound_high++; + else + cong_event->stats.pci_bw_inbound_low++; + } + + if (changes & MLX5E_OUTBOUND_CONG) { + if (new_cong_state & MLX5E_OUTBOUND_CONG) + cong_event->stats.pci_bw_outbound_high++; + else + cong_event->stats.pci_bw_outbound_low++; + } +} + +static int mlx5e_pcie_cong_event_handler(struct notifier_block *nb, + unsigned long event, void *eqe) +{ + struct mlx5e_pcie_cong_event *cong_event; + + cong_event = mlx5_nb_cof(nb, struct mlx5e_pcie_cong_event, nb); + queue_work(cong_event->priv->wq, &cong_event->work); + + return NOTIFY_OK; +} + +int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv) +{ + struct mlx5e_pcie_cong_event *cong_event; + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + if (!mlx5_pcie_cong_event_supported(mdev)) + return 0; + + cong_event = kvzalloc_node(sizeof(*cong_event), GFP_KERNEL, + mdev->priv.numa_node); + if (!cong_event) + return -ENOMEM; + + INIT_WORK(&cong_event->work, mlx5e_pcie_cong_event_work); + MLX5_NB_INIT(&cong_event->nb, mlx5e_pcie_cong_event_handler, + OBJECT_CHANGE); + + cong_event->priv = priv; + + err = mlx5_cmd_pcie_cong_event_set(mdev, &default_thresh_config, + &cong_event->obj_id); + if (err) { + mlx5_core_warn(mdev, "Error creating a PCIe congestion event object\n"); + goto err_free; + } + + err = mlx5_eq_notifier_register(mdev, &cong_event->nb); + if (err) { + mlx5_core_warn(mdev, "Error registering notifier for the PCIe congestion event\n"); + goto err_obj_destroy; + } + + priv->cong_event = cong_event; + + return 0; + +err_obj_destroy: + mlx5_cmd_pcie_cong_event_destroy(mdev, cong_event->obj_id); +err_free: + kvfree(cong_event); + + return err; +} + +void mlx5e_pcie_cong_event_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_pcie_cong_event *cong_event = priv->cong_event; + struct mlx5_core_dev *mdev = priv->mdev; + + if (!cong_event) + return; + + priv->cong_event = NULL; + + mlx5_eq_notifier_unregister(mdev, &cong_event->nb); + cancel_work_sync(&cong_event->work); + + if (mlx5_cmd_pcie_cong_event_destroy(mdev, cong_event->obj_id)) + mlx5_core_warn(mdev, "Error destroying PCIe congestion event (obj_id=%llu)\n", + cong_event->obj_id); + + kvfree(cong_event); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.h b/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.h new file mode 100644 index 000000000000..b1ea46bf648a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __MLX5_PCIE_CONG_EVENT_H__ +#define __MLX5_PCIE_CONG_EVENT_H__ + +int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv); +void mlx5e_pcie_cong_event_cleanup(struct mlx5e_priv *priv); + +#endif /* __MLX5_PCIE_CONG_EVENT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 5f6a0605e4ae..6049ccf475bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -80,6 +80,7 @@ int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable, int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { struct mlx5_port_eth_proto eproto; + const struct mlx5_link_info *info; bool force_legacy = false; bool ext; int err; @@ -94,9 +95,13 @@ int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) if (err) goto out; } - *speed = mlx5_port_ptys2speed(mdev, eproto.oper, force_legacy); - if (!(*speed)) + info = mlx5_port_ptys2info(mdev, eproto.oper, force_legacy); + if (!info) { + *speed = SPEED_UNKNOWN; err = -EINVAL; + goto out; + } + *speed = info->speed; out: return err; @@ -296,11 +301,16 @@ enum mlx5e_fec_supported_link_mode { MLX5E_FEC_SUPPORTED_LINK_MODE_200G_2X, MLX5E_FEC_SUPPORTED_LINK_MODE_400G_4X, MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X, + MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X, + MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X, + MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X, + MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X, MLX5E_MAX_FEC_SUPPORTED_LINK_MODE, }; #define MLX5E_FEC_FIRST_50G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X #define MLX5E_FEC_FIRST_100G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_100G_1X +#define MLX5E_FEC_FIRST_200G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X #define MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, policy, write, link) \ do { \ @@ -320,8 +330,10 @@ static bool mlx5e_is_fec_supported_link_mode(struct mlx5_core_dev *dev, return link_mode < MLX5E_FEC_FIRST_50G_PER_LANE_MODE || (link_mode < MLX5E_FEC_FIRST_100G_PER_LANE_MODE && MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm)) || - (link_mode >= MLX5E_FEC_FIRST_100G_PER_LANE_MODE && - MLX5_CAP_PCAM_FEATURE(dev, fec_100G_per_lane_in_pplm)); + (link_mode < MLX5E_FEC_FIRST_200G_PER_LANE_MODE && + MLX5_CAP_PCAM_FEATURE(dev, fec_100G_per_lane_in_pplm)) || + (link_mode >= MLX5E_FEC_FIRST_200G_PER_LANE_MODE && + MLX5_CAP_PCAM_FEATURE(dev, fec_200G_per_lane_in_pplm)); } /* get/set FEC admin field for a given speed */ @@ -368,6 +380,18 @@ static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X: MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 800g_8x); break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 200g_1x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 400g_2x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 800g_4x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 1600g_8x); + break; default: return -EINVAL; } @@ -421,6 +445,18 @@ static int mlx5e_get_fec_cap_field(u32 *pplm, u16 *fec_cap, case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X: *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 800g_8x); break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 200g_1x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 400g_2x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 800g_4x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 1600g_8x); + break; default: return -EINVAL; } @@ -494,6 +530,26 @@ out: return 0; } +static u16 mlx5e_remap_fec_conf_mode(enum mlx5e_fec_supported_link_mode link_mode, + u16 conf_fec) +{ + /* RS fec in ethtool is originally mapped to MLX5E_FEC_RS_528_514. + * For link modes up to 25G per lane, the value is kept. + * For 50G or 100G per lane, it's remapped to MLX5E_FEC_RS_544_514. + * For 200G per lane, remapped to MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD. + */ + if (conf_fec != BIT(MLX5E_FEC_RS_528_514)) + return conf_fec; + + if (link_mode >= MLX5E_FEC_FIRST_200G_PER_LANE_MODE) + return BIT(MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD); + + if (link_mode >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE) + return BIT(MLX5E_FEC_RS_544_514); + + return conf_fec; +} + int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy) { bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); @@ -530,14 +586,7 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy) if (!mlx5e_is_fec_supported_link_mode(dev, i)) break; - /* RS fec in ethtool is mapped to MLX5E_FEC_RS_528_514 - * to link modes up to 25G per lane and to - * MLX5E_FEC_RS_544_514 in the new link modes based on - * 50G or 100G per lane - */ - if (conf_fec == (1 << MLX5E_FEC_RS_528_514) && - i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE) - conf_fec = (1 << MLX5E_FEC_RS_544_514); + conf_fec = mlx5e_remap_fec_conf_mode(i, conf_fec); mlx5e_get_fec_cap_field(out, &fec_caps, i); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index d1da225f35da..fa2283dd383b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -61,6 +61,7 @@ enum { MLX5E_FEC_NOFEC, MLX5E_FEC_FIRECODE, MLX5E_FEC_RS_528_514, + MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD = 4, MLX5E_FEC_RS_544_514 = 7, MLX5E_FEC_LLRS_272_257_1 = 9, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 8e25f4ef5ccc..3efa8bf1d14e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -272,8 +272,8 @@ static int port_update_shared_buffer(struct mlx5_core_dev *mdev, /* Total shared buffer size is split in a ratio of 3:1 between * lossy and lossless pools respectively. */ - lossy_epool_size = (shared_buffer_size / 4) * 3; lossless_ipool_size = shared_buffer_size / 4; + lossy_epool_size = shared_buffer_size - lossless_ipool_size; mlx5e_port_set_sbpr(mdev, 0, MLX5_EGRESS_DIR, MLX5_LOSSY_POOL, 0, lossy_epool_size); @@ -288,14 +288,12 @@ static int port_set_buffer(struct mlx5e_priv *priv, u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; struct mlx5_core_dev *mdev = priv->mdev; int sz = MLX5_ST_SZ_BYTES(pbmc_reg); - u32 new_headroom_size = 0; - u32 current_headroom_size; + u32 current_headroom_cells = 0; + u32 new_headroom_cells = 0; void *in; int err; int i; - current_headroom_size = port_buffer->headroom_size; - in = kzalloc(sz, GFP_KERNEL); if (!in) return -ENOMEM; @@ -306,12 +304,14 @@ static int port_set_buffer(struct mlx5e_priv *priv, for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { void *buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]); + current_headroom_cells += MLX5_GET(bufferx_reg, buffer, size); + u64 size = port_buffer->buffer[i].size; u64 xoff = port_buffer->buffer[i].xoff; u64 xon = port_buffer->buffer[i].xon; - new_headroom_size += size; do_div(size, port_buff_cell_sz); + new_headroom_cells += size; do_div(xoff, port_buff_cell_sz); do_div(xon, port_buff_cell_sz); MLX5_SET(bufferx_reg, buffer, size, size); @@ -320,10 +320,8 @@ static int port_set_buffer(struct mlx5e_priv *priv, MLX5_SET(bufferx_reg, buffer, xon_threshold, xon); } - new_headroom_size /= port_buff_cell_sz; - current_headroom_size /= port_buff_cell_sz; - err = port_update_shared_buffer(priv->mdev, current_headroom_size, - new_headroom_size); + err = port_update_shared_buffer(priv->mdev, current_headroom_cells, + new_headroom_cells); if (err) goto out; @@ -331,6 +329,9 @@ static int port_set_buffer(struct mlx5e_priv *priv, if (err) goto out; + /* RO bits should be set to 0 on write */ + MLX5_SET(pbmc_reg, in, port_buffer_size, 0); + err = mlx5e_port_set_pbmc(mdev, in); out: kfree(in); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index afd654583b6b..391b4e9c9dc4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -8,6 +8,7 @@ #include "en/fs_tt_redirect.h" #include <linux/list.h> #include <linux/spinlock.h> +#include <net/netdev_lock.h> struct mlx5e_ptp_fs { struct mlx5_flow_handle *l2_rule; @@ -326,7 +327,7 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix, int node; sq->pdev = c->pdev; - sq->clock = &mdev->clock; + sq->clock = mdev->clock; sq->mkey_be = c->mkey_be; sq->netdev = c->netdev; sq->priv = c->priv; @@ -339,8 +340,6 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix, sq->stats = &c->priv->ptp_stats.sq[tc]; sq->ptpsq = ptpsq; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); - if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) - set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); sq->stop_room = param->stop_room; sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev); @@ -449,8 +448,22 @@ static void mlx5e_ptpsq_unhealthy_work(struct work_struct *work) { struct mlx5e_ptpsq *ptpsq = container_of(work, struct mlx5e_ptpsq, report_unhealthy_work); + struct mlx5e_txqsq *sq = &ptpsq->txqsq; + + /* Recovering the PTP SQ means re-enabling NAPI, which requires the + * netdev instance lock. However, SQ closing has to wait for this work + * task to finish while also holding the same lock. So either get the + * lock or find that the SQ is no longer enabled and thus this work is + * not relevant anymore. + */ + while (!netdev_trylock(sq->netdev)) { + if (!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)) + return; + msleep(20); + } mlx5e_reporter_tx_ptpsq_unhealthy(ptpsq); + netdev_unlock(sq->netdev); } static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn, @@ -696,7 +709,7 @@ static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, rq->pdev = c->pdev; rq->netdev = priv->netdev; rq->priv = priv; - rq->clock = &mdev->clock; + rq->clock = mdev->clock; rq->tstamp = &priv->tstamp; rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); @@ -892,7 +905,7 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, if (err) goto err_free; - netif_napi_add(netdev, &c->napi, mlx5e_ptp_napi_poll); + netif_napi_add_locked(netdev, &c->napi, mlx5e_ptp_napi_poll); mlx5e_ptp_build_params(c, cparams, params); @@ -910,7 +923,7 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, return 0; err_napi_del: - netif_napi_del(&c->napi); + netif_napi_del_locked(&c->napi); err_free: kvfree(cparams); kvfree(c); @@ -920,7 +933,7 @@ err_free: void mlx5e_ptp_close(struct mlx5e_ptp *c) { mlx5e_ptp_close_queues(c); - netif_napi_del(&c->napi); + netif_napi_del_locked(&c->napi); kvfree(c); } @@ -929,7 +942,7 @@ void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c) { int tc; - napi_enable(&c->napi); + napi_enable_locked(&c->napi); if (test_bit(MLX5E_PTP_STATE_TX, c->state)) { for (tc = 0; tc < c->num_tc; tc++) @@ -957,7 +970,7 @@ void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c) mlx5e_deactivate_txqsq(&c->ptpsq[tc].txqsq); } - napi_disable(&c->napi); + napi_disable_locked(&c->napi); } int mlx5e_ptp_get_rqn(struct mlx5e_ptp *c, u32 *rqn) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index f0744a45db92..4e461cb03b83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -374,7 +374,7 @@ void mlx5e_reactivate_qos_sq(struct mlx5e_priv *priv, u16 qid, struct netdev_que void mlx5e_reset_qdisc(struct net_device *dev, u16 qid) { struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, qid); - struct Qdisc *qdisc = dev_queue->qdisc_sleeping; + struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc_sleeping); if (!qdisc) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index 5d128c5b4529..0f5d7ea8956f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -48,15 +48,10 @@ mlx5_esw_bridge_lag_rep_get(struct net_device *dev, struct mlx5_eswitch *esw) struct list_head *iter; netdev_for_each_lower_dev(dev, lower, iter) { - struct mlx5_core_dev *mdev; - struct mlx5e_priv *priv; - if (!mlx5e_eswitch_rep(lower)) continue; - priv = netdev_priv(lower); - mdev = priv->mdev; - if (mlx5_lag_is_shared_fdb(mdev) && mlx5_esw_bridge_dev_same_esw(lower, esw)) + if (mlx5_esw_bridge_dev_same_esw(lower, esw)) return lower; } @@ -125,7 +120,7 @@ static bool mlx5_esw_bridge_is_local(struct net_device *dev, struct net_device * priv = netdev_priv(rep); mdev = priv->mdev; if (netif_is_lag_master(dev)) - return mlx5_lag_is_shared_fdb(mdev) && mlx5_lag_is_master(mdev); + return mlx5_lag_is_master(mdev); return true; } @@ -455,6 +450,9 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, if (!rep) return NOTIFY_DONE; + if (netif_is_lag_master(dev) && !mlx5_lag_is_shared_fdb(esw->dev)) + return NOTIFY_DONE; + switch (event) { case SWITCHDEV_FDB_ADD_TO_BRIDGE: fdb_info = container_of(info, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 25d751eba99b..16c44d628eda 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -170,16 +170,23 @@ static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx) static int mlx5e_rx_reporter_timeout_recover(void *ctx) { struct mlx5_eq_comp *eq; + struct mlx5e_priv *priv; struct mlx5e_rq *rq; int err; rq = ctx; + priv = rq->priv; + + mutex_lock(&priv->state_lock); + eq = rq->cq.mcq.eq; err = mlx5e_health_channel_eq_recover(rq->netdev, eq, rq->cq.ch_stats); if (err && rq->icosq) clear_bit(MLX5E_SQ_STATE_ENABLED, &rq->icosq->state); + mutex_unlock(&priv->state_lock); + return err; } @@ -317,10 +324,8 @@ mlx5e_rx_reporter_diagnose_common_ptp_config(struct mlx5e_priv *priv, struct mlx } static void -mlx5e_rx_reporter_diagnose_common_config(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg) +mlx5e_rx_reporter_diagnose_common_config(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { - struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); struct mlx5e_rq *generic_rq = &priv->channels.c[0]->rq; struct mlx5e_ptp *ptp_ch = priv->channels.ptp; @@ -340,20 +345,100 @@ static void mlx5e_rx_reporter_build_diagnose_output_ptp_rq(struct mlx5e_rq *rq, devlink_fmsg_obj_nest_end(fmsg); } -static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, - struct devlink_fmsg *fmsg, - struct netlink_ext_ack *extack) +static void mlx5e_rx_reporter_diagnose_rx_res_dir_tirns(struct mlx5e_rx_res *rx_res, + struct devlink_fmsg *fmsg) { - struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); - struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + unsigned int max_nch = mlx5e_rx_res_get_max_nch(rx_res); int i; - mutex_lock(&priv->state_lock); + devlink_fmsg_arr_pair_nest_start(fmsg, "Direct TIRs"); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - goto unlock; + for (i = 0; i < max_nch; i++) { + devlink_fmsg_obj_nest_start(fmsg); + + devlink_fmsg_u32_pair_put(fmsg, "ix", i); + devlink_fmsg_u32_pair_put(fmsg, "tirn", mlx5e_rx_res_get_tirn_direct(rx_res, i)); + devlink_fmsg_u32_pair_put(fmsg, "rqtn", mlx5e_rx_res_get_rqtn_direct(rx_res, i)); + + devlink_fmsg_obj_nest_end(fmsg); + } + + devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static void mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(struct mlx5e_rss *rss, bool inner, + struct devlink_fmsg *fmsg) +{ + bool found_valid_tir = false; + int tt; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + if (!mlx5e_rss_valid_tir(rss, tt, inner)) + continue; + + if (!found_valid_tir) { + char *tir_msg = inner ? "Inner TIRs Numbers" : "TIRs Numbers"; + + found_valid_tir = true; + devlink_fmsg_arr_pair_nest_start(fmsg, tir_msg); + } + + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_string_pair_put(fmsg, "tt", mlx5_ttc_get_name(tt)); + devlink_fmsg_u32_pair_put(fmsg, "tirn", mlx5e_rss_get_tirn(rss, tt, inner)); + devlink_fmsg_obj_nest_end(fmsg); + } + + if (found_valid_tir) + devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static void mlx5e_rx_reporter_diagnose_rx_res_rss_ix(struct mlx5e_rx_res *rx_res, u32 rss_idx, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_rss *rss = mlx5e_rx_res_rss_get(rx_res, rss_idx); + + if (!rss) + return; + + devlink_fmsg_obj_nest_start(fmsg); + + devlink_fmsg_u32_pair_put(fmsg, "Index", rss_idx); + devlink_fmsg_u32_pair_put(fmsg, "rqtn", mlx5e_rss_get_rqtn(rss)); + mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(rss, false, fmsg); + if (mlx5e_rss_get_inner_ft_support(rss)) + mlx5e_rx_reporter_diagnose_rx_res_rss_tirn(rss, true, fmsg); + + devlink_fmsg_obj_nest_end(fmsg); +} + +static void mlx5e_rx_reporter_diagnose_rx_res_rss(struct mlx5e_rx_res *rx_res, + struct devlink_fmsg *fmsg) +{ + int rss_ix; + + devlink_fmsg_arr_pair_nest_start(fmsg, "RSS"); + for (rss_ix = 0; rss_ix < MLX5E_MAX_NUM_RSS; rss_ix++) + mlx5e_rx_reporter_diagnose_rx_res_rss_ix(rx_res, rss_ix, fmsg); + devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static void mlx5e_rx_reporter_diagnose_rx_res(struct mlx5e_priv *priv, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_rx_res *rx_res = priv->rx_res; + + mlx5e_health_fmsg_named_obj_nest_start(fmsg, "RX resources"); + mlx5e_rx_reporter_diagnose_rx_res_dir_tirns(rx_res, fmsg); + mlx5e_rx_reporter_diagnose_rx_res_rss(rx_res, fmsg); + mlx5e_health_fmsg_named_obj_nest_end(fmsg); +} + +static void mlx5e_rx_reporter_diagnose_rqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) +{ + struct mlx5e_ptp *ptp_ch = priv->channels.ptp; + int i; - mlx5e_rx_reporter_diagnose_common_config(reporter, fmsg); devlink_fmsg_arr_pair_nest_start(fmsg, "RQs"); for (i = 0; i < priv->channels.num; i++) { @@ -367,7 +452,24 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, } if (ptp_ch && test_bit(MLX5E_PTP_STATE_RX, ptp_ch->state)) mlx5e_rx_reporter_build_diagnose_output_ptp_rq(&ptp_ch->rq, fmsg); + devlink_fmsg_arr_pair_nest_end(fmsg); +} + +static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + mlx5e_rx_reporter_diagnose_common_config(priv, fmsg); + mlx5e_rx_reporter_diagnose_rqs(priv, fmsg); + mlx5e_rx_reporter_diagnose_rx_res(priv, fmsg); unlock: mutex_unlock(&priv->state_lock); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 09433b91be17..85d5cb39b107 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -13,10 +13,8 @@ static const char * const sq_sw_state_type_name[] = { [MLX5E_SQ_STATE_RECOVERING] = "recovering", [MLX5E_SQ_STATE_IPSEC] = "ipsec", [MLX5E_SQ_STATE_DIM] = "dim", - [MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE] = "vlan_need_l2_inline", [MLX5E_SQ_STATE_PENDING_XSK_TX] = "pending_xsk_tx", [MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC] = "pending_tls_rx_resync", - [MLX5E_SQ_STATE_XDP_MULTIBUF] = "xdp_multibuf", }; static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq) @@ -108,9 +106,7 @@ static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) mlx5e_reset_txqsq_cc_pc(sq); sq->stats->recover++; clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); - rtnl_lock(); mlx5e_activate_txqsq(sq); - rtnl_unlock(); if (sq->channel) mlx5e_trigger_napi_icosq(sq->channel); @@ -184,16 +180,12 @@ static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx) carrier_ok = netif_carrier_ok(netdev); netif_carrier_off(netdev); - rtnl_lock(); mlx5e_deactivate_priv_channels(priv); - rtnl_unlock(); mlx5e_ptp_close(chs->ptp); err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp); - rtnl_lock(); mlx5e_activate_priv_channels(priv); - rtnl_unlock(); /* return carrier back if needed */ if (carrier_ok) @@ -319,6 +311,30 @@ out: mlx5e_health_fmsg_named_obj_nest_end(fmsg); } +static void +mlx5e_tx_reporter_diagnose_tis_config(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg) +{ + struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter); + u8 num_tc = mlx5e_get_dcb_num_tc(&priv->channels.params); + u32 tc, i, tisn; + + devlink_fmsg_arr_pair_nest_start(fmsg, "TIS Config"); + for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++) { + for (tc = 0; tc < num_tc; tc++) { + tisn = mlx5e_profile_get_tisn(priv->mdev, priv, + priv->profile, i, tc); + + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_u32_pair_put(fmsg, "lag port", i); + devlink_fmsg_u32_pair_put(fmsg, "tc", tc); + devlink_fmsg_u32_pair_put(fmsg, "tisn", tisn); + devlink_fmsg_obj_nest_end(fmsg); + } + } + devlink_fmsg_arr_pair_nest_end(fmsg); +} + static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, struct devlink_fmsg *fmsg, struct netlink_ext_ack *extack) @@ -334,6 +350,7 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter, goto unlock; mlx5e_tx_reporter_diagnose_common_config(reporter, fmsg); + mlx5e_tx_reporter_diagnose_tis_config(reporter, fmsg); devlink_fmsg_arr_pair_nest_start(fmsg, "SQs"); for (i = 0; i < priv->channels.num; i++) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c index 5f742f896600..c68ba0e58fa6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c @@ -81,6 +81,11 @@ struct mlx5e_rss { refcount_t refcnt; }; +bool mlx5e_rss_get_inner_ft_support(struct mlx5e_rss *rss) +{ + return rss->inner_ft_support; +} + void mlx5e_rss_params_indir_modify_actual_size(struct mlx5e_rss *rss, u32 num_channels) { rss->indir.actual_table_size = mlx5e_rqt_size(rss->mdev, num_channels); @@ -156,6 +161,7 @@ static void mlx5e_rss_params_init(struct mlx5e_rss *rss) { enum mlx5_traffic_types tt; + rss->hash.symmetric = true; rss->hash.hfunc = ETH_RSS_HASH_TOP; netdev_rss_key_fill(rss->hash.toeplitz_hash_key, sizeof(rss->hash.toeplitz_hash_key)); @@ -449,6 +455,16 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, return mlx5e_tir_get_tirn(tir); } +u32 mlx5e_rss_get_rqtn(struct mlx5e_rss *rss) +{ + return mlx5e_rqt_get_rqtn(&rss->rqt); +} + +bool mlx5e_rss_valid_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, bool inner) +{ + return !!rss_get_tir(rss, tt, inner); +} + /* Fill the "tirn" output parameter. * Create the requested TIR if it's its first usage. */ @@ -551,7 +567,8 @@ inner_tir: return final_err; } -int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc) +void mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc, + bool *symmetric) { if (indir) memcpy(indir, rss->indir.table, @@ -564,11 +581,12 @@ int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc) if (hfunc) *hfunc = rss->hash.hfunc; - return 0; + if (symmetric) + *symmetric = rss->hash.symmetric; } int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, - const u8 *key, const u8 *hfunc, + const u8 *key, const u8 *hfunc, const bool *symmetric, u32 *rqns, u32 *vhca_ids, unsigned int num_rqns) { bool changed_indir = false; @@ -608,6 +626,11 @@ int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, rss->indir.actual_table_size * sizeof(*rss->indir.table)); } + if (symmetric) { + rss->hash.symmetric = *symmetric; + changed_hash = true; + } + if (changed_indir && rss->enabled) { err = mlx5e_rss_apply(rss, rqns, vhca_ids, num_rqns); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h index d0df98963c8d..c6c1b2847cf5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h @@ -32,8 +32,11 @@ void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss); void mlx5e_rss_refcnt_dec(struct mlx5e_rss *rss); unsigned int mlx5e_rss_refcnt_read(struct mlx5e_rss *rss); +bool mlx5e_rss_get_inner_ft_support(struct mlx5e_rss *rss); u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, bool inner); +bool mlx5e_rss_valid_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, bool inner); +u32 mlx5e_rss_get_rqtn(struct mlx5e_rss *rss); int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, const struct mlx5e_packet_merge_param *init_pkt_merge_param, @@ -44,9 +47,10 @@ void mlx5e_rss_disable(struct mlx5e_rss *rss); int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss, struct mlx5e_packet_merge_param *pkt_merge_param); -int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc); +void mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc, + bool *symmetric); int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir, - const u8 *key, const u8 *hfunc, + const u8 *key, const u8 *hfunc, const bool *symmetric, u32 *rqns, u32 *vhca_ids, unsigned int num_rqns); struct mlx5e_rss_params_hash mlx5e_rss_get_hash(struct mlx5e_rss *rss); u8 mlx5e_rss_get_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index a86eade9a9e0..a2acbfee2b77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -5,8 +5,6 @@ #include "channels.h" #include "params.h" -#define MLX5E_MAX_NUM_RSS 16 - struct mlx5e_rx_res { struct mlx5_core_dev *mdev; /* primary */ enum mlx5e_rx_res_features features; @@ -73,17 +71,12 @@ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res, return 0; } -int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch) +int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 rss_idx, unsigned int init_nch) { bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; struct mlx5e_rss *rss; - int i; - for (i = 1; i < MLX5E_MAX_NUM_RSS; i++) - if (!res->rss[i]) - break; - - if (i == MLX5E_MAX_NUM_RSS) + if (WARN_ON_ONCE(res->rss[rss_idx])) return -ENOSPC; rss = mlx5e_rss_init(res->mdev, inner_ft_support, res->drop_rqn, @@ -99,8 +92,7 @@ int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int i mlx5e_rss_enable(rss, res->rss_rqns, vhca_ids, res->rss_nch); } - res->rss[i] = rss; - *rss_idx = i; + res->rss[rss_idx] = rss; return 0; } @@ -195,23 +187,22 @@ void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int n mlx5e_rss_set_indir_uniform(res->rss[0], nch); } -int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, - u32 *indir, u8 *key, u8 *hfunc) +void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, + u32 *indir, u8 *key, u8 *hfunc, bool *symmetric) { - struct mlx5e_rss *rss; - - if (rss_idx >= MLX5E_MAX_NUM_RSS) - return -EINVAL; + struct mlx5e_rss *rss = NULL; - rss = res->rss[rss_idx]; - if (!rss) - return -ENOENT; + if (rss_idx < MLX5E_MAX_NUM_RSS) + rss = res->rss[rss_idx]; + if (WARN_ON_ONCE(!rss)) + return; - return mlx5e_rss_get_rxfh(rss, indir, key, hfunc); + mlx5e_rss_get_rxfh(rss, indir, key, hfunc, symmetric); } int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, - const u32 *indir, const u8 *key, const u8 *hfunc) + const u32 *indir, const u8 *key, const u8 *hfunc, + const bool *symmetric) { u32 *vhca_ids = get_vhca_ids(res, 0); struct mlx5e_rss *rss; @@ -223,8 +214,8 @@ int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, if (!rss) return -ENOENT; - return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, res->rss_rqns, vhca_ids, - res->rss_nch); + return mlx5e_rss_set_rxfh(rss, indir, key, hfunc, symmetric, + res->rss_rqns, vhca_ids, res->rss_nch); } int mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx, @@ -497,6 +488,11 @@ void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res) mlx5e_rx_res_free(res); } +unsigned int mlx5e_rx_res_get_max_nch(struct mlx5e_rx_res *res) +{ + return res->max_nch; +} + u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix) { return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir); @@ -522,7 +518,7 @@ u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res) return mlx5e_tir_get_tirn(&res->ptp.tir); } -static u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) +u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) { return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt); } @@ -575,8 +571,6 @@ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_chann for (ix = 0; ix < nch; ix++) mlx5e_rx_res_channel_activate_direct(res, chs, ix); - for (ix = nch; ix < res->max_nch; ix++) - mlx5e_rx_res_channel_deactivate_direct(res, ix); if (res->features & MLX5E_RX_RES_FEATURE_PTP) { u32 rqn; @@ -599,7 +593,7 @@ void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res) mlx5e_rx_res_rss_disable(res); - for (ix = 0; ix < res->max_nch; ix++) + for (ix = 0; ix < res->rss_nch; ix++) mlx5e_rx_res_channel_deactivate_direct(res, ix); if (res->features & MLX5E_RX_RES_FEATURE_PTP) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index 7b1a9f0f1874..1d049e2aa264 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -10,6 +10,8 @@ #include "fs.h" #include "rss.h" +#define MLX5E_MAX_NUM_RSS 16 + struct mlx5e_rx_res; struct mlx5e_channels; @@ -34,6 +36,9 @@ u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix); u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res); +u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix); +unsigned int mlx5e_rx_res_get_max_nch(struct mlx5e_rx_res *res); +bool mlx5_rx_res_rss_inner_ft_support(struct mlx5e_rx_res *res); /* Activate/deactivate API */ void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs); @@ -43,10 +48,12 @@ void mlx5e_rx_res_xsk_update(struct mlx5e_rx_res *res, struct mlx5e_channels *ch /* Configuration API */ void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch); -int mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, - u32 *indir, u8 *key, u8 *hfunc); +void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, + u32 *indir, u8 *key, u8 *hfunc, + bool *symmetric); int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx, - const u32 *indir, const u8 *key, const u8 *hfunc); + const u32 *indir, const u8 *key, const u8 *hfunc, + const bool *symmetric); int mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx, enum mlx5_traffic_types tt); @@ -55,7 +62,7 @@ int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, u32 rss_idx, int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res, struct mlx5e_packet_merge_param *pkt_merge_param); -int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch); +int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 rss_idx, unsigned int init_nch); int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx); int mlx5e_rx_res_rss_cnt(struct mlx5e_rx_res *res); int mlx5e_rx_res_rss_index(struct mlx5e_rx_res *res, struct mlx5e_rss *rss); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h index d6c12d0ea55b..2e528b2c34d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -73,11 +73,6 @@ struct mlx5e_tc_act { bool is_terminating_action; }; -struct mlx5e_tc_flow_action { - unsigned int num_entries; - struct flow_action_entry **entries; -}; - extern struct mlx5e_tc_act mlx5e_tc_act_drop; extern struct mlx5e_tc_act mlx5e_tc_act_trap; extern struct mlx5e_tc_act mlx5e_tc_act_accept; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c index feeb41693c17..b6cabe829f19 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c @@ -5,6 +5,16 @@ #include "en/tc_priv.h" #include "en/tc_ct.h" +static bool +tc_act_can_offload_ct(struct mlx5e_tc_act_parse_state *parse_state, + const struct flow_action_entry *act, + int act_index, + struct mlx5_flow_attr *attr) +{ + return !((act->ct.action & TCA_CT_ACT_COMMIT) && + flow_action_is_last_entry(parse_state->flow_action, act)); +} + static int tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, @@ -56,6 +66,7 @@ tc_act_is_missable_ct(const struct flow_action_entry *act) } struct mlx5e_tc_act mlx5e_tc_act_ct = { + .can_offload = tc_act_can_offload_ct, .parse_action = tc_act_parse_ct, .post_parse = tc_act_post_parse_ct, .is_multi_table_act = tc_act_is_multi_table_act_ct, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c index a13c5e707b83..9bdb5820c553 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c @@ -94,29 +94,30 @@ mlx5e_tc_act_vlan_add_push_action(struct mlx5e_priv *priv, struct net_device **out_dev, struct netlink_ext_ack *extack) { - struct net_device *vlan_dev = *out_dev; - struct flow_action_entry vlan_act = { - .id = FLOW_ACTION_VLAN_PUSH, - .vlan.vid = vlan_dev_vlan_id(vlan_dev), - .vlan.proto = vlan_dev_vlan_proto(vlan_dev), - .vlan.prio = 0, - }; - int err; - - err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, &attr->action, extack, NULL); - if (err) - return err; - - rcu_read_lock(); - *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev)); - rcu_read_unlock(); - if (!*out_dev) - return -ENODEV; + do { + struct net_device *vlan_dev = *out_dev; + struct flow_action_entry vlan_act = { + .id = FLOW_ACTION_VLAN_PUSH, + .vlan.vid = vlan_dev_vlan_id(vlan_dev), + .vlan.proto = vlan_dev_vlan_proto(vlan_dev), + .vlan.prio = 0, + }; + int err; + + err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, + &attr->action, extack, NULL); + if (err) + return err; - if (is_vlan_dev(*out_dev)) - err = mlx5e_tc_act_vlan_add_push_action(priv, attr, out_dev, extack); + rcu_read_lock(); + *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), + dev_get_iflink(vlan_dev)); + rcu_read_unlock(); + if (!*out_dev) + return -ENODEV; + } while (is_vlan_dev(*out_dev)); - return err; + return 0; } int diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h index 62b3f7ff5562..e5b30801314b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h @@ -48,4 +48,14 @@ mlx5_ct_fs_smfs_ops_get(void) } #endif /* IS_ENABLED(CONFIG_MLX5_SW_STEERING) */ +#if IS_ENABLED(CONFIG_MLX5_HW_STEERING) +struct mlx5_ct_fs_ops *mlx5_ct_fs_hmfs_ops_get(void); +#else +static inline struct mlx5_ct_fs_ops * +mlx5_ct_fs_hmfs_ops_get(void) +{ + return NULL; +} +#endif /* IS_ENABLED(CONFIG_MLX5_SW_STEERING) */ + #endif /* __MLX5_EN_TC_CT_FS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c new file mode 100644 index 000000000000..01d522b02947 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. */ + +#include "en_tc.h" +#include "en/tc_ct.h" +#include "en/tc_priv.h" +#include "en/tc/ct_fs.h" +#include "fs_core.h" +#include "steering/hws/fs_hws_pools.h" +#include "steering/hws/mlx5hws.h" +#include "steering/hws/table.h" + +struct mlx5_ct_fs_hmfs_matcher { + struct mlx5hws_bwc_matcher *hws_bwc_matcher; + refcount_t ref; +}; + +/* We need {ipv4, ipv6} x {tcp, udp, gre} matchers. */ +#define NUM_MATCHERS (2 * 3) + +struct mlx5_ct_fs_hmfs { + struct mlx5hws_table *ct_tbl; + struct mlx5hws_table *ct_nat_tbl; + struct mlx5_flow_table *ct_nat; + struct mlx5hws_action *fwd_action; + struct mlx5hws_action *last_action; + struct mlx5hws_context *ctx; + struct mutex lock; /* Guards matchers */ + struct mlx5_ct_fs_hmfs_matcher matchers[NUM_MATCHERS]; + struct mlx5_ct_fs_hmfs_matcher matchers_nat[NUM_MATCHERS]; +}; + +struct mlx5_ct_fs_hmfs_rule { + struct mlx5_ct_fs_rule fs_rule; + struct mlx5hws_bwc_rule *hws_bwc_rule; + struct mlx5_ct_fs_hmfs_matcher *hmfs_matcher; + struct mlx5_fc *counter; +}; + +static u32 get_matcher_idx(bool ipv4, bool tcp, bool gre) +{ + return ipv4 * 3 + tcp * 2 + gre; +} + +static int mlx5_ct_fs_hmfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct, + struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct) +{ + u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED; + struct mlx5hws_table *ct_tbl, *ct_nat_tbl, *post_ct_tbl; + struct mlx5_ct_fs_hmfs *fs_hmfs = mlx5_ct_fs_priv(fs); + + ct_tbl = ct->fs_hws_table.hws_table; + ct_nat_tbl = ct_nat->fs_hws_table.hws_table; + post_ct_tbl = post_ct->fs_hws_table.hws_table; + fs_hmfs->ct_nat = ct_nat; + + if (!ct_tbl || !ct_nat_tbl || !post_ct_tbl) { + netdev_warn(fs->netdev, "ct_fs_hmfs: failed to init, missing backing hws tables"); + return -EOPNOTSUPP; + } + + netdev_dbg(fs->netdev, "using hmfs steering"); + + fs_hmfs->ct_tbl = ct_tbl; + fs_hmfs->ct_nat_tbl = ct_nat_tbl; + fs_hmfs->ctx = ct_tbl->ctx; + mutex_init(&fs_hmfs->lock); + + fs_hmfs->fwd_action = mlx5hws_action_create_dest_table(ct_tbl->ctx, post_ct_tbl, flags); + if (!fs_hmfs->fwd_action) { + netdev_warn(fs->netdev, "ct_fs_hmfs: failed to create fwd action\n"); + return -EINVAL; + } + fs_hmfs->last_action = mlx5hws_action_create_last(ct_tbl->ctx, flags); + if (!fs_hmfs->last_action) { + netdev_warn(fs->netdev, "ct_fs_hmfs: failed to create last action\n"); + mlx5hws_action_destroy(fs_hmfs->fwd_action); + return -EINVAL; + } + + return 0; +} + +static void mlx5_ct_fs_hmfs_destroy(struct mlx5_ct_fs *fs) +{ + struct mlx5_ct_fs_hmfs *fs_hmfs = mlx5_ct_fs_priv(fs); + + mlx5hws_action_destroy(fs_hmfs->last_action); + mlx5hws_action_destroy(fs_hmfs->fwd_action); +} + +static struct mlx5hws_bwc_matcher * +mlx5_ct_fs_hmfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5hws_table *tbl, + struct mlx5_flow_spec *spec, bool ipv4, bool tcp, bool gre) +{ + u8 match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2 | MLX5_MATCH_OUTER_HEADERS; + struct mlx5hws_match_parameters mask = { + .match_buf = spec->match_criteria, + .match_sz = sizeof(spec->match_criteria), + }; + u32 priority = get_matcher_idx(ipv4, tcp, gre); /* Static priority based on params. */ + struct mlx5hws_bwc_matcher *hws_bwc_matcher; + + hws_bwc_matcher = mlx5hws_bwc_matcher_create(tbl, priority, match_criteria_enable, &mask); + if (!hws_bwc_matcher) + return ERR_PTR(-EINVAL); + + return hws_bwc_matcher; +} + +static struct mlx5_ct_fs_hmfs_matcher * +mlx5_ct_fs_hmfs_matcher_get(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, + bool nat, bool ipv4, bool tcp, bool gre) +{ + struct mlx5_ct_fs_hmfs *fs_hmfs = mlx5_ct_fs_priv(fs); + u32 matcher_idx = get_matcher_idx(ipv4, tcp, gre); + struct mlx5_ct_fs_hmfs_matcher *hmfs_matcher; + struct mlx5hws_bwc_matcher *hws_bwc_matcher; + struct mlx5hws_table *tbl; + + hmfs_matcher = nat ? + (fs_hmfs->matchers_nat + matcher_idx) : + (fs_hmfs->matchers + matcher_idx); + + if (refcount_inc_not_zero(&hmfs_matcher->ref)) + return hmfs_matcher; + + mutex_lock(&fs_hmfs->lock); + + /* Retry with lock, as the matcher might be already created by another cpu. */ + if (refcount_inc_not_zero(&hmfs_matcher->ref)) + goto out_unlock; + + tbl = nat ? fs_hmfs->ct_nat_tbl : fs_hmfs->ct_tbl; + + hws_bwc_matcher = mlx5_ct_fs_hmfs_matcher_create(fs, tbl, spec, ipv4, tcp, gre); + if (IS_ERR(hws_bwc_matcher)) { + netdev_warn(fs->netdev, + "ct_fs_hmfs: failed to create bwc matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %ld\n", + nat, ipv4, tcp, gre, PTR_ERR(hws_bwc_matcher)); + + hmfs_matcher = ERR_CAST(hws_bwc_matcher); + goto out_unlock; + } + + hmfs_matcher->hws_bwc_matcher = hws_bwc_matcher; + refcount_set(&hmfs_matcher->ref, 1); + +out_unlock: + mutex_unlock(&fs_hmfs->lock); + return hmfs_matcher; +} + +static void +mlx5_ct_fs_hmfs_matcher_put(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_hmfs_matcher *hmfs_matcher) +{ + struct mlx5_ct_fs_hmfs *fs_hmfs = mlx5_ct_fs_priv(fs); + + if (!refcount_dec_and_mutex_lock(&hmfs_matcher->ref, &fs_hmfs->lock)) + return; + + mlx5hws_bwc_matcher_destroy(hmfs_matcher->hws_bwc_matcher); + mutex_unlock(&fs_hmfs->lock); +} + +#define NUM_CT_HMFS_RULES 4 + +static void mlx5_ct_fs_hmfs_fill_rule_actions(struct mlx5_ct_fs_hmfs *fs_hmfs, + struct mlx5_flow_attr *attr, + struct mlx5hws_rule_action *rule_actions) +{ + struct mlx5_fs_hws_action *mh_action = &attr->modify_hdr->fs_hws_action; + + memset(rule_actions, 0, NUM_CT_HMFS_RULES * sizeof(*rule_actions)); + rule_actions[0].action = mlx5_fc_get_hws_action(fs_hmfs->ctx, attr->counter); + rule_actions[0].counter.offset = + attr->counter->id - attr->counter->bulk->base_id; + /* Modify header is special, it may require extra arguments outside the action itself. */ + if (mh_action->mh_data) { + rule_actions[1].modify_header.offset = mh_action->mh_data->offset; + rule_actions[1].modify_header.data = mh_action->mh_data->data; + } + rule_actions[1].action = mh_action->hws_action; + rule_actions[2].action = fs_hmfs->fwd_action; + rule_actions[3].action = fs_hmfs->last_action; +} + +static struct mlx5_ct_fs_rule * +mlx5_ct_fs_hmfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, struct flow_rule *flow_rule) +{ + struct mlx5hws_rule_action rule_actions[NUM_CT_HMFS_RULES]; + struct mlx5_ct_fs_hmfs *fs_hmfs = mlx5_ct_fs_priv(fs); + struct mlx5hws_match_parameters match_params = { + .match_buf = spec->match_value, + .match_sz = ARRAY_SIZE(spec->match_value), + }; + struct mlx5_ct_fs_hmfs_matcher *hmfs_matcher; + struct mlx5_ct_fs_hmfs_rule *hmfs_rule; + bool nat, tcp, ipv4, gre; + int err; + + if (!mlx5e_tc_ct_is_valid_flow_rule(fs->netdev, flow_rule)) + return ERR_PTR(-EOPNOTSUPP); + + hmfs_rule = kzalloc(sizeof(*hmfs_rule), GFP_KERNEL); + if (!hmfs_rule) + return ERR_PTR(-ENOMEM); + + nat = (attr->ft == fs_hmfs->ct_nat); + ipv4 = mlx5e_tc_get_ip_version(spec, true) == 4; + tcp = MLX5_GET(fte_match_param, spec->match_value, + outer_headers.ip_protocol) == IPPROTO_TCP; + gre = MLX5_GET(fte_match_param, spec->match_value, + outer_headers.ip_protocol) == IPPROTO_GRE; + + hmfs_matcher = mlx5_ct_fs_hmfs_matcher_get(fs, spec, nat, ipv4, tcp, gre); + if (IS_ERR(hmfs_matcher)) { + err = PTR_ERR(hmfs_matcher); + goto err_free_rule; + } + hmfs_rule->hmfs_matcher = hmfs_matcher; + + mlx5_ct_fs_hmfs_fill_rule_actions(fs_hmfs, attr, rule_actions); + hmfs_rule->counter = attr->counter; + + hmfs_rule->hws_bwc_rule = + mlx5hws_bwc_rule_create(hmfs_matcher->hws_bwc_matcher, &match_params, + spec->flow_context.flow_source, rule_actions); + if (!hmfs_rule->hws_bwc_rule) { + err = -EINVAL; + goto err_put_matcher; + } + + return &hmfs_rule->fs_rule; + +err_put_matcher: + mlx5_fc_put_hws_action(hmfs_rule->counter); + mlx5_ct_fs_hmfs_matcher_put(fs, hmfs_matcher); +err_free_rule: + kfree(hmfs_rule); + return ERR_PTR(err); +} + +static void mlx5_ct_fs_hmfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule) +{ + struct mlx5_ct_fs_hmfs_rule *hmfs_rule = container_of(fs_rule, + struct mlx5_ct_fs_hmfs_rule, + fs_rule); + mlx5hws_bwc_rule_destroy(hmfs_rule->hws_bwc_rule); + mlx5_fc_put_hws_action(hmfs_rule->counter); + mlx5_ct_fs_hmfs_matcher_put(fs, hmfs_rule->hmfs_matcher); + kfree(hmfs_rule); +} + +static int mlx5_ct_fs_hmfs_ct_rule_update(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule, + struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr) +{ + struct mlx5_ct_fs_hmfs_rule *hmfs_rule = container_of(fs_rule, + struct mlx5_ct_fs_hmfs_rule, + fs_rule); + struct mlx5hws_rule_action rule_actions[NUM_CT_HMFS_RULES]; + struct mlx5_ct_fs_hmfs *fs_hmfs = mlx5_ct_fs_priv(fs); + int err; + + mlx5_ct_fs_hmfs_fill_rule_actions(fs_hmfs, attr, rule_actions); + + err = mlx5hws_bwc_rule_action_update(hmfs_rule->hws_bwc_rule, rule_actions); + if (err) { + mlx5_fc_put_hws_action(attr->counter); + return err; + } + + mlx5_fc_put_hws_action(hmfs_rule->counter); + hmfs_rule->counter = attr->counter; + + return 0; +} + +static struct mlx5_ct_fs_ops hmfs_ops = { + .ct_rule_add = mlx5_ct_fs_hmfs_ct_rule_add, + .ct_rule_del = mlx5_ct_fs_hmfs_ct_rule_del, + .ct_rule_update = mlx5_ct_fs_hmfs_ct_rule_update, + + .init = mlx5_ct_fs_hmfs_init, + .destroy = mlx5_ct_fs_hmfs_destroy, + + .priv_size = sizeof(struct mlx5_ct_fs_hmfs), +}; + +struct mlx5_ct_fs_ops *mlx5_ct_fs_hmfs_ops_get(void) +{ + return &hmfs_ops; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c index 45737d039252..0c97c5899904 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c @@ -13,7 +13,6 @@ #define INIT_ERR_PREFIX "ct_fs_smfs init failed" #define ct_dbg(fmt, args...)\ netdev_dbg(fs->netdev, "ct_fs_smfs debug: " fmt "\n", ##args) -#define MLX5_CT_TCP_FLAGS_MASK cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16) struct mlx5_ct_fs_smfs_matcher { struct mlx5dr_matcher *dr_matcher; @@ -220,78 +219,6 @@ mlx5_ct_fs_smfs_destroy(struct mlx5_ct_fs *fs) mlx5_smfs_action_destroy(fs_smfs->fwd_action); } -static inline bool -mlx5_tc_ct_valid_used_dissector_keys(const u64 used_keys) -{ -#define DISS_BIT(name) BIT_ULL(FLOW_DISSECTOR_KEY_ ## name) - const u64 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | - DISS_BIT(META); - const u64 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | - DISS_BIT(PORTS) | DISS_BIT(TCP); - const u64 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | - DISS_BIT(PORTS) | DISS_BIT(TCP); - const u64 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | - DISS_BIT(PORTS); - const u64 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | - DISS_BIT(PORTS); - const u64 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS); - const u64 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS); - - return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp || - used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre); -} - -static bool -mlx5_ct_fs_smfs_ct_validate_flow_rule(struct mlx5_ct_fs *fs, struct flow_rule *flow_rule) -{ - struct flow_match_ipv4_addrs ipv4_addrs; - struct flow_match_ipv6_addrs ipv6_addrs; - struct flow_match_control control; - struct flow_match_basic basic; - struct flow_match_ports ports; - struct flow_match_tcp tcp; - - if (!mlx5_tc_ct_valid_used_dissector_keys(flow_rule->match.dissector->used_keys)) { - ct_dbg("rule uses unexpected dissectors (0x%016llx)", - flow_rule->match.dissector->used_keys); - return false; - } - - flow_rule_match_basic(flow_rule, &basic); - flow_rule_match_control(flow_rule, &control); - flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs); - flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs); - if (basic.key->ip_proto != IPPROTO_GRE) - flow_rule_match_ports(flow_rule, &ports); - if (basic.key->ip_proto == IPPROTO_TCP) - flow_rule_match_tcp(flow_rule, &tcp); - - if (basic.mask->n_proto != htons(0xFFFF) || - (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) || - basic.mask->ip_proto != 0xFF || - (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP && - basic.key->ip_proto != IPPROTO_GRE)) { - ct_dbg("rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)", - ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto), - basic.key->ip_proto, basic.mask->ip_proto); - return false; - } - - if (basic.key->ip_proto != IPPROTO_GRE && - (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) { - ct_dbg("rule uses ports match (src 0x%04x, dst 0x%04x)", - ports.mask->src, ports.mask->dst); - return false; - } - - if (basic.key->ip_proto == IPPROTO_TCP && tcp.mask->flags != MLX5_CT_TCP_FLAGS_MASK) { - ct_dbg("rule uses unexpected tcp match (flags 0x%02x)", tcp.mask->flags); - return false; - } - - return true; -} - static struct mlx5_ct_fs_rule * mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, struct flow_rule *flow_rule) @@ -304,7 +231,7 @@ mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, int num_actions = 0, err; bool nat, tcp, ipv4, gre; - if (!mlx5_ct_fs_smfs_ct_validate_flow_rule(fs, flow_rule)) + if (!mlx5e_tc_ct_is_valid_flow_rule(fs->netdev, flow_rule)) return ERR_PTR(-EOPNOTSUPP); smfs_rule = kzalloc(sizeof(*smfs_rule), GFP_KERNEL); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c index 8218c892b161..7819fb297280 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c @@ -593,3 +593,8 @@ mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter, *drops = packets2; *lastuse = max_t(u64, lastuse1, lastuse2); } + +int mlx5e_flow_meter_get_base_id(struct mlx5e_flow_meter_handle *meter) +{ + return meter->meters_obj->base_id; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h index 9b795cd106bb..d6afb6556875 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h @@ -72,4 +72,17 @@ void mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter, u64 *bytes, u64 *packets, u64 *drops, u64 *lastuse); +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + +int mlx5e_flow_meter_get_base_id(struct mlx5e_flow_meter_handle *meter); + +#else /* CONFIG_MLX5_CLS_ACT */ + +static inline int +mlx5e_flow_meter_get_base_id(struct mlx5e_flow_meter_handle *meter) +{ + return 0; +} +#endif /* CONFIG_MLX5_CLS_ACT */ + #endif /* __MLX5_EN_FLOW_METER_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index a84ebac2f011..870d12364f99 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -1195,6 +1195,7 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, struct flow_action_entry *meta_action; unsigned long cookie = flow->cookie; struct mlx5_ct_entry *entry; + bool has_nat; int err; meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule); @@ -1236,6 +1237,8 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule); if (err) goto err_set; + has_nat = memcmp(&entry->tuple, &entry->tuple_nat, + sizeof(entry->tuple)); spin_lock_bh(&ct_priv->ht_lock); @@ -1244,7 +1247,7 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, if (err) goto err_entries; - if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) { + if (has_nat) { err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht, &entry->tuple_nat_node, tuples_nat_ht_params); @@ -1349,6 +1352,32 @@ mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, return 0; } +static bool +mlx5_tc_ct_filter_legacy_non_nic_flows(struct mlx5_ct_ft *ft, + struct flow_cls_offload *flow) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; + struct flow_match_meta match; + struct net_device *netdev; + bool same_dev = false; + + if (!is_mdev_legacy_mode(ct_priv->dev) || + !flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) + return true; + + flow_rule_match_meta(rule, &match); + + if (!(match.key->ingress_ifindex & match.mask->ingress_ifindex)) + return true; + + netdev = dev_get_by_index(&init_net, match.key->ingress_ifindex); + same_dev = ct_priv->netdev == netdev; + dev_put(netdev); + + return same_dev; +} + static int mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data, void *cb_priv) @@ -1361,6 +1390,9 @@ mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data, switch (f->command) { case FLOW_CLS_REPLACE: + if (!mlx5_tc_ct_filter_legacy_non_nic_flows(ft, f)) + return -EOPNOTSUPP; + return mlx5_tc_ct_block_flow_offload_add(ft, f); case FLOW_CLS_DESTROY: return mlx5_tc_ct_block_flow_offload_del(ft, f); @@ -2065,10 +2097,19 @@ mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv) struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get(); int err; - if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB && - ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) { - ct_dbg("Using SMFS ct flow steering provider"); - fs_ops = mlx5_ct_fs_smfs_ops_get(); + if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) { + if (ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_HMFS) { + ct_dbg("Using HMFS ct flow steering provider"); + fs_ops = mlx5_ct_fs_hmfs_ops_get(); + } else if (ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) { + ct_dbg("Using SMFS ct flow steering provider"); + fs_ops = mlx5_ct_fs_smfs_ops_get(); + } + + if (!fs_ops) { + ct_dbg("Requested flow steering mode is not enabled."); + return -EOPNOTSUPP; + } } ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL); @@ -2421,3 +2462,74 @@ out_inc_drop: atomic_inc(&ct_priv->debugfs.stats.rx_dropped); return false; } + +static bool mlx5e_tc_ct_valid_used_dissector_keys(const u64 used_keys) +{ +#define DISS_BIT(name) BIT_ULL(FLOW_DISSECTOR_KEY_ ## name) + const u64 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | + DISS_BIT(META); + const u64 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | + DISS_BIT(PORTS) | DISS_BIT(TCP); + const u64 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | + DISS_BIT(PORTS) | DISS_BIT(TCP); + const u64 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | + DISS_BIT(PORTS); + const u64 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | + DISS_BIT(PORTS); + const u64 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS); + const u64 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS); + + return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp || + used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre); +} + +bool mlx5e_tc_ct_is_valid_flow_rule(const struct net_device *dev, struct flow_rule *flow_rule) +{ + struct flow_match_ipv4_addrs ipv4_addrs; + struct flow_match_ipv6_addrs ipv6_addrs; + struct flow_match_control control; + struct flow_match_basic basic; + struct flow_match_ports ports; + struct flow_match_tcp tcp; + + if (!mlx5e_tc_ct_valid_used_dissector_keys(flow_rule->match.dissector->used_keys)) { + netdev_dbg(dev, "ct_debug: rule uses unexpected dissectors (0x%016llx)", + flow_rule->match.dissector->used_keys); + return false; + } + + flow_rule_match_basic(flow_rule, &basic); + flow_rule_match_control(flow_rule, &control); + flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs); + flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs); + if (basic.key->ip_proto != IPPROTO_GRE) + flow_rule_match_ports(flow_rule, &ports); + if (basic.key->ip_proto == IPPROTO_TCP) + flow_rule_match_tcp(flow_rule, &tcp); + + if (basic.mask->n_proto != htons(0xFFFF) || + (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) || + basic.mask->ip_proto != 0xFF || + (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP && + basic.key->ip_proto != IPPROTO_GRE)) { + netdev_dbg(dev, "ct_debug: rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)", + ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto), + basic.key->ip_proto, basic.mask->ip_proto); + return false; + } + + if (basic.key->ip_proto != IPPROTO_GRE && + (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) { + netdev_dbg(dev, "ct_debug: rule uses ports match (src 0x%04x, dst 0x%04x)", + ports.mask->src, ports.mask->dst); + return false; + } + + if (basic.key->ip_proto == IPPROTO_TCP && tcp.mask->flags != MLX5_CT_TCP_FLAGS_MASK) { + netdev_dbg(dev, "ct_debug: rule uses unexpected tcp match (flags 0x%02x)", + tcp.mask->flags); + return false; + } + + return true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index b66c5f98067f..5e9dbdd4a5e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -128,6 +128,9 @@ bool mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, struct sk_buff *skb, u8 zone_restore_id); +#define MLX5_CT_TCP_FLAGS_MASK cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16) +bool mlx5e_tc_ct_is_valid_flow_rule(const struct net_device *dev, struct flow_rule *flow_rule); + #else /* CONFIG_MLX5_TC_CT */ static inline struct mlx5_tc_ct_priv * @@ -202,5 +205,12 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, return false; } +static inline bool +mlx5e_tc_ct_is_valid_flow_rule(const struct net_device *dev, + struct flow_rule *flow_rule) +{ + return false; +} + #endif /* !IS_ENABLED(CONFIG_MLX5_TC_CT) */ #endif /* __MLX5_EN_TC_CT_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 721f35e59757..2162d776fe35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -31,8 +31,7 @@ static void mlx5e_tc_tun_route_attr_cleanup(struct mlx5e_tc_tun_route_attr *attr { if (attr->n) neigh_release(attr->n); - if (attr->route_dev) - dev_put(attr->route_dev); + dev_put(attr->route_dev); } struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev) @@ -68,16 +67,14 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, * while holding rcu read lock. Take the net_device for correctness * sake. */ - if (uplink_upper) - dev_hold(uplink_upper); + dev_hold(uplink_upper); rcu_read_unlock(); dst_is_lag_dev = (uplink_upper && netif_is_lag_master(uplink_upper) && real_dev == uplink_upper && mlx5_lag_is_sriov(priv->mdev)); - if (uplink_upper) - dev_put(uplink_upper); + dev_put(uplink_upper); /* if the egress device isn't on the same HW e-switch or * it's a LAG device, use the uplink diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 878cbdbf5ec8..a0fc76a1bc08 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -5,6 +5,7 @@ #include <net/nexthop.h> #include <net/ip_tunnels.h> #include "tc_tun_encap.h" +#include "fs_core.h" #include "en_tc.h" #include "tc_tun.h" #include "rep/tc.h" @@ -24,17 +25,24 @@ static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv, route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex); - if (!route_dev || !netif_is_ovs_master(route_dev) || - attr->parse_attr->filter_dev == e->out_dev) + if (!route_dev || !netif_is_ovs_master(route_dev)) goto out; + if (priv->mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS && + mlx5e_eswitch_uplink_rep(attr->parse_attr->filter_dev) && + (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) { + mlx5_core_warn(priv->mdev, + "Matching on external port with encap + fwd to table actions is not allowed for firmware steering\n"); + err = -EINVAL; + goto out; + } + err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex, MLX5E_TC_INT_PORT_EGRESS, &attr->action, out_index); out: - if (route_dev) - dev_put(route_dev); + dev_put(route_dev); return err; } @@ -744,8 +752,7 @@ static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw, } out: - if (route_dev) - dev_put(route_dev); + dev_put(route_dev); return err; } @@ -779,8 +786,7 @@ static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw, mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data); out: - if (route_dev) - dev_put(route_dev); + dev_put(route_dev); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c index e4e487c8431b..7a18a469961d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -140,7 +140,7 @@ static int mlx5e_tc_tun_parse_vxlan_gbp_option(struct mlx5e_priv *priv, gbp_mask = (u32 *)&enc_opts.mask->data[0]; if (*gbp_mask & ~VXLAN_GBP_MASK) { - NL_SET_ERR_MSG_FMT_MOD(extack, "Wrong VxLAN GBP mask(0x%08X)\n", *gbp_mask); + NL_SET_ERR_MSG_FMT_MOD(extack, "Wrong VxLAN GBP mask(0x%08X)", *gbp_mask); return -EINVAL; } @@ -165,9 +165,6 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, struct flow_match_enc_keyid enc_keyid; void *misc_c, *misc_v; - misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) return 0; @@ -182,6 +179,30 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, err = mlx5e_tc_tun_parse_vxlan_gbp_option(priv, spec, f); if (err) return err; + + /* We can't mix custom tunnel headers with symbolic ones and we + * don't have a symbolic field name for GBP, so we use custom + * tunnel headers in this case. We need hardware support to + * match on custom tunnel headers, but we already know it's + * supported because the previous call successfully checked for + * that. + */ + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_5); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_5); + + /* Shift by 8 to account for the reserved bits in the vxlan + * header after the VNI. + */ + MLX5_SET(fte_match_set_misc5, misc_c, tunnel_header_1, + be32_to_cpu(enc_keyid.mask->keyid) << 8); + MLX5_SET(fte_match_set_misc5, misc_v, tunnel_header_1, + be32_to_cpu(enc_keyid.key->keyid) << 8); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_5; + + return 0; } /* match on VNI is required */ @@ -195,6 +216,11 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, be32_to_cpu(enc_keyid.mask->keyid)); MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c index 11f724ad90db..19499072f67f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c @@ -124,7 +124,7 @@ void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder, const size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key); void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); - MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + MLX5_SET(tirc, tirc, rx_hash_symmetric, rss_hash->symmetric); memcpy(rss_key, rss_hash->toeplitz_hash_key, len); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h index 857a84bcd53a..e8df3aaf6562 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h @@ -9,6 +9,7 @@ struct mlx5e_rss_params_hash { u8 hfunc; u8 toeplitz_hash_key[40]; + bool symmetric; }; struct mlx5e_rss_params_traffic_type { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 53ca16cb9c41..b5c19396e096 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -46,7 +46,7 @@ static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params rq->pdev = t->pdev; rq->netdev = priv->netdev; rq->priv = priv; - rq->clock = &mdev->clock; + rq->clock = mdev->clock; rq->tstamp = &priv->tstamp; rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); @@ -149,7 +149,7 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) t->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey); t->stats = &priv->trap_stats.ch; - netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll); + netif_napi_add_locked(netdev, &t->napi, mlx5e_trap_napi_poll); err = mlx5e_open_trap_rq(priv, t); if (unlikely(err)) @@ -164,7 +164,7 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) err_close_trap_rq: mlx5e_close_trap_rq(&t->rq); err_napi_del: - netif_napi_del(&t->napi); + netif_napi_del_locked(&t->napi); kvfree(t); return ERR_PTR(err); } @@ -173,13 +173,13 @@ void mlx5e_close_trap(struct mlx5e_trap *trap) { mlx5e_tir_destroy(&trap->tir); mlx5e_close_trap_rq(&trap->rq); - netif_napi_del(&trap->napi); + netif_napi_del_locked(&trap->napi); kvfree(trap); } static void mlx5e_activate_trap(struct mlx5e_trap *trap) { - napi_enable(&trap->napi); + napi_enable_locked(&trap->napi); mlx5e_activate_rq(&trap->rq); mlx5e_trigger_napi_sched(&trap->napi); } @@ -189,7 +189,7 @@ void mlx5e_deactivate_trap(struct mlx5e_priv *priv) struct mlx5e_trap *trap = priv->en_trap; mlx5e_deactivate_rq(&trap->rq); - napi_disable(&trap->napi); + napi_disable_locked(&trap->napi); } static struct mlx5e_trap *mlx5e_add_trap_queue(struct mlx5e_priv *priv) @@ -285,6 +285,7 @@ int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; + netdev_lock(priv->netdev); switch (trap_ctx->action) { case DEVLINK_TRAP_ACTION_TRAP: err = mlx5e_handle_action_trap(priv, trap_ctx->id); @@ -297,6 +298,7 @@ int mlx5e_handle_trap_event(struct mlx5e_priv *priv, struct mlx5_trap_ctx *trap_ trap_ctx->action); err = -EINVAL; } + netdev_unlock(priv->netdev); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 5ec468268d1a..5dc04bbfc71b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -214,6 +214,19 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size) return pi; } +static inline u16 mlx5e_txqsq_get_next_pi_anysize(struct mlx5e_txqsq *sq, + u16 *size) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi, contig_wqebbs; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + *size = min_t(u16, contig_wqebbs, sq->max_sq_mpw_wqebbs); + + return pi; +} + void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq); static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) @@ -309,14 +322,24 @@ mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i) } static inline void -mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size, - enum mlx5e_dma_map_type map_type) +mlx5e_dma_push_single(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size) { struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++); dma->addr = addr; dma->size = size; - dma->type = map_type; + dma->type = MLX5E_DMA_MAP_SINGLE; +} + +static inline void +mlx5e_dma_push_netmem(struct mlx5e_txqsq *sq, netmem_ref netmem, + dma_addr_t addr, u32 size) +{ + struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++); + + netmem_dma_unmap_addr_set(netmem, dma, addr, addr); + dma->size = size; + dma->type = MLX5E_DMA_MAP_PAGE; } static inline @@ -349,7 +372,8 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE); break; case MLX5E_DMA_MAP_PAGE: - dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + netmem_dma_unmap_page_attrs(pdev, dma->addr, dma->size, + DMA_TO_DEVICE, 0); break; default: WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n"); @@ -358,9 +382,9 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq); -static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs) +static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session) { - return session->ds_count == max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS; + return session->ds_count == session->ds_count_max; } static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 94b291662087..5d51600935a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -289,9 +289,9 @@ static u64 mlx5e_xsk_fill_timestamp(void *_priv) ts = get_cqe_ts(priv->cqe); if (mlx5_is_real_time_rq(priv->cq->mdev) || mlx5_is_real_time_sq(priv->cq->mdev)) - return mlx5_real_time_cyc2time(&priv->cq->mdev->clock, ts); + return mlx5_real_time_cyc2time(priv->cq->mdev->clock, ts); - return mlx5_timecounter_cyc2time(&priv->cq->mdev->clock, ts); + return mlx5_timecounter_cyc2time(priv->cq->mdev->clock, ts); } static void mlx5e_xsk_request_checksum(u16 csum_start, u16 csum_offset, void *priv) @@ -390,6 +390,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) .wqe = wqe, .bytes_count = 0, .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT, + .ds_count_max = sq->max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS, .pkt_count = 0, .inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on), }; @@ -501,7 +502,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptx mlx5e_xdp_mpwqe_add_dseg(sq, p, stats); - if (unlikely(mlx5e_xdp_mpwqe_is_full(session, sq->max_sq_mpw_wqebbs))) + if (unlikely(mlx5e_xdp_mpwqe_is_full(session))) mlx5e_xdp_mpwqe_complete(sq); stats->xmit++; @@ -546,6 +547,7 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, bool inline_ok; bool linear; u16 pi; + int i; struct mlx5e_xdpsq_stats *stats = sq->stats; @@ -612,41 +614,33 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); - if (test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) { - int i; - - memset(&cseg->trailer, 0, sizeof(cseg->trailer)); - memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer)); - - eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); + memset(&cseg->trailer, 0, sizeof(cseg->trailer)); + memset(eseg, 0, sizeof(*eseg) - sizeof(eseg->trailer)); - for (i = 0; i < num_frags; i++) { - skb_frag_t *frag = &xdptxdf->sinfo->frags[i]; - dma_addr_t addr; + eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); - addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[i] : - page_pool_get_dma_addr(skb_frag_page(frag)) + - skb_frag_off(frag); + for (i = 0; i < num_frags; i++) { + skb_frag_t *frag = &xdptxdf->sinfo->frags[i]; + dma_addr_t addr; - dseg->addr = cpu_to_be64(addr); - dseg->byte_count = cpu_to_be32(skb_frag_size(frag)); - dseg->lkey = sq->mkey_be; - dseg++; - } + addr = xdptxdf->dma_arr ? xdptxdf->dma_arr[i] : + page_pool_get_dma_addr(skb_frag_page(frag)) + + skb_frag_off(frag); - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + dseg->addr = cpu_to_be64(addr); + dseg->byte_count = cpu_to_be32(skb_frag_size(frag)); + dseg->lkey = sq->mkey_be; + dseg++; + } - sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) { - .num_wqebbs = num_wqebbs, - .num_pkts = 1, - }; + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - sq->pc += num_wqebbs; - } else { - cseg->fm_ce_se = 0; + sq->db.wqe_info[pi] = (struct mlx5e_xdp_wqe_info) { + .num_wqebbs = num_wqebbs, + .num_pkts = 1, + }; - sq->pc++; - } + sq->pc += num_wqebbs; xsk_tx_metadata_request(meta, &mlx5e_xsk_tx_metadata_ops, eseg); @@ -713,10 +707,11 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); page = xdpi.page.page; - /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) - * as we know this is a page_pool page. + /* No need to check page_pool_page_is_pp() as we + * know this is a page_pool page. */ - page_pool_recycle_direct(page->pp, page); + page_pool_recycle_direct(pp_page_to_nmdesc(page)->pp, + page); } while (++n < num); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index e054db1e10f8..46ab0a9e8cdd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -45,12 +45,6 @@ (MLX5E_XDP_INLINE_WQE_MAX_DS_CNT * MLX5_SEND_WQE_DS - \ sizeof(struct mlx5_wqe_inline_seg)) -struct mlx5e_xdp_buff { - struct xdp_buff xdp; - struct mlx5_cqe64 *cqe; - struct mlx5e_rq *rq; -}; - /* XDP packets can be transmitted in different ways. On completion, we need to * distinguish between them to clean up things in a proper way. */ @@ -182,13 +176,13 @@ static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur) return cur; } -static inline bool mlx5e_xdp_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs) +static inline bool mlx5e_xdp_mpwqe_is_full(struct mlx5e_tx_mpwqe *session) { if (session->inline_on) return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > - max_sq_mpw_wqebbs * MLX5_SEND_WQEBB_NUM_DS; + session->ds_count_max; - return mlx5e_tx_mpwqe_is_full(session, max_sq_mpw_wqebbs); + return mlx5e_tx_mpwqe_is_full(session); } struct mlx5e_xdp_wqe_info { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index 1b7132fa70de..2b05536d564a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -123,7 +123,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) bitmap_zero(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); wi->consumed_strides = 0; - umr_wqe->ctrl.opmod_idx_opcode = + umr_wqe->hdr.ctrl.opmod_idx_opcode = cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); /* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */ @@ -134,7 +134,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD; else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD; - umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); + umr_wqe->hdr.uctrl.xlt_offset = cpu_to_be16(offset); icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX, @@ -144,7 +144,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) icosq->pc += rq->mpwqe.umr_wqebbs; - icosq->doorbell_cseg = &umr_wqe->ctrl; + icosq->doorbell_cseg = &umr_wqe->hdr.ctrl; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 9240cfe25d10..d743e823362a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -72,7 +72,7 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, rq->netdev = c->netdev; rq->priv = c->priv; rq->tstamp = c->tstamp; - rq->clock = &mdev->clock; + rq->clock = mdev->clock; rq->icosq = &c->icosq; rq->ix = c->ix; rq->channel = c; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index ca92e518be76..00e77c71e201 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -36,6 +36,7 @@ #include <linux/inetdevice.h> #include <linux/netdevice.h> #include <net/netevent.h> +#include <net/ipv6_stubs.h> #include "en.h" #include "eswitch.h" @@ -94,25 +95,14 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) u32 esn, esn_msb; u8 overlap; - switch (x->xso.type) { - case XFRM_DEV_OFFLOAD_PACKET: - switch (x->xso.dir) { - case XFRM_DEV_OFFLOAD_IN: - esn = x->replay_esn->seq; - esn_msb = x->replay_esn->seq_hi; - break; - case XFRM_DEV_OFFLOAD_OUT: - esn = x->replay_esn->oseq; - esn_msb = x->replay_esn->oseq_hi; - break; - default: - WARN_ON(true); - return false; - } - break; - case XFRM_DEV_OFFLOAD_CRYPTO: - /* Already parsed by XFRM core */ + switch (x->xso.dir) { + case XFRM_DEV_OFFLOAD_IN: esn = x->replay_esn->seq; + esn_msb = x->replay_esn->seq_hi; + break; + case XFRM_DEV_OFFLOAD_OUT: + esn = x->replay_esn->oseq; + esn_msb = x->replay_esn->oseq_hi; break; default: WARN_ON(true); @@ -121,11 +111,15 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) overlap = sa_entry->esn_state.overlap; - if (esn >= x->replay_esn->replay_window) - seq_bottom = esn - x->replay_esn->replay_window + 1; + if (!x->replay_esn->replay_window) { + seq_bottom = esn; + } else { + if (esn >= x->replay_esn->replay_window) + seq_bottom = esn - x->replay_esn->replay_window + 1; - if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO) - esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom)); + if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO) + esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom)); + } if (sa_entry->esn_state.esn_msb) sa_entry->esn_state.esn = esn; @@ -266,10 +260,15 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_accel_esp_xfrm_attrs *attrs) { struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry); + struct mlx5e_ipsec_addr *addrs = &attrs->addrs; + struct net_device *netdev = sa_entry->dev; struct xfrm_state *x = sa_entry->x; - struct net_device *netdev; + struct dst_entry *rt_dst_entry; + struct flowi4 fl4 = {}; + struct flowi6 fl6 = {}; struct neighbour *n; u8 addr[ETH_ALEN]; + struct rtable *rt; const void *pkey; u8 *dst, *src; @@ -277,25 +276,94 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, attrs->type != XFRM_DEV_OFFLOAD_PACKET) return; - netdev = x->xso.real_dev; - mlx5_query_mac_address(mdev, addr); switch (attrs->dir) { case XFRM_DEV_OFFLOAD_IN: src = attrs->dmac; dst = attrs->smac; - pkey = &attrs->saddr.a4; + + switch (addrs->family) { + case AF_INET: + fl4.flowi4_proto = x->sel.proto; + fl4.daddr = addrs->saddr.a4; + fl4.saddr = addrs->daddr.a4; + pkey = &addrs->saddr.a4; + break; + case AF_INET6: + fl6.flowi6_proto = x->sel.proto; + memcpy(fl6.daddr.s6_addr32, addrs->saddr.a6, 16); + memcpy(fl6.saddr.s6_addr32, addrs->daddr.a6, 16); + pkey = &addrs->saddr.a6; + break; + default: + return; + } break; case XFRM_DEV_OFFLOAD_OUT: src = attrs->smac; dst = attrs->dmac; - pkey = &attrs->daddr.a4; + switch (addrs->family) { + case AF_INET: + fl4.flowi4_proto = x->sel.proto; + fl4.daddr = addrs->daddr.a4; + fl4.saddr = addrs->saddr.a4; + pkey = &addrs->daddr.a4; + break; + case AF_INET6: + fl6.flowi6_proto = x->sel.proto; + memcpy(fl6.daddr.s6_addr32, addrs->daddr.a6, 16); + memcpy(fl6.saddr.s6_addr32, addrs->saddr.a6, 16); + pkey = &addrs->daddr.a6; + break; + default: + return; + } break; default: return; } ether_addr_copy(src, addr); + + /* Destination can refer to a routed network, so perform FIB lookup + * to resolve nexthop and get its MAC. Neighbour resolution is used as + * fallback. + */ + switch (addrs->family) { + case AF_INET: + rt = ip_route_output_key(dev_net(netdev), &fl4); + if (IS_ERR(rt)) + goto neigh; + + if (rt->rt_type != RTN_UNICAST) { + ip_rt_put(rt); + goto neigh; + } + rt_dst_entry = &rt->dst; + break; + case AF_INET6: + rt_dst_entry = ipv6_stub->ipv6_dst_lookup_flow( + dev_net(netdev), NULL, &fl6, NULL); + if (IS_ERR(rt_dst_entry)) + goto neigh; + break; + default: + return; + } + + n = dst_neigh_lookup(rt_dst_entry, pkey); + if (!n) { + dst_release(rt_dst_entry); + goto neigh; + } + + neigh_ha_snapshot(addr, n, netdev); + ether_addr_copy(dst, addr); + dst_release(rt_dst_entry); + neigh_release(n); + return; + +neigh: n = neigh_lookup(&arp_tbl, pkey, netdev); if (!n) { n = neigh_create(&arp_tbl, pkey, netdev); @@ -310,6 +378,16 @@ static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry, neigh_release(n); } +static void mlx5e_ipsec_state_mask(struct mlx5e_ipsec_addr *addrs) +{ + /* + * State doesn't have subnet prefixes in outer headers. + * The match is performed for exaxt source/destination addresses. + */ + memset(addrs->smask.m6, 0xFF, sizeof(__be32) * 4); + memset(addrs->dmask.m6, 0xFF, sizeof(__be32) * 4); +} + void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_accel_esp_xfrm_attrs *attrs) { @@ -381,9 +459,11 @@ skip_replay_window: attrs->spi = be32_to_cpu(x->id.spi); /* source , destination ips */ - memcpy(&attrs->saddr, x->props.saddr.a6, sizeof(attrs->saddr)); - memcpy(&attrs->daddr, x->id.daddr.a6, sizeof(attrs->daddr)); - attrs->family = x->props.family; + memcpy(&attrs->addrs.saddr, x->props.saddr.a6, + sizeof(attrs->addrs.saddr)); + memcpy(&attrs->addrs.daddr, x->id.daddr.a6, sizeof(attrs->addrs.daddr)); + attrs->addrs.family = x->props.family; + mlx5e_ipsec_state_mask(&attrs->addrs); attrs->type = x->xso.type; attrs->reqid = x->props.reqid; attrs->upspec.dport = ntohs(x->sel.dport); @@ -435,7 +515,8 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, } if (x->encap) { if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESPINUDP)) { - NL_SET_ERR_MSG_MOD(extack, "Encapsulation is not supported"); + NL_SET_ERR_MSG_MOD(extack, + "Encapsulation is not supported"); return -EINVAL; } @@ -686,17 +767,17 @@ static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry) return 0; } -static int mlx5e_xfrm_add_state(struct xfrm_state *x, +static int mlx5e_xfrm_add_state(struct net_device *dev, + struct xfrm_state *x, struct netlink_ext_ack *extack) { struct mlx5e_ipsec_sa_entry *sa_entry = NULL; - struct net_device *netdev = x->xso.real_dev; struct mlx5e_ipsec *ipsec; struct mlx5e_priv *priv; gfp_t gfp; int err; - priv = netdev_priv(netdev); + priv = netdev_priv(dev); if (!priv->ipsec) return -EOPNOTSUPP; @@ -707,6 +788,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, return -ENOMEM; sa_entry->x = x; + sa_entry->dev = dev; sa_entry->ipsec = ipsec; /* Check if this SA is originated from acquire flow temporary SA */ if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) @@ -724,6 +806,12 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, /* check esn */ if (x->props.flags & XFRM_STATE_ESN) mlx5e_ipsec_update_esn_state(sa_entry); + else + /* According to RFC4303, section "3.3.3. Sequence Number Generation", + * the first packet sent using a given SA will contain a sequence + * number of 1. + */ + sa_entry->esn_state.esn = 1; mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs); @@ -768,9 +856,12 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, MLX5_IPSEC_RESCHED); if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET && - x->props.mode == XFRM_MODE_TUNNEL) - xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id, - MLX5E_IPSEC_TUNNEL_SA); + x->props.mode == XFRM_MODE_TUNNEL) { + xa_lock_bh(&ipsec->sadb); + __xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id, + MLX5E_IPSEC_TUNNEL_SA); + xa_unlock_bh(&ipsec->sadb); + } out: x->xso.offload_handle = (unsigned long)sa_entry; @@ -794,10 +885,9 @@ err_xfrm: return err; } -static void mlx5e_xfrm_del_state(struct xfrm_state *x) +static void mlx5e_xfrm_del_state(struct net_device *dev, struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); - struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; struct mlx5e_ipsec *ipsec = sa_entry->ipsec; struct mlx5e_ipsec_sa_entry *old; @@ -806,15 +896,9 @@ static void mlx5e_xfrm_del_state(struct xfrm_state *x) old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id); WARN_ON(old != sa_entry); - - if (attrs->mode == XFRM_MODE_TUNNEL && - attrs->type == XFRM_DEV_OFFLOAD_PACKET) - /* Make sure that no ARP requests are running in parallel */ - flush_workqueue(ipsec->wq); - } -static void mlx5e_xfrm_free_state(struct xfrm_state *x) +static void mlx5e_xfrm_free_state(struct net_device *dev, struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); struct mlx5e_ipsec *ipsec = sa_entry->ipsec; @@ -847,8 +931,6 @@ static int mlx5e_ipsec_netevent_event(struct notifier_block *nb, struct mlx5e_ipsec_sa_entry *sa_entry; struct mlx5e_ipsec *ipsec; struct neighbour *n = ptr; - struct net_device *netdev; - struct xfrm_state *x; unsigned long idx; if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID)) @@ -858,21 +940,19 @@ static int mlx5e_ipsec_netevent_event(struct notifier_block *nb, xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) { attrs = &sa_entry->attrs; - if (attrs->family == AF_INET) { - if (!neigh_key_eq32(n, &attrs->saddr.a4) && - !neigh_key_eq32(n, &attrs->daddr.a4)) + if (attrs->addrs.family == AF_INET) { + if (!neigh_key_eq32(n, &attrs->addrs.saddr.a4) && + !neigh_key_eq32(n, &attrs->addrs.daddr.a4)) continue; } else { - if (!neigh_key_eq128(n, &attrs->saddr.a4) && - !neigh_key_eq128(n, &attrs->daddr.a4)) + if (!neigh_key_eq128(n, &attrs->addrs.saddr.a4) && + !neigh_key_eq128(n, &attrs->addrs.daddr.a4)) continue; } - x = sa_entry->x; - netdev = x->xso.real_dev; data = sa_entry->work->data; - neigh_ha_snapshot(data->addr, n, netdev); + neigh_ha_snapshot(data->addr, n, sa_entry->dev); queue_work(ipsec->wq, &sa_entry->work->work); } @@ -958,21 +1038,6 @@ void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv) priv->ipsec = NULL; } -static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x) -{ - if (x->props.family == AF_INET) { - /* Offload with IPv4 options is not supported yet */ - if (ip_hdr(skb)->ihl > 5) - return false; - } else { - /* Offload with IPv6 extension headers is not support yet */ - if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr)) - return false; - } - - return true; -} - static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); @@ -1003,8 +1068,8 @@ static void mlx5e_xfrm_update_stats(struct xfrm_state *x) size_t headers; lockdep_assert(lockdep_is_held(&x->lock) || - lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex) || - lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_state_lock)); + lockdep_is_held(&net->xfrm.xfrm_cfg_mutex) || + lockdep_is_held(&net->xfrm.xfrm_state_lock)); if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) return; @@ -1040,7 +1105,7 @@ static void mlx5e_xfrm_update_stats(struct xfrm_state *x) * by removing always available headers. */ headers = sizeof(struct ethhdr); - if (sa_entry->attrs.family == AF_INET) + if (sa_entry->attrs.addrs.family == AF_INET) headers += sizeof(struct iphdr); else headers += sizeof(struct ipv6hdr); @@ -1049,6 +1114,43 @@ static void mlx5e_xfrm_update_stats(struct xfrm_state *x) x->curlft.bytes += success_bytes - headers * success_packets; } +static __be32 word_to_mask(int prefix) +{ + if (prefix < 0) + return 0; + + if (!prefix || prefix > 31) + return cpu_to_be32(0xFFFFFFFF); + + return cpu_to_be32(((1U << prefix) - 1) << (32 - prefix)); +} + +static void mlx5e_ipsec_policy_mask(struct mlx5e_ipsec_addr *addrs, + struct xfrm_selector *sel) +{ + int i; + + if (addrs->family == AF_INET) { + addrs->smask.m4 = word_to_mask(sel->prefixlen_s); + addrs->saddr.a4 &= addrs->smask.m4; + addrs->dmask.m4 = word_to_mask(sel->prefixlen_d); + addrs->daddr.a4 &= addrs->dmask.m4; + return; + } + + for (i = 0; i < 4; i++) { + if (sel->prefixlen_s != 32 * i) + addrs->smask.m6[i] = + word_to_mask(sel->prefixlen_s - 32 * i); + addrs->saddr.a6[i] &= addrs->smask.m6[i]; + + if (sel->prefixlen_d != 32 * i) + addrs->dmask.m6[i] = + word_to_mask(sel->prefixlen_d - 32 * i); + addrs->daddr.a6[i] &= addrs->dmask.m6[i]; + } +} + static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev, struct xfrm_policy *x, struct netlink_ext_ack *extack) @@ -1121,9 +1223,10 @@ mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry, sel = &x->selector; memset(attrs, 0, sizeof(*attrs)); - memcpy(&attrs->saddr, sel->saddr.a6, sizeof(attrs->saddr)); - memcpy(&attrs->daddr, sel->daddr.a6, sizeof(attrs->daddr)); - attrs->family = sel->family; + memcpy(&attrs->addrs.saddr, sel->saddr.a6, sizeof(attrs->addrs.saddr)); + memcpy(&attrs->addrs.daddr, sel->daddr.a6, sizeof(attrs->addrs.daddr)); + attrs->addrs.family = sel->family; + mlx5e_ipsec_policy_mask(&attrs->addrs, sel); attrs->dir = x->xdo.dir; attrs->action = x->action; attrs->type = XFRM_DEV_OFFLOAD_PACKET; @@ -1139,7 +1242,7 @@ mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry, static int mlx5e_xfrm_add_policy(struct xfrm_policy *x, struct netlink_ext_ack *extack) { - struct net_device *netdev = x->xdo.real_dev; + struct net_device *netdev = x->xdo.dev; struct mlx5e_ipsec_pol_entry *pol_entry; struct mlx5e_priv *priv; int err; @@ -1201,7 +1304,6 @@ static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = { .xdo_dev_state_add = mlx5e_xfrm_add_state, .xdo_dev_state_delete = mlx5e_xfrm_del_state, .xdo_dev_state_free = mlx5e_xfrm_free_state, - .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok, .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state, .xdo_dev_state_update_stats = mlx5e_xfrm_update_stats, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 7d943e93cf6d..ffcd0cdeb775 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -76,27 +76,36 @@ struct mlx5_replay_esn { u8 trigger : 1; }; -struct mlx5_accel_esp_xfrm_attrs { - u32 spi; - u32 mode; - struct aes_gcm_keymat aes_gcm; - +struct mlx5e_ipsec_addr { union { __be32 a4; __be32 a6[4]; } saddr; - + union { + __be32 m4; + __be32 m6[4]; + } smask; union { __be32 a4; __be32 a6[4]; } daddr; + union { + __be32 m4; + __be32 m6[4]; + } dmask; + u8 family; +}; +struct mlx5_accel_esp_xfrm_attrs { + u32 spi; + u32 mode; + struct aes_gcm_keymat aes_gcm; + struct mlx5e_ipsec_addr addrs; struct upspec upspec; u8 dir : 2; u8 type : 2; u8 drop : 1; u8 encap : 1; - u8 family; struct mlx5_replay_esn replay_esn; u32 authsize; u32 reqid; @@ -128,6 +137,7 @@ struct mlx5e_ipsec_hw_stats { u64 ipsec_rx_bytes; u64 ipsec_rx_drop_pkts; u64 ipsec_rx_drop_bytes; + u64 ipsec_rx_drop_mismatch_sa_sel; u64 ipsec_tx_pkts; u64 ipsec_tx_bytes; u64 ipsec_tx_drop_pkts; @@ -184,6 +194,7 @@ struct mlx5e_ipsec_ft { struct mutex mutex; /* Protect changes to this struct */ struct mlx5_flow_table *pol; struct mlx5_flow_table *sa; + struct mlx5_flow_table *sa_sel; struct mlx5_flow_table *status; u32 refcnt; }; @@ -195,6 +206,8 @@ struct mlx5e_ipsec_drop { struct mlx5e_ipsec_rule { struct mlx5_flow_handle *rule; + struct mlx5_flow_handle *status_pass; + struct mlx5_flow_handle *sa_sel; struct mlx5_modify_hdr *modify_hdr; struct mlx5_pkt_reformat *pkt_reformat; struct mlx5_fc *fc; @@ -206,6 +219,7 @@ struct mlx5e_ipsec_rule { struct mlx5e_ipsec_miss { struct mlx5_flow_group *group; struct mlx5_flow_handle *rule; + struct mlx5_fc *fc; }; struct mlx5e_ipsec_tx_create_attr { @@ -260,6 +274,7 @@ struct mlx5e_ipsec_limits { struct mlx5e_ipsec_sa_entry { struct mlx5e_ipsec_esn_state esn_state; struct xfrm_state *x; + struct net_device *dev; struct mlx5e_ipsec *ipsec; struct mlx5_accel_esp_xfrm_attrs attrs; void (*set_iv_op)(struct sk_buff *skb, struct xfrm_state *x, @@ -274,18 +289,8 @@ struct mlx5e_ipsec_sa_entry { }; struct mlx5_accel_pol_xfrm_attrs { - union { - __be32 a4; - __be32 a6[4]; - } saddr; - - union { - __be32 a4; - __be32 a6[4]; - } daddr; - + struct mlx5e_ipsec_addr addrs; struct upspec upspec; - u8 family; u8 action; u8 type : 2; u8 dir : 2; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index e51b03d4c717..98b6a3a623f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -16,6 +16,16 @@ #define MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_SIZE 16 #define IPSEC_TUNNEL_DEFAULT_TTL 0x40 +#define MLX5_IPSEC_FS_SA_SELECTOR_MAX_NUM_GROUPS 16 + +enum { + MLX5_IPSEC_ASO_OK, + MLX5_IPSEC_ASO_BAD_REPLY, + + /* For crypto offload, set by driver */ + MLX5_IPSEC_ASO_SW_CRYPTO_OFFLOAD = 0xAA, +}; + struct mlx5e_ipsec_fc { struct mlx5_fc *cnt; struct mlx5_fc *drop; @@ -33,6 +43,9 @@ struct mlx5e_ipsec_tx { }; struct mlx5e_ipsec_status_checks { + struct mlx5_flow_group *pass_group; + struct mlx5_flow_handle *packet_offload_pass_rule; + struct mlx5_flow_handle *crypto_offload_pass_rule; struct mlx5_flow_group *drop_all_group; struct mlx5e_ipsec_drop all; }; @@ -41,10 +54,12 @@ struct mlx5e_ipsec_rx { struct mlx5e_ipsec_ft ft; struct mlx5e_ipsec_miss pol; struct mlx5e_ipsec_miss sa; - struct mlx5e_ipsec_rule status; - struct mlx5e_ipsec_status_checks status_drops; + struct mlx5e_ipsec_miss sa_sel; + struct mlx5e_ipsec_status_checks status_checks; struct mlx5e_ipsec_fc *fc; struct mlx5_fs_chains *chains; + struct mlx5_flow_table *pol_miss_ft; + struct mlx5_flow_handle *pol_miss_rule; u8 allow_tunnel_mode : 1; }; @@ -130,11 +145,12 @@ static void ipsec_chains_put_table(struct mlx5_fs_chains *chains, u32 prio) static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns, int level, int prio, + int num_reserved_entries, int max_num_groups, u32 flags) { struct mlx5_flow_table_attr ft_attr = {}; - ft_attr.autogroup.num_reserved_entries = 1; + ft_attr.autogroup.num_reserved_entries = num_reserved_entries; ft_attr.autogroup.max_num_groups = max_num_groups; ft_attr.max_fte = NUM_IPSEC_FTE; ft_attr.level = level; @@ -147,22 +163,35 @@ static struct mlx5_flow_table *ipsec_ft_create(struct mlx5_flow_namespace *ns, static void ipsec_rx_status_drop_destroy(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx) { - mlx5_del_flow_rules(rx->status_drops.all.rule); - mlx5_fc_destroy(ipsec->mdev, rx->status_drops.all.fc); - mlx5_destroy_flow_group(rx->status_drops.drop_all_group); + mlx5_del_flow_rules(rx->status_checks.all.rule); + mlx5_fc_destroy(ipsec->mdev, rx->status_checks.all.fc); + mlx5_destroy_flow_group(rx->status_checks.drop_all_group); } static void ipsec_rx_status_pass_destroy(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx) { - mlx5_del_flow_rules(rx->status.rule); + mlx5_del_flow_rules(rx->status_checks.packet_offload_pass_rule); + mlx5_del_flow_rules(rx->status_checks.crypto_offload_pass_rule); +} - if (rx != ipsec->rx_esw) - return; +static void ipsec_rx_rule_add_match_obj(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_spec *spec) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; -#ifdef CONFIG_MLX5_ESWITCH - mlx5_chains_put_table(esw_chains(ipsec->mdev->priv.eswitch), 0, 1, 0); -#endif + if (rx == ipsec->rx_esw) { + mlx5_esw_ipsec_rx_rule_add_match_obj(sa_entry, spec); + } else { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_2); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.metadata_reg_c_2, + sa_entry->ipsec_obj_id | BIT(31)); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + } } static int rx_add_rule_drop_auth_trailer(struct mlx5e_ipsec_sa_entry *sa_entry, @@ -194,17 +223,14 @@ static int rx_add_rule_drop_auth_trailer(struct mlx5e_ipsec_sa_entry *sa_entry, flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; flow_act.flags = FLOW_ACT_NO_APPEND; dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(flow_counter); + dest.counter = flow_counter; if (rx == ipsec->rx_esw) spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.ipsec_syndrome); MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.ipsec_syndrome, 1); - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_2); - MLX5_SET(fte_match_param, spec->match_value, - misc_parameters_2.metadata_reg_c_2, - sa_entry->ipsec_obj_id | BIT(31)); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + ipsec_rx_rule_add_match_obj(sa_entry, rx, spec); rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -223,7 +249,7 @@ static int rx_add_rule_drop_auth_trailer(struct mlx5e_ipsec_sa_entry *sa_entry, } sa_entry->ipsec_rule.trailer.fc = flow_counter; - dest.counter_id = mlx5_fc_id(flow_counter); + dest.counter = flow_counter; MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.ipsec_syndrome, 2); rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { @@ -275,16 +301,14 @@ static int rx_add_rule_drop_replay(struct mlx5e_ipsec_sa_entry *sa_entry, struct flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; flow_act.flags = FLOW_ACT_NO_APPEND; dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(flow_counter); + dest.counter = flow_counter; if (rx == ipsec->rx_esw) spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_4); MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_4, 1); - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_c_2); - MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_c_2, - sa_entry->ipsec_obj_id | BIT(31)); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + ipsec_rx_rule_add_match_obj(sa_entry, rx, spec); rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -348,7 +372,7 @@ static int ipsec_rx_status_drop_all_create(struct mlx5e_ipsec *ipsec, flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(flow_counter); + dest.counter = flow_counter; if (rx == ipsec->rx_esw) spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); @@ -359,9 +383,9 @@ static int ipsec_rx_status_drop_all_create(struct mlx5e_ipsec *ipsec, goto err_rule; } - rx->status_drops.drop_all_group = g; - rx->status_drops.all.rule = rule; - rx->status_drops.all.fc = flow_counter; + rx->status_checks.drop_all_group = g; + rx->status_checks.all.rule = rule; + rx->status_checks.all.fc = flow_counter; kvfree(flow_group_in); kvfree(spec); @@ -377,9 +401,52 @@ err_out: return err; } -static int ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx, - struct mlx5_flow_destination *dest) +static int ipsec_rx_status_pass_group_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table *ft = rx->ft.status; + struct mlx5_flow_group *fg; + void *match_criteria; + u32 *flow_group_in; + int err = 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters_2.ipsec_syndrome); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_4); + + MLX5_SET(create_flow_group_in, flow_group_in, + start_flow_index, ft->max_fte - 3); + MLX5_SET(create_flow_group_in, flow_group_in, + end_flow_index, ft->max_fte - 2); + + fg = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(fg)) { + err = PTR_ERR(fg); + mlx5_core_warn(ipsec->mdev, + "Failed to create rx status pass flow group, err=%d\n", + err); + } + rx->status_checks.pass_group = fg; + + kvfree(flow_group_in); + return err; +} + +static struct mlx5_flow_handle * +ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_destination *dest, + u8 aso_ok) { struct mlx5_flow_act flow_act = {}; struct mlx5_flow_handle *rule; @@ -388,7 +455,7 @@ static int ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) - return -ENOMEM; + return ERR_PTR(-ENOMEM); MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, misc_parameters_2.ipsec_syndrome); @@ -397,11 +464,11 @@ static int ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.ipsec_syndrome, 0); MLX5_SET(fte_match_param, spec->match_value, - misc_parameters_2.metadata_reg_c_4, 0); + misc_parameters_2.metadata_reg_c_4, aso_ok); if (rx == ipsec->rx_esw) spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; - flow_act.flags = FLOW_ACT_NO_APPEND; + flow_act.flags = FLOW_ACT_NO_APPEND | FLOW_ACT_IGNORE_FLOW_LEVEL; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; rule = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2); @@ -412,19 +479,19 @@ static int ipsec_rx_status_pass_create(struct mlx5e_ipsec *ipsec, goto err_rule; } - rx->status.rule = rule; kvfree(spec); - return 0; + return rule; err_rule: kvfree(spec); - return err; + return ERR_PTR(err); } static void mlx5_ipsec_rx_status_destroy(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx) { ipsec_rx_status_pass_destroy(ipsec, rx); + mlx5_destroy_flow_group(rx->status_checks.pass_group); ipsec_rx_status_drop_destroy(ipsec, rx); } @@ -432,19 +499,44 @@ static int mlx5_ipsec_rx_status_create(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx, struct mlx5_flow_destination *dest) { + struct mlx5_flow_destination pol_dest[2]; + struct mlx5_flow_handle *rule; int err; err = ipsec_rx_status_drop_all_create(ipsec, rx); if (err) return err; - err = ipsec_rx_status_pass_create(ipsec, rx, dest); + err = ipsec_rx_status_pass_group_create(ipsec, rx); if (err) - goto err_pass_create; + goto err_pass_group_create; + + rule = ipsec_rx_status_pass_create(ipsec, rx, dest, + MLX5_IPSEC_ASO_SW_CRYPTO_OFFLOAD); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto err_crypto_offload_pass_create; + } + rx->status_checks.crypto_offload_pass_rule = rule; + + pol_dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + pol_dest[0].ft = rx->ft.pol; + pol_dest[1] = dest[1]; + rule = ipsec_rx_status_pass_create(ipsec, rx, pol_dest, + MLX5_IPSEC_ASO_OK); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto err_packet_offload_pass_create; + } + rx->status_checks.packet_offload_pass_rule = rule; return 0; -err_pass_create: +err_packet_offload_pass_create: + mlx5_del_flow_rules(rx->status_checks.crypto_offload_pass_rule); +err_crypto_offload_pass_create: + mlx5_destroy_flow_group(rx->status_checks.pass_group); +err_pass_group_create: ipsec_rx_status_drop_destroy(ipsec, rx); return err; } @@ -493,6 +585,15 @@ out: return err; } +static void ipsec_rx_update_default_dest(struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_destination *old_dest, + struct mlx5_flow_destination *new_dest) +{ + mlx5_modify_rule_destination(rx->pol_miss_rule, new_dest, old_dest); + mlx5_modify_rule_destination(rx->status_checks.crypto_offload_pass_rule, + new_dest, old_dest); +} + static void handle_ipsec_rx_bringup(struct mlx5e_ipsec *ipsec, u32 family) { struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family, XFRM_DEV_OFFLOAD_PACKET); @@ -507,8 +608,7 @@ static void handle_ipsec_rx_bringup(struct mlx5e_ipsec *ipsec, u32 family) new_dest.ft = mlx5_ipsec_fs_roce_ft_get(ipsec->roce, family); new_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - mlx5_modify_rule_destination(rx->status.rule, &new_dest, &old_dest); - mlx5_modify_rule_destination(rx->sa.rule, &new_dest, &old_dest); + ipsec_rx_update_default_dest(rx, &old_dest, &new_dest); } static void handle_ipsec_rx_cleanup(struct mlx5e_ipsec *ipsec, u32 family) @@ -520,8 +620,7 @@ static void handle_ipsec_rx_cleanup(struct mlx5e_ipsec *ipsec, u32 family) old_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; new_dest = mlx5_ttc_get_default_dest(mlx5e_fs_get_ttc(ipsec->fs, false), family2tt(family)); - mlx5_modify_rule_destination(rx->sa.rule, &new_dest, &old_dest); - mlx5_modify_rule_destination(rx->status.rule, &new_dest, &old_dest); + ipsec_rx_update_default_dest(rx, &old_dest, &new_dest); mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family, ipsec->mdev); } @@ -577,13 +676,8 @@ static void ipsec_rx_ft_disconnect(struct mlx5e_ipsec *ipsec, u32 family) mlx5_ttc_fwd_default_dest(ttc, family2tt(family)); } -static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, - struct mlx5e_ipsec_rx *rx, u32 family) +static void ipsec_rx_policy_destroy(struct mlx5e_ipsec_rx *rx) { - /* disconnect */ - if (rx != ipsec->rx_esw) - ipsec_rx_ft_disconnect(ipsec, family); - if (rx->chains) { ipsec_chains_destroy(rx->chains); } else { @@ -592,6 +686,29 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, mlx5_destroy_flow_table(rx->ft.pol); } + if (rx->pol_miss_rule) { + mlx5_del_flow_rules(rx->pol_miss_rule); + mlx5_destroy_flow_table(rx->pol_miss_ft); + } +} + +static void ipsec_rx_sa_selector_destroy(struct mlx5_core_dev *mdev, + struct mlx5e_ipsec_rx *rx) +{ + mlx5_del_flow_rules(rx->sa_sel.rule); + mlx5_fc_destroy(mdev, rx->sa_sel.fc); + rx->sa_sel.fc = NULL; + mlx5_destroy_flow_group(rx->sa_sel.group); + mlx5_destroy_flow_table(rx->ft.sa_sel); +} + +static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, u32 family) +{ + /* disconnect */ + if (rx != ipsec->rx_esw) + ipsec_rx_ft_disconnect(ipsec, family); + mlx5_del_flow_rules(rx->sa.rule); mlx5_destroy_flow_group(rx->sa.group); mlx5_destroy_flow_table(rx->ft.sa); @@ -600,7 +717,17 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, mlx5_ipsec_rx_status_destroy(ipsec, rx); mlx5_destroy_flow_table(rx->ft.status); + ipsec_rx_sa_selector_destroy(mdev, rx); + + ipsec_rx_policy_destroy(rx); + mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family, mdev); + +#ifdef CONFIG_MLX5_ESWITCH + if (rx == ipsec->rx_esw) + mlx5_chains_put_table(esw_chains(ipsec->mdev->priv.eswitch), + 0, 1, 0); +#endif } static void ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, @@ -652,6 +779,28 @@ static int ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec, return 0; } +static void ipsec_rx_sa_miss_dest_get(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5e_ipsec_rx_create_attr *attr, + struct mlx5_flow_destination *dest, + struct mlx5_flow_destination *miss_dest) +{ + if (rx == ipsec->rx_esw) + *miss_dest = *dest; + else + *miss_dest = + mlx5_ttc_get_default_dest(attr->ttc, + family2tt(attr->family)); +} + +static void ipsec_rx_default_dest_get(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5_flow_destination *dest) +{ + dest->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest->ft = rx->pol_miss_ft; +} + static void ipsec_rx_ft_connect(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx, struct mlx5e_ipsec_rx_create_attr *attr) @@ -659,15 +808,219 @@ static void ipsec_rx_ft_connect(struct mlx5e_ipsec *ipsec, struct mlx5_flow_destination dest = {}; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = rx->ft.pol; + dest.ft = rx->ft.sa; mlx5_ttc_fwd_dest(attr->ttc, family2tt(attr->family), &dest); } +static int ipsec_rx_chains_create_miss(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5e_ipsec_rx_create_attr *attr, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_table_attr ft_attr = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_table *ft; + int err; + + if (rx == ipsec->rx_esw) { + /* No need to create miss table for switchdev mode, + * just set it to the root chain table. + */ + rx->pol_miss_ft = dest->ft; + return 0; + } + + ft_attr.max_fte = 1; + ft_attr.autogroup.max_num_groups = 1; + ft_attr.level = attr->pol_level; + ft_attr.prio = attr->prio; + + ft = mlx5_create_auto_grouped_flow_table(attr->ns, &ft_attr); + if (IS_ERR(ft)) + return PTR_ERR(ft); + + rule = mlx5_add_flow_rules(ft, NULL, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + goto err_rule; + } + + rx->pol_miss_ft = ft; + rx->pol_miss_rule = rule; + + return 0; + +err_rule: + mlx5_destroy_flow_table(ft); + return err; +} + +static int ipsec_rx_policy_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5e_ipsec_rx_create_attr *attr, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_destination default_dest; + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_flow_table *ft; + int err; + + err = ipsec_rx_chains_create_miss(ipsec, rx, attr, dest); + if (err) + return err; + + ipsec_rx_default_dest_get(ipsec, rx, &default_dest); + + if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO) { + rx->chains = ipsec_chains_create(mdev, + default_dest.ft, + attr->chains_ns, + attr->prio, + attr->sa_level, + &rx->ft.pol); + if (IS_ERR(rx->chains)) + err = PTR_ERR(rx->chains); + } else { + ft = ipsec_ft_create(attr->ns, attr->pol_level, + attr->prio, 1, 2, 0); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_out; + } + rx->ft.pol = ft; + + err = ipsec_miss_create(mdev, rx->ft.pol, &rx->pol, + &default_dest); + if (err) + mlx5_destroy_flow_table(rx->ft.pol); + } + + if (!err) + return 0; + +err_out: + if (rx->pol_miss_rule) { + mlx5_del_flow_rules(rx->pol_miss_rule); + mlx5_destroy_flow_table(rx->pol_miss_ft); + } + return err; +} + +static int ipsec_rx_sa_selector_create(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, + struct mlx5e_ipsec_rx_create_attr *attr) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_destination dest; + struct mlx5_flow_handle *rule; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + u32 *flow_group_in; + struct mlx5_fc *fc; + int err; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + ft = ipsec_ft_create(attr->ns, attr->status_level, attr->prio, 1, + MLX5_IPSEC_FS_SA_SELECTOR_MAX_NUM_GROUPS, 0); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "Failed to create RX SA selector flow table, err=%d\n", + err); + goto err_ft; + } + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, + ft->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ft->max_fte - 1); + fg = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(fg)) { + err = PTR_ERR(fg); + mlx5_core_err(mdev, "Failed to create RX SA selector miss group, err=%d\n", + err); + goto err_fg; + } + + fc = mlx5_fc_create(mdev, false); + if (IS_ERR(fc)) { + err = PTR_ERR(fc); + mlx5_core_err(mdev, + "Failed to create ipsec RX SA selector miss rule counter, err=%d\n", + err); + goto err_cnt; + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter = fc; + flow_act.action = + MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_DROP; + + rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Failed to create RX SA selector miss drop rule, err=%d\n", + err); + goto err_rule; + } + + rx->ft.sa_sel = ft; + rx->sa_sel.group = fg; + rx->sa_sel.fc = fc; + rx->sa_sel.rule = rule; + + kvfree(flow_group_in); + + return 0; + +err_rule: + mlx5_fc_destroy(mdev, fc); +err_cnt: + mlx5_destroy_flow_group(fg); +err_fg: + mlx5_destroy_flow_table(ft); +err_ft: + kvfree(flow_group_in); + return err; +} + +/* The decryption processing is as follows: + * + * +----------+ +-------------+ + * | | | | + * | Kernel <--------------+----------+ policy miss <------------+ + * | | ^ | | ^ + * +----^-----+ | +-------------+ | + * | crypto | + * miss offload ok allow/default + * ^ ^ ^ + * | | packet | + * +----+---------+ +----+-------------+ offload ok +------+---+ + * | | | | (no UPSPEC) | | + * | SA (decrypt) +-----> status +--->------->----+ policy | + * | | | | | | + * +--------------+ ++---------+-------+ +-^----+---+ + * | | | | + * v packet +-->->---+ v + * | offload ok match | + * fails (with UPSPEC) | block + * | | +-------------+-+ | + * v v | | miss v + * drop +---> SA sel +--------->drop + * | | + * +---------------+ + */ + static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx *rx, u32 family) { + struct mlx5_flow_destination dest[2], miss_dest; struct mlx5e_ipsec_rx_create_attr attr; - struct mlx5_flow_destination dest[2]; struct mlx5_flow_table *ft; u32 flags = 0; int err; @@ -678,80 +1031,61 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, if (err) return err; - ft = ipsec_ft_create(attr.ns, attr.status_level, attr.prio, 3, 0); + ft = ipsec_ft_create(attr.ns, attr.status_level, attr.prio, 3, 4, 0); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_fs_ft_status; } rx->ft.status = ft; - dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[1].counter_id = mlx5_fc_id(rx->fc->cnt); - err = mlx5_ipsec_rx_status_create(ipsec, rx, dest); + err = ipsec_rx_sa_selector_create(ipsec, rx, &attr); if (err) - goto err_add; + goto err_fs_ft_sa_sel; /* Create FT */ if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL) rx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev); if (rx->allow_tunnel_mode) flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - ft = ipsec_ft_create(attr.ns, attr.sa_level, attr.prio, 2, flags); + ft = ipsec_ft_create(attr.ns, attr.sa_level, attr.prio, 1, 2, flags); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_fs_ft; } rx->ft.sa = ft; - err = ipsec_miss_create(mdev, rx->ft.sa, &rx->sa, dest); + ipsec_rx_sa_miss_dest_get(ipsec, rx, &attr, &dest[0], &miss_dest); + err = ipsec_miss_create(mdev, rx->ft.sa, &rx->sa, &miss_dest); if (err) goto err_fs; - if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO) { - rx->chains = ipsec_chains_create(mdev, rx->ft.sa, - attr.chains_ns, - attr.prio, - attr.pol_level, - &rx->ft.pol); - if (IS_ERR(rx->chains)) { - err = PTR_ERR(rx->chains); - goto err_pol_ft; - } - - goto connect; - } + err = ipsec_rx_policy_create(ipsec, rx, &attr, &dest[0]); + if (err) + goto err_policy; - ft = ipsec_ft_create(attr.ns, attr.pol_level, attr.prio, 2, 0); - if (IS_ERR(ft)) { - err = PTR_ERR(ft); - goto err_pol_ft; - } - rx->ft.pol = ft; - memset(dest, 0x00, 2 * sizeof(*dest)); - dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[0].ft = rx->ft.sa; - err = ipsec_miss_create(mdev, rx->ft.pol, &rx->pol, dest); + dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[1].counter = rx->fc->cnt; + err = mlx5_ipsec_rx_status_create(ipsec, rx, dest); if (err) - goto err_pol_miss; + goto err_add; -connect: /* connect */ if (rx != ipsec->rx_esw) ipsec_rx_ft_connect(ipsec, rx, &attr); return 0; -err_pol_miss: - mlx5_destroy_flow_table(rx->ft.pol); -err_pol_ft: +err_add: + ipsec_rx_policy_destroy(rx); +err_policy: mlx5_del_flow_rules(rx->sa.rule); mlx5_destroy_flow_group(rx->sa.group); err_fs: mlx5_destroy_flow_table(rx->ft.sa); -err_fs_ft: if (rx->allow_tunnel_mode) mlx5_eswitch_unblock_encap(mdev); - mlx5_ipsec_rx_status_destroy(ipsec, rx); -err_add: +err_fs_ft: + ipsec_rx_sa_selector_destroy(mdev, rx); +err_fs_ft_sa_sel: mlx5_destroy_flow_table(rx->ft.status); err_fs_ft_status: mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family, mdev); @@ -873,7 +1207,7 @@ static int ipsec_counter_rule_tx(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | MLX5_FLOW_CONTEXT_ACTION_COUNT; dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(tx->fc->cnt); + dest.counter = tx->fc->cnt; fte = mlx5_add_flow_rules(tx->ft.status, spec, &flow_act, &dest, 1); if (IS_ERR(fte)) { err = PTR_ERR(fte); @@ -941,7 +1275,7 @@ static int tx_create(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx, int err; ipsec_tx_create_attr_set(ipsec, tx, &attr); - ft = ipsec_ft_create(tx->ns, attr.cnt_level, attr.prio, 1, 0); + ft = ipsec_ft_create(tx->ns, attr.cnt_level, attr.prio, 1, 1, 0); if (IS_ERR(ft)) return PTR_ERR(ft); tx->ft.status = ft; @@ -954,7 +1288,7 @@ static int tx_create(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx, tx->allow_tunnel_mode = mlx5_eswitch_block_encap(mdev); if (tx->allow_tunnel_mode) flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - ft = ipsec_ft_create(tx->ns, attr.sa_level, attr.prio, 4, flags); + ft = ipsec_ft_create(tx->ns, attr.sa_level, attr.prio, 1, 4, flags); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_sa_ft; @@ -982,7 +1316,7 @@ static int tx_create(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx *tx, goto connect_roce; } - ft = ipsec_ft_create(tx->ns, attr.pol_level, attr.prio, 2, 0); + ft = ipsec_ft_create(tx->ns, attr.pol_level, attr.prio, 1, 2, 0); if (IS_ERR(ft)) { err = PTR_ERR(ft); goto err_pol_ft; @@ -1150,9 +1484,14 @@ static void tx_ft_put_policy(struct mlx5e_ipsec *ipsec, u32 prio, int type) mutex_unlock(&tx->ft.mutex); } -static void setup_fte_addr4(struct mlx5_flow_spec *spec, __be32 *saddr, - __be32 *daddr) +static void setup_fte_addr4(struct mlx5_flow_spec *spec, + struct mlx5e_ipsec_addr *addrs) { + __be32 *saddr = &addrs->saddr.a4; + __be32 *smask = &addrs->smask.m4; + __be32 *daddr = &addrs->daddr.a4; + __be32 *dmask = &addrs->dmask.m4; + if (!*saddr && !*daddr) return; @@ -1164,21 +1503,26 @@ static void setup_fte_addr4(struct mlx5_flow_spec *spec, __be32 *saddr, if (*saddr) { memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), saddr, 4); - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, - outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), smask, 4); } if (*daddr) { memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), daddr, 4); - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, - outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), dmask, 4); } } -static void setup_fte_addr6(struct mlx5_flow_spec *spec, __be32 *saddr, - __be32 *daddr) +static void setup_fte_addr6(struct mlx5_flow_spec *spec, + struct mlx5e_ipsec_addr *addrs) { + __be32 *saddr = addrs->saddr.a6; + __be32 *smask = addrs->smask.m6; + __be32 *daddr = addrs->daddr.a6; + __be32 *dmask = addrs->dmask.m6; + if (addr6_all_zero(saddr) && addr6_all_zero(daddr)) return; @@ -1190,15 +1534,15 @@ static void setup_fte_addr6(struct mlx5_flow_spec *spec, __be32 *saddr, if (!addr6_all_zero(saddr)) { memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), saddr, 16); - memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), 0xff, 16); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), dmask, 16); } if (!addr6_all_zero(daddr)) { memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), daddr, 16); - memset(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 0xff, 16); + memcpy(MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), smask, 16); } } @@ -1340,7 +1684,8 @@ static int setup_modify_header(struct mlx5e_ipsec *ipsec, int type, u32 val, u8 MLX5_ACTION_TYPE_SET); MLX5_SET(set_action_in, action[2], field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_4); - MLX5_SET(set_action_in, action[2], data, 0); + MLX5_SET(set_action_in, action[2], data, + MLX5_IPSEC_ASO_SW_CRYPTO_OFFLOAD); MLX5_SET(set_action_in, action[2], offset, 0); MLX5_SET(set_action_in, action[2], length, 32); } @@ -1387,7 +1732,7 @@ setup_pkt_tunnel_reformat(struct mlx5_core_dev *mdev, if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) { bfflen += sizeof(*esp_hdr) + 8; - switch (attrs->family) { + switch (attrs->addrs.family) { case AF_INET: bfflen += sizeof(*iphdr); break; @@ -1404,7 +1749,7 @@ setup_pkt_tunnel_reformat(struct mlx5_core_dev *mdev, return -ENOMEM; eth_hdr = (struct ethhdr *)reformatbf; - switch (attrs->family) { + switch (attrs->addrs.family) { case AF_INET: eth_hdr->h_proto = htons(ETH_P_IP); break; @@ -1427,11 +1772,11 @@ setup_pkt_tunnel_reformat(struct mlx5_core_dev *mdev, reformat_params->param_0 = attrs->authsize; hdr = reformatbf + sizeof(*eth_hdr); - switch (attrs->family) { + switch (attrs->addrs.family) { case AF_INET: iphdr = (struct iphdr *)hdr; - memcpy(&iphdr->saddr, &attrs->saddr.a4, 4); - memcpy(&iphdr->daddr, &attrs->daddr.a4, 4); + memcpy(&iphdr->saddr, &attrs->addrs.saddr.a4, 4); + memcpy(&iphdr->daddr, &attrs->addrs.daddr.a4, 4); iphdr->version = 4; iphdr->ihl = 5; iphdr->ttl = IPSEC_TUNNEL_DEFAULT_TTL; @@ -1440,8 +1785,8 @@ setup_pkt_tunnel_reformat(struct mlx5_core_dev *mdev, break; case AF_INET6: ipv6hdr = (struct ipv6hdr *)hdr; - memcpy(&ipv6hdr->saddr, &attrs->saddr.a6, 16); - memcpy(&ipv6hdr->daddr, &attrs->daddr.a6, 16); + memcpy(&ipv6hdr->saddr, &attrs->addrs.saddr.a6, 16); + memcpy(&ipv6hdr->daddr, &attrs->addrs.daddr.a6, 16); ipv6hdr->nexthdr = IPPROTO_ESP; ipv6hdr->version = 6; ipv6hdr->hop_limit = IPSEC_TUNNEL_DEFAULT_TTL; @@ -1475,7 +1820,7 @@ static int get_reformat_type(struct mlx5_accel_esp_xfrm_attrs *attrs) return MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT_OVER_UDP; return MLX5_REFORMAT_TYPE_DEL_ESP_TRANSPORT; case XFRM_DEV_OFFLOAD_OUT: - if (attrs->family == AF_INET) { + if (attrs->addrs.family == AF_INET) { if (attrs->encap) return MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_UDPV4; return MLX5_REFORMAT_TYPE_ADD_ESP_TRANSPORT_OVER_IPV4; @@ -1576,6 +1921,85 @@ static int setup_pkt_reformat(struct mlx5e_ipsec *ipsec, return 0; } +static int rx_add_rule_sa_selector(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5e_ipsec_rx *rx, + struct upspec *upspec) +{ + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + struct mlx5_core_dev *mdev = ipsec->mdev; + struct mlx5_flow_destination dest[2]; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_2.ipsec_syndrome); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_4); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.ipsec_syndrome, 0); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.metadata_reg_c_4, 0); + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + + ipsec_rx_rule_add_match_obj(sa_entry, rx, spec); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[0].ft = rx->ft.sa_sel; + dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest[1].counter = rx->fc->cnt; + + rule = mlx5_add_flow_rules(rx->ft.status, spec, &flow_act, dest, 2); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add ipsec rx pass rule, err=%d\n", + err); + goto err_add_status_pass_rule; + } + + sa_entry->ipsec_rule.status_pass = rule; + + MLX5_SET(fte_match_param, spec->match_criteria, + misc_parameters_2.ipsec_syndrome, 0); + MLX5_SET(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_4, 0); + + setup_fte_upper_proto_match(spec, upspec); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[0].ft = rx->ft.pol; + + rule = mlx5_add_flow_rules(rx->ft.sa_sel, spec, &flow_act, &dest[0], 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "Failed to add ipsec rx sa selector rule, err=%d\n", + err); + goto err_add_sa_sel_rule; + } + + sa_entry->ipsec_rule.sa_sel = rule; + + kvfree(spec); + return 0; + +err_add_sa_sel_rule: + mlx5_del_flow_rules(sa_entry->ipsec_rule.status_pass); +err_add_status_pass_rule: + kvfree(spec); + return err; +} + static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) { struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; @@ -1589,7 +2013,7 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) struct mlx5_fc *counter; int err = 0; - rx = rx_ft_get(mdev, ipsec, attrs->family, attrs->type); + rx = rx_ft_get(mdev, ipsec, attrs->addrs.family, attrs->type); if (IS_ERR(rx)) return PTR_ERR(rx); @@ -1599,16 +2023,15 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) goto err_alloc; } - if (attrs->family == AF_INET) - setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); + if (attrs->addrs.family == AF_INET) + setup_fte_addr4(spec, &attrs->addrs); else - setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); + setup_fte_addr6(spec, &attrs->addrs); setup_fte_spi(spec, attrs->spi, attrs->encap); if (!attrs->encap) setup_fte_esp(spec); setup_fte_no_frags(spec); - setup_fte_upper_proto_match(spec, &attrs->upspec); if (!attrs->drop) { if (rx != ipsec->rx_esw) @@ -1649,13 +2072,20 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[0].ft = rx->ft.status; dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[1].counter_id = mlx5_fc_id(counter); + dest[1].counter = counter; rule = mlx5_add_flow_rules(rx->ft.sa, spec, &flow_act, dest, 2); if (IS_ERR(rule)) { err = PTR_ERR(rule); mlx5_core_err(mdev, "fail to add RX ipsec rule err=%d\n", err); goto err_add_flow; } + + if (attrs->upspec.proto && attrs->type == XFRM_DEV_OFFLOAD_PACKET) { + err = rx_add_rule_sa_selector(sa_entry, rx, &attrs->upspec); + if (err) + goto err_add_sa_sel; + } + if (attrs->type == XFRM_DEV_OFFLOAD_PACKET) err = rx_add_rule_drop_replay(sa_entry, rx); if (err) @@ -1679,6 +2109,11 @@ err_drop_reason: mlx5_fc_destroy(mdev, sa_entry->ipsec_rule.replay.fc); } err_add_replay: + if (sa_entry->ipsec_rule.sa_sel) { + mlx5_del_flow_rules(sa_entry->ipsec_rule.sa_sel); + mlx5_del_flow_rules(sa_entry->ipsec_rule.status_pass); + } +err_add_sa_sel: mlx5_del_flow_rules(rule); err_add_flow: mlx5_fc_destroy(mdev, counter); @@ -1691,7 +2126,7 @@ err_pkt_reformat: err_mod_header: kvfree(spec); err_alloc: - rx_ft_put(ipsec, attrs->family, attrs->type); + rx_ft_put(ipsec, attrs->addrs.family, attrs->type); return err; } @@ -1718,23 +2153,21 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) goto err_alloc; } - if (attrs->family == AF_INET) - setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); - else - setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); - setup_fte_no_frags(spec); setup_fte_upper_proto_match(spec, &attrs->upspec); switch (attrs->type) { case XFRM_DEV_OFFLOAD_CRYPTO: + if (attrs->addrs.family == AF_INET) + setup_fte_addr4(spec, &attrs->addrs); + else + setup_fte_addr6(spec, &attrs->addrs); setup_fte_spi(spec, attrs->spi, false); setup_fte_esp(spec); setup_fte_reg_a(spec); break; case XFRM_DEV_OFFLOAD_PACKET: - if (attrs->reqid) - setup_fte_reg_c4(spec, attrs->reqid); + setup_fte_reg_c4(spec, attrs->reqid); err = setup_pkt_reformat(ipsec, attrs, &flow_act); if (err) goto err_pkt_reformat; @@ -1762,7 +2195,7 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) dest[0].ft = tx->ft.status; dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[1].counter_id = mlx5_fc_id(counter); + dest[1].counter = counter; rule = mlx5_add_flow_rules(tx->ft.sa, spec, &flow_act, dest, 2); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -1812,10 +2245,10 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) } tx = ipsec_tx(ipsec, attrs->type); - if (attrs->family == AF_INET) - setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); + if (attrs->addrs.family == AF_INET) + setup_fte_addr4(spec, &attrs->addrs); else - setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); + setup_fte_addr6(spec, &attrs->addrs); setup_fte_no_frags(spec); setup_fte_upper_proto_match(spec, &attrs->upspec); @@ -1835,7 +2268,7 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[dstn].counter_id = mlx5_fc_id(tx->fc->drop); + dest[dstn].counter = tx->fc->drop; dstn++; break; default: @@ -1885,12 +2318,12 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) struct mlx5e_ipsec_rx *rx; int err, dstn = 0; - ft = rx_ft_get_policy(mdev, pol_entry->ipsec, attrs->family, attrs->prio, - attrs->type); + ft = rx_ft_get_policy(mdev, pol_entry->ipsec, attrs->addrs.family, + attrs->prio, attrs->type); if (IS_ERR(ft)) return PTR_ERR(ft); - rx = ipsec_rx(pol_entry->ipsec, attrs->family, attrs->type); + rx = ipsec_rx(pol_entry->ipsec, attrs->addrs.family, attrs->type); spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { @@ -1898,10 +2331,10 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) goto err_alloc; } - if (attrs->family == AF_INET) - setup_fte_addr4(spec, &attrs->saddr.a4, &attrs->daddr.a4); + if (attrs->addrs.family == AF_INET) + setup_fte_addr4(spec, &attrs->addrs); else - setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); + setup_fte_addr6(spec, &attrs->addrs); setup_fte_no_frags(spec); setup_fte_upper_proto_match(spec, &attrs->upspec); @@ -1913,7 +2346,7 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) case XFRM_POLICY_BLOCK: flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[dstn].counter_id = mlx5_fc_id(rx->fc->drop); + dest[dstn].counter = rx->fc->drop; dstn++; break; default: @@ -1925,8 +2358,7 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) flow_act.flags |= FLOW_ACT_NO_APPEND; if (rx == ipsec->rx_esw && rx->chains) flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; - dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[dstn].ft = rx->ft.sa; + ipsec_rx_default_dest_get(ipsec, rx, &dest[dstn]); dstn++; rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, dstn); if (IS_ERR(rule)) { @@ -1942,7 +2374,8 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) err_action: kvfree(spec); err_alloc: - rx_ft_put_policy(pol_entry->ipsec, attrs->family, attrs->prio, attrs->type); + rx_ft_put_policy(pol_entry->ipsec, attrs->addrs.family, attrs->prio, + attrs->type); return err; } @@ -2063,6 +2496,7 @@ void mlx5e_accel_ipsec_fs_read_stats(struct mlx5e_priv *priv, void *ipsec_stats) stats->ipsec_rx_bytes = 0; stats->ipsec_rx_drop_pkts = 0; stats->ipsec_rx_drop_bytes = 0; + stats->ipsec_rx_drop_mismatch_sa_sel = 0; stats->ipsec_tx_pkts = 0; stats->ipsec_tx_bytes = 0; stats->ipsec_tx_drop_pkts = 0; @@ -2072,6 +2506,9 @@ void mlx5e_accel_ipsec_fs_read_stats(struct mlx5e_priv *priv, void *ipsec_stats) mlx5_fc_query(mdev, fc->cnt, &stats->ipsec_rx_pkts, &stats->ipsec_rx_bytes); mlx5_fc_query(mdev, fc->drop, &stats->ipsec_rx_drop_pkts, &stats->ipsec_rx_drop_bytes); + if (ipsec->rx_ipv4->sa_sel.fc) + mlx5_fc_query(mdev, ipsec->rx_ipv4->sa_sel.fc, + &stats->ipsec_rx_drop_mismatch_sa_sel, &bytes); fc = ipsec->tx->fc; mlx5_fc_query(mdev, fc->cnt, &stats->ipsec_tx_pkts, &stats->ipsec_tx_bytes); @@ -2100,6 +2537,11 @@ void mlx5e_accel_ipsec_fs_read_stats(struct mlx5e_priv *priv, void *ipsec_stats) stats->ipsec_tx_drop_pkts += packets; stats->ipsec_tx_drop_bytes += bytes; } + + if (ipsec->rx_esw->sa_sel.fc && + !mlx5_fc_query(mdev, ipsec->rx_esw->sa_sel.fc, + &packets, &bytes)) + stats->ipsec_rx_drop_mismatch_sa_sel += packets; } } @@ -2197,12 +2639,18 @@ void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry) mlx5_del_flow_rules(ipsec_rule->auth.rule); mlx5_fc_destroy(mdev, ipsec_rule->auth.fc); + if (ipsec_rule->sa_sel) { + mlx5_del_flow_rules(ipsec_rule->sa_sel); + mlx5_del_flow_rules(ipsec_rule->status_pass); + } + if (ipsec_rule->replay.rule) { mlx5_del_flow_rules(ipsec_rule->replay.rule); mlx5_fc_destroy(mdev, ipsec_rule->replay.fc); } mlx5_esw_ipsec_rx_id_mapping_remove(sa_entry); - rx_ft_put(sa_entry->ipsec, sa_entry->attrs.family, sa_entry->attrs.type); + rx_ft_put(sa_entry->ipsec, sa_entry->attrs.addrs.family, + sa_entry->attrs.type); } int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry) @@ -2238,7 +2686,8 @@ void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry) mlx5e_ipsec_unblock_tc_offload(pol_entry->ipsec->mdev); if (pol_entry->attrs.dir == XFRM_DEV_OFFLOAD_IN) { - rx_ft_put_policy(pol_entry->ipsec, pol_entry->attrs.family, + rx_ft_put_policy(pol_entry->ipsec, + pol_entry->attrs.addrs.family, pol_entry->attrs.prio, pol_entry->attrs.type); return; } @@ -2378,7 +2827,7 @@ bool mlx5e_ipsec_fs_tunnel_enabled(struct mlx5e_ipsec_sa_entry *sa_entry) struct mlx5e_ipsec_rx *rx; struct mlx5e_ipsec_tx *tx; - rx = ipsec_rx(sa_entry->ipsec, attrs->family, attrs->type); + rx = ipsec_rx(sa_entry->ipsec, attrs->addrs.family, attrs->type); tx = ipsec_tx(sa_entry->ipsec, attrs->type); if (sa_entry->attrs.dir == XFRM_DEV_OFFLOAD_OUT) return tx->allow_tunnel_mode; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 53cfa39188cb..ef7322d381af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -42,8 +42,7 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) if (MLX5_CAP_IPSEC(mdev, ipsec_full_offload) && (mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_DMFS || - (mdev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS && - is_mdev_legacy_mode(mdev)))) { + is_mdev_legacy_mode(mdev))) { if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, reformat_add_esp_trasport) && MLX5_CAP_FLOWTABLE_NIC_RX(mdev, @@ -91,8 +90,9 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) EXPORT_SYMBOL_GPL(mlx5_ipsec_device_caps); static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn, - struct mlx5_accel_esp_xfrm_attrs *attrs) + struct mlx5e_ipsec_sa_entry *sa_entry) { + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; void *aso_ctx; aso_ctx = MLX5_ADDR_OF(ipsec_obj, obj, ipsec_aso); @@ -120,8 +120,12 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn, * active. */ MLX5_SET(ipsec_obj, obj, aso_return_reg, MLX5_IPSEC_ASO_REG_C_4_5); - if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) + if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) { MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_INC_SN); + if (!attrs->replay_esn.trigger) + MLX5_SET(ipsec_aso, aso_ctx, mode_parameter, + sa_entry->esn_state.esn); + } if (attrs->lft.hard_packet_limit != XFRM_INF) { MLX5_SET(ipsec_aso, aso_ctx, remove_flow_pkt_cnt, @@ -175,7 +179,7 @@ static int mlx5_create_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry) res = &mdev->mlx5e_res.hw_objs; if (attrs->type == XFRM_DEV_OFFLOAD_PACKET) - mlx5e_ipsec_packet_setup(obj, res->pdn, attrs); + mlx5e_ipsec_packet_setup(obj, res->pdn, sa_entry); err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); if (!err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 727fa7c18523..6056106edcc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -327,6 +327,10 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev, if (unlikely(!sa_entry)) { rcu_read_unlock(); atomic64_inc(&ipsec->sw_stats.ipsec_rx_drop_sadb_miss); + /* Clear secpath to prevent invalid dereference + * in downstream XFRM policy checks. + */ + secpath_reset(skb); return; } xfrm_state_hold(sa_entry->x); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c index 92bf3fa44a3b..93be388068f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c @@ -42,6 +42,7 @@ static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_rx_bytes) }, { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_rx_drop_pkts) }, { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_rx_drop_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_rx_drop_mismatch_sa_sel) }, { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_tx_pkts) }, { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_tx_bytes) }, { MLX5E_DECLARE_STAT(struct mlx5e_ipsec_hw_stats, ipsec_tx_drop_pkts) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 3db31cc10719..08f06984407b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -744,7 +744,7 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn) dseg->addr = cpu_to_be64(dma_addr); dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32(fsz); - mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); + mlx5e_dma_push_netmem(sq, skb_frag_netmem(frag), dma_addr, fsz); tx_fill_wi(sq, pi, MLX5E_KTLS_DUMP_WQEBBS, fsz, skb_frag_page(frag)); sq->pc += MLX5E_KTLS_DUMP_WQEBBS; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index cc9bcc420032..6ab02f3fc291 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -339,9 +339,13 @@ static int mlx5e_macsec_init_sa_fs(struct macsec_context *ctx, { struct mlx5e_priv *priv = macsec_netdev_priv(ctx->netdev); struct mlx5_macsec_fs *macsec_fs = priv->mdev->macsec_fs; + const struct macsec_tx_sc *tx_sc = &ctx->secy->tx_sc; struct mlx5_macsec_rule_attrs rule_attrs; union mlx5_macsec_rule *macsec_rule; + if (is_tx && tx_sc->encoding_sa != sa->assoc_num) + return 0; + rule_attrs.macsec_obj_id = sa->macsec_obj_id; rule_attrs.sci = sa->sci; rule_attrs.assoc_num = sa->assoc_num; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 8705cffc747f..d166c0d5189e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -362,6 +362,7 @@ static int mlx5e_dcbnl_ieee_getpfc(struct net_device *dev, static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc) { + u8 buffer_ownership = MLX5_BUF_OWNERSHIP_UNKNOWN; struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; u32 old_cable_len = priv->dcbx.cable_len; @@ -389,7 +390,14 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, if (MLX5_BUFFER_SUPPORTED(mdev)) { pfc_new.pfc_en = (changed & MLX5E_PORT_BUFFER_PFC) ? pfc->pfc_en : curr_pfc_en; - if (priv->dcbx.manual_buffer) + ret = mlx5_query_port_buffer_ownership(mdev, + &buffer_ownership); + if (ret) + netdev_err(dev, + "%s, Failed to get buffer ownership: %d\n", + __func__, ret); + + if (buffer_ownership == MLX5_BUF_OWNERSHIP_SW_OWNED) ret = mlx5e_port_manual_buffer_config(priv, changed, dev->mtu, &pfc_new, NULL, NULL); @@ -982,7 +990,6 @@ static int mlx5e_dcbnl_setbuffer(struct net_device *dev, if (!changed) return 0; - priv->dcbx.manual_buffer = true; err = mlx5e_port_manual_buffer_config(priv, changed, dev->mtu, NULL, buffer_size, prio2buffer); return err; @@ -1147,6 +1154,7 @@ static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) bool reset = true; int err; + netdev_lock(priv->netdev); mutex_lock(&priv->state_lock); new_params = priv->channels.params; @@ -1162,6 +1170,7 @@ static int mlx5e_set_trust_state(struct mlx5e_priv *priv, u8 trust_state) &trust_state, reset); mutex_unlock(&priv->state_lock); + netdev_unlock(priv->netdev); return err; } @@ -1250,7 +1259,6 @@ void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) priv->dcbx.cap |= DCB_CAP_DCBX_HOST; priv->dcbx.port_buff_cell_sz = mlx5e_query_port_buffers_cell_size(priv); - priv->dcbx.manual_buffer = false; priv->dcbx.cable_len = MLX5E_DEFAULT_CABLE_LEN; mlx5e_ets_init(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index 298bb74ec5e9..d1d629697e28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -113,7 +113,7 @@ int mlx5e_dim_rx_change(struct mlx5e_rq *rq, bool enable) __set_bit(MLX5E_RQ_STATE_DIM, &rq->state); } else { __clear_bit(MLX5E_RQ_STATE_DIM, &rq->state); - + synchronize_net(); mlx5e_dim_disable(rq->dim); rq->dim = NULL; } @@ -140,7 +140,7 @@ int mlx5e_dim_tx_change(struct mlx5e_txqsq *sq, bool enable) __set_bit(MLX5E_SQ_STATE_DIM, &sq->state); } else { __clear_bit(MLX5E_SQ_STATE_DIM, &sq->state); - + synchronize_net(); mlx5e_dim_disable(sq->dim); sq->dim = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index cae39198b4db..d507366d773e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -32,6 +32,7 @@ #include <linux/dim.h> #include <linux/ethtool_netlink.h> +#include <net/netdev_queues.h> #include "en.h" #include "en/channels.h" @@ -42,6 +43,8 @@ #include "lib/clock.h" #include "en/fs_ethtool.h" +#define LANES_UNKNOWN 0 + void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, struct ethtool_drvinfo *drvinfo) { @@ -237,14 +240,33 @@ void mlx5e_build_ptys2ethtool_map(void) ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT, ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT, ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT); -} - -static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev, + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_1_200GBASE_CR1_KR1, ext, + ETHTOOL_LINK_MODE_200000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_200000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_200000baseDR_Full_BIT, + ETHTOOL_LINK_MODE_200000baseDR_2_Full_BIT, + ETHTOOL_LINK_MODE_200000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_200000baseVR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_400GAUI_2_400GBASE_CR2_KR2, ext, + ETHTOOL_LINK_MODE_400000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseDR2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseDR2_2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseVR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_800GAUI_4_800GBASE_CR4_KR4, ext, + ETHTOOL_LINK_MODE_800000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_800000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_800000baseDR4_Full_BIT, + ETHTOOL_LINK_MODE_800000baseDR4_2_Full_BIT, + ETHTOOL_LINK_MODE_800000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_800000baseVR4_Full_BIT); +} + +static void mlx5e_ethtool_get_speed_arr(bool ext, struct ptys2ethtool_config **arr, u32 *size) { - bool ext = mlx5_ptys_ext_supported(mdev); - *arr = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table; *size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) : ARRAY_SIZE(ptys2legacy_ethtool_table); @@ -344,11 +366,6 @@ void mlx5e_ethtool_get_ringparam(struct mlx5e_priv *priv, param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; param->rx_pending = 1 << priv->channels.params.log_rq_mtu_frames; param->tx_pending = 1 << priv->channels.params.log_sq_size; - - kernel_param->tcp_data_split = - (priv->channels.params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) ? - ETHTOOL_TCP_DATA_SPLIT_ENABLED : - ETHTOOL_TCP_DATA_SPLIT_DISABLED; } static void mlx5e_get_ringparam(struct net_device *dev, @@ -361,6 +378,27 @@ static void mlx5e_get_ringparam(struct net_device *dev, mlx5e_ethtool_get_ringparam(priv, param, kernel_param); } +static bool mlx5e_ethtool_set_tcp_data_split(struct mlx5e_priv *priv, + u8 tcp_data_split, + struct netlink_ext_ack *extack) +{ + struct net_device *dev = priv->netdev; + + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + !(dev->features & NETIF_F_GRO_HW)) { + NL_SET_ERR_MSG_MOD(extack, + "TCP-data-split is not supported when GRO HW is disabled"); + return false; + } + + /* Might need to disable HW-GRO if it was kept on due to hds. */ + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_DISABLED && + dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED) + netdev_update_features(priv->netdev); + + return true; +} + int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, struct ethtool_ringparam *param, struct netlink_ext_ack *extack) @@ -419,6 +457,11 @@ static int mlx5e_set_ringparam(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); + if (!mlx5e_ethtool_set_tcp_data_split(priv, + kernel_param->tcp_data_split, + extack)) + return -EINVAL; + return mlx5e_ethtool_set_ringparam(priv, param, extack); } @@ -891,37 +934,19 @@ int mlx5e_set_per_queue_coalesce(struct net_device *dev, u32 queue, return mlx5e_ethtool_set_per_queue_coalesce(priv, queue, coal); } -static void ptys2ethtool_supported_link(struct mlx5_core_dev *mdev, - unsigned long *supported_modes, - u32 eth_proto_cap) -{ - unsigned long proto_cap = eth_proto_cap; - struct ptys2ethtool_config *table; - u32 max_size; - int proto; - - mlx5e_ethtool_get_speed_arr(mdev, &table, &max_size); - for_each_set_bit(proto, &proto_cap, max_size) - bitmap_or(supported_modes, supported_modes, - table[proto].supported, - __ETHTOOL_LINK_MODE_MASK_NBITS); -} - -static void ptys2ethtool_adver_link(unsigned long *advertising_modes, - u32 eth_proto_cap, bool ext) +static void ptys2ethtool_process_link(u32 eth_eproto, bool ext, bool advertised, + unsigned long *modes) { - unsigned long proto_cap = eth_proto_cap; + unsigned long eproto = eth_eproto; struct ptys2ethtool_config *table; u32 max_size; int proto; - table = ext ? ptys2ext_ethtool_table : ptys2legacy_ethtool_table; - max_size = ext ? ARRAY_SIZE(ptys2ext_ethtool_table) : - ARRAY_SIZE(ptys2legacy_ethtool_table); - - for_each_set_bit(proto, &proto_cap, max_size) - bitmap_or(advertising_modes, advertising_modes, - table[proto].advertised, + mlx5e_ethtool_get_speed_arr(ext, &table, &max_size); + for_each_set_bit(proto, &eproto, max_size) + bitmap_or(modes, modes, + advertised ? + table[proto].advertised : table[proto].supported, __ETHTOOL_LINK_MODE_MASK_NBITS); } @@ -931,6 +956,7 @@ static const u32 pplm_fec_2_ethtool[] = { [MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS, [MLX5E_FEC_RS_544_514] = ETHTOOL_FEC_RS, [MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_FEC_LLRS, + [MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD] = ETHTOOL_FEC_RS, }; static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size) @@ -1074,50 +1100,51 @@ static void ptys2ethtool_supported_advertised_port(struct mlx5_core_dev *mdev, } } -static void get_speed_duplex(struct net_device *netdev, - u32 eth_proto_oper, bool force_legacy, - u16 data_rate_oper, - struct ethtool_link_ksettings *link_ksettings) +static void get_link_properties(struct net_device *netdev, + u32 eth_proto_oper, bool force_legacy, + u16 data_rate_oper, + struct ethtool_link_ksettings *link_ksettings) { struct mlx5e_priv *priv = netdev_priv(netdev); - u32 speed = SPEED_UNKNOWN; + const struct mlx5_link_info *info; u8 duplex = DUPLEX_UNKNOWN; + u32 speed = SPEED_UNKNOWN; + u32 lanes = LANES_UNKNOWN; if (!netif_carrier_ok(netdev)) goto out; - speed = mlx5_port_ptys2speed(priv->mdev, eth_proto_oper, force_legacy); - if (!speed) { - if (data_rate_oper) - speed = 100 * data_rate_oper; - else - speed = SPEED_UNKNOWN; - goto out; - } - - duplex = DUPLEX_FULL; + info = mlx5_port_ptys2info(priv->mdev, eth_proto_oper, force_legacy); + if (info) { + speed = info->speed; + lanes = info->lanes; + duplex = DUPLEX_FULL; + } else if (data_rate_oper) + speed = 100 * data_rate_oper; out: - link_ksettings->base.speed = speed; link_ksettings->base.duplex = duplex; + link_ksettings->base.speed = speed; + link_ksettings->lanes = lanes; } static void get_supported(struct mlx5_core_dev *mdev, u32 eth_proto_cap, struct ethtool_link_ksettings *link_ksettings) { unsigned long *supported = link_ksettings->link_modes.supported; - ptys2ethtool_supported_link(mdev, supported, eth_proto_cap); + bool ext = mlx5_ptys_ext_supported(mdev); + + ptys2ethtool_process_link(eth_proto_cap, ext, false, supported); ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause); } -static void get_advertising(u32 eth_proto_cap, u8 tx_pause, u8 rx_pause, +static void get_advertising(u32 eth_proto_admin, u8 tx_pause, u8 rx_pause, struct ethtool_link_ksettings *link_ksettings, bool ext) { unsigned long *advertising = link_ksettings->link_modes.advertising; - ptys2ethtool_adver_link(advertising, eth_proto_cap, ext); - + ptys2ethtool_process_link(eth_proto_admin, ext, true, advertising); if (rx_pause) ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause); if (tx_pause ^ rx_pause) @@ -1173,7 +1200,7 @@ static void get_lp_advertising(struct mlx5_core_dev *mdev, u32 eth_proto_lp, unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising; bool ext = mlx5_ptys_ext_supported(mdev); - ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext); + ptys2ethtool_process_link(eth_proto_lp, ext, true, lp_advertising); } static int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, @@ -1235,8 +1262,8 @@ static int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, get_supported(mdev, eth_proto_cap, link_ksettings); get_advertising(eth_proto_admin, tx_pause, rx_pause, link_ksettings, admin_ext); - get_speed_duplex(priv->netdev, eth_proto_oper, !admin_ext, - data_rate_oper, link_ksettings); + get_link_properties(priv->netdev, eth_proto_oper, !admin_ext, + data_rate_oper, link_ksettings); eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; connector_type = connector_type < MLX5E_CONNECTOR_TYPE_NUMBER ? @@ -1341,28 +1368,22 @@ static bool ext_link_mode_requested(const unsigned long *adver) return bitmap_intersects(modes, adver, __ETHTOOL_LINK_MODE_MASK_NBITS); } -static bool ext_requested(u8 autoneg, const unsigned long *adver, bool ext_supported) -{ - bool ext_link_mode = ext_link_mode_requested(adver); - - return autoneg == AUTONEG_ENABLE ? ext_link_mode : ext_supported; -} - static int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, const struct ethtool_link_ksettings *link_ksettings) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_port_eth_proto eproto; + struct mlx5_link_info info = {}; const unsigned long *adver; bool an_changes = false; u8 an_disable_admin; bool ext_supported; + bool ext_requested; u8 an_disable_cap; bool an_disable; u32 link_modes; u8 an_status; u8 autoneg; - u32 speed; bool ext; int err; @@ -1370,13 +1391,15 @@ static int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, adver = link_ksettings->link_modes.advertising; autoneg = link_ksettings->base.autoneg; - speed = link_ksettings->base.speed; + info.speed = link_ksettings->base.speed; + info.lanes = link_ksettings->lanes; ext_supported = mlx5_ptys_ext_supported(mdev); - ext = ext_requested(autoneg, adver, ext_supported); - if (!ext_supported && ext) + ext_requested = ext_link_mode_requested(adver); + if (!ext_supported && ext_requested) return -EOPNOTSUPP; + ext = autoneg == AUTONEG_ENABLE ? ext_requested : ext_supported; ethtool2ptys_adver_func = ext ? mlx5e_ethtool2ptys_ext_adver_link : mlx5e_ethtool2ptys_adver_link; err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto); @@ -1386,7 +1409,7 @@ static int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, goto out; } link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) : - mlx5_port_speed2linkmodes(mdev, speed, !ext); + mlx5_port_info2linkmodes(mdev, &info, !ext); err = mlx5e_speed_validate(priv->netdev, ext, link_modes, autoneg); if (err) @@ -1457,60 +1480,144 @@ static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev) static int mlx5e_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) { struct mlx5e_priv *priv = netdev_priv(netdev); - u32 rss_context = rxfh->rss_context; - int err; + bool symmetric; mutex_lock(&priv->state_lock); - err = mlx5e_rx_res_rss_get_rxfh(priv->rx_res, rss_context, - rxfh->indir, rxfh->key, &rxfh->hfunc); + mlx5e_rx_res_rss_get_rxfh(priv->rx_res, 0, rxfh->indir, rxfh->key, + &rxfh->hfunc, &symmetric); mutex_unlock(&priv->state_lock); - return err; + + if (symmetric) + rxfh->input_xfrm = RXH_XFRM_SYM_OR_XOR; + + return 0; } -static int mlx5e_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh, - struct netlink_ext_ack *extack) +static int mlx5e_rxfh_hfunc_check(struct mlx5e_priv *priv, + const struct ethtool_rxfh_param *rxfh) { - struct mlx5e_priv *priv = netdev_priv(dev); - u32 *rss_context = &rxfh->rss_context; - u8 hfunc = rxfh->hfunc; unsigned int count; - int err; - - mutex_lock(&priv->state_lock); count = priv->channels.params.num_channels; - if (hfunc == ETH_RSS_HASH_XOR) { + if (rxfh->hfunc == ETH_RSS_HASH_XOR) { unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8(); if (count > xor8_max_channels) { - err = -EINVAL; netdev_err(priv->netdev, "%s: Cannot set RSS hash function to XOR, current number of channels (%d) exceeds the maximum allowed for XOR8 RSS hfunc (%d)\n", __func__, count, xor8_max_channels); - goto unlock; + return -EINVAL; } } - if (*rss_context && rxfh->rss_delete) { - err = mlx5e_rx_res_rss_destroy(priv->rx_res, *rss_context); + return 0; +} + +static int mlx5e_set_rxfh(struct net_device *dev, + struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + bool symmetric = rxfh->input_xfrm == RXH_XFRM_SYM_OR_XOR; + struct mlx5e_priv *priv = netdev_priv(dev); + u8 hfunc = rxfh->hfunc; + int err; + + mutex_lock(&priv->state_lock); + + err = mlx5e_rxfh_hfunc_check(priv, rxfh); + if (err) goto unlock; - } - if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) { - err = mlx5e_rx_res_rss_init(priv->rx_res, rss_context, count); - if (err) - goto unlock; - } + err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, rxfh->rss_context, + rxfh->indir, rxfh->key, + hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc, + rxfh->input_xfrm == RXH_XFRM_NO_CHANGE ? NULL : &symmetric); + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_create_rxfh_context(struct net_device *dev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + bool symmetric = rxfh->input_xfrm == RXH_XFRM_SYM_OR_XOR; + struct mlx5e_priv *priv = netdev_priv(dev); + u8 hfunc = rxfh->hfunc; + int err; + + mutex_lock(&priv->state_lock); + + err = mlx5e_rxfh_hfunc_check(priv, rxfh); + if (err) + goto unlock; + + err = mlx5e_rx_res_rss_init(priv->rx_res, rxfh->rss_context, + priv->channels.params.num_channels); + if (err) + goto unlock; + + err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, rxfh->rss_context, + rxfh->indir, rxfh->key, + hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc, + rxfh->input_xfrm == RXH_XFRM_NO_CHANGE ? NULL : &symmetric); + if (err) + goto unlock; + + mlx5e_rx_res_rss_get_rxfh(priv->rx_res, rxfh->rss_context, + ethtool_rxfh_context_indir(ctx), + ethtool_rxfh_context_key(ctx), + &ctx->hfunc, &symmetric); + if (symmetric) + ctx->input_xfrm = RXH_XFRM_SYM_OR_XOR; + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5e_modify_rxfh_context(struct net_device *dev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + bool symmetric = rxfh->input_xfrm == RXH_XFRM_SYM_OR_XOR; + struct mlx5e_priv *priv = netdev_priv(dev); + u8 hfunc = rxfh->hfunc; + int err; + + mutex_lock(&priv->state_lock); + + err = mlx5e_rxfh_hfunc_check(priv, rxfh); + if (err) + goto unlock; - err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, *rss_context, + err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, rxfh->rss_context, rxfh->indir, rxfh->key, - hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc); + hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc, + rxfh->input_xfrm == RXH_XFRM_NO_CHANGE ? NULL : &symmetric); unlock: mutex_unlock(&priv->state_lock); return err; } +static int mlx5e_remove_rxfh_context(struct net_device *dev, + struct ethtool_rxfh_context *ctx, + u32 rss_context, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_rx_res_rss_destroy(priv->rx_res, rss_context); + mutex_unlock(&priv->state_lock); + return err; +} + #define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC 100 #define MLX5E_PFC_PREVEN_TOUT_MAX_MSEC 8000 #define MLX5E_PFC_PREVEN_MINOR_PRECENT 85 @@ -1675,6 +1782,7 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, return 0; info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_TX_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE | SOF_TIMESTAMPING_RAW_HARDWARE; @@ -2018,7 +2126,7 @@ static int mlx5e_get_module_eeprom_by_page(struct net_device *netdev, if (size_read < 0) { NL_SET_ERR_MSG_FMT_MOD( extack, - "Query module eeprom by page failed, read %u bytes, err %d\n", + "Query module eeprom by page failed, read %u bytes, err %d", i, size_read); return i; } @@ -2045,14 +2153,9 @@ int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, if (err) return err; - dev_hold(dev); - rtnl_unlock(); - err = mlx5_firmware_flash(mdev, fw, NULL); release_firmware(fw); - rtnl_lock(); - dev_put(dev); return err; } @@ -2370,6 +2473,23 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev) return priv->channels.params.pflags; } +static int mlx5e_get_rxfh_fields(struct net_device *dev, + struct ethtool_rxfh_fields *info) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_get_rxfh_fields(priv, info); +} + +static int mlx5e_set_rxfh_fields(struct net_device *dev, + const struct ethtool_rxfh_fields *cmd, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_set_rxfh_fields(priv, cmd, extack); +} + static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) { @@ -2607,12 +2727,16 @@ static void mlx5e_get_ts_stats(struct net_device *netdev, } const struct ethtool_ops mlx5e_ethtool_ops = { - .cap_rss_ctx_supported = true, + .cap_link_lanes_supported = true, + .rxfh_per_ctx_fields = true, .rxfh_per_ctx_key = true, + .rxfh_max_num_contexts = MLX5E_MAX_NUM_RSS, .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | ETHTOOL_COALESCE_USE_ADAPTIVE | ETHTOOL_COALESCE_USE_CQE, + .supported_input_xfrm = RXH_XFRM_SYM_OR_XOR, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT, .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, .get_link_ext_state = mlx5e_get_link_ext_state, @@ -2633,6 +2757,11 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_rxfh_indir_size = mlx5e_get_rxfh_indir_size, .get_rxfh = mlx5e_get_rxfh, .set_rxfh = mlx5e_set_rxfh, + .get_rxfh_fields = mlx5e_get_rxfh_fields, + .set_rxfh_fields = mlx5e_set_rxfh_fields, + .create_rxfh_context = mlx5e_create_rxfh_context, + .modify_rxfh_context = mlx5e_modify_rxfh_context, + .remove_rxfh_context = mlx5e_remove_rxfh_context, .get_rxnfc = mlx5e_get_rxnfc, .set_rxnfc = mlx5e_set_rxnfc, .get_tunable = mlx5e_get_tunable, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 05058710d2c7..265c4ca85f7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -484,7 +484,9 @@ static int mlx5e_vlan_rx_add_svid(struct mlx5e_flow_steering *fs, } /* Need to fix some features.. */ + netdev_lock(netdev); netdev_update_features(netdev); + netdev_unlock(netdev); return err; } @@ -521,7 +523,9 @@ int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs, } else if (be16_to_cpu(proto) == ETH_P_8021AD) { clear_bit(vid, fs->vlan->active_svlans); mlx5e_fs_del_vlan_rule(fs, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, vid); + netdev_lock(netdev); netdev_update_features(netdev); + netdev_unlock(netdev); } return 0; @@ -776,7 +780,7 @@ static int mlx5e_create_promisc_table(struct mlx5e_flow_steering *fs) ft_attr.max_fte = MLX5E_PROMISC_TABLE_SIZE; ft_attr.autogroup.max_num_groups = 1; ft_attr.level = MLX5E_PROMISC_FT_LEVEL; - ft_attr.prio = MLX5E_NIC_PRIO; + ft_attr.prio = MLX5E_PROMISC_PRIO; ft->t = mlx5_create_auto_grouped_flow_table(fs->ns, &ft_attr); if (IS_ERR(ft->t)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 773624bb2c5d..79916f1abd14 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -884,25 +884,27 @@ static int flow_type_to_traffic_type(u32 flow_type) case ESP_V6_FLOW: return MLX5_TT_IPV6_IPSEC_ESP; case IPV4_FLOW: + case IP_USER_FLOW: return MLX5_TT_IPV4; case IPV6_FLOW: + case IPV6_USER_FLOW: return MLX5_TT_IPV6; default: return -EINVAL; } } -static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, - struct ethtool_rxnfc *nfc) +int mlx5e_ethtool_set_rxfh_fields(struct mlx5e_priv *priv, + const struct ethtool_rxfh_fields *nfc, + struct netlink_ext_ack *extack) { u8 rx_hash_field = 0; u32 flow_type = 0; - u32 rss_idx = 0; + u32 rss_idx; int err; int tt; - if (nfc->flow_type & FLOW_RSS) - rss_idx = nfc->rss_context; + rss_idx = nfc->rss_context; flow_type = flow_type_mask(nfc->flow_type); tt = flow_type_to_traffic_type(flow_type); @@ -939,16 +941,15 @@ static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, return err; } -static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv, - struct ethtool_rxnfc *nfc) +int mlx5e_ethtool_get_rxfh_fields(struct mlx5e_priv *priv, + struct ethtool_rxfh_fields *nfc) { int hash_field = 0; u32 flow_type = 0; - u32 rss_idx = 0; + u32 rss_idx; int tt; - if (nfc->flow_type & FLOW_RSS) - rss_idx = nfc->rss_context; + rss_idx = nfc->rss_context; flow_type = flow_type_mask(nfc->flow_type); tt = flow_type_to_traffic_type(flow_type); @@ -984,9 +985,6 @@ int mlx5e_ethtool_set_rxnfc(struct mlx5e_priv *priv, struct ethtool_rxnfc *cmd) case ETHTOOL_SRXCLSRLDEL: err = mlx5e_ethtool_flow_remove(priv, cmd->fs.location); break; - case ETHTOOL_SRXFH: - err = mlx5e_set_rss_hash_opt(priv, cmd); - break; default: err = -EOPNOTSUPP; break; @@ -1011,9 +1009,6 @@ int mlx5e_ethtool_get_rxnfc(struct mlx5e_priv *priv, case ETHTOOL_GRXCLSRLALL: err = mlx5e_ethtool_get_all_flows(priv, info, rule_locs); break; - case ETHTOOL_GRXFH: - err = mlx5e_get_rss_hash_opt(priv, info); - break; default: err = -EOPNOTSUPP; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d0b80b520397..21bb88c5d3dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -39,7 +39,9 @@ #include <linux/debugfs.h> #include <linux/if_bridge.h> #include <linux/filter.h> +#include <net/netdev_lock.h> #include <net/netdev_queues.h> +#include <net/netdev_rx_queue.h> #include <net/page_pool/types.h> #include <net/pkt_sched.h> #include <net/xdp_sock_drv.h> @@ -74,10 +76,12 @@ #include "en/trap.h" #include "lib/devcom.h" #include "lib/sd.h" +#include "en/pcie_cong_event.h" static bool mlx5e_hw_gro_supported(struct mlx5_core_dev *mdev) { - if (!MLX5_CAP_GEN(mdev, shampo)) + if (!MLX5_CAP_GEN(mdev, shampo) || + !MLX5_CAP_SHAMPO(mdev, shampo_header_split_data_merge)) return false; /* Our HW-GRO implementation relies on "KSM Mkey" for @@ -311,8 +315,8 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *wqe) { - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; + struct mlx5_wqe_ctrl_seg *cseg = &wqe->hdr.ctrl; + struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->hdr.uctrl; u16 octowords; u8 ds_cnt; @@ -330,47 +334,6 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); } -static int mlx5e_rq_shampo_hd_alloc(struct mlx5e_rq *rq, int node) -{ - rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo), - GFP_KERNEL, node); - if (!rq->mpwqe.shampo) - return -ENOMEM; - return 0; -} - -static void mlx5e_rq_shampo_hd_free(struct mlx5e_rq *rq) -{ - kvfree(rq->mpwqe.shampo); -} - -static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node) -{ - struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; - - shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL, - node); - shampo->pages = kvzalloc_node(array_size(shampo->hd_per_wq, - sizeof(*shampo->pages)), - GFP_KERNEL, node); - if (!shampo->bitmap || !shampo->pages) - goto err_nomem; - - return 0; - -err_nomem: - kvfree(shampo->bitmap); - kvfree(shampo->pages); - - return -ENOMEM; -} - -static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) -{ - kvfree(rq->mpwqe.shampo->bitmap); - kvfree(rq->mpwqe.shampo->pages); -} - static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) { int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); @@ -393,7 +356,9 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node) bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); } - mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe); + mlx5e_build_umr_wqe(rq, rq->icosq, + container_of(&rq->mpwqe.umr_wqe, + struct mlx5e_umr_wqe, hdr)); return 0; } @@ -581,19 +546,26 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq } static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev, - struct mlx5e_rq *rq) + u16 hd_per_wq, __be32 *umr_mkey) { u32 max_ksm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size)); + u32 mkey; + int err; - if (max_ksm_size < rq->mpwqe.shampo->hd_per_wq) { + if (max_ksm_size < hd_per_wq) { mlx5_core_err(mdev, "max ksm list size 0x%x is smaller than shampo header buffer list size 0x%x\n", - max_ksm_size, rq->mpwqe.shampo->hd_per_wq); + max_ksm_size, hd_per_wq); return -EINVAL; } - return mlx5e_create_umr_ksm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq, - MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE, - &rq->mpwqe.shampo->mkey); + err = mlx5e_create_umr_ksm_mkey(mdev, hd_per_wq, + MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE, + &mkey); + if (err) + return err; + + *umr_mkey = cpu_to_be32(mkey); + return 0; } static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) @@ -704,6 +676,27 @@ static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_rq_cqe_err(rq); } +static void mlx5e_rq_timeout_work(struct work_struct *timeout_work) +{ + struct mlx5e_rq *rq = container_of(timeout_work, + struct mlx5e_rq, + rx_timeout_work); + + /* Acquire netdev instance lock to synchronize with channel close and + * reopen flows. Either successfully obtain the lock, or detect that + * channels are closing for another reason, making this work no longer + * necessary. + */ + while (!netdev_trylock(rq->netdev)) { + if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &rq->priv->state)) + return; + msleep(20); + } + + mlx5e_reporter_rx_timeout(rq); + netdev_unlock(rq->netdev); +} + static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq) { rq->wqe_overflow.page = alloc_page(GFP_KERNEL); @@ -737,7 +730,7 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param rq->netdev = c->netdev; rq->priv = c->priv; rq->tstamp = c->tstamp; - rq->clock = &mdev->clock; + rq->clock = mdev->clock; rq->icosq = &c->icosq; rq->ix = c->ix; rq->channel = c; @@ -755,6 +748,42 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param xdp_frag_size); } +static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, u16 hd_per_wq, + int node) +{ + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + + shampo->hd_per_wq = hd_per_wq; + + shampo->bitmap = bitmap_zalloc_node(hd_per_wq, GFP_KERNEL, node); + shampo->pages = kvzalloc_node(array_size(hd_per_wq, + sizeof(*shampo->pages)), + GFP_KERNEL, node); + if (!shampo->bitmap || !shampo->pages) + goto err_nomem; + + return 0; + +err_nomem: + kvfree(shampo->pages); + bitmap_free(shampo->bitmap); + + return -ENOMEM; +} + +static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) +{ + kvfree(rq->mpwqe.shampo->pages); + bitmap_free(rq->mpwqe.shampo->bitmap); +} + +static bool mlx5_rq_needs_separate_hd_pool(struct mlx5e_rq *rq) +{ + struct netdev_rx_queue *rxq = __netif_get_rx_queue(rq->netdev, rq->ix); + + return !!rxq->mp_params.mp_ops; +} + static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rqp, @@ -762,42 +791,80 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, u32 *pool_size, int node) { + void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq); + u32 hd_pool_size; + u16 hd_per_wq; + int wq_size; int err; if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) return 0; - err = mlx5e_rq_shampo_hd_alloc(rq, node); - if (err) - goto out; - rq->mpwqe.shampo->hd_per_wq = - mlx5e_shampo_hd_per_wq(mdev, params, rqp); - err = mlx5e_create_rq_hd_umr_mkey(mdev, rq); + + rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo), + GFP_KERNEL, node); + if (!rq->mpwqe.shampo) + return -ENOMEM; + + /* split headers data structures */ + hd_per_wq = mlx5e_shampo_hd_per_wq(mdev, params, rqp); + err = mlx5e_rq_shampo_hd_info_alloc(rq, hd_per_wq, node); if (err) - goto err_shampo_hd; - err = mlx5e_rq_shampo_hd_info_alloc(rq, node); + goto err_shampo_hd_info_alloc; + + err = mlx5e_create_rq_hd_umr_mkey(mdev, hd_per_wq, + &rq->mpwqe.shampo->mkey_be); if (err) - goto err_shampo_info; + goto err_umr_mkey; + + rq->mpwqe.shampo->hd_per_wqe = + mlx5e_shampo_hd_per_wqe(mdev, params, rqp); + wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); + hd_pool_size = (rq->mpwqe.shampo->hd_per_wqe * wq_size) / + MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; + + if (mlx5_rq_needs_separate_hd_pool(rq)) { + /* Separate page pool for shampo headers */ + struct page_pool_params pp_params = { }; + + pp_params.order = 0; + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pp_params.pool_size = hd_pool_size; + pp_params.nid = node; + pp_params.dev = rq->pdev; + pp_params.napi = rq->cq.napi; + pp_params.netdev = rq->netdev; + pp_params.dma_dir = rq->buff.map_dir; + pp_params.max_len = PAGE_SIZE; + + rq->hd_page_pool = page_pool_create(&pp_params); + if (IS_ERR(rq->hd_page_pool)) { + err = PTR_ERR(rq->hd_page_pool); + rq->hd_page_pool = NULL; + goto err_hds_page_pool; + } + } else { + /* Common page pool, reserve space for headers. */ + *pool_size += hd_pool_size; + rq->hd_page_pool = NULL; + } + + /* gro only data structures */ rq->hw_gro_data = kvzalloc_node(sizeof(*rq->hw_gro_data), GFP_KERNEL, node); if (!rq->hw_gro_data) { err = -ENOMEM; goto err_hw_gro_data; } - rq->mpwqe.shampo->key = - cpu_to_be32(rq->mpwqe.shampo->mkey); - rq->mpwqe.shampo->hd_per_wqe = - mlx5e_shampo_hd_per_wqe(mdev, params, rqp); - rq->mpwqe.shampo->pages_per_wq = - rq->mpwqe.shampo->hd_per_wq / MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; - *pool_size += rq->mpwqe.shampo->pages_per_wq; + return 0; err_hw_gro_data: + page_pool_destroy(rq->hd_page_pool); +err_hds_page_pool: + mlx5_core_destroy_mkey(mdev, be32_to_cpu(rq->mpwqe.shampo->mkey_be)); +err_umr_mkey: mlx5e_rq_shampo_hd_info_free(rq); -err_shampo_info: - mlx5_core_destroy_mkey(mdev, rq->mpwqe.shampo->mkey); -err_shampo_hd: - mlx5e_rq_shampo_hd_free(rq); -out: +err_shampo_hd_info_alloc: + kvfree(rq->mpwqe.shampo); return err; } @@ -807,9 +874,12 @@ static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq) return; kvfree(rq->hw_gro_data); + if (rq->hd_page_pool != rq->page_pool) + page_pool_destroy(rq->hd_page_pool); mlx5e_rq_shampo_hd_info_free(rq); - mlx5_core_destroy_mkey(rq->mdev, rq->mpwqe.shampo->mkey); - mlx5e_rq_shampo_hd_free(rq); + mlx5_core_destroy_mkey(rq->mdev, + be32_to_cpu(rq->mpwqe.shampo->mkey_be)); + kvfree(rq->mpwqe.shampo); } static int mlx5e_alloc_rq(struct mlx5e_params *params, @@ -827,6 +897,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, rqp->wq.db_numa_node = node; INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work); + INIT_WORK(&rq->rx_timeout_work, mlx5e_rq_timeout_work); if (params->xdp_prog) bpf_prog_inc(params->xdp_prog); @@ -912,6 +983,8 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, if (xsk) { err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_XSK_BUFF_POOL, NULL); + if (err) + goto err_free_by_rq_type; xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq); } else { /* Create a page_pool and register it with rxq */ @@ -926,6 +999,11 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, pp_params.netdev = rq->netdev; pp_params.dma_dir = rq->buff.map_dir; pp_params.max_len = PAGE_SIZE; + pp_params.queue_idx = rq->ix; + + /* Shampo header data split allow for unreadable netmem */ + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) + pp_params.flags |= PP_FLAG_ALLOW_UNREADABLE_NETMEM; /* page_pool can be used even when there is no rq->xdp_prog, * given page_pool does not handle DMA mapping there is no @@ -938,12 +1016,15 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, rq->page_pool = NULL; goto err_free_by_rq_type; } - if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) + if (!rq->hd_page_pool) + rq->hd_page_pool = rq->page_pool; + if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) { err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_PAGE_POOL, rq->page_pool); + if (err) + goto err_destroy_page_pool; + } } - if (err) - goto err_destroy_page_pool; for (i = 0; i < wq_sz; i++) { if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { @@ -1071,7 +1152,8 @@ int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param, u16 q_cou if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { MLX5_SET(wq, wq, log_headers_buffer_entry_num, order_base_2(rq->mpwqe.shampo->hd_per_wq)); - MLX5_SET(wq, wq, headers_mkey, rq->mpwqe.shampo->mkey); + MLX5_SET(wq, wq, headers_mkey, + be32_to_cpu(rq->mpwqe.shampo->mkey_be)); } mlx5_fill_page_frag_array(&rq->wq_ctrl.buf, @@ -1201,7 +1283,8 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) netdev_warn(rq->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n", rq->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes); - mlx5e_reporter_rx_timeout(rq); + queue_work(rq->priv->wq, &rq->rx_timeout_work); + return -ETIMEDOUT; } @@ -1372,6 +1455,7 @@ void mlx5e_close_rq(struct mlx5e_rq *rq) if (rq->dim) cancel_work_sync(&rq->dim->work); cancel_work_sync(&rq->recover_work); + cancel_work_sync(&rq->rx_timeout_work); mlx5e_destroy_rq(rq); mlx5e_free_rx_descs(rq); mlx5e_free_rq(rq); @@ -1614,7 +1698,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int err; sq->pdev = c->pdev; - sq->clock = &mdev->clock; + sq->clock = mdev->clock; sq->mkey_be = c->mkey_be; sq->netdev = c->netdev; sq->mdev = c->mdev; @@ -1627,8 +1711,6 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); - if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) - set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); if (mlx5_ipsec_device_caps(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (param->is_mpw) @@ -1901,7 +1983,20 @@ void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq, recover_work); + /* Recovering queues means re-enabling NAPI, which requires the netdev + * instance lock. However, SQ closing flows have to wait for work tasks + * to finish while also holding the netdev instance lock. So either get + * the lock or find that the SQ is no longer enabled and thus this work + * is not relevant anymore. + */ + while (!netdev_trylock(sq->netdev)) { + if (!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)) + return; + msleep(20); + } + mlx5e_reporter_tx_err_cqe(sq); + netdev_unlock(sq->netdev); } static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) @@ -2023,41 +2118,12 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, csp.min_inline_mode = sq->min_inline_mode; set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - if (param->is_xdp_mb) - set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state); - err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); if (err) goto err_free_xdpsq; mlx5e_set_xmit_fp(sq, param->is_mpw); - if (!param->is_mpw && !test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) { - unsigned int ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1; - unsigned int inline_hdr_sz = 0; - int i; - - if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { - inline_hdr_sz = MLX5E_XDP_MIN_INLINE; - ds_cnt++; - } - - /* Pre initialize fixed WQE fields */ - for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) { - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i); - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_eth_seg *eseg = &wqe->eth; - - sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) { - .num_wqebbs = 1, - .num_pkts = 1, - }; - - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); - } - } - return 0; err_free_xdpsq: @@ -2087,7 +2153,7 @@ static struct mlx5e_xdpsq *mlx5e_open_xdpredirect_sq(struct mlx5e_channel *c, struct mlx5e_xdpsq *xdpsq; int err; - xdpsq = kvzalloc_node(sizeof(*xdpsq), GFP_KERNEL, c->cpu); + xdpsq = kvzalloc_node(sizeof(*xdpsq), GFP_KERNEL, cpu_to_node(c->cpu)); if (!xdpsq) return ERR_PTR(-ENOMEM); @@ -2680,11 +2746,11 @@ void mlx5e_trigger_napi_sched(struct napi_struct *napi) static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_params *params, - struct mlx5e_channel_param *cparam, struct xsk_buff_pool *xsk_pool, struct mlx5e_channel **cp) { struct net_device *netdev = priv->netdev; + struct mlx5e_channel_param *cparam; struct mlx5_core_dev *mdev; struct mlx5e_xsk_param xsk; struct mlx5e_channel *c; @@ -2706,8 +2772,15 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, return err; c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); - if (!c) - return -ENOMEM; + cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL); + if (!c || !cparam) { + err = -ENOMEM; + goto err_free; + } + + err = mlx5e_build_channel_param(mdev, params, cparam); + if (err) + goto err_free; c->priv = priv; c->mdev = mdev; @@ -2725,8 +2798,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->aff_mask = irq_get_effective_affinity_mask(irq); c->lag_port = mlx5e_enumerate_lag_port(mdev, ix); - netif_napi_add_config(netdev, &c->napi, mlx5e_napi_poll, ix); - netif_napi_set_irq(&c->napi, irq); + netif_napi_add_config_locked(netdev, &c->napi, mlx5e_napi_poll, ix); + netif_napi_set_irq_locked(&c->napi, irq); err = mlx5e_open_queues(c, params, cparam); if (unlikely(err)) @@ -2741,14 +2814,17 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, *cp = c; + kvfree(cparam); return 0; err_close_queues: mlx5e_close_queues(c); err_napi_del: - netif_napi_del(&c->napi); + netif_napi_del_locked(&c->napi); +err_free: + kvfree(cparam); kvfree(c); return err; @@ -2758,7 +2834,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) { int tc; - napi_enable(&c->napi); + napi_enable_locked(&c->napi); for (tc = 0; tc < c->num_tc; tc++) mlx5e_activate_txqsq(&c->sq[tc]); @@ -2790,7 +2866,7 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) mlx5e_deactivate_txqsq(&c->sq[tc]); mlx5e_qos_deactivate_queues(c); - napi_disable(&c->napi); + napi_disable_locked(&c->napi); } static void mlx5e_close_channel(struct mlx5e_channel *c) @@ -2799,7 +2875,7 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) mlx5e_close_xsk(c); mlx5e_close_queues(c); mlx5e_qos_close_queues(c); - netif_napi_del(&c->napi); + netif_napi_del_locked(&c->napi); kvfree(c); } @@ -2807,20 +2883,14 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) int mlx5e_open_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) { - struct mlx5e_channel_param *cparam; int err = -ENOMEM; int i; chs->num = chs->params.num_channels; chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL); - cparam = kvzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL); - if (!chs->c || !cparam) - goto err_free; - - err = mlx5e_build_channel_param(priv->mdev, &chs->params, cparam); - if (err) - goto err_free; + if (!chs->c) + goto err_out; for (i = 0; i < chs->num; i++) { struct xsk_buff_pool *xsk_pool = NULL; @@ -2828,7 +2898,7 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, if (chs->params.xdp_prog) xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, i); - err = mlx5e_open_channel(priv, i, &chs->params, cparam, xsk_pool, &chs->c[i]); + err = mlx5e_open_channel(priv, i, &chs->params, xsk_pool, &chs->c[i]); if (err) goto err_close_channels; } @@ -2846,7 +2916,6 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, } mlx5e_health_channels_update(priv); - kvfree(cparam); return 0; err_close_ptp: @@ -2857,9 +2926,8 @@ err_close_channels: for (i--; i >= 0; i--) mlx5e_close_channel(chs->c[i]); -err_free: kfree(chs->c); - kvfree(cparam); +err_out: chs->num = 0; return err; } @@ -3814,8 +3882,11 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, /* MQPRIO is another toplevel qdisc that can't be attached * simultaneously with the offloaded HTB. */ - if (WARN_ON(mlx5e_selq_is_htb_enabled(&priv->selq))) - return -EINVAL; + if (mlx5e_selq_is_htb_enabled(&priv->selq)) { + NL_SET_ERR_MSG_MOD(mqprio->extack, + "MQPRIO cannot be configured when HTB offload is enabled."); + return -EOPNOTSUPP; + } switch (mqprio->mode) { case TC_MQPRIO_MODE_DCB: @@ -3944,6 +4015,7 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) } stats->rx_missed_errors = priv->stats.qcnt.rx_out_of_buffer; + stats->rx_dropped = PPORT_2863_GET(pstats, if_in_discards); stats->rx_length_errors = PPORT_802_3_GET(pstats, a_in_range_length_errors) + @@ -4050,10 +4122,6 @@ static int set_feature_hw_gro(struct net_device *netdev, bool enable) if (enable) { new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO; - new_params.packet_merge.shampo.match_criteria_type = - MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED; - new_params.packet_merge.shampo.alignment_granularity = - MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE; } else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) { new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE; } else { @@ -4297,7 +4365,7 @@ void mlx5e_set_xdp_feature(struct net_device *netdev) if (!netdev->netdev_ops->ndo_bpf || params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) { - xdp_clear_features_flag(netdev); + xdp_set_features_flag_locked(netdev, 0); return; } @@ -4306,7 +4374,7 @@ void mlx5e_set_xdp_feature(struct net_device *netdev) NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_NDO_XMIT_SG; - xdp_set_features_flag(netdev, val); + xdp_set_features_flag_locked(netdev, val); } int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) @@ -4370,12 +4438,17 @@ static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev if (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER) netdev_warn(netdev, "Disabling HW_VLAN CTAG FILTERING, not supported in switchdev mode\n"); + features &= ~NETIF_F_HW_MACSEC; + if (netdev->features & NETIF_F_HW_MACSEC) + netdev_warn(netdev, "Disabling HW MACsec offload, not supported in switchdev mode\n"); + return features; } static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_features_t features) { + struct netdev_config *cfg = netdev->cfg_pending; struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_vlan_table *vlan; struct mlx5e_params *params; @@ -4442,11 +4515,18 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, } } + /* The header-data split ring param requires HW GRO to stay enabled. */ + if (cfg && cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + !(features & NETIF_F_GRO_HW)) { + netdev_warn(netdev, "Keeping HW-GRO enabled, TCP header-data split depends on it\n"); + features |= NETIF_F_GRO_HW; + } + if (mlx5e_is_uplink_rep(priv)) { features = mlx5e_fix_uplink_rep_features(netdev, features); - netdev->netns_local = true; + netdev->netns_immutable = true; } else { - netdev->netns_local = false; + netdev->netns_immutable = false; } mutex_unlock(&priv->state_lock); @@ -4985,21 +5065,19 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) struct net_device *netdev = priv->netdev; int i; - /* Take rtnl_lock to ensure no change in netdev->real_num_tx_queues - * through this flow. However, channel closing flows have to wait for - * this work to finish while holding rtnl lock too. So either get the - * lock or find that channels are being closed for other reason and - * this work is not relevant anymore. + /* Recovering the TX queues implies re-enabling NAPI, which requires + * the netdev instance lock. + * However, channel closing flows have to wait for this work to finish + * while holding the same lock. So either get the lock or find that + * channels are being closed for other reason and this work is not + * relevant anymore. */ - while (!rtnl_trylock()) { + while (!netdev_trylock(netdev)) { if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &priv->state)) return; msleep(20); } - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - goto unlock; - for (i = 0; i < netdev->real_num_tx_queues; i++) { struct netdev_queue *dev_queue = netdev_get_tx_queue(netdev, i); @@ -5013,8 +5091,7 @@ static void mlx5e_tx_timeout_work(struct work_struct *work) break; } -unlock: - rtnl_unlock(); + netdev_unlock(netdev); } static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue) @@ -5129,11 +5206,9 @@ static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; u8 mode, setting; - int err; - err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting); - if (err) - return err; + if (mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting)) + return -EOPNOTSUPP; mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, @@ -5311,8 +5386,7 @@ void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv) priv->nic_info.set_port = mlx5e_vxlan_set_port; priv->nic_info.unset_port = mlx5e_vxlan_unset_port; - priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | - UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN; + priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN; priv->nic_info.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN; /* Don't count the space hard-coded to the IANA port */ priv->nic_info.tables[0].n_entries = @@ -5340,7 +5414,6 @@ static void mlx5e_get_queue_stats_rx(struct net_device *dev, int i, struct mlx5e_rq_stats *xskrq_stats; struct mlx5e_rq_stats *rq_stats; - ASSERT_RTNL(); if (mlx5e_is_uplink_rep(priv) || !priv->stats_nch) return; @@ -5360,7 +5433,6 @@ static void mlx5e_get_queue_stats_tx(struct net_device *dev, int i, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_sq_stats *sq_stats; - ASSERT_RTNL(); if (!priv->stats_nch) return; @@ -5381,7 +5453,6 @@ static void mlx5e_get_base_stats(struct net_device *dev, struct mlx5e_ptp *ptp_channel; int i, tc; - ASSERT_RTNL(); if (!mlx5e_is_uplink_rep(priv)) { rx->packets = 0; rx->bytes = 0; @@ -5465,6 +5536,103 @@ static const struct netdev_stat_ops mlx5e_stat_ops = { .get_base_stats = mlx5e_get_base_stats, }; +struct mlx5_qmgmt_data { + struct mlx5e_channel *c; + struct mlx5e_channel_param cparam; +}; + +static int mlx5e_queue_mem_alloc(struct net_device *dev, void *newq, + int queue_index) +{ + struct mlx5_qmgmt_data *new = (struct mlx5_qmgmt_data *)newq; + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_channels *chs = &priv->channels; + struct mlx5e_params params = chs->params; + struct mlx5_core_dev *mdev; + int err; + + mutex_lock(&priv->state_lock); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = -ENODEV; + goto unlock; + } + + if (queue_index >= chs->num) { + err = -ERANGE; + goto unlock; + } + + if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || + chs->params.ptp_rx || + chs->params.xdp_prog || + priv->htb) { + netdev_err(priv->netdev, + "Cloning channels with Port/rx PTP, XDP or HTB is not supported\n"); + err = -EOPNOTSUPP; + goto unlock; + } + + mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, queue_index); + err = mlx5e_build_channel_param(mdev, ¶ms, &new->cparam); + if (err) + goto unlock; + + err = mlx5e_open_channel(priv, queue_index, ¶ms, NULL, &new->c); +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static void mlx5e_queue_mem_free(struct net_device *dev, void *mem) +{ + struct mlx5_qmgmt_data *data = (struct mlx5_qmgmt_data *)mem; + + /* not supposed to happen since mlx5e_queue_start never fails + * but this is how this should be implemented just in case + */ + if (data->c) + mlx5e_close_channel(data->c); +} + +static int mlx5e_queue_stop(struct net_device *dev, void *oldq, int queue_index) +{ + /* In mlx5 a txq cannot be simply stopped in isolation, only restarted. + * mlx5e_queue_start does not fail, we stop the old queue there. + * TODO: Improve this. + */ + return 0; +} + +static int mlx5e_queue_start(struct net_device *dev, void *newq, + int queue_index) +{ + struct mlx5_qmgmt_data *new = (struct mlx5_qmgmt_data *)newq; + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_channel *old; + + mutex_lock(&priv->state_lock); + + /* stop and close the old */ + old = priv->channels.c[queue_index]; + mlx5e_deactivate_priv_channels(priv); + /* close old before activating new, to avoid napi conflict */ + mlx5e_close_channel(old); + + /* start the new */ + priv->channels.c[queue_index] = new->c; + mlx5e_activate_priv_channels(priv); + mutex_unlock(&priv->state_lock); + return 0; +} + +static const struct netdev_queue_mgmt_ops mlx5e_queue_mgmt_ops = { + .ndo_queue_mem_size = sizeof(struct mlx5_qmgmt_data), + .ndo_queue_mem_alloc = mlx5e_queue_mem_alloc, + .ndo_queue_mem_free = mlx5e_queue_mem_free, + .ndo_queue_start = mlx5e_queue_start, + .ndo_queue_stop = mlx5e_queue_stop, +}; + static void mlx5e_build_nic_netdev(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -5475,8 +5643,11 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) SET_NETDEV_DEV(netdev, mdev->device); netdev->netdev_ops = &mlx5e_netdev_ops; + netdev->queue_mgmt_ops = &mlx5e_queue_mgmt_ops; netdev->xdp_metadata_ops = &mlx5e_xdp_metadata_ops; netdev->xsk_tx_metadata_ops = &mlx5e_xsk_tx_metadata_ops; + netdev->request_ops_lock = true; + netdev_lockdep_set_classes(netdev); mlx5e_dcbnl_build_netdev(netdev); @@ -5515,17 +5686,17 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) MLX5E_MPWRQ_UMR_MODE_ALIGNED)) netdev->vlan_features |= NETIF_F_LRO; + if (mlx5e_hw_gro_supported(mdev) && + mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, + MLX5E_MPWRQ_UMR_MODE_ALIGNED)) + netdev->vlan_features |= NETIF_F_GRO_HW; + netdev->hw_features = netdev->vlan_features; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; - if (mlx5e_hw_gro_supported(mdev) && - mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, - MLX5E_MPWRQ_UMR_MODE_ALIGNED)) - netdev->hw_features |= NETIF_F_GRO_HW; - if (mlx5e_tunnel_any_tx_proto_supported(mdev)) { netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; @@ -5604,6 +5775,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->netmem_tx = true; + netif_set_tso_max_size(netdev, GSO_MAX_SIZE); mlx5e_set_xdp_feature(netdev); mlx5e_set_netdev_dev_addr(netdev); @@ -5850,6 +6023,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) if (mlx5e_monitor_counter_supported(priv)) mlx5e_monitor_counter_init(priv); + mlx5e_pcie_cong_event_init(priv); mlx5e_hv_vhca_stats_create(priv); if (netdev->reg_state != NETREG_REGISTERED) return; @@ -5858,9 +6032,11 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5e_nic_set_rx_mode(priv); rtnl_lock(); + netdev_lock(netdev); if (netif_running(netdev)) mlx5e_open(netdev); udp_tunnel_nic_reset_ntf(priv->netdev); + netdev_unlock(netdev); netif_device_attach(netdev); rtnl_unlock(); } @@ -5873,23 +6049,26 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) mlx5e_dcbnl_delete_app(priv); rtnl_lock(); + netdev_lock(priv->netdev); if (netif_running(priv->netdev)) mlx5e_close(priv->netdev); netif_device_detach(priv->netdev); + if (priv->en_trap) { + mlx5e_deactivate_trap(priv); + mlx5e_close_trap(priv->en_trap); + priv->en_trap = NULL; + } + netdev_unlock(priv->netdev); rtnl_unlock(); mlx5e_nic_set_rx_mode(priv); + mlx5e_pcie_cong_event_cleanup(priv); mlx5e_hv_vhca_stats_destroy(priv); if (mlx5e_monitor_counter_supported(priv)) mlx5e_monitor_counter_cleanup(priv); mlx5e_disable_blocking_events(priv); - if (priv->en_trap) { - mlx5e_deactivate_trap(priv); - mlx5e_close_trap(priv->en_trap); - priv->en_trap = NULL; - } mlx5e_disable_async_events(priv); mlx5_lag_remove_netdev(mdev, priv->netdev); mlx5_vxlan_reset_to_default(mdev->vxlan); @@ -6144,7 +6323,9 @@ static void mlx5e_update_features(struct net_device *netdev) return; /* features will be updated on netdev registration */ rtnl_lock(); + netdev_lock(netdev); netdev_update_features(netdev); + netdev_unlock(netdev); rtnl_unlock(); } @@ -6155,7 +6336,7 @@ static void mlx5e_reset_channels(struct net_device *netdev) int mlx5e_attach_netdev(struct mlx5e_priv *priv) { - const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED; + const bool need_lock = priv->netdev->reg_state == NETREG_REGISTERED; const struct mlx5e_profile *profile = priv->profile; int max_nch; int err; @@ -6197,15 +6378,19 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) * 2. Set our default XPS cpumask. * 3. Build the RQT. * - * rtnl_lock is required by netif_set_real_num_*_queues in case the + * Locking is required by netif_set_real_num_*_queues in case the * netdev has been registered by this point (if this function was called * in the reload or resume flow). */ - if (take_rtnl) + if (need_lock) { rtnl_lock(); + netdev_lock(priv->netdev); + } err = mlx5e_num_channels_changed(priv); - if (take_rtnl) + if (need_lock) { + netdev_unlock(priv->netdev); rtnl_unlock(); + } if (err) goto out; @@ -6540,8 +6725,23 @@ static void _mlx5e_remove(struct auxiliary_device *adev) mlx5_core_uplink_netdev_set(mdev, NULL); mlx5e_dcbnl_delete_app(priv); - unregister_netdev(priv->netdev); - _mlx5e_suspend(adev, false); + /* When unload driver, the netdev is in registered state + * if it's from legacy mode. If from switchdev mode, it + * is already unregistered before changing to NIC profile. + */ + if (priv->netdev->reg_state == NETREG_REGISTERED) { + unregister_netdev(priv->netdev); + _mlx5e_suspend(adev, false); + } else { + struct mlx5_core_dev *pos; + int i; + + if (test_bit(MLX5E_STATE_DESTROYING, &priv->state)) + mlx5_sd_for_each_dev(i, mdev, pos) + mlx5e_destroy_mdev_resources(pos); + else + _mlx5e_suspend(adev, true); + } /* Avoid cleanup if profile rollback failed. */ if (priv->profile) priv->profile->cleanup(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 554f9cb5b53f..63a7a788fb0d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -33,6 +33,7 @@ #include <linux/dim.h> #include <linux/debugfs.h> #include <linux/mlx5/fs.h> +#include <net/netdev_lock.h> #include <net/switchdev.h> #include <net/pkt_cls.h> #include <net/act_api.h> @@ -65,6 +66,7 @@ #define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \ max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) #define MLX5E_REP_PARAMS_DEF_NUM_CHANNELS 1 +#define MLX5E_REP_PARAMS_DEF_LOG_RQ_SIZE 0x8 static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; @@ -802,6 +804,7 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { .ndo_stop = mlx5e_rep_close, .ndo_start_xmit = mlx5e_xmit, .ndo_setup_tc = mlx5e_rep_setup_tc, + .ndo_set_mac_address = eth_mac_addr, .ndo_get_stats64 = mlx5e_rep_get_stats, .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, @@ -855,6 +858,8 @@ static void mlx5e_build_rep_params(struct net_device *netdev) /* RQ */ mlx5e_build_rq_params(mdev, params); + if (!mlx5e_is_uplink_rep(priv) && mlx5_core_is_ecpf(mdev)) + params->log_rq_mtu_frames = MLX5E_REP_PARAMS_DEF_LOG_RQ_SIZE; /* If netdev is already registered (e.g. move from nic profile to uplink, * RTNL lock must be held before triggering netdev notifiers. @@ -882,10 +887,14 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev, { SET_NETDEV_DEV(netdev, mdev->device); netdev->netdev_ops = &mlx5e_netdev_ops_rep; + netdev->request_ops_lock = true; + netdev_lockdep_set_classes(netdev); eth_hw_addr_random(netdev); netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; netdev->watchdog_timeo = 15 * HZ; + if (mlx5_core_is_ecpf(mdev)) + netdev->tx_queue_len = 1 << MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE; #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) netdev->hw_features |= NETIF_F_HW_TC; @@ -900,7 +909,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev, netdev->features |= netdev->hw_features; - netdev->netns_local = true; + netdev->netns_immutable = true; } static int mlx5e_init_rep(struct mlx5_core_dev *mdev, @@ -1339,9 +1348,11 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv) netdev->wanted_features |= NETIF_F_HW_TC; rtnl_lock(); + netdev_lock(netdev); if (netif_running(netdev)) mlx5e_open(netdev); udp_tunnel_nic_reset_ntf(priv->netdev); + netdev_unlock(netdev); netif_device_attach(netdev); rtnl_unlock(); } @@ -1351,9 +1362,11 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) struct mlx5_core_dev *mdev = priv->mdev; rtnl_lock(); + netdev_lock(priv->netdev); if (netif_running(priv->netdev)) mlx5e_close(priv->netdev); netif_device_detach(priv->netdev); + netdev_unlock(priv->netdev); rtnl_unlock(); mlx5e_rep_bridge_cleanup(priv); @@ -1509,6 +1522,21 @@ mlx5e_vport_uplink_rep_unload(struct mlx5e_rep_priv *rpriv) priv = netdev_priv(netdev); + /* This bit is set when using devlink to change eswitch mode from + * switchdev to legacy. As need to keep uplink netdev ifindex, we + * detach uplink representor profile and attach NIC profile only. + * The netdev will be unregistered later when unload NIC auxiliary + * driver for this case. + * We explicitly block devlink eswitch mode change if any IPSec rules + * offloaded, but can't block other cases, such as driver unload + * and devlink reload. We have to unregister netdev before profile + * change for those cases. This is to avoid resource leak because + * the offloaded rules don't have the chance to be unoffloaded before + * cleanup which is triggered by detach uplink representor profile. + */ + if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_SWITCH_LEGACY)) + unregister_netdev(netdev); + mlx5e_netdev_attach_nic_profile(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 1963bc5adb18..b8c609d91d11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -273,33 +273,32 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, #define MLX5E_PAGECNT_BIAS_MAX (PAGE_SIZE / 64) -static int mlx5e_page_alloc_fragmented(struct mlx5e_rq *rq, +static int mlx5e_page_alloc_fragmented(struct page_pool *pp, struct mlx5e_frag_page *frag_page) { - struct page *page; + netmem_ref netmem = page_pool_dev_alloc_netmems(pp); - page = page_pool_dev_alloc_pages(rq->page_pool); - if (unlikely(!page)) + if (unlikely(!netmem)) return -ENOMEM; - page_pool_fragment_page(page, MLX5E_PAGECNT_BIAS_MAX); + page_pool_fragment_netmem(netmem, MLX5E_PAGECNT_BIAS_MAX); *frag_page = (struct mlx5e_frag_page) { - .page = page, + .netmem = netmem, .frags = 0, }; return 0; } -static void mlx5e_page_release_fragmented(struct mlx5e_rq *rq, +static void mlx5e_page_release_fragmented(struct page_pool *pp, struct mlx5e_frag_page *frag_page) { u16 drain_count = MLX5E_PAGECNT_BIAS_MAX - frag_page->frags; - struct page *page = frag_page->page; + netmem_ref netmem = frag_page->netmem; - if (page_pool_unref_page(page, drain_count) == 0) - page_pool_put_unrefed_page(rq->page_pool, page, -1, true); + if (page_pool_unref_netmem(netmem, drain_count) == 0) + page_pool_put_unrefed_netmem(pp, netmem, -1, true); } static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, @@ -313,7 +312,8 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, * offset) should just use the new one without replenishing again * by themselves. */ - err = mlx5e_page_alloc_fragmented(rq, frag->frag_page); + err = mlx5e_page_alloc_fragmented(rq->page_pool, + frag->frag_page); return err; } @@ -332,7 +332,7 @@ static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *frag) { if (mlx5e_frag_can_release(frag)) - mlx5e_page_release_fragmented(rq, frag->frag_page); + mlx5e_page_release_fragmented(rq->page_pool, frag->frag_page); } static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix) @@ -358,7 +358,7 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe, frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); headroom = i == 0 ? rq->buff.headroom : 0; - addr = page_pool_get_dma_addr(frag->frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag->frag_page->netmem); wqe->data[i].addr = cpu_to_be64(addr + frag->offset + headroom); } @@ -499,9 +499,10 @@ mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinf struct xdp_buff *xdp, struct mlx5e_frag_page *frag_page, u32 frag_offset, u32 len) { + netmem_ref netmem = frag_page->netmem; skb_frag_t *frag; - dma_addr_t addr = page_pool_get_dma_addr(frag_page->page); + dma_addr_t addr = page_pool_get_dma_addr_netmem(netmem); dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir); if (!xdp_buff_has_frags(xdp)) { @@ -514,9 +515,9 @@ mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinf } frag = &sinfo->frags[sinfo->nr_frags++]; - skb_frag_fill_page_desc(frag, frag_page->page, frag_offset, len); + skb_frag_fill_netmem_desc(frag, netmem, frag_offset, len); - if (page_is_pfmemalloc(frag_page->page)) + if (netmem_is_pfmemalloc(netmem)) xdp_buff_set_frag_pfmemalloc(xdp); sinfo->xdp_frags_size += len; } @@ -527,27 +528,29 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb, u32 frag_offset, u32 len, unsigned int truesize) { - dma_addr_t addr = page_pool_get_dma_addr(frag_page->page); + dma_addr_t addr = page_pool_get_dma_addr_netmem(frag_page->netmem); u8 next_frag = skb_shinfo(skb)->nr_frags; + netmem_ref netmem = frag_page->netmem; dma_sync_single_for_cpu(rq->pdev, addr + frag_offset, len, rq->buff.map_dir); - if (skb_can_coalesce(skb, next_frag, frag_page->page, frag_offset)) { + if (skb_can_coalesce_netmem(skb, next_frag, netmem, frag_offset)) { skb_coalesce_rx_frag(skb, next_frag - 1, len, truesize); - } else { - frag_page->frags++; - skb_add_rx_frag(skb, next_frag, frag_page->page, - frag_offset, len, truesize); + return; } + + frag_page->frags++; + skb_add_rx_frag_netmem(skb, next_frag, netmem, + frag_offset, len, truesize); } static inline void mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb, - struct page *page, dma_addr_t addr, + netmem_ref netmem, dma_addr_t addr, int offset_from, int dma_offset, u32 headlen) { - const void *from = page_address(page) + offset_from; + const void *from = netmem_address(netmem) + offset_from; /* Aligning len to sizeof(long) optimizes memcpy performance */ unsigned int len = ALIGN(headlen, sizeof(long)); @@ -584,7 +587,8 @@ mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) struct mlx5e_frag_page *frag_page; frag_page = &wi->alloc_units.frag_pages[i]; - mlx5e_page_release_fragmented(rq, frag_page); + mlx5e_page_release_fragmented(rq->page_pool, + frag_page); } } } @@ -631,16 +635,16 @@ static void build_ksm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe, __be32 key, u16 offset, u16 ksm_len) { memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_ksms)); - umr_wqe->ctrl.opmod_idx_opcode = + umr_wqe->hdr.ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); - umr_wqe->ctrl.umr_mkey = key; - umr_wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) + umr_wqe->hdr.ctrl.umr_mkey = key; + umr_wqe->hdr.ctrl.qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | MLX5E_KSM_UMR_DS_CNT(ksm_len)); - umr_wqe->uctrl.flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE; - umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); - umr_wqe->uctrl.xlt_octowords = cpu_to_be16(ksm_len); - umr_wqe->uctrl.mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + umr_wqe->hdr.uctrl.flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE; + umr_wqe->hdr.uctrl.xlt_offset = cpu_to_be16(offset); + umr_wqe->hdr.uctrl.xlt_octowords = cpu_to_be16(ksm_len); + umr_wqe->hdr.uctrl.mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); } static struct mlx5e_frag_page *mlx5e_shampo_hd_to_frag_page(struct mlx5e_rq *rq, int header_index) @@ -672,19 +676,18 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, wqe_bbs = MLX5E_KSM_UMR_WQEBBS(ksm_entries); pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs); umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); - build_ksm_umr(sq, umr_wqe, shampo->key, index, ksm_entries); + build_ksm_umr(sq, umr_wqe, shampo->mkey_be, index, ksm_entries); WARN_ON_ONCE(ksm_entries & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)); while (i < ksm_entries) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); u64 addr; - err = mlx5e_page_alloc_fragmented(rq, frag_page); + err = mlx5e_page_alloc_fragmented(rq->hd_page_pool, frag_page); if (unlikely(err)) goto err_unmap; - - addr = page_pool_get_dma_addr(frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); for (int j = 0; j < MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; j++) { header_offset = mlx5e_shampo_hd_offset(index++); @@ -704,7 +707,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, shampo->pi = (shampo->pi + ksm_entries) & (shampo->hd_per_wq - 1); sq->pc += wqe_bbs; - sq->doorbell_cseg = &umr_wqe->ctrl; + sq->doorbell_cseg = &umr_wqe->hdr.ctrl; return 0; @@ -715,7 +718,8 @@ err_unmap: if (!header_offset) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); - mlx5e_page_release_fragmented(rq, frag_page); + mlx5e_page_release_fragmented(rq->hd_page_pool, + frag_page); } } @@ -791,10 +795,11 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) for (i = 0; i < rq->mpwqe.pages_per_wqe; i++, frag_page++) { dma_addr_t addr; - err = mlx5e_page_alloc_fragmented(rq, frag_page); + err = mlx5e_page_alloc_fragmented(rq->page_pool, frag_page); if (unlikely(err)) goto err_unmap; - addr = page_pool_get_dma_addr(frag_page->page); + + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { .ptag = cpu_to_be64(addr | MLX5_EN_WR), }; @@ -814,12 +819,12 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) bitmap_zero(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); wi->consumed_strides = 0; - umr_wqe->ctrl.opmod_idx_opcode = + umr_wqe->hdr.ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); offset = (ix * rq->mpwqe.mtts_per_wqe) * sizeof(struct mlx5_mtt) / MLX5_OCTWORD; - umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); + umr_wqe->hdr.uctrl.xlt_offset = cpu_to_be16(offset); sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX, @@ -829,14 +834,14 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) sq->pc += rq->mpwqe.umr_wqebbs; - sq->doorbell_cseg = &umr_wqe->ctrl; + sq->doorbell_cseg = &umr_wqe->hdr.ctrl; return 0; err_unmap: while (--i >= 0) { frag_page--; - mlx5e_page_release_fragmented(rq, frag_page); + mlx5e_page_release_fragmented(rq->page_pool, frag_page); } bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); @@ -855,7 +860,7 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); - mlx5e_page_release_fragmented(rq, frag_page); + mlx5e_page_release_fragmented(rq->hd_page_pool, frag_page); } clear_bit(header_index, shampo->bitmap); } @@ -1100,6 +1105,8 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) if (rq->page_pool) page_pool_nid_changed(rq->page_pool, numa_mem_id()); + if (rq->hd_page_pool) + page_pool_nid_changed(rq->hd_page_pool, numa_mem_id()); head = rq->mpwqe.actual_wq_head; i = missing; @@ -1154,8 +1161,9 @@ static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp) } } -static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, - u32 cqe_bcnt) +static unsigned int mlx5e_lro_update_hdr(struct sk_buff *skb, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt) { struct ethhdr *eth = (struct ethhdr *)(skb->data); struct tcphdr *tcp; @@ -1205,6 +1213,8 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, tcp->check = tcp_v6_check(payload_len, &ipv6->saddr, &ipv6->daddr, check); } + + return (unsigned int)((unsigned char *)tcp + tcp->doff * 4 - skb->data); } static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) @@ -1212,7 +1222,7 @@ static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); u16 head_offset = mlx5e_shampo_hd_offset(header_index) + rq->buff.headroom; - return page_address(frag_page->page) + head_offset; + return netmem_address(frag_page->netmem) + head_offset; } static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4) @@ -1561,8 +1571,10 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, mlx5e_macsec_offload_handle_rx_skb(netdev, skb, cqe); if (lro_num_seg > 1) { - mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); - skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg); + unsigned int hdrlen = mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); + + skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt - hdrlen, lro_num_seg); + skb_shinfo(skb)->gso_segs = lro_num_seg; /* Subtract one since we already counted this as one * "regular" packet in mlx5e_complete_rx_cqe() */ @@ -1673,28 +1685,28 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, dma_addr_t addr; u32 frag_size; - va = page_address(frag_page->page) + wi->offset; + va = netmem_address(frag_page->netmem) + wi->offset; data = va + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - addr = page_pool_get_dma_addr(frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, frag_size, rq->buff.map_dir); net_prefetch(data); prog = rcu_dereference(rq->xdp_prog); if (prog) { - struct mlx5e_xdp_buff mxbuf; + struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf; net_prefetchw(va); /* xdp_frame data area */ mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz, - cqe_bcnt, &mxbuf); - if (mlx5e_xdp_handle(rq, prog, &mxbuf)) + cqe_bcnt, mxbuf); + if (mlx5e_xdp_handle(rq, prog, mxbuf)) return NULL; /* page/packet was consumed by XDP */ - rx_headroom = mxbuf.xdp.data - mxbuf.xdp.data_hard_start; - metasize = mxbuf.xdp.data - mxbuf.xdp.data_meta; - cqe_bcnt = mxbuf.xdp.data_end - mxbuf.xdp.data; + rx_headroom = mxbuf->xdp.data - mxbuf->xdp.data_hard_start; + metasize = mxbuf->xdp.data - mxbuf->xdp.data_meta; + cqe_bcnt = mxbuf->xdp.data_end - mxbuf->xdp.data; } frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize); @@ -1713,11 +1725,11 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; + struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf; struct mlx5e_wqe_frag_info *head_wi = wi; u16 rx_headroom = rq->buff.headroom; struct mlx5e_frag_page *frag_page; struct skb_shared_info *sinfo; - struct mlx5e_xdp_buff mxbuf; u32 frag_consumed_bytes; struct bpf_prog *prog; struct sk_buff *skb; @@ -1727,18 +1739,18 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi frag_page = wi->frag_page; - va = page_address(frag_page->page) + wi->offset; + va = netmem_address(frag_page->netmem) + wi->offset; frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); - addr = page_pool_get_dma_addr(frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); dma_sync_single_range_for_cpu(rq->pdev, addr, wi->offset, rq->buff.frame0_sz, rq->buff.map_dir); net_prefetchw(va); /* xdp_frame data area */ net_prefetch(va + rx_headroom); mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz, - frag_consumed_bytes, &mxbuf); - sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp); + frag_consumed_bytes, mxbuf); + sinfo = xdp_get_shared_info_from_buff(&mxbuf->xdp); truesize = 0; cqe_bcnt -= frag_consumed_bytes; @@ -1750,8 +1762,9 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt); - mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page, - wi->offset, frag_consumed_bytes); + mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf->xdp, + frag_page, wi->offset, + frag_consumed_bytes); truesize += frag_info->frag_stride; cqe_bcnt -= frag_consumed_bytes; @@ -1760,7 +1773,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi } prog = rcu_dereference(rq->xdp_prog); - if (prog && mlx5e_xdp_handle(rq, prog, &mxbuf)) { + if (prog && mlx5e_xdp_handle(rq, prog, mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { struct mlx5e_wqe_frag_info *pwi; @@ -1770,21 +1783,23 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi return NULL; /* page/packet was consumed by XDP */ } - skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start, rq->buff.frame0_sz, - mxbuf.xdp.data - mxbuf.xdp.data_hard_start, - mxbuf.xdp.data_end - mxbuf.xdp.data, - mxbuf.xdp.data - mxbuf.xdp.data_meta); + skb = mlx5e_build_linear_skb( + rq, mxbuf->xdp.data_hard_start, rq->buff.frame0_sz, + mxbuf->xdp.data - mxbuf->xdp.data_hard_start, + mxbuf->xdp.data_end - mxbuf->xdp.data, + mxbuf->xdp.data - mxbuf->xdp.data_meta); if (unlikely(!skb)) return NULL; skb_mark_for_recycle(skb); head_wi->frag_page->frags++; - if (xdp_buff_has_frags(&mxbuf.xdp)) { + if (xdp_buff_has_frags(&mxbuf->xdp)) { /* sinfo->nr_frags is reset by build_skb, calculate again. */ xdp_update_skb_shared_info(skb, wi - head_wi - 1, sinfo->xdp_frags_size, truesize, - xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); + xdp_buff_is_frag_pfmemalloc( + &mxbuf->xdp)); for (struct mlx5e_wqe_frag_info *pwi = head_wi + 1; pwi < wi; pwi++) pwi->frag_page->frags++; @@ -1984,10 +1999,10 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w struct mlx5e_frag_page *frag_page = &wi->alloc_units.frag_pages[page_idx]; u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); struct mlx5e_frag_page *head_page = frag_page; + struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf; u32 frag_offset = head_offset; u32 byte_cnt = cqe_bcnt; struct skb_shared_info *sinfo; - struct mlx5e_xdp_buff mxbuf; unsigned int truesize = 0; struct bpf_prog *prog; struct sk_buff *skb; @@ -2000,12 +2015,14 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w if (prog) { /* area for bpf_xdp_[store|load]_bytes */ - net_prefetchw(page_address(frag_page->page) + frag_offset); - if (unlikely(mlx5e_page_alloc_fragmented(rq, &wi->linear_page))) { + net_prefetchw(netmem_address(frag_page->netmem) + frag_offset); + if (unlikely(mlx5e_page_alloc_fragmented(rq->page_pool, + &wi->linear_page))) { rq->stats->buff_alloc_err++; return NULL; } - va = page_address(wi->linear_page.page); + + va = netmem_address(wi->linear_page.netmem); net_prefetchw(va); /* xdp_frame data area */ linear_hr = XDP_PACKET_HEADROOM; linear_data_len = 0; @@ -2033,9 +2050,10 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w } } - mlx5e_fill_mxbuf(rq, cqe, va, linear_hr, linear_frame_sz, linear_data_len, &mxbuf); + mlx5e_fill_mxbuf(rq, cqe, va, linear_hr, linear_frame_sz, + linear_data_len, mxbuf); - sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp); + sinfo = xdp_get_shared_info_from_buff(&mxbuf->xdp); while (byte_cnt) { /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ @@ -2046,7 +2064,8 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w else truesize += ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz)); - mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf.xdp, frag_page, frag_offset, + mlx5e_add_skb_shared_info_frag(rq, sinfo, &mxbuf->xdp, + frag_page, frag_offset, pg_consumed_bytes); byte_cnt -= pg_consumed_bytes; frag_offset = 0; @@ -2054,7 +2073,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w } if (prog) { - if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { + if (mlx5e_xdp_handle(rq, prog, mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { struct mlx5e_frag_page *pfp; @@ -2063,30 +2082,33 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w wi->linear_page.frags++; } - mlx5e_page_release_fragmented(rq, &wi->linear_page); + mlx5e_page_release_fragmented(rq->page_pool, + &wi->linear_page); return NULL; /* page/packet was consumed by XDP */ } - skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start, - linear_frame_sz, - mxbuf.xdp.data - mxbuf.xdp.data_hard_start, 0, - mxbuf.xdp.data - mxbuf.xdp.data_meta); + skb = mlx5e_build_linear_skb( + rq, mxbuf->xdp.data_hard_start, linear_frame_sz, + mxbuf->xdp.data - mxbuf->xdp.data_hard_start, 0, + mxbuf->xdp.data - mxbuf->xdp.data_meta); if (unlikely(!skb)) { - mlx5e_page_release_fragmented(rq, &wi->linear_page); + mlx5e_page_release_fragmented(rq->page_pool, + &wi->linear_page); return NULL; } skb_mark_for_recycle(skb); wi->linear_page.frags++; - mlx5e_page_release_fragmented(rq, &wi->linear_page); + mlx5e_page_release_fragmented(rq->page_pool, &wi->linear_page); - if (xdp_buff_has_frags(&mxbuf.xdp)) { + if (xdp_buff_has_frags(&mxbuf->xdp)) { struct mlx5e_frag_page *pagep; /* sinfo->nr_frags is reset by build_skb, calculate again. */ xdp_update_skb_shared_info(skb, frag_page - head_page, sinfo->xdp_frags_size, truesize, - xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); + xdp_buff_is_frag_pfmemalloc( + &mxbuf->xdp)); pagep = head_page; do @@ -2097,12 +2119,13 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w } else { dma_addr_t addr; - if (xdp_buff_has_frags(&mxbuf.xdp)) { + if (xdp_buff_has_frags(&mxbuf->xdp)) { struct mlx5e_frag_page *pagep; xdp_update_skb_shared_info(skb, sinfo->nr_frags, sinfo->xdp_frags_size, truesize, - xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); + xdp_buff_is_frag_pfmemalloc( + &mxbuf->xdp)); pagep = frag_page - sinfo->nr_frags; do @@ -2110,8 +2133,8 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w while (++pagep < frag_page); } /* copy header */ - addr = page_pool_get_dma_addr(head_page->page); - mlx5e_copy_skb_header(rq, skb, head_page->page, addr, + addr = page_pool_get_dma_addr_netmem(head_page->netmem); + mlx5e_copy_skb_header(rq, skb, head_page->netmem, addr, head_offset, head_offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; @@ -2141,31 +2164,31 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, return NULL; } - va = page_address(frag_page->page) + head_offset; + va = netmem_address(frag_page->netmem) + head_offset; data = va + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); - addr = page_pool_get_dma_addr(frag_page->page); + addr = page_pool_get_dma_addr_netmem(frag_page->netmem); dma_sync_single_range_for_cpu(rq->pdev, addr, head_offset, frag_size, rq->buff.map_dir); net_prefetch(data); prog = rcu_dereference(rq->xdp_prog); if (prog) { - struct mlx5e_xdp_buff mxbuf; + struct mlx5e_xdp_buff *mxbuf = &rq->mxbuf; net_prefetchw(va); /* xdp_frame data area */ mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, rq->buff.frame0_sz, - cqe_bcnt, &mxbuf); - if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { + cqe_bcnt, mxbuf); + if (mlx5e_xdp_handle(rq, prog, mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) frag_page->frags++; return NULL; /* page/packet was consumed by XDP */ } - rx_headroom = mxbuf.xdp.data - mxbuf.xdp.data_hard_start; - metasize = mxbuf.xdp.data - mxbuf.xdp.data_meta; - cqe_bcnt = mxbuf.xdp.data_end - mxbuf.xdp.data; + rx_headroom = mxbuf->xdp.data - mxbuf->xdp.data_hard_start; + metasize = mxbuf->xdp.data - mxbuf->xdp.data_meta; + cqe_bcnt = mxbuf->xdp.data_end - mxbuf->xdp.data; } frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize); @@ -2184,16 +2207,19 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct mlx5_cqe64 *cqe, u16 header_index) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); - dma_addr_t page_dma_addr = page_pool_get_dma_addr(frag_page->page); u16 head_offset = mlx5e_shampo_hd_offset(header_index); - dma_addr_t dma_addr = page_dma_addr + head_offset; u16 head_size = cqe->shampo.header_size; u16 rx_headroom = rq->buff.headroom; struct sk_buff *skb = NULL; + dma_addr_t page_dma_addr; + dma_addr_t dma_addr; void *hdr, *data; u32 frag_size; - hdr = page_address(frag_page->page) + head_offset; + page_dma_addr = page_pool_get_dma_addr_netmem(frag_page->netmem); + dma_addr = page_dma_addr + head_offset; + + hdr = netmem_address(frag_page->netmem) + head_offset; data = hdr + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size); @@ -2218,7 +2244,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, } net_prefetchw(skb->data); - mlx5e_copy_skb_header(rq, skb, frag_page->page, dma_addr, + mlx5e_copy_skb_header(rq, skb, frag_page->netmem, dma_addr, head_offset + rx_headroom, rx_headroom, head_size); /* skb linear part was allocated with headlen and aligned to long */ @@ -2312,11 +2338,23 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq } if (!*skb) { - if (likely(head_size)) + if (likely(head_size)) { *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index); - else - *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe, cqe_bcnt, - data_offset, page_idx); + } else { + struct mlx5e_frag_page *frag_page; + + frag_page = &wi->alloc_units.frag_pages[page_idx]; + /* Drop packets with header in unreadable data area to + * prevent the kernel from touching it. + */ + if (unlikely(netmem_is_net_iov(frag_page->netmem))) + goto free_hd_entry; + *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe, + cqe_bcnt, + data_offset, + page_idx); + } + if (unlikely(!*skb)) goto free_hd_entry; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 1d60465cc2ca..2f7a543feca6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -166,6 +166,9 @@ mlx5e_test_loopback_validate(struct sk_buff *skb, struct udphdr *udph; struct iphdr *iph; + if (skb_linearize(skb)) + goto out; + /* We are only going to peek, no need to clone the SKB */ if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb)) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 611ec4b6f370..87536f158d07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -37,9 +37,7 @@ #include "en/ptp.h" #include "en/port.h" -#ifdef CONFIG_PAGE_POOL_STATS #include <net/page_pool/helpers.h> -#endif void mlx5e_ethtool_put_stat(u64 **data, u64 val) { @@ -196,7 +194,6 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) }, #endif { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) }, -#ifdef CONFIG_PAGE_POOL_STATS { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_fast) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_slow) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_alloc_slow_high_order) }, @@ -208,7 +205,6 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_ring) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_ring_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_pp_recycle_released_ref) }, -#endif #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_bytes) }, @@ -377,7 +373,6 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s, s->rx_arfs_err += rq_stats->arfs_err; #endif s->rx_recover += rq_stats->recover; -#ifdef CONFIG_PAGE_POOL_STATS s->rx_pp_alloc_fast += rq_stats->pp_alloc_fast; s->rx_pp_alloc_slow += rq_stats->pp_alloc_slow; s->rx_pp_alloc_empty += rq_stats->pp_alloc_empty; @@ -389,7 +384,6 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s, s->rx_pp_recycle_ring += rq_stats->pp_recycle_ring; s->rx_pp_recycle_ring_full += rq_stats->pp_recycle_ring_full; s->rx_pp_recycle_released_ref += rq_stats->pp_recycle_released_ref; -#endif #ifdef CONFIG_MLX5_EN_TLS s->rx_tls_decrypted_packets += rq_stats->tls_decrypted_packets; s->rx_tls_decrypted_bytes += rq_stats->tls_decrypted_bytes; @@ -496,7 +490,6 @@ static void mlx5e_stats_grp_sw_update_stats_qos(struct mlx5e_priv *priv, } } -#ifdef CONFIG_PAGE_POOL_STATS static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c) { struct mlx5e_rq_stats *rq_stats = c->rq.stats; @@ -519,11 +512,6 @@ static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c) rq_stats->pp_recycle_ring_full = stats.recycle_stats.ring_full; rq_stats->pp_recycle_released_ref = stats.recycle_stats.released_refcnt; } -#else -static void mlx5e_stats_update_stats_rq_page_pool(struct mlx5e_channel *c) -{ -} -#endif static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw) { @@ -1227,6 +1215,13 @@ out: mutex_unlock(&priv->state_lock); } +#define PPORT_PHY_LAYER_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.phys_layer_cntrs.c) +static const struct counter_desc pport_phy_layer_cntrs_stats_desc[] = { + { "link_down_events_phy", PPORT_PHY_LAYER_OFF(link_down_events) } +}; + #define PPORT_PHY_STATISTICAL_OFF(c) \ MLX5_BYTE_OFF(ppcnt_reg, \ counter_set.phys_layer_statistical_cntrs.c##_high) @@ -1243,25 +1238,45 @@ pport_phy_statistical_err_lanes_stats_desc[] = { { "rx_err_lane_3_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane3) }, }; +#define PPORT_PHY_RECOVERY_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, counter_set.phys_layer_recovery_cntrs.c) +static const struct counter_desc +pport_phy_recovery_cntrs_stats_desc[] = { + { "total_success_recovery_phy", + PPORT_PHY_RECOVERY_OFF(total_successful_recovery_events) } +}; + +#define NUM_PPORT_PHY_LAYER_COUNTERS \ + ARRAY_SIZE(pport_phy_layer_cntrs_stats_desc) #define NUM_PPORT_PHY_STATISTICAL_COUNTERS \ ARRAY_SIZE(pport_phy_statistical_stats_desc) #define NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS \ ARRAY_SIZE(pport_phy_statistical_err_lanes_stats_desc) +#define NUM_PPORT_PHY_RECOVERY_COUNTERS \ + ARRAY_SIZE(pport_phy_recovery_cntrs_stats_desc) + +#define NUM_PPORT_PHY_STATISTICAL_LOOPBACK_COUNTERS(dev) \ + (MLX5_CAP_PCAM_FEATURE(dev, ppcnt_statistical_group) ? \ + NUM_PPORT_PHY_STATISTICAL_COUNTERS : 0) +#define NUM_PPORT_PHY_STATISTICAL_PER_LANE_LOOPBACK_COUNTERS(dev) \ + (MLX5_CAP_PCAM_FEATURE(dev, per_lane_error_counters) ? \ + NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS : 0) +#define NUM_PPORT_PHY_RECOVERY_LOOPBACK_COUNTERS(dev) \ + (MLX5_CAP_PCAM_FEATURE(dev, ppcnt_recovery_counters) ? \ + NUM_PPORT_PHY_RECOVERY_COUNTERS : 0) static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(phy) { struct mlx5_core_dev *mdev = priv->mdev; int num_stats; - /* "1" for link_down_events special counter */ - num_stats = 1; + num_stats = NUM_PPORT_PHY_LAYER_COUNTERS; - num_stats += MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group) ? - NUM_PPORT_PHY_STATISTICAL_COUNTERS : 0; + num_stats += NUM_PPORT_PHY_STATISTICAL_LOOPBACK_COUNTERS(mdev); - num_stats += MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters) ? - NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS : 0; + num_stats += NUM_PPORT_PHY_STATISTICAL_PER_LANE_LOOPBACK_COUNTERS(mdev); + num_stats += NUM_PPORT_PHY_RECOVERY_LOOPBACK_COUNTERS(mdev); return num_stats; } @@ -1270,18 +1285,22 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(phy) struct mlx5_core_dev *mdev = priv->mdev; int i; - ethtool_puts(data, "link_down_events_phy"); - - if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) - return; + for (i = 0; i < NUM_PPORT_PHY_LAYER_COUNTERS; i++) + ethtool_puts(data, pport_phy_layer_cntrs_stats_desc[i].format); - for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_LOOPBACK_COUNTERS(mdev); i++) ethtool_puts(data, pport_phy_statistical_stats_desc[i].format); - if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) - for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++) - ethtool_puts(data, - pport_phy_statistical_err_lanes_stats_desc[i].format); + for (i = 0; + i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_LOOPBACK_COUNTERS(mdev); + i++) + ethtool_puts(data, + pport_phy_statistical_err_lanes_stats_desc[i] + .format); + + for (i = 0; i < NUM_PPORT_PHY_RECOVERY_LOOPBACK_COUNTERS(mdev); i++) + ethtool_puts(data, + pport_phy_recovery_cntrs_stats_desc[i].format); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(phy) @@ -1289,30 +1308,35 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(phy) struct mlx5_core_dev *mdev = priv->mdev; int i; - /* link_down_events_phy has special handling since it is not stored in __be64 format */ - mlx5e_ethtool_put_stat( - data, MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters, - counter_set.phys_layer_cntrs.link_down_events)); - - if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) - return; + for (i = 0; i < NUM_PPORT_PHY_LAYER_COUNTERS; i++) + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR32_BE(&priv->stats.pport + .phy_counters, + pport_phy_layer_cntrs_stats_desc, i)); - for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_LOOPBACK_COUNTERS(mdev); i++) mlx5e_ethtool_put_stat( data, MLX5E_READ_CTR64_BE( &priv->stats.pport.phy_statistical_counters, pport_phy_statistical_stats_desc, i)); - if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) - for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++) - mlx5e_ethtool_put_stat( - data, - MLX5E_READ_CTR64_BE( - &priv->stats.pport - .phy_statistical_counters, - pport_phy_statistical_err_lanes_stats_desc, - i)); + for (i = 0; + i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_LOOPBACK_COUNTERS(mdev); + i++) + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR64_BE( + &priv->stats.pport.phy_statistical_counters, + pport_phy_statistical_err_lanes_stats_desc, i)); + + for (i = 0; i < NUM_PPORT_PHY_RECOVERY_LOOPBACK_COUNTERS(mdev); i++) + mlx5e_ethtool_put_stat( + data, + MLX5E_READ_CTR32_BE( + &priv->stats.pport.phy_recovery_counters, + pport_phy_recovery_cntrs_stats_desc, i)); } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy) @@ -1328,12 +1352,21 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(phy) MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); - if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) - return; + if (MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) { + out = pstats->phy_statistical_counters; + MLX5_SET(ppcnt_reg, in, grp, + MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, + 0); + } - out = pstats->phy_statistical_counters; - MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + if (MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_recovery_counters)) { + out = pstats->phy_recovery_counters; + MLX5_SET(ppcnt_reg, in, grp, + MLX5_PHYSICAL_LAYER_RECOVERY_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, + 0); + } } void mlx5e_get_link_ext_stats(struct net_device *dev, @@ -2086,7 +2119,6 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) }, #endif { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) }, -#ifdef CONFIG_PAGE_POOL_STATS { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_fast) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_slow) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_alloc_slow_high_order) }, @@ -2098,7 +2130,6 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_ring) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_ring_full) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, pp_recycle_released_ref) }, -#endif #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_bytes) }, @@ -2393,8 +2424,7 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp) } if (priv->rx_ptp_opened) { for (i = 0; i < NUM_PTP_RQ_STATS; i++) - ethtool_sprintf(data, ptp_rq_stats_desc[i].format, - MLX5E_PTP_CHANNEL_IX); + ethtool_puts(data, ptp_rq_stats_desc[i].format); } } @@ -2582,6 +2612,7 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = { #ifdef CONFIG_MLX5_MACSEC &MLX5E_STATS_GRP(macsec_hw), #endif + &MLX5E_STATS_GRP(pcie_cong), }; unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 5961c569cfe0..72dbcc1928ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -54,7 +54,7 @@ #define MLX5E_DECLARE_PTP_TX_STAT(type, fld) "ptp_tx%d_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld) -#define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq0_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld) @@ -215,7 +215,6 @@ struct mlx5e_sw_stats { u64 ch_aff_change; u64 ch_force_irq; u64 ch_eq_rearm; -#ifdef CONFIG_PAGE_POOL_STATS u64 rx_pp_alloc_fast; u64 rx_pp_alloc_slow; u64 rx_pp_alloc_slow_high_order; @@ -227,7 +226,6 @@ struct mlx5e_sw_stats { u64 rx_pp_recycle_ring; u64 rx_pp_recycle_ring_full; u64 rx_pp_recycle_released_ref; -#endif #ifdef CONFIG_MLX5_EN_TLS u64 tx_tls_encrypted_packets; u64 tx_tls_encrypted_bytes; @@ -309,6 +307,9 @@ struct mlx5e_vport_stats { #define PPORT_PHY_STATISTICAL_GET(pstats, c) \ MLX5_GET64(ppcnt_reg, (pstats)->phy_statistical_counters, \ counter_set.phys_layer_statistical_cntrs.c##_high) +#define PPORT_PHY_RECOVERY_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, (pstats)->phy_recovery_counters, \ + counter_set.phys_layer_recovery_cntrs.c) #define PPORT_PER_PRIO_GET(pstats, prio, c) \ MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \ counter_set.eth_per_prio_grp_data_layout.c##_high) @@ -324,6 +325,7 @@ struct mlx5e_pport_stats { __be64 per_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 phy_statistical_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 phy_recovery_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 per_tc_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 per_tc_congest_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; @@ -381,7 +383,6 @@ struct mlx5e_rq_stats { u64 arfs_err; #endif u64 recover; -#ifdef CONFIG_PAGE_POOL_STATS u64 pp_alloc_fast; u64 pp_alloc_slow; u64 pp_alloc_slow_high_order; @@ -393,7 +394,6 @@ struct mlx5e_rq_stats { u64 pp_recycle_ring; u64 pp_recycle_ring_full; u64 pp_recycle_released_ref; -#endif #ifdef CONFIG_MLX5_EN_TLS u64 tls_decrypted_packets; u64 tls_decrypted_bytes; @@ -535,5 +535,6 @@ extern MLX5E_DECLARE_STATS_GRP(ipsec_hw); extern MLX5E_DECLARE_STATS_GRP(ipsec_sw); extern MLX5E_DECLARE_STATS_GRP(ptp); extern MLX5E_DECLARE_STATS_GRP(macsec_hw); +extern MLX5E_DECLARE_STATS_GRP(pcie_cong); #endif /* __MLX5_EN_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 6b3b1afe8312..32c07a8b03d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1282,7 +1282,7 @@ mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[dest_ix].counter_id = mlx5_fc_id(attr->counter); + dest[dest_ix].counter = attr->counter; dest_ix++; } @@ -1750,9 +1750,6 @@ extra_split_attr_dests_needed(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr !list_is_first(&attr->list, &flow->attrs)) return 0; - if (flow_flag_test(flow, SLOW)) - return 0; - esw_attr = attr->esw_attr; if (!esw_attr->split_count || esw_attr->split_count == esw_attr->out_count - 1) @@ -1766,7 +1763,7 @@ extra_split_attr_dests_needed(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr for (i = esw_attr->split_count; i < esw_attr->out_count; i++) { /* external dest with encap is considered as internal by firmware */ if (esw_attr->dests[i].vport == MLX5_VPORT_UPLINK && - !(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) + !(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)) ext_dest = true; else int_dest = true; @@ -2031,9 +2028,8 @@ err_out: return err; } -static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) +static bool mlx5_flow_has_geneve_opt(struct mlx5_flow_spec *spec) { - struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec; void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3); @@ -2072,7 +2068,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, } complete_all(&flow->del_hw_done); - if (mlx5_flow_has_geneve_opt(flow)) + if (mlx5_flow_has_geneve_opt(&attr->parse_attr->spec)) mlx5_geneve_tlv_option_del(priv->mdev->geneve); if (flow->decap_route) @@ -2577,12 +2573,13 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level); if (err) { - kvfree(tmp_spec); NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes"); netdev_warn(priv->netdev, "Failed to parse tunnel attributes"); - return err; + } else { + err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); } - err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec); + if (mlx5_flow_has_geneve_opt(tmp_spec)) + mlx5_geneve_tlv_option_del(priv->mdev->geneve); kvfree(tmp_spec); if (err) return err; @@ -5449,7 +5446,7 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) goto err_action_counter; } - err = dev_get_port_parent_id(priv->netdev, &ppid, false); + err = netif_get_port_parent_id(priv->netdev, &ppid, false); if (!err) { memcpy(&key, &ppid.id, sizeof(key)); mlx5_esw_offloads_devcom_init(esw, key); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index f8c7912abe0e..319061d31602 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -196,7 +196,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32(headlen); - mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE); + mlx5e_dma_push_single(sq, dma_addr, headlen); num_dma++; dseg++; } @@ -214,7 +214,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32(fsz); - mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); + mlx5e_dma_push_netmem(sq, skb_frag_netmem(frag), dma_addr, fsz); num_dma++; dseg++; } @@ -256,8 +256,7 @@ mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb, mode = sq->min_inline_mode; - if (skb_vlan_tag_present(skb) && - test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state)) + if (skb_vlan_tag_present(skb)) mode = max_t(u8, MLX5_INLINE_MODE_L2, mode); return mode; @@ -337,10 +336,11 @@ static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_at }; } -static void mlx5e_tx_skb_update_hwts_flags(struct sk_buff *skb) +static void mlx5e_tx_skb_update_ts_flags(struct sk_buff *skb) { if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + skb_tx_timestamp(skb); } static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq) @@ -392,7 +392,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | wqe_attr->ds_cnt); - mlx5e_tx_skb_update_hwts_flags(skb); + mlx5e_tx_skb_update_ts_flags(skb); sq->pc += wi->num_wqebbs; @@ -482,12 +482,6 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, } eseg->inline_hdr.sz |= cpu_to_be16(ihs); dseg += wqe_attr->ds_cnt_inl; - } else if (skb_vlan_tag_present(skb)) { - eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); - if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD)) - eseg->insert.type |= cpu_to_be16(MLX5_ETH_WQE_SVLAN); - eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); - stats->added_vlan_packets++; } dseg += wqe_attr->ds_cnt_ids; @@ -525,9 +519,9 @@ static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq, { struct mlx5e_tx_mpwqe *session = &sq->mpwqe; struct mlx5e_tx_wqe *wqe; - u16 pi; + u16 pi, num_wqebbs; - pi = mlx5e_txqsq_get_next_pi(sq, sq->max_sq_mpw_wqebbs); + pi = mlx5e_txqsq_get_next_pi_anysize(sq, &num_wqebbs); wqe = MLX5E_TX_FETCH_WQE(sq, pi); net_prefetchw(wqe->data); @@ -535,6 +529,7 @@ static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq, .wqe = wqe, .bytes_count = 0, .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT, + .ds_count_max = num_wqebbs * MLX5_SEND_WQEBB_NUM_DS, .pkt_count = 0, .inline_on = 0, }; @@ -621,12 +616,12 @@ mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb, sq->stats->xmit_more += xmit_more; - mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE); + mlx5e_dma_push_single(sq, txd.dma_addr, txd.len); mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb); mlx5e_tx_mpwqe_add_dseg(sq, &txd); - mlx5e_tx_skb_update_hwts_flags(skb); + mlx5e_tx_skb_update_ts_flags(skb); - if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) { + if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) { /* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */ cseg = mlx5e_tx_mpwqe_session_complete(sq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 2b229b6226c6..1ab77159409d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -585,6 +585,9 @@ static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4]) async_event_mask |= (1ull << MLX5_EVENT_TYPE_OBJECT_CHANGE); + if (mlx5_pcie_cong_event_supported(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_OBJECT_CHANGE); + mask[0] = async_event_mask; if (MLX5_CAP_GEN(dev, event_cap)) @@ -871,21 +874,27 @@ static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx) static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) { + struct mlx5_irq_pool *pool = mlx5_irq_table_get_comp_irq_pool(dev); struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); - struct irq_affinity_desc af_desc = {}; + struct irq_affinity_desc *af_desc; struct mlx5_irq *irq; - /* In case SF irq pool does not exist, fallback to the PF irqs*/ + /* In case SF irq pool does not exist, fallback to the PF irqs */ if (!mlx5_irq_pool_is_sf_pool(pool)) return comp_irq_request_pci(dev, vecidx); - af_desc.is_managed = false; - cpumask_copy(&af_desc.mask, cpu_online_mask); - cpumask_andnot(&af_desc.mask, &af_desc.mask, &table->used_cpus); - irq = mlx5_irq_affinity_request(dev, pool, &af_desc); - if (IS_ERR(irq)) + af_desc = kvzalloc(sizeof(*af_desc), GFP_KERNEL); + if (!af_desc) + return -ENOMEM; + + af_desc->is_managed = false; + cpumask_copy(&af_desc->mask, cpu_online_mask); + cpumask_andnot(&af_desc->mask, &af_desc->mask, &table->used_cpus); + irq = mlx5_irq_affinity_request(dev, pool, af_desc); + if (IS_ERR(irq)) { + kvfree(af_desc); return PTR_ERR(irq); + } cpumask_or(&table->used_cpus, &table->used_cpus, mlx5_irq_get_affinity_mask(irq)); mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", @@ -893,6 +902,8 @@ static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); + kvfree(af_desc); + return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c index 6b4c9ffad95b..7dd1dc3f77c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c @@ -135,7 +135,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, if (drop_counter) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - drop_ctr_dst.counter_id = mlx5_fc_id(drop_counter); + drop_ctr_dst.counter = drop_counter; dst = &drop_ctr_dst; dest_num++; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c index d599e50af346..3ce455c2535c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c @@ -27,7 +27,7 @@ esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, esw_debug(dev, "Create vport[%d] %s ACL table\n", vport_num, ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress"); - root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport->index); + root_ns = mlx5_get_flow_vport_namespace(dev, ns, vport->index); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch root namespace for vport (%d)\n", vport_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c index 093ed86a0acd..1c37098e09ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c @@ -260,7 +260,7 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, if (counter) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - drop_ctr_dst.counter_id = mlx5_fc_id(counter); + drop_ctr_dst.counter = counter; dst = &drop_ctr_dst; dest_num++; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index c5ea1d1d2b03..76e35c827da0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -570,7 +570,8 @@ mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge) static struct mlx5_flow_handle * mlx5_esw_bridge_ingress_flow_with_esw_create(u16 vport_num, const unsigned char *addr, - struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_fc *counter, struct mlx5_esw_bridge *bridge, struct mlx5_eswitch *esw) { @@ -628,7 +629,7 @@ mlx5_esw_bridge_ingress_flow_with_esw_create(u16 vport_num, const unsigned char dests[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dests[0].ft = bridge->egress_ft; dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dests[1].counter_id = counter_id; + dests[1].counter = counter; handle = mlx5_add_flow_rules(br_offloads->ingress_ft, rule_spec, &flow_act, dests, ARRAY_SIZE(dests)); @@ -639,17 +640,19 @@ mlx5_esw_bridge_ingress_flow_with_esw_create(u16 vport_num, const unsigned char static struct mlx5_flow_handle * mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, - struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_fc *counter, struct mlx5_esw_bridge *bridge) { - return mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id, + return mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter, bridge, bridge->br_offloads->esw); } static struct mlx5_flow_handle * mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, u16 esw_owner_vhca_id, const unsigned char *addr, - struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_fc *counter, struct mlx5_esw_bridge *bridge) { struct mlx5_devcom_comp_dev *devcom = bridge->br_offloads->esw->devcom, *pos; @@ -671,7 +674,7 @@ mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, u16 esw_owner_vhca_id, goto out; } - handle = mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id, + handle = mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter, bridge, peer_esw); out: @@ -1385,10 +1388,9 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, u16 esw_ow handle = peer ? mlx5_esw_bridge_ingress_flow_peer_create(vport_num, esw_owner_vhca_id, - addr, vlan, mlx5_fc_id(counter), - bridge) : + addr, vlan, counter, bridge) : mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan, - mlx5_fc_id(counter), bridge); + counter, bridge); if (IS_ERR(handle)) { err = PTR_ERR(handle); esw_warn(esw->dev, "Failed to create ingress flow(vport=%u,err=%d,peer=%d)\n", @@ -1861,7 +1863,7 @@ int mlx5_esw_bridge_port_mdb_add(struct net_device *dev, u16 vport_num, u16 esw_ "Failed to lookup bridge port to add MDB (MAC=%pM,vport=%u)\n", addr, vport_num); NL_SET_ERR_MSG_FMT_MOD(extack, - "Failed to lookup bridge port to add MDB (MAC=%pM,vport=%u)\n", + "Failed to lookup bridge port to add MDB (MAC=%pM,vport=%u)", addr, vport_num); return -EINVAL; } @@ -1874,7 +1876,7 @@ int mlx5_esw_bridge_port_mdb_add(struct net_device *dev, u16 vport_num, u16 esw_ "Failed to lookup bridge port vlan metadata to create MDB (MAC=%pM,vid=%u,vport=%u)\n", addr, vid, vport_num); NL_SET_ERR_MSG_FMT_MOD(extack, - "Failed to lookup vlan metadata for MDB (MAC=%pM,vid=%u,vport=%u)\n", + "Failed to lookup vlan metadata for MDB (MAC=%pM,vid=%u,vport=%u)", addr, vid, vport_num); return -EINVAL; } @@ -1882,7 +1884,7 @@ int mlx5_esw_bridge_port_mdb_add(struct net_device *dev, u16 vport_num, u16 esw_ err = mlx5_esw_bridge_port_mdb_attach(dev, port, addr, vid); if (err) { - NL_SET_ERR_MSG_FMT_MOD(extack, "Failed to add MDB (MAC=%pM,vid=%u,vport=%u)\n", + NL_SET_ERR_MSG_FMT_MOD(extack, "Failed to add MDB (MAC=%pM,vid=%u,vport=%u)", addr, vid, vport_num); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index 982fe3714683..c33accadae0f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -32,7 +32,7 @@ static void mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(struct mlx5_eswitch * u16 pfnum; mlx5_esw_get_port_parent_id(dev, &ppid); - pfnum = mlx5_get_dev_index(dev); + pfnum = PCI_FUNC(dev->pdev->devfn); external = mlx5_core_is_ecpf_esw_manager(dev); if (external) controller_num = dev->priv.eswitch->offloads.host_number + 1; @@ -47,10 +47,12 @@ static void mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(struct mlx5_eswitch * devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum, vport_num - 1, external); } else if (mlx5_core_is_ec_vf_vport(esw->dev, vport_num)) { + u16 base_vport = mlx5_core_ec_vf_vport_base(dev); + memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len); dl_port->attrs.switch_id.id_len = ppid.id_len; devlink_port_attrs_pci_vf_set(dl_port, 0, pfnum, - vport_num - 1, false); + vport_num - base_vport, false); } } @@ -110,7 +112,7 @@ static void mlx5_esw_offloads_sf_devlink_port_attrs_set(struct mlx5_eswitch *esw struct netdev_phys_item_id ppid = {}; u16 pfnum; - pfnum = mlx5_get_dev_index(dev); + pfnum = PCI_FUNC(dev->pdev->devfn); mlx5_esw_get_port_parent_id(dev, &ppid); memcpy(dl_port->attrs.switch_id.id, &ppid.id[0], ppid.id_len); dl_port->attrs.switch_id.id_len = ppid.id_len; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c index 5a0047bdcb51..3cfe743610d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.c @@ -10,9 +10,9 @@ #endif enum { - MLX5_ESW_IPSEC_RX_POL_FT_LEVEL, MLX5_ESW_IPSEC_RX_ESP_FT_LEVEL, MLX5_ESW_IPSEC_RX_ESP_FT_CHK_LEVEL, + MLX5_ESW_IPSEC_RX_POL_FT_LEVEL, }; enum { @@ -85,6 +85,19 @@ err_header_alloc: return err; } +void mlx5_esw_ipsec_rx_rule_add_match_obj(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_flow_spec *spec) +{ + MLX5_SET(fte_match_param, spec->match_criteria, + misc_parameters_2.metadata_reg_c_1, + ESW_IPSEC_RX_MAPPED_ID_MATCH_MASK); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.metadata_reg_c_1, + sa_entry->rx_mapped_id << ESW_ZONE_ID_BITS); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; +} + void mlx5_esw_ipsec_rx_id_mapping_remove(struct mlx5e_ipsec_sa_entry *sa_entry) { struct mlx5e_ipsec *ipsec = sa_entry->ipsec; @@ -150,11 +163,11 @@ void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev) unsigned long i; int err; - xa_for_each(&esw->offloads.vport_reps, i, rep) { - rpriv = rep->rep_data[REP_ETH].priv; - if (!rpriv || !rpriv->netdev) + mlx5_esw_for_each_rep(esw, i, rep) { + if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED) continue; + rpriv = rep->rep_data[REP_ETH].priv; rhashtable_walk_enter(&rpriv->tc_ht, &iter); rhashtable_walk_start(&iter); while ((flow = rhashtable_walk_next(&iter)) != NULL) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h index ac9c65b89166..514c15258b1d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/ipsec_fs.h @@ -20,6 +20,8 @@ int mlx5_esw_ipsec_rx_ipsec_obj_id_search(struct mlx5e_priv *priv, u32 id, void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx_create_attr *attr); void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev); +void mlx5_esw_ipsec_rx_rule_add_match_obj(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_flow_spec *spec); #else static inline void mlx5_esw_ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx_create_attr *attr) {} @@ -48,5 +50,8 @@ static inline void mlx5_esw_ipsec_tx_create_attr_set(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_tx_create_attr *attr) {} static inline void mlx5_esw_ipsec_restore_dest_uplink(struct mlx5_core_dev *mdev) {} +static inline void +mlx5_esw_ipsec_rx_rule_add_match_obj(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_flow_spec *spec) {} #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESW_IPSEC_FS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index 45183de424f3..76382626ad41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -96,7 +96,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) if (!flow_group_in) return -ENOMEM; - ft_attr.max_fte = POOL_NEXT_SIZE; + ft_attr.max_fte = MLX5_FS_MAX_POOL_SIZE; ft_attr.prio = LEGACY_FDB_PRIO; fdb = mlx5_create_flow_table(root_ns, &ft_attr); if (IS_ERR(fdb)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 8b7c843446e1..8b4977650183 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -64,11 +64,19 @@ static void esw_qos_domain_release(struct mlx5_eswitch *esw) enum sched_node_type { SCHED_NODE_TYPE_VPORTS_TSAR, SCHED_NODE_TYPE_VPORT, + SCHED_NODE_TYPE_TC_ARBITER_TSAR, + SCHED_NODE_TYPE_RATE_LIMITER, + SCHED_NODE_TYPE_VPORT_TC, + SCHED_NODE_TYPE_VPORTS_TC_TSAR, }; static const char * const sched_node_type_str[] = { [SCHED_NODE_TYPE_VPORTS_TSAR] = "vports TSAR", [SCHED_NODE_TYPE_VPORT] = "vport", + [SCHED_NODE_TYPE_TC_ARBITER_TSAR] = "TC Arbiter TSAR", + [SCHED_NODE_TYPE_RATE_LIMITER] = "Rate Limiter", + [SCHED_NODE_TYPE_VPORT_TC] = "vport TC", + [SCHED_NODE_TYPE_VPORTS_TC_TSAR] = "vports TC TSAR", }; struct mlx5_esw_sched_node { @@ -90,19 +98,75 @@ struct mlx5_esw_sched_node { struct list_head children; /* Valid only if this node is associated with a vport. */ struct mlx5_vport *vport; + /* Level in the hierarchy. The root node level is 1. */ + u8 level; + /* Valid only when this node represents a traffic class. */ + u8 tc; + /* Valid only for a TC arbiter node or vport TC arbiter. */ + u32 tc_bw[DEVLINK_RATE_TCS_MAX]; }; +static void esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node *node) +{ + if (!node->parent) { + /* Root children are assigned a depth level of 2. */ + node->level = 2; + list_add_tail(&node->entry, &node->esw->qos.domain->nodes); + } else { + node->level = node->parent->level + 1; + list_add_tail(&node->entry, &node->parent->children); + } +} + +static int esw_qos_num_tcs(struct mlx5_core_dev *dev) +{ + int num_tcs = mlx5_max_tc(dev) + 1; + + return num_tcs < DEVLINK_RATE_TCS_MAX ? num_tcs : DEVLINK_RATE_TCS_MAX; +} + static void esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_node *parent) { list_del_init(&node->entry); node->parent = parent; - list_add_tail(&node->entry, &parent->children); - node->esw = parent->esw; + if (parent) + node->esw = parent->esw; + esw_qos_node_attach_to_parent(node); +} + +static void esw_qos_nodes_set_parent(struct list_head *nodes, + struct mlx5_esw_sched_node *parent) +{ + struct mlx5_esw_sched_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, nodes, entry) { + esw_qos_node_set_parent(node, parent); + if (!list_empty(&node->children) && + parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { + struct mlx5_esw_sched_node *child; + + list_for_each_entry(child, &node->children, entry) { + struct mlx5_vport *vport = child->vport; + + if (vport) + vport->qos.sched_node->parent = parent; + } + } + } } void mlx5_esw_qos_vport_qos_free(struct mlx5_vport *vport) { + if (vport->qos.sched_nodes) { + int num_tcs = esw_qos_num_tcs(vport->qos.sched_node->esw->dev); + int i; + + for (i = 0; i < num_tcs; i++) + kfree(vport->qos.sched_nodes[i]); + kfree(vport->qos.sched_nodes); + } + kfree(vport->qos.sched_node); memset(&vport->qos, 0, sizeof(vport->qos)); } @@ -126,16 +190,37 @@ mlx5_esw_qos_vport_get_parent(const struct mlx5_vport *vport) static void esw_qos_sched_elem_warn(struct mlx5_esw_sched_node *node, int err, const char *op) { - if (node->vport) { + switch (node->type) { + case SCHED_NODE_TYPE_VPORTS_TC_TSAR: + esw_warn(node->esw->dev, + "E-Switch %s %s scheduling element failed (tc=%d,err=%d)\n", + op, sched_node_type_str[node->type], node->tc, err); + break; + case SCHED_NODE_TYPE_VPORT_TC: + esw_warn(node->esw->dev, + "E-Switch %s %s scheduling element failed (vport=%d,tc=%d,err=%d)\n", + op, + sched_node_type_str[node->type], + node->vport->vport, node->tc, err); + break; + case SCHED_NODE_TYPE_VPORT: esw_warn(node->esw->dev, "E-Switch %s %s scheduling element failed (vport=%d,err=%d)\n", op, sched_node_type_str[node->type], node->vport->vport, err); - return; + break; + case SCHED_NODE_TYPE_RATE_LIMITER: + case SCHED_NODE_TYPE_TC_ARBITER_TSAR: + case SCHED_NODE_TYPE_VPORTS_TSAR: + esw_warn(node->esw->dev, + "E-Switch %s %s scheduling element failed (err=%d)\n", + op, sched_node_type_str[node->type], err); + break; + default: + esw_warn(node->esw->dev, + "E-Switch %s scheduling element failed (err=%d)\n", + op, err); + break; } - - esw_warn(node->esw->dev, - "E-Switch %s %s scheduling element failed (err=%d)\n", - op, sched_node_type_str[node->type], err); } static int esw_qos_node_create_sched_element(struct mlx5_esw_sched_node *node, void *ctx, @@ -218,6 +303,24 @@ static int esw_qos_sched_elem_config(struct mlx5_esw_sched_node *node, u32 max_r return 0; } +static int esw_qos_create_rate_limit_element(struct mlx5_esw_sched_node *node, + struct netlink_ext_ack *extack) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + + if (!mlx5_qos_element_type_supported( + node->esw->dev, + SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT, + SCHEDULING_HIERARCHY_E_SWITCH)) + return -EOPNOTSUPP; + + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, node->max_rate); + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT); + + return esw_qos_node_create_sched_element(node, sched_ctx, extack); +} + static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, struct mlx5_esw_sched_node *parent) { @@ -251,11 +354,13 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, return 0; } -static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max) +static u32 esw_qos_calc_bw_share(u32 value, u32 divider, u32 fw_max) { if (!divider) return 0; - return min_t(u32, max_t(u32, DIV_ROUND_UP(min_rate, divider), MLX5_MIN_BW_SHARE), fw_max); + return min_t(u32, fw_max, + max_t(u32, + DIV_ROUND_UP(value, divider), MLX5_MIN_BW_SHARE)); } static void esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node *node, @@ -282,7 +387,13 @@ static void esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, if (node->esw != esw || node->ix == esw->qos.root_tsar_ix) continue; - esw_qos_update_sched_node_bw_share(node, divider, extack); + /* Vports TC TSARs don't have a minimum rate configured, + * so there's no need to update the bw_share on them. + */ + if (node->type != SCHED_NODE_TYPE_VPORTS_TC_TSAR) { + esw_qos_update_sched_node_bw_share(node, divider, + extack); + } if (list_empty(&node->children)) continue; @@ -291,6 +402,20 @@ static void esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, } } +static u32 esw_qos_calculate_tc_bw_divider(u32 *tc_bw) +{ + u32 total = 0; + int i; + + for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) + total += tc_bw[i]; + + /* If total is zero, tc-bw config is disabled and we shouldn't reach + * here. + */ + return WARN_ON(!total) ? 1 : total; +} + static int esw_qos_set_node_min_rate(struct mlx5_esw_sched_node *node, u32 min_rate, struct netlink_ext_ack *extack) { @@ -305,8 +430,9 @@ static int esw_qos_set_node_min_rate(struct mlx5_esw_sched_node *node, return 0; } -static int esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_element_id, - u32 *tsar_ix) +static int +esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_element_id, + u32 max_rate, u32 bw_share, u32 *tsar_ix) { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; void *attr; @@ -323,6 +449,8 @@ static int esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_ SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, parent_element_id); + MLX5_SET(scheduling_context, tsar_ctx, max_average_bw, max_rate); + MLX5_SET(scheduling_context, tsar_ctx, bw_share, bw_share); attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); @@ -332,33 +460,69 @@ static int esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_ tsar_ix); } -static int esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node, - struct netlink_ext_ack *extack) +static int +esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node, + struct netlink_ext_ack *extack) { + struct mlx5_esw_sched_node *parent = vport_node->parent; u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_core_dev *dev = vport_node->esw->dev; void *attr; - if (!mlx5_qos_element_type_supported(dev, - SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT, - SCHEDULING_HIERARCHY_E_SWITCH)) + if (!mlx5_qos_element_type_supported( + dev, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT, + SCHEDULING_HIERARCHY_E_SWITCH)) return -EOPNOTSUPP; MLX5_SET(scheduling_context, sched_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport); - MLX5_SET(scheduling_context, sched_ctx, parent_element_id, vport_node->parent->ix); - MLX5_SET(scheduling_context, sched_ctx, max_average_bw, vport_node->max_rate); + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, + parent ? parent->ix : vport_node->esw->qos.root_tsar_ix); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, + vport_node->max_rate); return esw_qos_node_create_sched_element(vport_node, sched_ctx, extack); } +static int +esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node *vport_tc_node, + u32 rate_limit_elem_ix, + struct netlink_ext_ack *extack) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_core_dev *dev = vport_tc_node->esw->dev; + void *attr; + + if (!mlx5_qos_element_type_supported( + dev, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC, + SCHEDULING_HIERARCHY_E_SWITCH)) + return -EOPNOTSUPP; + + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC); + attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); + MLX5_SET(vport_tc_element, attr, vport_number, + vport_tc_node->vport->vport); + MLX5_SET(vport_tc_element, attr, traffic_class, vport_tc_node->tc); + MLX5_SET(scheduling_context, sched_ctx, max_bw_obj_id, + rate_limit_elem_ix); + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, + vport_tc_node->parent->ix); + MLX5_SET(scheduling_context, sched_ctx, bw_share, + vport_tc_node->bw_share); + + return esw_qos_node_create_sched_element(vport_tc_node, sched_ctx, + extack); +} + static struct mlx5_esw_sched_node * __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type type, struct mlx5_esw_sched_node *parent) { - struct list_head *parent_children; struct mlx5_esw_sched_node *node; node = kzalloc(sizeof(*node), GFP_KERNEL); @@ -370,8 +534,15 @@ __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type node->type = type; node->parent = parent; INIT_LIST_HEAD(&node->children); - parent_children = parent ? &parent->children : &esw->qos.domain->nodes; - list_add_tail(&node->entry, parent_children); + esw_qos_node_attach_to_parent(node); + if (!parent) { + /* The caller is responsible for inserting the node into the + * parent list if necessary. This function can also be used with + * a NULL parent, which doesn't necessarily indicate that it + * refers to the root scheduling element. + */ + list_del_init(&node->entry); + } return node; } @@ -388,6 +559,147 @@ static void esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlin __esw_qos_free_node(node); } +static int esw_qos_create_vports_tc_node(struct mlx5_esw_sched_node *parent, + u8 tc, struct netlink_ext_ack *extack) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_core_dev *dev = parent->esw->dev; + struct mlx5_esw_sched_node *vports_tc_node; + void *attr; + int err; + + if (!mlx5_qos_element_type_supported( + dev, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR, + SCHEDULING_HIERARCHY_E_SWITCH) || + !mlx5_qos_tsar_type_supported(dev, + TSAR_ELEMENT_TSAR_TYPE_DWRR, + SCHEDULING_HIERARCHY_E_SWITCH)) + return -EOPNOTSUPP; + + vports_tc_node = __esw_qos_alloc_node(parent->esw, 0, + SCHED_NODE_TYPE_VPORTS_TC_TSAR, + parent); + if (!vports_tc_node) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed"); + esw_warn(dev, "Failed to alloc vports TC node (tc=%d)\n", tc); + return -ENOMEM; + } + + attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); + MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); + MLX5_SET(tsar_element, attr, traffic_class, tc); + MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, parent->ix); + MLX5_SET(scheduling_context, tsar_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + + err = esw_qos_node_create_sched_element(vports_tc_node, tsar_ctx, + extack); + if (err) + goto err_create_sched_element; + + vports_tc_node->tc = tc; + + return 0; + +err_create_sched_element: + __esw_qos_free_node(vports_tc_node); + return err; +} + +static void +esw_qos_tc_arbiter_get_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node, + u32 *tc_bw) +{ + memcpy(tc_bw, tc_arbiter_node->tc_bw, sizeof(tc_arbiter_node->tc_bw)); +} + +static void +esw_qos_set_tc_arbiter_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node, + u32 *tc_bw, struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = tc_arbiter_node->esw; + struct mlx5_esw_sched_node *vports_tc_node; + u32 divider, fw_max_bw_share; + + fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + divider = esw_qos_calculate_tc_bw_divider(tc_bw); + list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) { + u8 tc = vports_tc_node->tc; + u32 bw_share; + + tc_arbiter_node->tc_bw[tc] = tc_bw[tc]; + bw_share = tc_bw[tc] * fw_max_bw_share; + bw_share = esw_qos_calc_bw_share(bw_share, divider, + fw_max_bw_share); + esw_qos_sched_elem_config(vports_tc_node, 0, bw_share, extack); + } +} + +static void +esw_qos_destroy_vports_tc_nodes(struct mlx5_esw_sched_node *tc_arbiter_node, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vports_tc_node, *tmp; + + list_for_each_entry_safe(vports_tc_node, tmp, + &tc_arbiter_node->children, entry) + esw_qos_destroy_node(vports_tc_node, extack); +} + +static int +esw_qos_create_vports_tc_nodes(struct mlx5_esw_sched_node *tc_arbiter_node, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = tc_arbiter_node->esw; + int err, i, num_tcs = esw_qos_num_tcs(esw->dev); + + for (i = 0; i < num_tcs; i++) { + err = esw_qos_create_vports_tc_node(tc_arbiter_node, i, extack); + if (err) + goto err_tc_node_create; + } + + return 0; + +err_tc_node_create: + esw_qos_destroy_vports_tc_nodes(tc_arbiter_node, NULL); + return err; +} + +static int esw_qos_create_tc_arbiter_sched_elem( + struct mlx5_esw_sched_node *tc_arbiter_node, + struct netlink_ext_ack *extack) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + u32 tsar_parent_ix; + void *attr; + + if (!mlx5_qos_tsar_type_supported(tc_arbiter_node->esw->dev, + TSAR_ELEMENT_TSAR_TYPE_TC_ARB, + SCHEDULING_HIERARCHY_E_SWITCH)) { + NL_SET_ERR_MSG_MOD(extack, + "E-Switch TC Arbiter scheduling element is not supported"); + return -EOPNOTSUPP; + } + + attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); + MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_TC_ARB); + tsar_parent_ix = tc_arbiter_node->parent ? tc_arbiter_node->parent->ix : + tc_arbiter_node->esw->qos.root_tsar_ix; + MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, + tsar_parent_ix); + MLX5_SET(scheduling_context, tsar_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + MLX5_SET(scheduling_context, tsar_ctx, max_average_bw, + tc_arbiter_node->max_rate); + MLX5_SET(scheduling_context, tsar_ctx, bw_share, + tc_arbiter_node->bw_share); + + return esw_qos_node_create_sched_element(tc_arbiter_node, tsar_ctx, + extack); +} + static struct mlx5_esw_sched_node * __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sched_node *parent, struct netlink_ext_ack *extack) @@ -396,7 +708,8 @@ __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sch u32 tsar_ix; int err; - err = esw_qos_create_node_sched_elem(esw->dev, esw->qos.root_tsar_ix, &tsar_ix); + err = esw_qos_create_node_sched_elem(esw->dev, esw->qos.root_tsar_ix, 0, + 0, &tsar_ix); if (err) { NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for node failed"); return ERR_PTR(err); @@ -409,6 +722,7 @@ __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sch goto err_alloc_node; } + list_add_tail(&node->entry, &esw->qos.domain->nodes); esw_qos_normalize_min_rate(esw, NULL, extack); trace_mlx5_esw_node_qos_create(esw->dev, node, node->ix); @@ -450,6 +764,9 @@ static void __esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netl { struct mlx5_eswitch *esw = node->esw; + if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) + esw_qos_destroy_vports_tc_nodes(node, extack); + trace_mlx5_esw_node_qos_destroy(esw->dev, node, node->ix); esw_qos_destroy_node(node, extack); esw_qos_normalize_min_rate(esw, NULL, extack); @@ -463,51 +780,22 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) return -EOPNOTSUPP; - err = esw_qos_create_node_sched_elem(esw->dev, 0, &esw->qos.root_tsar_ix); + err = esw_qos_create_node_sched_elem(esw->dev, 0, 0, 0, + &esw->qos.root_tsar_ix); if (err) { esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err); return err; } - if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { - esw->qos.node0 = __esw_qos_create_vports_sched_node(esw, NULL, extack); - } else { - /* The eswitch doesn't support scheduling nodes. - * Create a software-only node0 using the root TSAR to attach vport QoS to. - */ - if (!__esw_qos_alloc_node(esw, - esw->qos.root_tsar_ix, - SCHED_NODE_TYPE_VPORTS_TSAR, - NULL)) - esw->qos.node0 = ERR_PTR(-ENOMEM); - } - if (IS_ERR(esw->qos.node0)) { - err = PTR_ERR(esw->qos.node0); - esw_warn(dev, "E-Switch create rate node 0 failed (%d)\n", err); - goto err_node0; - } refcount_set(&esw->qos.refcnt, 1); return 0; - -err_node0: - if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, - esw->qos.root_tsar_ix)) - esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n"); - - return err; } static void esw_qos_destroy(struct mlx5_eswitch *esw) { int err; - if (esw->qos.node0->ix != esw->qos.root_tsar_ix) - __esw_qos_destroy_node(esw->qos.node0, NULL); - else - __esw_qos_free_node(esw->qos.node0); - esw->qos.node0 = NULL; - err = mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, esw->qos.root_tsar_ix); @@ -537,33 +825,270 @@ static void esw_qos_put(struct mlx5_eswitch *esw) esw_qos_destroy(esw); } -static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_ack *extack) +static void +esw_qos_tc_arbiter_scheduling_teardown(struct mlx5_esw_sched_node *node, + struct netlink_ext_ack *extack) +{ + /* Clean up all Vports TC nodes within the TC arbiter node. */ + esw_qos_destroy_vports_tc_nodes(node, extack); + /* Destroy the scheduling element for the TC arbiter node itself. */ + esw_qos_node_destroy_sched_element(node, extack); +} + +static int esw_qos_tc_arbiter_scheduling_setup(struct mlx5_esw_sched_node *node, + struct netlink_ext_ack *extack) +{ + u32 curr_ix = node->ix; + int err; + + err = esw_qos_create_tc_arbiter_sched_elem(node, extack); + if (err) + return err; + /* Initialize the vports TC nodes within created TC arbiter TSAR. */ + err = esw_qos_create_vports_tc_nodes(node, extack); + if (err) + goto err_vports_tc_nodes; + + node->type = SCHED_NODE_TYPE_TC_ARBITER_TSAR; + + return 0; + +err_vports_tc_nodes: + /* If initialization fails, clean up the scheduling element + * for the TC arbiter node. + */ + esw_qos_node_destroy_sched_element(node, NULL); + node->ix = curr_ix; + return err; +} + +static int +esw_qos_create_vport_tc_sched_node(struct mlx5_vport *vport, + u32 rate_limit_elem_ix, + struct mlx5_esw_sched_node *vports_tc_node, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; + struct mlx5_esw_sched_node *vport_tc_node; + u8 tc = vports_tc_node->tc; + int err; + + vport_tc_node = __esw_qos_alloc_node(vport_node->esw, 0, + SCHED_NODE_TYPE_VPORT_TC, + vports_tc_node); + if (!vport_tc_node) + return -ENOMEM; + + vport_tc_node->min_rate = vport_node->min_rate; + vport_tc_node->tc = tc; + vport_tc_node->vport = vport; + err = esw_qos_vport_tc_create_sched_element(vport_tc_node, + rate_limit_elem_ix, + extack); + if (err) + goto err_out; + + vport->qos.sched_nodes[tc] = vport_tc_node; + + return 0; +err_out: + __esw_qos_free_node(vport_tc_node); + return err; +} + +static void +esw_qos_destroy_vport_tc_sched_elements(struct mlx5_vport *vport, + struct netlink_ext_ack *extack) +{ + int i, num_tcs = esw_qos_num_tcs(vport->qos.sched_node->esw->dev); + + for (i = 0; i < num_tcs; i++) { + if (vport->qos.sched_nodes[i]) { + __esw_qos_destroy_node(vport->qos.sched_nodes[i], + extack); + } + } + + kfree(vport->qos.sched_nodes); + vport->qos.sched_nodes = NULL; +} + +static int +esw_qos_create_vport_tc_sched_elements(struct mlx5_vport *vport, + enum sched_node_type type, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; + struct mlx5_esw_sched_node *tc_arbiter_node, *vports_tc_node; + int err, num_tcs = esw_qos_num_tcs(vport_node->esw->dev); + u32 rate_limit_elem_ix; + + vport->qos.sched_nodes = kcalloc(num_tcs, + sizeof(struct mlx5_esw_sched_node *), + GFP_KERNEL); + if (!vport->qos.sched_nodes) { + NL_SET_ERR_MSG_MOD(extack, + "Allocating the vport TC scheduling elements failed."); + return -ENOMEM; + } + + rate_limit_elem_ix = type == SCHED_NODE_TYPE_RATE_LIMITER ? + vport_node->ix : 0; + tc_arbiter_node = type == SCHED_NODE_TYPE_RATE_LIMITER ? + vport_node->parent : vport_node; + list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) { + err = esw_qos_create_vport_tc_sched_node(vport, + rate_limit_elem_ix, + vports_tc_node, + extack); + if (err) + goto err_create_vport_tc; + } + + return 0; + +err_create_vport_tc: + esw_qos_destroy_vport_tc_sched_elements(vport, NULL); + + return err; +} + +static int +esw_qos_vport_tc_enable(struct mlx5_vport *vport, enum sched_node_type type, + struct netlink_ext_ack *extack) { struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; struct mlx5_esw_sched_node *parent = vport_node->parent; + int err; + + if (type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { + int new_level, max_level; + + /* Increase the parent's level by 2 to account for both the + * TC arbiter and the vports TC scheduling element. + */ + new_level = (parent ? parent->level : 2) + 2; + max_level = 1 << MLX5_CAP_QOS(vport_node->esw->dev, + log_esw_max_sched_depth); + if (new_level > max_level) { + NL_SET_ERR_MSG_MOD(extack, + "TC arbitration on leafs is not supported beyond max scheduling depth"); + return -EOPNOTSUPP; + } + } - esw_qos_node_destroy_sched_element(vport_node, extack); + esw_assert_qos_lock_held(vport->dev->priv.eswitch); + + if (type == SCHED_NODE_TYPE_RATE_LIMITER) + err = esw_qos_create_rate_limit_element(vport_node, extack); + else + err = esw_qos_tc_arbiter_scheduling_setup(vport_node, extack); + if (err) + return err; + + /* Rate limiters impact multiple nodes not directly connected to them + * and are not direct members of the QoS hierarchy. + * Unlink it from the parent to reflect that. + */ + if (type == SCHED_NODE_TYPE_RATE_LIMITER) { + list_del_init(&vport_node->entry); + vport_node->level = 0; + } + + err = esw_qos_create_vport_tc_sched_elements(vport, type, extack); + if (err) + goto err_sched_nodes; + + return 0; + +err_sched_nodes: + if (type == SCHED_NODE_TYPE_RATE_LIMITER) { + esw_qos_node_destroy_sched_element(vport_node, NULL); + esw_qos_node_attach_to_parent(vport_node); + } else { + esw_qos_tc_arbiter_scheduling_teardown(vport_node, NULL); + } + return err; +} + +static void esw_qos_vport_tc_disable(struct mlx5_vport *vport, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; + enum sched_node_type curr_type = vport_node->type; + + esw_qos_destroy_vport_tc_sched_elements(vport, extack); + + if (curr_type == SCHED_NODE_TYPE_RATE_LIMITER) + esw_qos_node_destroy_sched_element(vport_node, extack); + else + esw_qos_tc_arbiter_scheduling_teardown(vport_node, extack); +} + +static int esw_qos_set_vport_tcs_min_rate(struct mlx5_vport *vport, + u32 min_rate, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; + int err, i, num_tcs = esw_qos_num_tcs(vport_node->esw->dev); + + for (i = 0; i < num_tcs; i++) { + err = esw_qos_set_node_min_rate(vport->qos.sched_nodes[i], + min_rate, extack); + if (err) + goto err_out; + } + vport_node->min_rate = min_rate; + + return 0; +err_out: + for (--i; i >= 0; i--) { + esw_qos_set_node_min_rate(vport->qos.sched_nodes[i], + vport_node->min_rate, extack); + } + return err; +} + +static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; + enum sched_node_type curr_type = vport_node->type; + + if (curr_type == SCHED_NODE_TYPE_VPORT) + esw_qos_node_destroy_sched_element(vport_node, extack); + else + esw_qos_vport_tc_disable(vport, extack); vport_node->bw_share = 0; + memset(vport_node->tc_bw, 0, sizeof(vport_node->tc_bw)); list_del_init(&vport_node->entry); - esw_qos_normalize_min_rate(parent->esw, parent, extack); + esw_qos_normalize_min_rate(vport_node->esw, vport_node->parent, extack); trace_mlx5_esw_vport_qos_destroy(vport_node->esw->dev, vport); } -static int esw_qos_vport_enable(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent, +static int esw_qos_vport_enable(struct mlx5_vport *vport, + enum sched_node_type type, + struct mlx5_esw_sched_node *parent, struct netlink_ext_ack *extack) { + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; int err; esw_assert_qos_lock_held(vport->dev->priv.eswitch); - esw_qos_node_set_parent(vport->qos.sched_node, parent); - err = esw_qos_vport_create_sched_element(vport->qos.sched_node, extack); + esw_qos_node_set_parent(vport_node, parent); + if (type == SCHED_NODE_TYPE_VPORT) + err = esw_qos_vport_create_sched_element(vport_node, extack); + else + err = esw_qos_vport_tc_enable(vport, type, extack); if (err) return err; - esw_qos_normalize_min_rate(parent->esw, parent, extack); + vport_node->type = type; + esw_qos_normalize_min_rate(vport_node->esw, parent, extack); + trace_mlx5_esw_vport_qos_create(vport->dev, vport, vport_node->max_rate, + vport_node->bw_share); return 0; } @@ -574,6 +1099,7 @@ static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_t { struct mlx5_eswitch *esw = vport->dev->priv.eswitch; struct mlx5_esw_sched_node *sched_node; + struct mlx5_eswitch *parent_esw; int err; esw_assert_qos_lock_held(esw); @@ -581,22 +1107,42 @@ static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_t if (err) return err; - parent = parent ?: esw->qos.node0; - sched_node = __esw_qos_alloc_node(parent->esw, 0, type, parent); - if (!sched_node) + parent_esw = parent ? parent->esw : esw; + sched_node = __esw_qos_alloc_node(parent_esw, 0, type, parent); + if (!sched_node) { + esw_qos_put(esw); return -ENOMEM; + } + if (!parent) + list_add_tail(&sched_node->entry, &esw->qos.domain->nodes); sched_node->max_rate = max_rate; sched_node->min_rate = min_rate; sched_node->vport = vport; vport->qos.sched_node = sched_node; - err = esw_qos_vport_enable(vport, parent, extack); - if (err) + err = esw_qos_vport_enable(vport, type, parent, extack); + if (err) { + __esw_qos_free_node(sched_node); esw_qos_put(esw); + vport->qos.sched_node = NULL; + } return err; } +static void mlx5_esw_qos_vport_disable_locked(struct mlx5_vport *vport) +{ + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + + esw_assert_qos_lock_held(esw); + if (!vport->qos.sched_node) + return; + + esw_qos_vport_disable(vport, NULL); + mlx5_esw_qos_vport_qos_free(vport); + esw_qos_put(esw); +} + void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport) { struct mlx5_eswitch *esw = vport->dev->priv.eswitch; @@ -608,11 +1154,9 @@ void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport) goto unlock; parent = vport->qos.sched_node->parent; - WARN(parent != esw->qos.node0, "Disabling QoS on port before detaching it from node"); + WARN(parent, "Disabling QoS on port before detaching it from node"); - esw_qos_vport_disable(vport, NULL); - mlx5_esw_qos_vport_qos_free(vport); - esw_qos_put(esw); + mlx5_esw_qos_vport_disable_locked(vport); unlock: esw_qos_unlock(esw); } @@ -642,6 +1186,8 @@ static int mlx5_esw_qos_set_vport_min_rate(struct mlx5_vport *vport, u32 min_rat if (!vport_node) return mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, NULL, 0, min_rate, extack); + else if (vport_node->type == SCHED_NODE_TYPE_RATE_LIMITER) + return esw_qos_set_vport_tcs_min_rate(vport, min_rate, extack); else return esw_qos_set_node_min_rate(vport_node, min_rate, extack); } @@ -674,28 +1220,278 @@ bool mlx5_esw_qos_get_vport_rate(struct mlx5_vport *vport, u32 *max_rate, u32 *m return enabled; } +static int esw_qos_vport_tc_check_type(enum sched_node_type curr_type, + enum sched_node_type new_type, + struct netlink_ext_ack *extack) +{ + if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && + new_type == SCHED_NODE_TYPE_RATE_LIMITER) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot switch from vport-level TC arbitration to node-level TC arbitration"); + return -EOPNOTSUPP; + } + + if (curr_type == SCHED_NODE_TYPE_RATE_LIMITER && + new_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot switch from node-level TC arbitration to vport-level TC arbitration"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int esw_qos_vport_update(struct mlx5_vport *vport, + enum sched_node_type type, + struct mlx5_esw_sched_node *parent, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; + struct mlx5_esw_sched_node *curr_parent = vport_node->parent; + enum sched_node_type curr_type = vport_node->type; + u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0}; + int err; + + esw_assert_qos_lock_held(vport->dev->priv.eswitch); + if (curr_type == type && curr_parent == parent) + return 0; + + err = esw_qos_vport_tc_check_type(curr_type, type, extack); + if (err) + return err; + + if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) + esw_qos_tc_arbiter_get_bw_shares(vport_node, curr_tc_bw); + + esw_qos_vport_disable(vport, extack); + + err = esw_qos_vport_enable(vport, type, parent, extack); + if (err) { + esw_qos_vport_enable(vport, curr_type, curr_parent, NULL); + extack = NULL; + } + + if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) { + esw_qos_set_tc_arbiter_bw_shares(vport_node, curr_tc_bw, + extack); + } + + return err; +} + static int esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent, struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = vport->dev->priv.eswitch; struct mlx5_esw_sched_node *curr_parent; - int err; + enum sched_node_type type; esw_assert_qos_lock_held(esw); curr_parent = vport->qos.sched_node->parent; - parent = parent ?: esw->qos.node0; if (curr_parent == parent) return 0; - esw_qos_vport_disable(vport, extack); + /* Set vport QoS type based on parent node type if different from + * default QoS; otherwise, use the vport's current QoS type. + */ + if (parent && parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) + type = SCHED_NODE_TYPE_RATE_LIMITER; + else if (curr_parent && + curr_parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) + type = SCHED_NODE_TYPE_VPORT; + else + type = vport->qos.sched_node->type; + + return esw_qos_vport_update(vport, type, parent, extack); +} + +static void +esw_qos_switch_vport_tcs_to_vport(struct mlx5_esw_sched_node *tc_arbiter_node, + struct mlx5_esw_sched_node *node, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vports_tc_node, *vport_tc_node, *tmp; + + vports_tc_node = list_first_entry(&tc_arbiter_node->children, + struct mlx5_esw_sched_node, + entry); - err = esw_qos_vport_enable(vport, parent, extack); + list_for_each_entry_safe(vport_tc_node, tmp, &vports_tc_node->children, + entry) + esw_qos_vport_update_parent(vport_tc_node->vport, node, extack); +} + +static int esw_qos_switch_tc_arbiter_node_to_vports( + struct mlx5_esw_sched_node *tc_arbiter_node, + struct mlx5_esw_sched_node *node, + struct netlink_ext_ack *extack) +{ + u32 parent_tsar_ix = node->parent ? + node->parent->ix : node->esw->qos.root_tsar_ix; + int err; + + err = esw_qos_create_node_sched_elem(node->esw->dev, parent_tsar_ix, + node->max_rate, node->bw_share, + &node->ix); if (err) { - if (esw_qos_vport_enable(vport, curr_parent, NULL)) - esw_warn(parent->esw->dev, "vport restore QoS failed (vport=%d)\n", - vport->vport); + NL_SET_ERR_MSG_MOD(extack, + "Failed to create scheduling element for vports node when disabling vports TC QoS"); + return err; + } + + node->type = SCHED_NODE_TYPE_VPORTS_TSAR; + + /* Disable TC QoS for vports in the arbiter node. */ + esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, extack); + + return 0; +} + +static int esw_qos_switch_vports_node_to_tc_arbiter( + struct mlx5_esw_sched_node *node, + struct mlx5_esw_sched_node *tc_arbiter_node, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vport_node, *tmp; + struct mlx5_vport *vport; + int err; + + /* Enable TC QoS for each vport in the node. */ + list_for_each_entry_safe(vport_node, tmp, &node->children, entry) { + vport = vport_node->vport; + err = esw_qos_vport_update_parent(vport, tc_arbiter_node, + extack); + if (err) + goto err_out; } + /* Destroy the current vports node TSAR. */ + err = mlx5_destroy_scheduling_element_cmd(node->esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + node->ix); + if (err) + goto err_out; + + return 0; +err_out: + /* Restore vports back into the node if an error occurs. */ + esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, NULL); + + return err; +} + +static struct mlx5_esw_sched_node * +esw_qos_move_node(struct mlx5_esw_sched_node *curr_node) +{ + struct mlx5_esw_sched_node *new_node; + + new_node = __esw_qos_alloc_node(curr_node->esw, curr_node->ix, + curr_node->type, NULL); + if (!new_node) + return ERR_PTR(-ENOMEM); + + esw_qos_nodes_set_parent(&curr_node->children, new_node); + return new_node; +} + +static int esw_qos_node_disable_tc_arbitration(struct mlx5_esw_sched_node *node, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *curr_node; + int err; + + if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR) + return 0; + + /* Allocate a new rate node to hold the current state, which will allow + * for restoring the vports back to this node after disabling TC + * arbitration. + */ + curr_node = esw_qos_move_node(node); + if (IS_ERR(curr_node)) { + NL_SET_ERR_MSG_MOD(extack, "Failed setting up vports node"); + return PTR_ERR(curr_node); + } + + /* Disable TC QoS for all vports, and assign them back to the node. */ + err = esw_qos_switch_tc_arbiter_node_to_vports(curr_node, node, extack); + if (err) + goto err_out; + + /* Clean up the TC arbiter node after disabling TC QoS for vports. */ + esw_qos_tc_arbiter_scheduling_teardown(curr_node, extack); + goto out; +err_out: + esw_qos_nodes_set_parent(&curr_node->children, node); +out: + __esw_qos_free_node(curr_node); + return err; +} + +static int esw_qos_node_enable_tc_arbitration(struct mlx5_esw_sched_node *node, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *curr_node, *child; + int err, new_level, max_level; + + if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) + return 0; + + /* Increase the hierarchy level by one to account for the additional + * vports TC scheduling node, and verify that the new level does not + * exceed the maximum allowed depth. + */ + new_level = node->level + 1; + max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth); + if (new_level > max_level) { + NL_SET_ERR_MSG_MOD(extack, + "TC arbitration on nodes is not supported beyond max scheduling depth"); + return -EOPNOTSUPP; + } + + /* Ensure the node does not contain non-leaf children before assigning + * TC bandwidth. + */ + if (!list_empty(&node->children)) { + list_for_each_entry(child, &node->children, entry) { + if (!child->vport) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot configure TC bandwidth on a node with non-leaf children"); + return -EOPNOTSUPP; + } + } + } + + /* Allocate a new node that will store the information of the current + * node. This will be used later to restore the node if necessary. + */ + curr_node = esw_qos_move_node(node); + if (IS_ERR(curr_node)) { + NL_SET_ERR_MSG_MOD(extack, "Failed setting up node TC QoS"); + return PTR_ERR(curr_node); + } + + /* Initialize the TC arbiter node for QoS management. + * This step prepares the node for handling Traffic Class arbitration. + */ + err = esw_qos_tc_arbiter_scheduling_setup(node, extack); + if (err) + goto err_setup; + + /* Enable TC QoS for each vport within the current node. */ + err = esw_qos_switch_vports_node_to_tc_arbiter(curr_node, node, extack); + if (err) + goto err_switch_vports; + goto out; + +err_switch_vports: + esw_qos_tc_arbiter_scheduling_teardown(node, NULL); + node->ix = curr_node->ix; + node->type = curr_node->type; +err_setup: + esw_qos_nodes_set_parent(&curr_node->children, node); +out: + __esw_qos_free_node(curr_node); return err; } @@ -824,6 +1620,57 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char * return 0; } +static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw, + u32 *tc_bw) +{ + int i, num_tcs = esw_qos_num_tcs(esw->dev); + + for (i = num_tcs; i < DEVLINK_RATE_TCS_MAX; i++) { + if (tc_bw[i]) + return false; + } + + return true; +} + +static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport, + u32 *tc_bw) +{ + struct mlx5_esw_sched_node *node = vport->qos.sched_node; + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + + esw = (node && node->parent) ? node->parent->esw : esw; + + return esw_qos_validate_unsupported_tc_bw(esw, tc_bw); +} + +static bool esw_qos_tc_bw_disabled(u32 *tc_bw) +{ + int i; + + for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) { + if (tc_bw[i]) + return false; + } + + return true; +} + +static void esw_vport_qos_prune_empty(struct mlx5_vport *vport) +{ + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; + + esw_assert_qos_lock_held(vport->dev->priv.eswitch); + if (!vport_node) + return; + + if (vport_node->parent || vport_node->max_rate || + vport_node->min_rate || !esw_qos_tc_bw_disabled(vport_node->tc_bw)) + return; + + mlx5_esw_qos_vport_disable_locked(vport); +} + int mlx5_esw_qos_init(struct mlx5_eswitch *esw) { if (esw->qos.domain) @@ -857,6 +1704,10 @@ int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void esw_qos_lock(esw); err = mlx5_esw_qos_set_vport_min_rate(vport, tx_share, extack); + if (err) + goto out; + esw_vport_qos_prune_empty(vport); +out: esw_qos_unlock(esw); return err; } @@ -878,6 +1729,95 @@ int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void * esw_qos_lock(esw); err = mlx5_esw_qos_set_vport_max_rate(vport, tx_max, extack); + if (err) + goto out; + esw_vport_qos_prune_empty(vport); +out: + esw_qos_unlock(esw); + return err; +} + +int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate *rate_leaf, + void *priv, + u32 *tc_bw, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *vport_node; + struct mlx5_vport *vport = priv; + struct mlx5_eswitch *esw; + bool disable; + int err = 0; + + esw = vport->dev->priv.eswitch; + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + disable = esw_qos_tc_bw_disabled(tc_bw); + esw_qos_lock(esw); + + if (!esw_qos_vport_validate_unsupported_tc_bw(vport, tc_bw)) { + NL_SET_ERR_MSG_MOD(extack, + "E-Switch traffic classes number is not supported"); + err = -EOPNOTSUPP; + goto unlock; + } + + vport_node = vport->qos.sched_node; + if (disable && !vport_node) + goto unlock; + + if (disable) { + if (vport_node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) + err = esw_qos_vport_update(vport, SCHED_NODE_TYPE_VPORT, + vport_node->parent, extack); + esw_vport_qos_prune_empty(vport); + goto unlock; + } + + if (!vport_node) { + err = mlx5_esw_qos_vport_enable(vport, + SCHED_NODE_TYPE_TC_ARBITER_TSAR, + NULL, 0, 0, extack); + vport_node = vport->qos.sched_node; + } else { + err = esw_qos_vport_update(vport, + SCHED_NODE_TYPE_TC_ARBITER_TSAR, + vport_node->parent, extack); + } + if (!err) + esw_qos_set_tc_arbiter_bw_shares(vport_node, tc_bw, extack); +unlock: + esw_qos_unlock(esw); + return err; +} + +int mlx5_esw_devlink_rate_node_tc_bw_set(struct devlink_rate *rate_node, + void *priv, + u32 *tc_bw, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *node = priv; + struct mlx5_eswitch *esw = node->esw; + bool disable; + int err; + + if (!esw_qos_validate_unsupported_tc_bw(esw, tc_bw)) { + NL_SET_ERR_MSG_MOD(extack, + "E-Switch traffic classes number is not supported"); + return -EOPNOTSUPP; + } + + disable = esw_qos_tc_bw_disabled(tc_bw); + esw_qos_lock(esw); + if (disable) { + err = esw_qos_node_disable_tc_arbitration(node, extack); + goto unlock; + } + + err = esw_qos_node_enable_tc_arbitration(node, extack); + if (!err) + esw_qos_set_tc_arbiter_bw_shares(node, tc_bw, extack); +unlock: esw_qos_unlock(esw); return err; } @@ -972,25 +1912,199 @@ int mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_s } esw_qos_lock(esw); - if (!vport->qos.sched_node && parent) - err = mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, parent, 0, 0, extack); - else if (vport->qos.sched_node) + if (!vport->qos.sched_node && parent) { + enum sched_node_type type; + + type = parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR ? + SCHED_NODE_TYPE_RATE_LIMITER : SCHED_NODE_TYPE_VPORT; + err = mlx5_esw_qos_vport_enable(vport, type, parent, 0, 0, + extack); + } else if (vport->qos.sched_node) { err = esw_qos_vport_update_parent(vport, parent, extack); + } esw_qos_unlock(esw); return err; } -int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, - struct devlink_rate *parent, - void *priv, void *parent_priv, - struct netlink_ext_ack *extack) +int mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack) { - struct mlx5_esw_sched_node *node; + struct mlx5_esw_sched_node *node = parent ? parent_priv : NULL; struct mlx5_vport *vport = priv; + int err; + + err = mlx5_esw_qos_vport_update_parent(vport, node, extack); + if (!err) { + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + + esw_qos_lock(esw); + esw_vport_qos_prune_empty(vport); + esw_qos_unlock(esw); + } + + return err; +} + +static bool esw_qos_is_node_empty(struct mlx5_esw_sched_node *node) +{ + if (list_empty(&node->children)) + return true; + + if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR) + return false; + + node = list_first_entry(&node->children, struct mlx5_esw_sched_node, + entry); + + return esw_qos_is_node_empty(node); +} + +static int +mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node *node, + struct mlx5_esw_sched_node *parent, + struct netlink_ext_ack *extack) +{ + u8 new_level, max_level; + + if (parent && parent->esw != node->esw) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot assign node to another E-Switch"); + return -EOPNOTSUPP; + } + + if (!esw_qos_is_node_empty(node)) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot reassign a node that contains rate objects"); + return -EOPNOTSUPP; + } + + if (parent && parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot attach a node to a parent with TC bandwidth configured"); + return -EOPNOTSUPP; + } + + new_level = parent ? parent->level + 1 : 2; + if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { + /* Increase by one to account for the vports TC scheduling + * element. + */ + new_level += 1; + } + + max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth); + if (new_level > max_level) { + NL_SET_ERR_MSG_MOD(extack, + "Node hierarchy depth exceeds the maximum supported level"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int +esw_qos_tc_arbiter_node_update_parent(struct mlx5_esw_sched_node *node, + struct mlx5_esw_sched_node *parent, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *curr_parent = node->parent; + u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0}; + struct mlx5_eswitch *esw = node->esw; + int err; + + esw_qos_tc_arbiter_get_bw_shares(node, curr_tc_bw); + esw_qos_tc_arbiter_scheduling_teardown(node, extack); + esw_qos_node_set_parent(node, parent); + err = esw_qos_tc_arbiter_scheduling_setup(node, extack); + if (err) { + esw_qos_node_set_parent(node, curr_parent); + if (esw_qos_tc_arbiter_scheduling_setup(node, extack)) { + esw_warn(esw->dev, "Node restore QoS failed\n"); + return err; + } + } + esw_qos_set_tc_arbiter_bw_shares(node, curr_tc_bw, extack); + + return err; +} + +static int esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node *node, + struct mlx5_esw_sched_node *parent, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *curr_parent = node->parent; + struct mlx5_eswitch *esw = node->esw; + u32 parent_ix; + int err; + + parent_ix = parent ? parent->ix : node->esw->qos.root_tsar_ix; + mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + node->ix); + err = esw_qos_create_node_sched_elem(esw->dev, parent_ix, + node->max_rate, 0, &node->ix); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to create a node under the new hierarchy."); + if (esw_qos_create_node_sched_elem(esw->dev, curr_parent->ix, + node->max_rate, + node->bw_share, + &node->ix)) + esw_warn(esw->dev, "Node restore QoS failed\n"); + + return err; + } + esw_qos_node_set_parent(node, parent); + node->bw_share = 0; + + return 0; +} + +static int mlx5_esw_qos_node_update_parent(struct mlx5_esw_sched_node *node, + struct mlx5_esw_sched_node *parent, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *curr_parent; + struct mlx5_eswitch *esw = node->esw; + int err; + + err = mlx5_esw_qos_node_validate_set_parent(node, parent, extack); + if (err) + return err; + + esw_qos_lock(esw); + curr_parent = node->parent; + if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) { + err = esw_qos_tc_arbiter_node_update_parent(node, parent, + extack); + } else { + err = esw_qos_vports_node_update_parent(node, parent, extack); + } + + if (err) + goto out; + + esw_qos_normalize_min_rate(esw, curr_parent, extack); + esw_qos_normalize_min_rate(esw, parent, extack); + +out: + esw_qos_unlock(esw); + + return err; +} + +int mlx5_esw_devlink_rate_node_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_sched_node *node = priv, *parent_node; if (!parent) - return mlx5_esw_qos_vport_update_parent(vport, NULL, extack); + return mlx5_esw_qos_node_update_parent(node, NULL, extack); - node = parent_priv; - return mlx5_esw_qos_vport_update_parent(vport, node, extack); + parent_node = parent_priv; + return mlx5_esw_qos_node_update_parent(node, parent_node, extack); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h index 6eb8f6a648c8..0a50982b0e27 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h @@ -21,6 +21,14 @@ int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void u64 tx_share, struct netlink_ext_ack *extack); int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, u64 tx_max, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate *rate_node, + void *priv, + u32 *tc_bw, + struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_tc_bw_set(struct devlink_rate *rate_node, + void *priv, + u32 *tc_bw, + struct netlink_ext_ack *extack); int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, u64 tx_share, struct netlink_ext_ack *extack); int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, @@ -29,10 +37,14 @@ int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, struct netlink_ext_ack *extack); int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, struct netlink_ext_ack *extack); -int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, - struct devlink_rate *parent, - void *priv, void *parent_priv, - struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack); #endif #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 7fb8a3381f84..4917d185d0c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1295,12 +1295,15 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_ECPF, enabled_events); if (ret) goto ecpf_err; - if (mlx5_core_ec_sriov_enabled(esw->dev)) { - ret = mlx5_eswitch_load_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs, - enabled_events); - if (ret) - goto ec_vf_err; - } + } + + /* Enable ECVF vports */ + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + ret = mlx5_eswitch_load_ec_vf_vports(esw, + esw->esw_funcs.num_ec_vfs, + enabled_events); + if (ret) + goto ec_vf_err; } /* Enable VF vports */ @@ -1331,9 +1334,11 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) { mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_eswitch_unload_ec_vf_vports(esw, + esw->esw_funcs.num_ec_vfs); + if (mlx5_ecpf_vport_exists(esw->dev)) { - if (mlx5_core_ec_sriov_enabled(esw->dev)) - mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_vfs); mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index a83d41121db6..45506ad56847 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -212,10 +212,20 @@ struct mlx5_vport { struct mlx5_vport_info info; - /* Protected with the E-Switch qos domain lock. */ + /* Protected with the E-Switch qos domain lock. The Vport QoS can + * either be disabled (sched_node is NULL) or in one of three states: + * 1. Regular QoS (sched_node is a vport node). + * 2. TC QoS enabled on the vport (sched_node is a TC arbiter). + * 3. TC QoS enabled on the vport's parent node + * (sched_node is a rate limit node). + * When TC is enabled in either mode, the vport owns vport TC scheduling + * nodes. + */ struct { - /* Vport scheduling element node. */ + /* Vport scheduling node. */ struct mlx5_esw_sched_node *sched_node; + /* Array of vport traffic class scheduling nodes. */ + struct mlx5_esw_sched_node **sched_nodes; } qos; u16 vport; @@ -363,11 +373,6 @@ struct mlx5_eswitch { refcount_t refcnt; u32 root_tsar_ix; struct mlx5_qos_domain *domain; - /* Contains all vports with QoS enabled but no explicit node. - * Cannot be NULL if QoS is enabled, but may be a fake node - * referencing the root TSAR if the esw doesn't support nodes. - */ - struct mlx5_esw_sched_node *node0; } qos; struct mlx5_esw_bridge_offloads *br_offloads; @@ -714,6 +719,9 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); MLX5_CAP_GEN_2((esw->dev), ec_vf_vport_base) +\ (last) - 1) +#define mlx5_esw_for_each_rep(esw, i, rep) \ + xa_for_each(&((esw)->offloads.vport_reps), i, rep) + struct mlx5_eswitch *__must_check mlx5_devlink_eswitch_get(struct devlink *devlink); @@ -814,7 +822,7 @@ void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num); int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num); /** - * mlx5_esw_event_info - Indicates eswitch mode changed/changing. + * struct mlx5_esw_event_info - Indicates eswitch mode changed/changing. * * @new_mode: New mode of eswitch. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d6ff2dc4c19e..bee906661282 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -53,9 +53,6 @@ #include "lag/lag.h" #include "en/tc/post_meter.h" -#define mlx5_esw_for_each_rep(esw, i, rep) \ - xa_for_each(&((esw)->offloads.vport_reps), i, rep) - /* There are two match-all miss flows, one for unicast dst mac and * one for multicast. */ @@ -651,6 +648,7 @@ esw_setup_meter(struct mlx5_flow_attr *attr, struct mlx5_flow_act *flow_act) meter = attr->meter_attr.meter; flow_act->exe_aso.type = attr->exe_aso_type; flow_act->exe_aso.object_id = meter->obj_id; + flow_act->exe_aso.base_id = mlx5e_flow_meter_get_base_id(meter); flow_act->exe_aso.flow_meter.meter_idx = meter->idx; flow_act->exe_aso.flow_meter.init_color = MLX5_FLOW_METER_COLOR_GREEN; /* use metadata reg 5 for packet color */ @@ -724,7 +722,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[i].counter_id = mlx5_fc_id(attr->counter); + dest[i].counter = attr->counter; i++; } @@ -1184,19 +1182,19 @@ static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw, static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_core_dev *peer_dev) { + struct mlx5_eswitch *peer_esw = peer_dev->priv.eswitch; struct mlx5_flow_destination dest = {}; struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_handle **flows; - /* total vports is the same for both e-switches */ - int nvports = esw->total_vports; struct mlx5_flow_handle *flow; + struct mlx5_vport *peer_vport; struct mlx5_flow_spec *spec; - struct mlx5_vport *vport; int err, pfindex; unsigned long i; void *misc; - if (!MLX5_VPORT_MANAGER(esw->dev) && !mlx5_core_is_ecpf_esw_manager(esw->dev)) + if (!MLX5_VPORT_MANAGER(peer_dev) && + !mlx5_core_is_ecpf_esw_manager(peer_dev)) return 0; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); @@ -1205,7 +1203,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, peer_miss_rules_setup(esw, peer_dev, spec, &dest); - flows = kvcalloc(nvports, sizeof(*flows), GFP_KERNEL); + flows = kvcalloc(peer_esw->total_vports, sizeof(*flows), GFP_KERNEL); if (!flows) { err = -ENOMEM; goto alloc_flows_err; @@ -1215,10 +1213,10 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, - spec, MLX5_VPORT_PF); + if (mlx5_core_is_ecpf_esw_manager(peer_dev)) { + peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF); + esw_set_peer_miss_rule_source_port(esw, peer_esw, spec, + MLX5_VPORT_PF); flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), spec, &flow_act, &dest, 1); @@ -1226,11 +1224,11 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, err = PTR_ERR(flow); goto add_pf_flow_err; } - flows[vport->index] = flow; + flows[peer_vport->index] = flow; } - if (mlx5_ecpf_vport_exists(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); + if (mlx5_ecpf_vport_exists(peer_dev)) { + peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF); MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF); flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), spec, &flow_act, &dest, 1); @@ -1238,13 +1236,14 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, err = PTR_ERR(flow); goto add_ecpf_flow_err; } - flows[vport->index] = flow; + flows[peer_vport->index] = flow; } - mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) { + mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_vfs(peer_dev)) { esw_set_peer_miss_rule_source_port(esw, - peer_dev->priv.eswitch, - spec, vport->vport); + peer_esw, + spec, peer_vport->vport); flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), spec, &flow_act, &dest, 1); @@ -1252,22 +1251,22 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, err = PTR_ERR(flow); goto add_vf_flow_err; } - flows[vport->index] = flow; + flows[peer_vport->index] = flow; } - if (mlx5_core_ec_sriov_enabled(esw->dev)) { - mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { - if (i >= mlx5_core_max_ec_vfs(peer_dev)) - break; - esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, - spec, vport->vport); + if (mlx5_core_ec_sriov_enabled(peer_dev)) { + mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_ec_vfs(peer_dev)) { + esw_set_peer_miss_rule_source_port(esw, peer_esw, + spec, + peer_vport->vport); flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { err = PTR_ERR(flow); goto add_ec_vf_flow_err; } - flows[vport->index] = flow; + flows[peer_vport->index] = flow; } } @@ -1284,25 +1283,27 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, return 0; add_ec_vf_flow_err: - mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { - if (!flows[vport->index]) + mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_ec_vfs(peer_dev)) { + if (!flows[peer_vport->index]) continue; - mlx5_del_flow_rules(flows[vport->index]); + mlx5_del_flow_rules(flows[peer_vport->index]); } add_vf_flow_err: - mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) { - if (!flows[vport->index]) + mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_vfs(peer_dev)) { + if (!flows[peer_vport->index]) continue; - mlx5_del_flow_rules(flows[vport->index]); + mlx5_del_flow_rules(flows[peer_vport->index]); } - if (mlx5_ecpf_vport_exists(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); - mlx5_del_flow_rules(flows[vport->index]); + if (mlx5_ecpf_vport_exists(peer_dev)) { + peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF); + mlx5_del_flow_rules(flows[peer_vport->index]); } add_ecpf_flow_err: - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - mlx5_del_flow_rules(flows[vport->index]); + if (mlx5_core_is_ecpf_esw_manager(peer_dev)) { + peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF); + mlx5_del_flow_rules(flows[peer_vport->index]); } add_pf_flow_err: esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err); @@ -1315,37 +1316,34 @@ alloc_flows_err: static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_core_dev *peer_dev) { + struct mlx5_eswitch *peer_esw = peer_dev->priv.eswitch; u16 peer_index = mlx5_get_dev_index(peer_dev); struct mlx5_flow_handle **flows; - struct mlx5_vport *vport; + struct mlx5_vport *peer_vport; unsigned long i; flows = esw->fdb_table.offloads.peer_miss_rules[peer_index]; if (!flows) return; - if (mlx5_core_ec_sriov_enabled(esw->dev)) { - mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { - /* The flow for a particular vport could be NULL if the other ECPF - * has fewer or no VFs enabled - */ - if (!flows[vport->index]) - continue; - mlx5_del_flow_rules(flows[vport->index]); - } + if (mlx5_core_ec_sriov_enabled(peer_dev)) { + mlx5_esw_for_each_ec_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_ec_vfs(peer_dev)) + mlx5_del_flow_rules(flows[peer_vport->index]); } - mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) - mlx5_del_flow_rules(flows[vport->index]); + mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_vfs(peer_dev)) + mlx5_del_flow_rules(flows[peer_vport->index]); - if (mlx5_ecpf_vport_exists(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF); - mlx5_del_flow_rules(flows[vport->index]); + if (mlx5_ecpf_vport_exists(peer_dev)) { + peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_ECPF); + mlx5_del_flow_rules(flows[peer_vport->index]); } - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); - mlx5_del_flow_rules(flows[vport->index]); + if (mlx5_core_is_ecpf_esw_manager(peer_dev)) { + peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF); + mlx5_del_flow_rules(flows[peer_vport->index]); } kvfree(flows); @@ -2335,9 +2333,10 @@ out_free: static void esw_mode_change(struct mlx5_eswitch *esw, u16 mode) { mlx5_devcom_comp_lock(esw->dev->priv.hca_devcom_comp); - - if (esw->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV) { + if (esw->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV || + mlx5_core_mp_enabled(esw->dev)) { esw->mode = mode; + mlx5_rescan_drivers_locked(esw->dev); mlx5_devcom_comp_unlock(esw->dev->priv.hca_devcom_comp); return; } @@ -2830,9 +2829,9 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master, if (IS_ERR(vport)) return PTR_ERR(vport); - egress_ns = mlx5_get_flow_vport_acl_namespace(master, - MLX5_FLOW_NAMESPACE_ESW_EGRESS, - vport->index); + egress_ns = mlx5_get_flow_vport_namespace(master, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, + vport->index); if (!egress_ns) return -EINVAL; @@ -3534,7 +3533,9 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) int err; mutex_init(&esw->offloads.termtbl_mutex); - mlx5_rdma_enable_roce(esw->dev); + err = mlx5_rdma_enable_roce(esw->dev); + if (err) + goto err_roce; err = mlx5_esw_host_number_init(esw); if (err) @@ -3595,6 +3596,7 @@ err_vport_metadata: esw_offloads_metadata_uninit(esw); err_metadata: mlx5_rdma_disable_roce(esw->dev); +err_roce: mutex_destroy(&esw->offloads.termtbl_mutex); return err; } @@ -3779,6 +3781,8 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, esw->eswitch_operation_in_progress = true; up_write(&esw->mode_lock); + if (mode == DEVLINK_ESWITCH_MODE_LEGACY) + esw->dev->priv.flags |= MLX5_PRIV_FLAGS_SWITCH_LEGACY; mlx5_eswitch_disable_locked(esw); if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) { if (mlx5_devlink_trap_get_num_active(esw->dev)) { @@ -4157,37 +4161,12 @@ u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, } EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); -static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num, u16 *vhca_id) -{ - int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); - void *query_ctx; - void *hca_caps; - int err; - - *vhca_id = 0; - - query_ctx = kzalloc(query_out_sz, GFP_KERNEL); - if (!query_ctx) - return -ENOMEM; - - err = mlx5_vport_get_other_func_general_cap(esw->dev, vport_num, query_ctx); - if (err) - goto out_free; - - hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); - *vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id); - -out_free: - kfree(query_ctx); - return err; -} - int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num) { u16 *old_entry, *vhca_map_entry, vhca_id; int err; - err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id); + err = mlx5_vport_get_vhca_id(esw->dev, vport_num, &vhca_id); if (err) { esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%u,err=%d)\n", vport_num, err); @@ -4213,7 +4192,7 @@ void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num) u16 *vhca_map_entry, vhca_id; int err; - err = mlx5_esw_query_vport_vhca_id(esw, vport_num, &vhca_id); + err = mlx5_vport_get_vhca_id(esw->dev, vport_num, &vhca_id); if (err) esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%hu,err=%d)\n", vport_num, err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index d91ea53eb394..01c5f5990f9a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -6,6 +6,7 @@ #include "mlx5_core.h" #include "lib/eq.h" #include "lib/events.h" +#include "hwmon.h" struct mlx5_event_nb { struct mlx5_nb nb; @@ -153,21 +154,50 @@ static int any_notifier(struct notifier_block *nb, return NOTIFY_OK; } +#if IS_ENABLED(CONFIG_HWMON) +static void print_sensor_names_in_bit_set(struct mlx5_core_dev *dev, struct mlx5_hwmon *hwmon, + u64 bit_set, int bit_set_offset) +{ + unsigned long *bit_set_ptr = (unsigned long *)&bit_set; + int num_bits = sizeof(bit_set) * BITS_PER_BYTE; + int i; + + for_each_set_bit(i, bit_set_ptr, num_bits) { + const char *sensor_name = hwmon_get_sensor_name(hwmon, i + bit_set_offset); + + mlx5_core_warn(dev, "Sensor name[%d]: %s\n", i + bit_set_offset, sensor_name); + } +} +#endif /* CONFIG_HWMON */ + /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */ static int temp_warn(struct notifier_block *nb, unsigned long type, void *data) { struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb); struct mlx5_events *events = event_nb->ctx; + struct mlx5_core_dev *dev = events->dev; struct mlx5_eqe *eqe = data; u64 value_lsb; u64 value_msb; value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb); + /* bit 1-63 are not supported for NICs, + * hence read only bit 0 (asic) from lsb. + */ + value_lsb &= 0x1; value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb); - mlx5_core_warn(events->dev, - "High temperature on sensors with bit set %llx %llx", - value_msb, value_lsb); + if (net_ratelimit()) { + mlx5_core_warn(dev, "High temperature on sensors with bit set %#llx %#llx.\n", + value_msb, value_lsb); +#if IS_ENABLED(CONFIG_HWMON) + if (dev->hwmon) { + print_sensor_names_in_bit_set(dev, dev->hwmon, value_lsb, 0); + print_sensor_names_in_bit_set(dev, dev->hwmon, value_msb, + sizeof(value_lsb) * BITS_PER_BYTE); + } +#endif + } return NOTIFY_OK; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 676005854dad..1af76da8b132 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -217,7 +217,8 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns, int err; if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && - underlay_qpn == 0) + underlay_qpn == 0 && + (ft->type != FS_FT_RDMA_RX && ft->type != FS_FT_RDMA_TX)) return 0; if (ft->type == FS_FT_FDB && @@ -526,7 +527,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, struct mlx5_flow_rule *dst; void *in_flow_context, *vlan; void *in_match_value; - int reformat_id = 0; + u32 reformat_id = 0; unsigned int inlen; int dst_cnt_size; u32 *in, action; @@ -579,23 +580,21 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, action, action); if (!extended_dest && fte->act_dests.action.pkt_reformat) { - struct mlx5_pkt_reformat *pkt_reformat = fte->act_dests.action.pkt_reformat; - - if (pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_SW) { - reformat_id = mlx5_fs_dr_action_get_pkt_reformat_id(pkt_reformat); - if (reformat_id < 0) { - mlx5_core_err(dev, - "Unsupported SW-owned pkt_reformat type (%d) in FW-owned table\n", - pkt_reformat->reformat_type); - err = reformat_id; - goto err_out; - } - } else { - reformat_id = fte->act_dests.action.pkt_reformat->id; + struct mlx5_pkt_reformat *pkt_reformat = + fte->act_dests.action.pkt_reformat; + + err = mlx5_fs_get_packet_reformat_id(pkt_reformat, + &reformat_id); + if (err) { + mlx5_core_err(dev, + "Unsupported pkt_reformat type (%d)\n", + pkt_reformat->reformat_type); + goto err_out; } } - MLX5_SET(flow_context, in_flow_context, packet_reformat_id, (u32)reformat_id); + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, + reformat_id); if (fte->act_dests.action.modify_hdr) { if (fte->act_dests.action.modify_hdr->owner == MLX5_FLOW_RESOURCE_OWNER_SW) { @@ -718,7 +717,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, continue; MLX5_SET(flow_counter_list, in_dests, flow_counter_id, - dst->dest_attr.counter_id); + mlx5_fc_id(dst->dest_attr.counter)); in_dests += dst_cnt_size; list_size++; } @@ -1141,6 +1140,8 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ case FS_FT_RDMA_RX: case FS_FT_RDMA_TX: case FS_FT_PORT_SEL: + case FS_FT_RDMA_TRANSPORT_RX: + case FS_FT_RDMA_TRANSPORT_TX: return mlx5_fs_cmd_get_fw_cmds(); default: return mlx5_fs_cmd_get_stub_cmds(); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 2eabfcc247c6..d87392360dbd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -113,13 +113,16 @@ #define ETHTOOL_PRIO_NUM_LEVELS 1 #define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) -/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy, +/* Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy, * {IPsec RoCE MPV,Alias table},IPsec RoCE policy */ -#define KERNEL_NIC_PRIO_NUM_LEVELS 11 +#define KERNEL_NIC_PRIO_NUM_LEVELS 10 #define KERNEL_NIC_NUM_PRIOS 1 -/* One more level for tc */ -#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) +/* One more level for tc, and one more for promisc */ +#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 2) + +#define KERNEL_NIC_PROMISC_NUM_PRIOS 1 +#define KERNEL_NIC_PROMISC_NUM_LEVELS 1 #define KERNEL_NIC_TC_NUM_PRIOS 1 #define KERNEL_NIC_TC_NUM_LEVELS 3 @@ -187,6 +190,8 @@ static struct init_tree_node { ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(KERNEL_NIC_TC_NUM_PRIOS, KERNEL_NIC_TC_NUM_LEVELS), + ADD_MULTIPLE_PRIO(KERNEL_NIC_PROMISC_NUM_PRIOS, + KERNEL_NIC_PROMISC_NUM_LEVELS), ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, KERNEL_NIC_PRIO_NUM_LEVELS))), ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS, @@ -658,6 +663,7 @@ static void del_sw_hw_rule(struct fs_node *node) BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) | BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); fte->act_dests.action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT; + mlx5_fc_local_destroy(rule->dest_attr.counter); goto out; } @@ -820,11 +826,17 @@ static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte) return index; fte->index = index + fg->start_index; +retry_insert: ret = rhashtable_insert_fast(&fg->ftes_hash, &fte->hash, rhash_fte); - if (ret) + if (ret) { + if (ret == -EBUSY) { + cond_resched(); + goto retry_insert; + } goto err_ida_remove; + } tree_add_node(&fte->node, &fg->node); list_add_tail(&fte->node.list, &fg->node.children); @@ -1449,7 +1461,7 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, struct mlx5_flow_table *ft; int autogroups_max_fte; - ft = mlx5_create_flow_table(ns, ft_attr); + ft = mlx5_create_vport_flow_table(ns, ft_attr, ft_attr->vport); if (IS_ERR(ft)) return ft; @@ -1823,14 +1835,35 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft, return err; } +int mlx5_fs_get_packet_reformat_id(struct mlx5_pkt_reformat *pkt_reformat, + u32 *id) +{ + switch (pkt_reformat->owner) { + case MLX5_FLOW_RESOURCE_OWNER_FW: + *id = pkt_reformat->id; + return 0; + case MLX5_FLOW_RESOURCE_OWNER_SW: + return mlx5_fs_dr_action_get_pkt_reformat_id(pkt_reformat, id); + case MLX5_FLOW_RESOURCE_OWNER_HWS: + return mlx5_fs_hws_action_get_pkt_reformat_id(pkt_reformat, id); + default: + return -EINVAL; + } +} + static bool mlx5_pkt_reformat_cmp(struct mlx5_pkt_reformat *p1, struct mlx5_pkt_reformat *p2) { - return p1->owner == p2->owner && - (p1->owner == MLX5_FLOW_RESOURCE_OWNER_FW ? - p1->id == p2->id : - mlx5_fs_dr_action_get_pkt_reformat_id(p1) == - mlx5_fs_dr_action_get_pkt_reformat_id(p2)); + int err1, err2; + u32 id1, id2; + + if (p1->owner != p2->owner) + return false; + + err1 = mlx5_fs_get_packet_reformat_id(p1, &id1); + err2 = mlx5_fs_get_packet_reformat_id(p2, &id2); + + return !err1 && !err2 && id1 == id2; } static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, @@ -2200,6 +2233,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, struct mlx5_flow_handle *rule; struct match_list *iter; bool take_write = false; + bool try_again = false; struct fs_fte *fte; u64 version = 0; int err; @@ -2264,6 +2298,7 @@ skip_search: nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); if (!g->node.active) { + try_again = true; up_write_ref_node(&g->node, false); continue; } @@ -2285,7 +2320,8 @@ skip_search: tree_put_node(&fte->node, false); return rule; } - rule = ERR_PTR(-ENOENT); + err = try_again ? -EAGAIN : -ENOENT; + rule = ERR_PTR(err); out: kmem_cache_free(steering->ftes_cache, fte); return rule; @@ -2709,6 +2745,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, break; case MLX5_FLOW_NAMESPACE_RDMA_TX: root_ns = steering->rdma_tx_root_ns; + prio = RDMA_TX_BYPASS_PRIO; break; case MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS: root_ns = steering->rdma_rx_root_ns; @@ -2756,9 +2793,9 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, } EXPORT_SYMBOL(mlx5_get_flow_namespace); -struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev, - enum mlx5_flow_namespace_type type, - int vport) +struct mlx5_flow_namespace * +mlx5_get_flow_vport_namespace(struct mlx5_core_dev *dev, + enum mlx5_flow_namespace_type type, int vport_idx) { struct mlx5_flow_steering *steering = dev->priv.steering; @@ -2767,25 +2804,43 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d switch (type) { case MLX5_FLOW_NAMESPACE_ESW_EGRESS: - if (vport >= steering->esw_egress_acl_vports) + if (vport_idx >= steering->esw_egress_acl_vports) return NULL; if (steering->esw_egress_root_ns && - steering->esw_egress_root_ns[vport]) - return &steering->esw_egress_root_ns[vport]->ns; + steering->esw_egress_root_ns[vport_idx]) + return &steering->esw_egress_root_ns[vport_idx]->ns; else return NULL; case MLX5_FLOW_NAMESPACE_ESW_INGRESS: - if (vport >= steering->esw_ingress_acl_vports) + if (vport_idx >= steering->esw_ingress_acl_vports) return NULL; if (steering->esw_ingress_root_ns && - steering->esw_ingress_root_ns[vport]) - return &steering->esw_ingress_root_ns[vport]->ns; + steering->esw_ingress_root_ns[vport_idx]) + return &steering->esw_ingress_root_ns[vport_idx]->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX: + if (vport_idx >= steering->rdma_transport_rx_vports) + return NULL; + if (steering->rdma_transport_rx_root_ns && + steering->rdma_transport_rx_root_ns[vport_idx]) + return &steering->rdma_transport_rx_root_ns[vport_idx]->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX: + if (vport_idx >= steering->rdma_transport_tx_vports) + return NULL; + + if (steering->rdma_transport_tx_root_ns && + steering->rdma_transport_tx_root_ns[vport_idx]) + return &steering->rdma_transport_tx_root_ns[vport_idx]->ns; else return NULL; default: return NULL; } } +EXPORT_SYMBOL(mlx5_get_flow_vport_namespace); static struct fs_prio *_fs_create_prio(struct mlx5_flow_namespace *ns, unsigned int prio, @@ -3191,6 +3246,155 @@ out_err: return err; } +static int +init_rdma_transport_rx_root_ns_one(struct mlx5_flow_steering *steering, + int vport_idx) +{ + struct mlx5_flow_root_namespace *root_ns; + struct fs_prio *prio; + int ret; + int i; + + steering->rdma_transport_rx_root_ns[vport_idx] = + create_root_ns(steering, FS_FT_RDMA_TRANSPORT_RX); + if (!steering->rdma_transport_rx_root_ns[vport_idx]) + return -ENOMEM; + + root_ns = steering->rdma_transport_rx_root_ns[vport_idx]; + + for (i = 0; i < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; i++) { + prio = fs_create_prio(&root_ns->ns, i, 1); + if (IS_ERR(prio)) { + ret = PTR_ERR(prio); + goto err; + } + } + set_prio_attrs(root_ns); + return 0; + +err: + cleanup_root_ns(root_ns); + return ret; +} + +static int +init_rdma_transport_tx_root_ns_one(struct mlx5_flow_steering *steering, + int vport_idx) +{ + struct mlx5_flow_root_namespace *root_ns; + struct fs_prio *prio; + int ret; + int i; + + steering->rdma_transport_tx_root_ns[vport_idx] = + create_root_ns(steering, FS_FT_RDMA_TRANSPORT_TX); + if (!steering->rdma_transport_tx_root_ns[vport_idx]) + return -ENOMEM; + + root_ns = steering->rdma_transport_tx_root_ns[vport_idx]; + + for (i = 0; i < MLX5_RDMA_TRANSPORT_BYPASS_PRIO; i++) { + prio = fs_create_prio(&root_ns->ns, i, 1); + if (IS_ERR(prio)) { + ret = PTR_ERR(prio); + goto err; + } + } + set_prio_attrs(root_ns); + return 0; + +err: + cleanup_root_ns(root_ns); + return ret; +} + +static int init_rdma_transport_rx_root_ns(struct mlx5_flow_steering *steering) +{ + struct mlx5_core_dev *dev = steering->dev; + int total_vports; + int err; + int i; + + /* In case eswitch not supported and working in legacy mode */ + total_vports = mlx5_eswitch_get_total_vports(dev) ?: 1; + + steering->rdma_transport_rx_root_ns = + kcalloc(total_vports, + sizeof(*steering->rdma_transport_rx_root_ns), + GFP_KERNEL); + if (!steering->rdma_transport_rx_root_ns) + return -ENOMEM; + + for (i = 0; i < total_vports; i++) { + err = init_rdma_transport_rx_root_ns_one(steering, i); + if (err) + goto cleanup_root_ns; + } + steering->rdma_transport_rx_vports = total_vports; + return 0; + +cleanup_root_ns: + while (i--) + cleanup_root_ns(steering->rdma_transport_rx_root_ns[i]); + kfree(steering->rdma_transport_rx_root_ns); + steering->rdma_transport_rx_root_ns = NULL; + return err; +} + +static int init_rdma_transport_tx_root_ns(struct mlx5_flow_steering *steering) +{ + struct mlx5_core_dev *dev = steering->dev; + int total_vports; + int err; + int i; + + /* In case eswitch not supported and working in legacy mode */ + total_vports = mlx5_eswitch_get_total_vports(dev) ?: 1; + + steering->rdma_transport_tx_root_ns = + kcalloc(total_vports, + sizeof(*steering->rdma_transport_tx_root_ns), + GFP_KERNEL); + if (!steering->rdma_transport_tx_root_ns) + return -ENOMEM; + + for (i = 0; i < total_vports; i++) { + err = init_rdma_transport_tx_root_ns_one(steering, i); + if (err) + goto cleanup_root_ns; + } + steering->rdma_transport_tx_vports = total_vports; + return 0; + +cleanup_root_ns: + while (i--) + cleanup_root_ns(steering->rdma_transport_tx_root_ns[i]); + kfree(steering->rdma_transport_tx_root_ns); + steering->rdma_transport_tx_root_ns = NULL; + return err; +} + +static void cleanup_rdma_transport_roots_ns(struct mlx5_flow_steering *steering) +{ + int i; + + if (steering->rdma_transport_rx_root_ns) { + for (i = 0; i < steering->rdma_transport_rx_vports; i++) + cleanup_root_ns(steering->rdma_transport_rx_root_ns[i]); + + kfree(steering->rdma_transport_rx_root_ns); + steering->rdma_transport_rx_root_ns = NULL; + } + + if (steering->rdma_transport_tx_root_ns) { + for (i = 0; i < steering->rdma_transport_tx_vports; i++) + cleanup_root_ns(steering->rdma_transport_tx_root_ns[i]); + + kfree(steering->rdma_transport_tx_root_ns); + steering->rdma_transport_tx_root_ns = NULL; + } +} + /* FT and tc chains are stored in the same array so we can re-use the * mlx5_get_fdb_sub_ns() and tc api for FT chains. * When creating a new ns for each chain store it in the first available slot. @@ -3528,35 +3732,42 @@ static int mlx5_fs_mode_validate(struct devlink *devlink, u32 id, { struct mlx5_core_dev *dev = devlink_priv(devlink); char *value = val.vstr; - int err = 0; + u8 eswitch_mode; - if (!strcmp(value, "dmfs")) { + if (!strcmp(value, "dmfs")) return 0; - } else if (!strcmp(value, "smfs")) { - u8 eswitch_mode; - bool smfs_cap; - eswitch_mode = mlx5_eswitch_mode(dev); - smfs_cap = mlx5_fs_dr_is_supported(dev); + if (!strcmp(value, "smfs")) { + bool smfs_cap = mlx5_fs_dr_is_supported(dev); if (!smfs_cap) { - err = -EOPNOTSUPP; NL_SET_ERR_MSG_MOD(extack, "Software managed steering is not supported by current device"); + return -EOPNOTSUPP; } + } else if (!strcmp(value, "hmfs")) { + bool hmfs_cap = mlx5_fs_hws_is_supported(dev); - else if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) { + if (!hmfs_cap) { NL_SET_ERR_MSG_MOD(extack, - "Software managed steering is not supported when eswitch offloads enabled."); - err = -EOPNOTSUPP; + "Hardware steering is not supported by current device"); + return -EOPNOTSUPP; } } else { NL_SET_ERR_MSG_MOD(extack, - "Bad parameter: supported values are [\"dmfs\", \"smfs\"]"); - err = -EINVAL; + "Bad parameter: supported values are [\"dmfs\", \"smfs\", \"hmfs\"]"); + return -EINVAL; } - return err; + eswitch_mode = mlx5_eswitch_mode(dev); + if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Moving to %s is not supported when eswitch offloads enabled.", + value); + return -EOPNOTSUPP; + } + + return 0; } static int mlx5_fs_mode_set(struct devlink *devlink, u32 id, @@ -3568,6 +3779,8 @@ static int mlx5_fs_mode_set(struct devlink *devlink, u32 id, if (!strcmp(ctx->val.vstr, "smfs")) mode = MLX5_FLOW_STEERING_MODE_SMFS; + else if (!strcmp(ctx->val.vstr, "hmfs")) + mode = MLX5_FLOW_STEERING_MODE_HMFS; else mode = MLX5_FLOW_STEERING_MODE_DMFS; dev->priv.steering->mode = mode; @@ -3580,10 +3793,17 @@ static int mlx5_fs_mode_get(struct devlink *devlink, u32 id, { struct mlx5_core_dev *dev = devlink_priv(devlink); - if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) + switch (dev->priv.steering->mode) { + case MLX5_FLOW_STEERING_MODE_SMFS: strscpy(ctx->val.vstr, "smfs", sizeof(ctx->val.vstr)); - else + break; + case MLX5_FLOW_STEERING_MODE_HMFS: + strscpy(ctx->val.vstr, "hmfs", sizeof(ctx->val.vstr)); + break; + default: strscpy(ctx->val.vstr, "dmfs", sizeof(ctx->val.vstr)); + } + return 0; } @@ -3607,6 +3827,7 @@ void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev) cleanup_root_ns(steering->rdma_rx_root_ns); cleanup_root_ns(steering->rdma_tx_root_ns); cleanup_root_ns(steering->egress_root_ns); + cleanup_rdma_transport_roots_ns(steering); devl_params_unregister(priv_to_devlink(dev), mlx5_fs_params, ARRAY_SIZE(mlx5_fs_params)); @@ -3658,8 +3879,7 @@ int mlx5_fs_core_init(struct mlx5_core_dev *dev) goto err; } - if (MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) && - MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)) { + if (MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support)) { err = init_rdma_rx_root_ns(steering); if (err) goto err; @@ -3677,6 +3897,18 @@ int mlx5_fs_core_init(struct mlx5_core_dev *dev) goto err; } + if (MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(dev, ft_support)) { + err = init_rdma_transport_rx_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(dev, ft_support)) { + err = init_rdma_transport_tx_root_ns(steering); + if (err) + goto err; + } + return 0; err: @@ -3720,6 +3952,8 @@ int mlx5_fs_core_alloc(struct mlx5_core_dev *dev) if (mlx5_fs_dr_is_supported(dev)) steering->mode = MLX5_FLOW_STEERING_MODE_SMFS; + else if (mlx5_fs_hws_is_supported(dev)) + steering->mode = MLX5_FLOW_STEERING_MODE_HMFS; else steering->mode = MLX5_FLOW_STEERING_MODE_DMFS; @@ -3827,8 +4061,10 @@ mlx5_get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type struct mlx5_flow_namespace *ns; if (ns_type == MLX5_FLOW_NAMESPACE_ESW_EGRESS || - ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS) - ns = mlx5_get_flow_vport_acl_namespace(dev, ns_type, 0); + ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS || + ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX || + ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) + ns = mlx5_get_flow_vport_namespace(dev, ns_type, 0); else ns = mlx5_get_flow_namespace(dev, ns_type); if (!ns) @@ -4003,6 +4239,8 @@ int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, if (mode == MLX5_FLOW_STEERING_MODE_SMFS) cmds = mlx5_fs_cmd_get_dr_cmds(); + else if (mode == MLX5_FLOW_STEERING_MODE_HMFS) + cmds = mlx5_fs_cmd_get_hws_cmds(); else cmds = mlx5_fs_cmd_get_fw_cmds(); if (!cmds) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index bad2df0715ec..500826229b0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -38,6 +38,7 @@ #include <linux/rhashtable.h> #include <linux/llist.h> #include <steering/sws/fs_dr.h> +#include <steering/hws/fs_hws.h> #define FDB_TC_MAX_CHAIN 3 #define FDB_FT_CHAIN (FDB_TC_MAX_CHAIN + 1) @@ -57,6 +58,7 @@ struct mlx5_flow_definer { enum mlx5_flow_resource_owner { MLX5_FLOW_RESOURCE_OWNER_FW, MLX5_FLOW_RESOURCE_OWNER_SW, + MLX5_FLOW_RESOURCE_OWNER_HWS, }; struct mlx5_modify_hdr { @@ -64,6 +66,7 @@ struct mlx5_modify_hdr { enum mlx5_flow_resource_owner owner; union { struct mlx5_fs_dr_action fs_dr_action; + struct mlx5_fs_hws_action fs_hws_action; u32 id; }; }; @@ -74,6 +77,7 @@ struct mlx5_pkt_reformat { enum mlx5_flow_resource_owner owner; union { struct mlx5_fs_dr_action fs_dr_action; + struct mlx5_fs_hws_action fs_hws_action; u32 id; }; }; @@ -112,7 +116,9 @@ enum fs_flow_table_type { FS_FT_PORT_SEL = 0X9, FS_FT_FDB_RX = 0xa, FS_FT_FDB_TX = 0xb, - FS_FT_MAX_TYPE = FS_FT_FDB_TX, + FS_FT_RDMA_TRANSPORT_RX = 0xd, + FS_FT_RDMA_TRANSPORT_TX = 0xe, + FS_FT_MAX_TYPE = FS_FT_RDMA_TRANSPORT_TX, }; enum fs_flow_table_op_mod { @@ -126,7 +132,8 @@ enum fs_fte_status { enum mlx5_flow_steering_mode { MLX5_FLOW_STEERING_MODE_DMFS, - MLX5_FLOW_STEERING_MODE_SMFS + MLX5_FLOW_STEERING_MODE_SMFS, + MLX5_FLOW_STEERING_MODE_HMFS, }; enum mlx5_flow_steering_capabilty { @@ -154,6 +161,10 @@ struct mlx5_flow_steering { struct mlx5_flow_root_namespace *port_sel_root_ns; int esw_egress_acl_vports; int esw_ingress_acl_vports; + struct mlx5_flow_root_namespace **rdma_transport_rx_root_ns; + struct mlx5_flow_root_namespace **rdma_transport_tx_root_ns; + int rdma_transport_rx_vports; + int rdma_transport_tx_vports; }; struct fs_node { @@ -190,7 +201,10 @@ struct mlx5_flow_handle { /* Type of children is mlx5_flow_group */ struct mlx5_flow_table { struct fs_node node; - struct mlx5_fs_dr_table fs_dr_table; + union { + struct mlx5_fs_dr_table fs_dr_table; + struct mlx5_fs_hws_table fs_hws_table; + }; u32 id; u16 vport; unsigned int max_fte; @@ -247,7 +261,10 @@ struct fs_fte_dup { /* Type of children is mlx5_flow_rule */ struct fs_fte { struct fs_node node; - struct mlx5_fs_dr_rule fs_dr_rule; + union { + struct mlx5_fs_dr_rule fs_dr_rule; + struct mlx5_fs_hws_rule fs_hws_rule; + }; u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; struct fs_fte_action act_dests; struct fs_fte_dup *dup; @@ -280,7 +297,10 @@ struct mlx5_flow_group_mask { /* Type of children is fs_fte */ struct mlx5_flow_group { struct fs_node node; - struct mlx5_fs_dr_matcher fs_dr_matcher; + union { + struct mlx5_fs_dr_matcher fs_dr_matcher; + struct mlx5_fs_hws_matcher fs_hws_matcher; + }; struct mlx5_flow_group_mask mask; u32 start_index; u32 max_ftes; @@ -293,7 +313,10 @@ struct mlx5_flow_group { struct mlx5_flow_root_namespace { struct mlx5_flow_namespace ns; enum mlx5_flow_steering_mode mode; - struct mlx5_fs_dr_domain fs_dr_domain; + union { + struct mlx5_fs_dr_domain fs_dr_domain; + struct mlx5_fs_hws_context fs_hws_context; + }; enum fs_flow_table_type table_type; struct mlx5_core_dev *dev; struct mlx5_flow_table *root_ft; @@ -303,6 +326,36 @@ struct mlx5_flow_root_namespace { const struct mlx5_flow_cmds *cmds; }; +enum mlx5_fc_type { + MLX5_FC_TYPE_ACQUIRED = 0, + MLX5_FC_TYPE_LOCAL, +}; + +struct mlx5_fc_cache { + u64 packets; + u64 bytes; + u64 lastuse; +}; + +struct mlx5_fc { + u32 id; + bool aging; + enum mlx5_fc_type type; + struct mlx5_fc_bulk *bulk; + struct mlx5_fc_cache cache; + /* last{packets,bytes} are used for calculating deltas since last reading. */ + u64 lastpackets; + u64 lastbytes; +}; + +struct mlx5_fc_bulk { + struct mlx5_fs_bulk fs_bulk; + u32 base_id; + struct mlx5_fs_hws_data hws_data; + struct mlx5_fc fcs[]; +}; + +u32 mlx5_fc_get_base_id(struct mlx5_fc *counter); int mlx5_init_fc_stats(struct mlx5_core_dev *dev); void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev); void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, @@ -334,6 +387,9 @@ u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace struct mlx5_flow_root_namespace *find_root(struct fs_node *node); +int mlx5_fs_get_packet_reformat_id(struct mlx5_pkt_reformat *pkt_reformat, + u32 *id); + #define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); } #define fs_list_for_each_entry(pos, root) \ @@ -382,7 +438,9 @@ struct mlx5_flow_root_namespace *find_root(struct fs_node *node); (type == FS_FT_PORT_SEL) ? MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) : \ (type == FS_FT_FDB_RX) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) : \ (type == FS_FT_FDB_TX) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) : \ - (BUILD_BUG_ON_ZERO(FS_FT_FDB_TX != FS_FT_MAX_TYPE))\ + (type == FS_FT_RDMA_TRANSPORT_RX) ? MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(mdev, cap) : \ + (type == FS_FT_RDMA_TRANSPORT_TX) ? MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(mdev, cap) : \ + (BUILD_BUG_ON_ZERO(FS_FT_RDMA_TRANSPORT_TX != FS_FT_MAX_TYPE))\ ) #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 62d0c689796b..492775d3d193 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -34,6 +34,7 @@ #include <linux/mlx5/fs.h> #include "mlx5_core.h" #include "fs_core.h" +#include "fs_pool.h" #include "fs_cmd.h" #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) @@ -43,33 +44,6 @@ #define MLX5_FC_POOL_MAX_THRESHOLD BIT(18) #define MLX5_FC_POOL_USED_BUFF_RATIO 10 -struct mlx5_fc_cache { - u64 packets; - u64 bytes; - u64 lastuse; -}; - -struct mlx5_fc { - u32 id; - bool aging; - struct mlx5_fc_bulk *bulk; - struct mlx5_fc_cache cache; - /* last{packets,bytes} are used for calculating deltas since last reading. */ - u64 lastpackets; - u64 lastbytes; -}; - -struct mlx5_fc_pool { - struct mlx5_core_dev *dev; - struct mutex pool_lock; /* protects pool lists */ - struct list_head fully_used; - struct list_head partially_used; - struct list_head unused; - int available_fcs; - int used_fcs; - int threshold; -}; - struct mlx5_fc_stats { struct xarray counters; @@ -80,13 +54,13 @@ struct mlx5_fc_stats { int bulk_query_len; bool bulk_query_alloc_failed; unsigned long next_bulk_query_alloc; - struct mlx5_fc_pool fc_pool; + struct mlx5_fs_pool fc_pool; }; -static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev); -static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool); -static struct mlx5_fc *mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool); -static void mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc); +static void mlx5_fc_pool_init(struct mlx5_fs_pool *fc_pool, struct mlx5_core_dev *dev); +static void mlx5_fc_pool_cleanup(struct mlx5_fs_pool *fc_pool); +static struct mlx5_fc *mlx5_fc_pool_acquire_counter(struct mlx5_fs_pool *fc_pool); +static void mlx5_fc_pool_release_counter(struct mlx5_fs_pool *fc_pool, struct mlx5_fc *fc); static int get_init_bulk_query_len(struct mlx5_core_dev *dev) { @@ -186,6 +160,9 @@ static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter) { struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; + if (WARN_ON(counter->type == MLX5_FC_TYPE_LOCAL)) + return; + if (counter->bulk) mlx5_fc_pool_release_counter(&fc_stats->fc_pool, counter); else @@ -435,15 +412,7 @@ void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, fc_stats->sampling_interval); } -/* Flow counter bluks */ - -struct mlx5_fc_bulk { - struct list_head pool_list; - u32 base_id; - int bulk_len; - unsigned long *bitmask; - struct mlx5_fc fcs[] __counted_by(bulk_len); -}; +/* Flow counter bulks */ static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk, u32 id) @@ -452,16 +421,16 @@ static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk, counter->id = id; } -static int mlx5_fc_bulk_get_free_fcs_amount(struct mlx5_fc_bulk *bulk) +u32 mlx5_fc_get_base_id(struct mlx5_fc *counter) { - return bitmap_weight(bulk->bitmask, bulk->bulk_len); + return counter->bulk->base_id; } -static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev) +static struct mlx5_fs_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev, + void *pool_ctx) { enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask; - struct mlx5_fc_bulk *bulk; - int err = -ENOMEM; + struct mlx5_fc_bulk *fc_bulk; int bulk_len; u32 base_id; int i; @@ -469,207 +438,141 @@ static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev) alloc_bitmask = MLX5_CAP_GEN(dev, flow_counter_bulk_alloc); bulk_len = alloc_bitmask > 0 ? MLX5_FC_BULK_NUM_FCS(alloc_bitmask) : 1; - bulk = kvzalloc(struct_size(bulk, fcs, bulk_len), GFP_KERNEL); - if (!bulk) - goto err_alloc_bulk; - - bulk->bitmask = kvcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long), - GFP_KERNEL); - if (!bulk->bitmask) - goto err_alloc_bitmask; + fc_bulk = kvzalloc(struct_size(fc_bulk, fcs, bulk_len), GFP_KERNEL); + if (!fc_bulk) + return NULL; - err = mlx5_cmd_fc_bulk_alloc(dev, alloc_bitmask, &base_id); - if (err) - goto err_mlx5_cmd_bulk_alloc; + if (mlx5_fs_bulk_init(dev, &fc_bulk->fs_bulk, bulk_len)) + goto fc_bulk_free; - bulk->base_id = base_id; - bulk->bulk_len = bulk_len; - for (i = 0; i < bulk_len; i++) { - mlx5_fc_init(&bulk->fcs[i], bulk, base_id + i); - set_bit(i, bulk->bitmask); - } + if (mlx5_cmd_fc_bulk_alloc(dev, alloc_bitmask, &base_id)) + goto fs_bulk_cleanup; + fc_bulk->base_id = base_id; + for (i = 0; i < bulk_len; i++) + mlx5_fc_init(&fc_bulk->fcs[i], fc_bulk, base_id + i); - return bulk; + refcount_set(&fc_bulk->hws_data.hws_action_refcount, 0); + mutex_init(&fc_bulk->hws_data.lock); + return &fc_bulk->fs_bulk; -err_mlx5_cmd_bulk_alloc: - kvfree(bulk->bitmask); -err_alloc_bitmask: - kvfree(bulk); -err_alloc_bulk: - return ERR_PTR(err); +fs_bulk_cleanup: + mlx5_fs_bulk_cleanup(&fc_bulk->fs_bulk); +fc_bulk_free: + kvfree(fc_bulk); + return NULL; } static int -mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk) +mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fs_bulk *fs_bulk) { - if (mlx5_fc_bulk_get_free_fcs_amount(bulk) < bulk->bulk_len) { + struct mlx5_fc_bulk *fc_bulk = container_of(fs_bulk, + struct mlx5_fc_bulk, + fs_bulk); + + if (mlx5_fs_bulk_get_free_amount(fs_bulk) < fs_bulk->bulk_len) { mlx5_core_err(dev, "Freeing bulk before all counters were released\n"); return -EBUSY; } - mlx5_cmd_fc_free(dev, bulk->base_id); - kvfree(bulk->bitmask); - kvfree(bulk); + mlx5_cmd_fc_free(dev, fc_bulk->base_id); + mlx5_fs_bulk_cleanup(fs_bulk); + kvfree(fc_bulk); return 0; } -static struct mlx5_fc *mlx5_fc_bulk_acquire_fc(struct mlx5_fc_bulk *bulk) +static void mlx5_fc_pool_update_threshold(struct mlx5_fs_pool *fc_pool) { - int free_fc_index = find_first_bit(bulk->bitmask, bulk->bulk_len); - - if (free_fc_index >= bulk->bulk_len) - return ERR_PTR(-ENOSPC); - - clear_bit(free_fc_index, bulk->bitmask); - return &bulk->fcs[free_fc_index]; -} - -static int mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc) -{ - int fc_index = fc->id - bulk->base_id; - - if (test_bit(fc_index, bulk->bitmask)) - return -EINVAL; - - set_bit(fc_index, bulk->bitmask); - return 0; + fc_pool->threshold = min_t(int, MLX5_FC_POOL_MAX_THRESHOLD, + fc_pool->used_units / MLX5_FC_POOL_USED_BUFF_RATIO); } /* Flow counters pool API */ -static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev) -{ - fc_pool->dev = dev; - mutex_init(&fc_pool->pool_lock); - INIT_LIST_HEAD(&fc_pool->fully_used); - INIT_LIST_HEAD(&fc_pool->partially_used); - INIT_LIST_HEAD(&fc_pool->unused); - fc_pool->available_fcs = 0; - fc_pool->used_fcs = 0; - fc_pool->threshold = 0; -} +static const struct mlx5_fs_pool_ops mlx5_fc_pool_ops = { + .bulk_destroy = mlx5_fc_bulk_destroy, + .bulk_create = mlx5_fc_bulk_create, + .update_threshold = mlx5_fc_pool_update_threshold, +}; -static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool) +static void +mlx5_fc_pool_init(struct mlx5_fs_pool *fc_pool, struct mlx5_core_dev *dev) { - struct mlx5_core_dev *dev = fc_pool->dev; - struct mlx5_fc_bulk *bulk; - struct mlx5_fc_bulk *tmp; - - list_for_each_entry_safe(bulk, tmp, &fc_pool->fully_used, pool_list) - mlx5_fc_bulk_destroy(dev, bulk); - list_for_each_entry_safe(bulk, tmp, &fc_pool->partially_used, pool_list) - mlx5_fc_bulk_destroy(dev, bulk); - list_for_each_entry_safe(bulk, tmp, &fc_pool->unused, pool_list) - mlx5_fc_bulk_destroy(dev, bulk); + mlx5_fs_pool_init(fc_pool, dev, &mlx5_fc_pool_ops, NULL); } -static void mlx5_fc_pool_update_threshold(struct mlx5_fc_pool *fc_pool) +static void mlx5_fc_pool_cleanup(struct mlx5_fs_pool *fc_pool) { - fc_pool->threshold = min_t(int, MLX5_FC_POOL_MAX_THRESHOLD, - fc_pool->used_fcs / MLX5_FC_POOL_USED_BUFF_RATIO); + mlx5_fs_pool_cleanup(fc_pool); } -static struct mlx5_fc_bulk * -mlx5_fc_pool_alloc_new_bulk(struct mlx5_fc_pool *fc_pool) +static struct mlx5_fc * +mlx5_fc_pool_acquire_counter(struct mlx5_fs_pool *fc_pool) { - struct mlx5_core_dev *dev = fc_pool->dev; - struct mlx5_fc_bulk *new_bulk; + struct mlx5_fs_pool_index pool_index = {}; + struct mlx5_fc_bulk *fc_bulk; + int err; - new_bulk = mlx5_fc_bulk_create(dev); - if (!IS_ERR(new_bulk)) - fc_pool->available_fcs += new_bulk->bulk_len; - mlx5_fc_pool_update_threshold(fc_pool); - return new_bulk; + err = mlx5_fs_pool_acquire_index(fc_pool, &pool_index); + if (err) + return ERR_PTR(err); + fc_bulk = container_of(pool_index.fs_bulk, struct mlx5_fc_bulk, fs_bulk); + return &fc_bulk->fcs[pool_index.index]; } static void -mlx5_fc_pool_free_bulk(struct mlx5_fc_pool *fc_pool, struct mlx5_fc_bulk *bulk) +mlx5_fc_pool_release_counter(struct mlx5_fs_pool *fc_pool, struct mlx5_fc *fc) { + struct mlx5_fs_bulk *fs_bulk = &fc->bulk->fs_bulk; + struct mlx5_fs_pool_index pool_index = {}; struct mlx5_core_dev *dev = fc_pool->dev; - fc_pool->available_fcs -= bulk->bulk_len; - mlx5_fc_bulk_destroy(dev, bulk); - mlx5_fc_pool_update_threshold(fc_pool); + pool_index.fs_bulk = fs_bulk; + pool_index.index = fc->id - fc->bulk->base_id; + if (mlx5_fs_pool_release_index(fc_pool, &pool_index)) + mlx5_core_warn(dev, "Attempted to release a counter which is not acquired\n"); } -static struct mlx5_fc * -mlx5_fc_pool_acquire_from_list(struct list_head *src_list, - struct list_head *next_list, - bool move_non_full_bulk) +/** + * mlx5_fc_local_create - Allocate mlx5_fc struct for a counter which + * was already acquired using its counter id and bulk data. + * + * @counter_id: counter acquired counter id + * @offset: counter offset from bulk base + * @bulk_size: counter's bulk size as was allocated + * + * Return: Pointer to mlx5_fc on success, ERR_PTR otherwise. + */ +struct mlx5_fc * +mlx5_fc_local_create(u32 counter_id, u32 offset, u32 bulk_size) { - struct mlx5_fc_bulk *bulk; - struct mlx5_fc *fc; - - if (list_empty(src_list)) - return ERR_PTR(-ENODATA); - - bulk = list_first_entry(src_list, struct mlx5_fc_bulk, pool_list); - fc = mlx5_fc_bulk_acquire_fc(bulk); - if (move_non_full_bulk || mlx5_fc_bulk_get_free_fcs_amount(bulk) == 0) - list_move(&bulk->pool_list, next_list); - return fc; -} + struct mlx5_fc_bulk *fc_bulk; + struct mlx5_fc *counter; -static struct mlx5_fc * -mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool) -{ - struct mlx5_fc_bulk *new_bulk; - struct mlx5_fc *fc; - - mutex_lock(&fc_pool->pool_lock); - - fc = mlx5_fc_pool_acquire_from_list(&fc_pool->partially_used, - &fc_pool->fully_used, false); - if (IS_ERR(fc)) - fc = mlx5_fc_pool_acquire_from_list(&fc_pool->unused, - &fc_pool->partially_used, - true); - if (IS_ERR(fc)) { - new_bulk = mlx5_fc_pool_alloc_new_bulk(fc_pool); - if (IS_ERR(new_bulk)) { - fc = ERR_CAST(new_bulk); - goto out; - } - fc = mlx5_fc_bulk_acquire_fc(new_bulk); - list_add(&new_bulk->pool_list, &fc_pool->partially_used); + counter = kzalloc(sizeof(*counter), GFP_KERNEL); + if (!counter) + return ERR_PTR(-ENOMEM); + fc_bulk = kzalloc(sizeof(*fc_bulk), GFP_KERNEL); + if (!fc_bulk) { + kfree(counter); + return ERR_PTR(-ENOMEM); } - fc_pool->available_fcs--; - fc_pool->used_fcs++; -out: - mutex_unlock(&fc_pool->pool_lock); - return fc; + counter->type = MLX5_FC_TYPE_LOCAL; + counter->id = counter_id; + fc_bulk->base_id = counter_id - offset; + fc_bulk->fs_bulk.bulk_len = bulk_size; + counter->bulk = fc_bulk; + return counter; } +EXPORT_SYMBOL(mlx5_fc_local_create); -static void -mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc) +void mlx5_fc_local_destroy(struct mlx5_fc *counter) { - struct mlx5_core_dev *dev = fc_pool->dev; - struct mlx5_fc_bulk *bulk = fc->bulk; - int bulk_free_fcs_amount; - - mutex_lock(&fc_pool->pool_lock); - - if (mlx5_fc_bulk_release_fc(bulk, fc)) { - mlx5_core_warn(dev, "Attempted to release a counter which is not acquired\n"); - goto unlock; - } - - fc_pool->available_fcs++; - fc_pool->used_fcs--; - - bulk_free_fcs_amount = mlx5_fc_bulk_get_free_fcs_amount(bulk); - if (bulk_free_fcs_amount == 1) - list_move_tail(&bulk->pool_list, &fc_pool->partially_used); - if (bulk_free_fcs_amount == bulk->bulk_len) { - list_del(&bulk->pool_list); - if (fc_pool->available_fcs > fc_pool->threshold) - mlx5_fc_pool_free_bulk(fc_pool, bulk); - else - list_add(&bulk->pool_list, &fc_pool->unused); - } + if (!counter || counter->type != MLX5_FC_TYPE_LOCAL) + return; -unlock: - mutex_unlock(&fc_pool->pool_lock); + kfree(counter->bulk); + kfree(counter); } +EXPORT_SYMBOL(mlx5_fc_local_destroy); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c index c14590acc772..f6abfd00d7e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c @@ -50,10 +50,12 @@ mlx5_ft_pool_get_avail_sz(struct mlx5_core_dev *dev, enum fs_flow_table_type tab int i, found_i = -1; for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) { - if (dev->priv.ft_pool->ft_left[i] && FT_POOLS[i] >= desired_size && + if (dev->priv.ft_pool->ft_left[i] && + (FT_POOLS[i] >= desired_size || + desired_size == MLX5_FS_MAX_POOL_SIZE) && FT_POOLS[i] <= max_ft_size) { found_i = i; - if (desired_size != POOL_NEXT_SIZE) + if (desired_size != MLX5_FS_MAX_POOL_SIZE) break; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h index 25f4274b372b..173e312db720 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h @@ -7,8 +7,6 @@ #include <linux/mlx5/driver.h> #include "fs_core.h" -#define POOL_NEXT_SIZE 0 - int mlx5_ft_pool_init(struct mlx5_core_dev *dev); void mlx5_ft_pool_destroy(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.c new file mode 100644 index 000000000000..f6c226664602 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */ + +#include <mlx5_core.h> +#include "fs_pool.h" + +int mlx5_fs_bulk_init(struct mlx5_core_dev *dev, struct mlx5_fs_bulk *fs_bulk, + int bulk_len) +{ + int i; + + fs_bulk->bitmask = kvcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long), + GFP_KERNEL); + if (!fs_bulk->bitmask) + return -ENOMEM; + + fs_bulk->bulk_len = bulk_len; + for (i = 0; i < bulk_len; i++) + set_bit(i, fs_bulk->bitmask); + + return 0; +} + +void mlx5_fs_bulk_cleanup(struct mlx5_fs_bulk *fs_bulk) +{ + kvfree(fs_bulk->bitmask); +} + +int mlx5_fs_bulk_get_free_amount(struct mlx5_fs_bulk *bulk) +{ + return bitmap_weight(bulk->bitmask, bulk->bulk_len); +} + +static int mlx5_fs_bulk_acquire_index(struct mlx5_fs_bulk *fs_bulk, + struct mlx5_fs_pool_index *pool_index) +{ + int free_index = find_first_bit(fs_bulk->bitmask, fs_bulk->bulk_len); + + WARN_ON_ONCE(!pool_index || !fs_bulk); + if (free_index >= fs_bulk->bulk_len) + return -ENOSPC; + + clear_bit(free_index, fs_bulk->bitmask); + pool_index->fs_bulk = fs_bulk; + pool_index->index = free_index; + return 0; +} + +static int mlx5_fs_bulk_release_index(struct mlx5_fs_bulk *fs_bulk, int index) +{ + if (test_bit(index, fs_bulk->bitmask)) + return -EINVAL; + + set_bit(index, fs_bulk->bitmask); + return 0; +} + +void mlx5_fs_pool_init(struct mlx5_fs_pool *pool, struct mlx5_core_dev *dev, + const struct mlx5_fs_pool_ops *ops, void *pool_ctx) +{ + WARN_ON_ONCE(!ops || !ops->bulk_destroy || !ops->bulk_create || + !ops->update_threshold); + pool->dev = dev; + pool->pool_ctx = pool_ctx; + mutex_init(&pool->pool_lock); + INIT_LIST_HEAD(&pool->fully_used); + INIT_LIST_HEAD(&pool->partially_used); + INIT_LIST_HEAD(&pool->unused); + pool->available_units = 0; + pool->used_units = 0; + pool->threshold = 0; + pool->ops = ops; +} + +void mlx5_fs_pool_cleanup(struct mlx5_fs_pool *pool) +{ + struct mlx5_core_dev *dev = pool->dev; + struct mlx5_fs_bulk *bulk; + struct mlx5_fs_bulk *tmp; + + list_for_each_entry_safe(bulk, tmp, &pool->fully_used, pool_list) + pool->ops->bulk_destroy(dev, bulk); + list_for_each_entry_safe(bulk, tmp, &pool->partially_used, pool_list) + pool->ops->bulk_destroy(dev, bulk); + list_for_each_entry_safe(bulk, tmp, &pool->unused, pool_list) + pool->ops->bulk_destroy(dev, bulk); +} + +static struct mlx5_fs_bulk * +mlx5_fs_pool_alloc_new_bulk(struct mlx5_fs_pool *fs_pool) +{ + struct mlx5_core_dev *dev = fs_pool->dev; + struct mlx5_fs_bulk *new_bulk; + + new_bulk = fs_pool->ops->bulk_create(dev, fs_pool->pool_ctx); + if (new_bulk) + fs_pool->available_units += new_bulk->bulk_len; + fs_pool->ops->update_threshold(fs_pool); + return new_bulk; +} + +static void +mlx5_fs_pool_free_bulk(struct mlx5_fs_pool *fs_pool, struct mlx5_fs_bulk *bulk) +{ + struct mlx5_core_dev *dev = fs_pool->dev; + + fs_pool->available_units -= bulk->bulk_len; + fs_pool->ops->bulk_destroy(dev, bulk); + fs_pool->ops->update_threshold(fs_pool); +} + +static int +mlx5_fs_pool_acquire_from_list(struct list_head *src_list, + struct list_head *next_list, + bool move_non_full_bulk, + struct mlx5_fs_pool_index *pool_index) +{ + struct mlx5_fs_bulk *fs_bulk; + int err; + + if (list_empty(src_list)) + return -ENODATA; + + fs_bulk = list_first_entry(src_list, struct mlx5_fs_bulk, pool_list); + err = mlx5_fs_bulk_acquire_index(fs_bulk, pool_index); + if (move_non_full_bulk || mlx5_fs_bulk_get_free_amount(fs_bulk) == 0) + list_move(&fs_bulk->pool_list, next_list); + return err; +} + +int mlx5_fs_pool_acquire_index(struct mlx5_fs_pool *fs_pool, + struct mlx5_fs_pool_index *pool_index) +{ + struct mlx5_fs_bulk *new_bulk; + int err; + + mutex_lock(&fs_pool->pool_lock); + + err = mlx5_fs_pool_acquire_from_list(&fs_pool->partially_used, + &fs_pool->fully_used, false, + pool_index); + if (err) + err = mlx5_fs_pool_acquire_from_list(&fs_pool->unused, + &fs_pool->partially_used, + true, pool_index); + if (err) { + new_bulk = mlx5_fs_pool_alloc_new_bulk(fs_pool); + if (!new_bulk) { + err = -ENOENT; + goto out; + } + err = mlx5_fs_bulk_acquire_index(new_bulk, pool_index); + WARN_ON_ONCE(err); + list_add(&new_bulk->pool_list, &fs_pool->partially_used); + } + fs_pool->available_units--; + fs_pool->used_units++; + +out: + mutex_unlock(&fs_pool->pool_lock); + return err; +} + +int mlx5_fs_pool_release_index(struct mlx5_fs_pool *fs_pool, + struct mlx5_fs_pool_index *pool_index) +{ + struct mlx5_fs_bulk *bulk = pool_index->fs_bulk; + int bulk_free_amount; + int err; + + mutex_lock(&fs_pool->pool_lock); + + /* TBD would rather return void if there was no warn here in original code */ + err = mlx5_fs_bulk_release_index(bulk, pool_index->index); + if (err) + goto unlock; + + fs_pool->available_units++; + fs_pool->used_units--; + + bulk_free_amount = mlx5_fs_bulk_get_free_amount(bulk); + if (bulk_free_amount == 1) + list_move_tail(&bulk->pool_list, &fs_pool->partially_used); + if (bulk_free_amount == bulk->bulk_len) { + list_del(&bulk->pool_list); + if (fs_pool->available_units > fs_pool->threshold) + mlx5_fs_pool_free_bulk(fs_pool, bulk); + else + list_add(&bulk->pool_list, &fs_pool->unused); + } + +unlock: + mutex_unlock(&fs_pool->pool_lock); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.h new file mode 100644 index 000000000000..f04ec3107498 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */ + +#ifndef __MLX5_FS_POOL_H__ +#define __MLX5_FS_POOL_H__ + +#include <linux/mlx5/driver.h> + +struct mlx5_fs_bulk { + struct list_head pool_list; + int bulk_len; + unsigned long *bitmask; +}; + +struct mlx5_fs_pool_index { + struct mlx5_fs_bulk *fs_bulk; + int index; +}; + +struct mlx5_fs_pool; + +struct mlx5_fs_pool_ops { + int (*bulk_destroy)(struct mlx5_core_dev *dev, struct mlx5_fs_bulk *bulk); + struct mlx5_fs_bulk * (*bulk_create)(struct mlx5_core_dev *dev, + void *pool_ctx); + void (*update_threshold)(struct mlx5_fs_pool *pool); +}; + +struct mlx5_fs_pool { + struct mlx5_core_dev *dev; + void *pool_ctx; + const struct mlx5_fs_pool_ops *ops; + struct mutex pool_lock; /* protects pool lists */ + struct list_head fully_used; + struct list_head partially_used; + struct list_head unused; + int available_units; + int used_units; + int threshold; +}; + +int mlx5_fs_bulk_init(struct mlx5_core_dev *dev, struct mlx5_fs_bulk *fs_bulk, + int bulk_len); +void mlx5_fs_bulk_cleanup(struct mlx5_fs_bulk *fs_bulk); +int mlx5_fs_bulk_get_free_amount(struct mlx5_fs_bulk *bulk); + +void mlx5_fs_pool_init(struct mlx5_fs_pool *pool, struct mlx5_core_dev *dev, + const struct mlx5_fs_pool_ops *ops, void *pool_ctx); +void mlx5_fs_pool_cleanup(struct mlx5_fs_pool *pool); +int mlx5_fs_pool_acquire_index(struct mlx5_fs_pool *fs_pool, + struct mlx5_fs_pool_index *pool_index); +int mlx5_fs_pool_release_index(struct mlx5_fs_pool *fs_pool, + struct mlx5_fs_pool_index *pool_index); + +#endif /* __MLX5_FS_POOL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 76ad46bf477d..57476487e31f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -281,6 +281,19 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, shampo)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_SHAMPO, HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, adv_rdma)) { + err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ADV_RDMA, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 566710d34a7b..69933addd921 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -246,7 +246,7 @@ static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; - del_timer_sync(&fw_reset->timer); + timer_delete_sync(&fw_reset->timer); } static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health) @@ -278,7 +278,8 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work) #define MLX5_RESET_POLL_INTERVAL (HZ / 10) static void poll_sync_reset(struct timer_list *t) { - struct mlx5_fw_reset *fw_reset = from_timer(fw_reset, t, timer); + struct mlx5_fw_reset *fw_reset = timer_container_of(fw_reset, t, + timer); struct mlx5_core_dev *dev = fw_reset->dev; u32 fatal_error; @@ -345,15 +346,12 @@ static void mlx5_fw_live_patch_event(struct work_struct *work) } #if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE) -static int mlx5_check_hotplug_interrupt(struct mlx5_core_dev *dev) +static int mlx5_check_hotplug_interrupt(struct mlx5_core_dev *dev, + struct pci_dev *bridge) { - struct pci_dev *bridge = dev->pdev->bus->self; u16 reg16; int err; - if (!bridge) - return -EOPNOTSUPP; - err = pcie_capability_read_word(bridge, PCI_EXP_SLTCTL, ®16); if (err) return err; @@ -416,9 +414,15 @@ static int mlx5_check_dev_ids(struct mlx5_core_dev *dev, u16 dev_id) static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev, u8 reset_method) { + struct pci_dev *bridge = dev->pdev->bus->self; u16 dev_id; int err; + if (!bridge) { + mlx5_core_warn(dev, "PCI bus bridge is not accessible\n"); + return false; + } + if (!MLX5_CAP_GEN(dev, fast_teardown)) { mlx5_core_warn(dev, "fast teardown is not supported by firmware\n"); return false; @@ -426,7 +430,7 @@ static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev, #if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE) if (reset_method != MLX5_MFRL_REG_PCI_RESET_METHOD_HOT_RESET) { - err = mlx5_check_hotplug_interrupt(dev); + err = mlx5_check_hotplug_interrupt(dev, bridge); if (err) return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index a6329ca2d9bf..cf7a1edd0530 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -96,6 +96,11 @@ static int mlx5_health_get_rfr(u8 rfr_severity) return rfr_severity >> MLX5_RFR_BIT_OFFSET; } +static int mlx5_health_get_crr(u8 rfr_severity) +{ + return (rfr_severity >> MLX5_CRR_BIT_OFFSET) & 0x01; +} + static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; @@ -375,6 +380,8 @@ static const char *hsynd_str(u8 synd) return "High temperature"; case MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PCI_POISONED_ERR: return "ICM fetch PCI data poisoned error"; + case MLX5_INITIAL_SEG_HEALTH_SYNDROME_TRUST_LOCKDOWN_ERR: + return "Trust lockdown error"; default: return "unrecognized error"; } @@ -442,12 +449,15 @@ static void print_health_info(struct mlx5_core_dev *dev) mlx5_log(dev, severity, "time %u\n", ioread32be(&h->time)); mlx5_log(dev, severity, "hw_id 0x%08x\n", ioread32be(&h->hw_id)); mlx5_log(dev, severity, "rfr %d\n", mlx5_health_get_rfr(rfr_severity)); + mlx5_log(dev, severity, "crr %d\n", mlx5_health_get_crr(rfr_severity)); mlx5_log(dev, severity, "severity %d (%s)\n", severity, mlx5_loglevel_str(severity)); mlx5_log(dev, severity, "irisc_index %d\n", ioread8(&h->irisc_index)); mlx5_log(dev, severity, "synd 0x%x: %s\n", ioread8(&h->synd), hsynd_str(ioread8(&h->synd))); mlx5_log(dev, severity, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd)); mlx5_log(dev, severity, "raw fw_ver 0x%08x\n", ioread32be(&h->fw_ver)); + if (mlx5_health_get_crr(rfr_severity)) + mlx5_core_warn(dev, "Cold reset is required\n"); } static int @@ -769,7 +779,8 @@ static void mlx5_health_log_ts_update(struct work_struct *work) static void poll_health(struct timer_list *t) { - struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer); + struct mlx5_core_dev *dev = timer_container_of(dev, t, + priv.health.timer); struct mlx5_core_health *health = &dev->priv.health; struct health_buffer __iomem *h = health->health; u32 fatal_error; @@ -799,14 +810,17 @@ static void poll_health(struct timer_list *t) health->prev = count; if (health->miss_counter == MAX_MISSES) { mlx5_core_err(dev, "device's health compromised - reached miss count\n"); + health->synd = ioread8(&h->synd); print_health_info(dev); queue_work(health->wq, &health->report_work); } prev_synd = health->synd; health->synd = ioread8(&h->synd); - if (health->synd && health->synd != prev_synd) + if (health->synd && health->synd != prev_synd) { + print_health_info(dev); queue_work(health->wq, &health->report_work); + } out: mod_timer(&health->timer, get_next_poll_jiffies(dev)); @@ -834,7 +848,7 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) if (disable_health) set_bit(MLX5_DROP_HEALTH_WORK, &health->flags); - del_timer_sync(&health->timer); + timer_delete_sync(&health->timer); } void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/hwmon.c b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.c index 353f81dccd1c..4ba2636d7fb6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/hwmon.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.c @@ -416,3 +416,8 @@ void mlx5_hwmon_dev_unregister(struct mlx5_core_dev *mdev) mlx5_hwmon_free(hwmon); mdev->hwmon = NULL; } + +const char *hwmon_get_sensor_name(struct mlx5_hwmon *hwmon, int channel) +{ + return hwmon->temp_channel_desc[channel].sensor_name; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/hwmon.h b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.h index 999654a9b9da..f38271c22c10 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/hwmon.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/hwmon.h @@ -10,6 +10,7 @@ int mlx5_hwmon_dev_register(struct mlx5_core_dev *mdev); void mlx5_hwmon_dev_unregister(struct mlx5_core_dev *mdev); +const char *hwmon_get_sensor_name(struct mlx5_hwmon *hwmon, int channel); #else static inline int mlx5_hwmon_dev_register(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 9772327d5124..4b3430ac3905 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -238,6 +238,23 @@ static u32 mlx5i_flow_type_mask(u32 flow_type) return flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS); } +static int mlx5i_set_rxfh_fields(struct net_device *dev, + const struct ethtool_rxfh_fields *cmd, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_set_rxfh_fields(priv, cmd, extack); +} + +static int mlx5i_get_rxfh_fields(struct net_device *dev, + struct ethtool_rxfh_fields *info) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + + return mlx5e_ethtool_get_rxfh_fields(priv, info); +} + static int mlx5i_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) { struct mlx5e_priv *priv = mlx5i_epriv(dev); @@ -283,6 +300,8 @@ const struct ethtool_ops mlx5i_ethtool_ops = { .get_coalesce = mlx5i_get_coalesce, .set_coalesce = mlx5i_set_coalesce, .get_ts_info = mlx5i_get_ts_info, + .get_rxfh_fields = mlx5i_get_rxfh_fields, + .set_rxfh_fields = mlx5i_set_rxfh_fields, .get_rxnfc = mlx5i_get_rxnfc, .set_rxnfc = mlx5i_set_rxnfc, .get_link_ksettings = mlx5i_get_link_ksettings, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 0979d672d47f..79ae3a51a4b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -32,6 +32,7 @@ #include <rdma/ib_verbs.h> #include <linux/mlx5/fs.h> +#include <net/netdev_lock.h> #include "en.h" #include "en/params.h" #include "ipoib.h" @@ -102,6 +103,8 @@ int mlx5i_init(struct mlx5_core_dev *mdev, struct net_device *netdev) netdev->netdev_ops = &mlx5i_netdev_ops; netdev->ethtool_ops = &mlx5i_ethtool_ops; + netdev->request_ops_lock = true; + netdev_lockdep_set_classes(netdev); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index 1477db7f5307..82d3c2568244 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -47,29 +47,40 @@ static int cpu_get_least_loaded(struct mlx5_irq_pool *pool, static struct mlx5_irq * irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc) { - struct irq_affinity_desc auto_desc = {}; + struct irq_affinity_desc *auto_desc; struct mlx5_irq *irq; u32 irq_index; int err; + auto_desc = kvzalloc(sizeof(*auto_desc), GFP_KERNEL); + if (!auto_desc) + return ERR_PTR(-ENOMEM); + err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL); - if (err) + if (err) { + kvfree(auto_desc); return ERR_PTR(err); + } + if (pool->irqs_per_cpu) { if (cpumask_weight(&af_desc->mask) > 1) /* if req_mask contain more then one CPU, set the least loadad CPU * of req_mask */ cpumask_set_cpu(cpu_get_least_loaded(pool, &af_desc->mask), - &auto_desc.mask); + &auto_desc->mask); else cpu_get(pool, cpumask_first(&af_desc->mask)); } + irq = mlx5_irq_alloc(pool, irq_index, - cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc, + cpumask_empty(&auto_desc->mask) ? af_desc : auto_desc, NULL); if (IS_ERR(irq)) xa_erase(&pool->irqs, irq_index); + + kvfree(auto_desc); + return irq; } @@ -175,7 +186,7 @@ unlock: void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq) { - struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); + struct mlx5_irq_pool *pool = mlx5_irq_get_pool(irq); int cpu; cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c index f4b777d4e108..62b6faa4276a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c @@ -105,20 +105,20 @@ static int mapping_show(struct seq_file *file, void *priv) struct mlx5_lag *ldev; bool hash = false; bool lag_active; + int i, idx = 0; int num_ports; - int i; ldev = mlx5_lag_dev(dev); mutex_lock(&ldev->lock); lag_active = __mlx5_lag_is_active(ldev); if (lag_active) { if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { - mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, ports, + mlx5_infer_tx_enabled(&ldev->tracker, ldev, ports, &num_ports); hash = true; } else { - for (i = 0; i < ldev->ports; i++) - ports[i] = ldev->v2p_map[i]; + mlx5_ldev_for_each(i, 0, ldev) + ports[idx++] = ldev->v2p_map[i]; num_ports = ldev->ports; } } @@ -144,11 +144,8 @@ static int members_show(struct seq_file *file, void *priv) ldev = mlx5_lag_dev(dev); mutex_lock(&ldev->lock); - for (i = 0; i < ldev->ports; i++) { - if (!ldev->pf[i].dev) - continue; + mlx5_ldev_for_each(i, 0, ldev) seq_printf(file, "%s\n", dev_name(ldev->pf[i].dev->device)); - } mutex_unlock(&ldev->lock); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 7f68468c2e75..d058cbb4a00c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -43,10 +43,6 @@ #include "mp.h" #include "mpesw.h" -enum { - MLX5_LAG_EGRESS_PORT_1 = 1, - MLX5_LAG_EGRESS_PORT_2, -}; /* General purpose, use for short periods of time. * Beware of lock dependencies (preferably, no locks should be acquired @@ -72,7 +68,7 @@ static u8 lag_active_port_bits(struct mlx5_lag *ldev) int num_enabled; int idx; - mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, enabled_ports, + mlx5_infer_tx_enabled(&ldev->tracker, ldev, enabled_ports, &num_enabled); for (idx = 0; idx < num_enabled; idx++) active_port |= BIT_MASK(enabled_ports[idx]); @@ -80,23 +76,30 @@ static u8 lag_active_port_bits(struct mlx5_lag *ldev) return active_port; } -static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode, - unsigned long flags) +static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev, + int mode, unsigned long flags) { bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, &flags); int port_sel_mode = get_port_sel_mode(mode, flags); u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; + u8 *ports = ldev->v2p_map; + int idx0, idx1; void *lag_ctx; lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode); + idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0); + idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1); + + if (idx0 < 0 || idx1 < 0) + return -EINVAL; switch (port_sel_mode) { case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]); break; case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass)) @@ -113,17 +116,23 @@ static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode, return mlx5_cmd_exec_in(dev, create_lag, in); } -static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports, +static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev, u8 *ports) { u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); + int idx0, idx1; + + idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0); + idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1); + if (idx0 < 0 || idx1 < 0) + return -EINVAL; MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); MLX5_SET(modify_lag_in, in, field_select, 0x1); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]); return mlx5_cmd_exec_in(dev, modify_lag, in); } @@ -148,33 +157,31 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); -static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, +static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, struct mlx5_lag *ldev, u8 *ports, int *num_disabled) { int i; *num_disabled = 0; - for (i = 0; i < num_ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) if (!tracker->netdev_state[i].tx_enabled || !tracker->netdev_state[i].link_up) ports[(*num_disabled)++] = i; - } } -void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, +void mlx5_infer_tx_enabled(struct lag_tracker *tracker, struct mlx5_lag *ldev, u8 *ports, int *num_enabled) { int i; *num_enabled = 0; - for (i = 0; i < num_ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) if (tracker->netdev_state[i].tx_enabled && tracker->netdev_state[i].link_up) ports[(*num_enabled)++] = i; - } if (*num_enabled == 0) - mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled); + mlx5_infer_tx_disabled(tracker, ldev, ports, num_enabled); } static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, @@ -192,7 +199,7 @@ static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, int j; if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { - mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports, + mlx5_infer_tx_enabled(tracker, ldev, enabled_ports, &num_enabled); for (i = 0; i < num_enabled; i++) { err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); @@ -203,7 +210,7 @@ static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, buf[written - 2] = 0; mlx5_core_info(dev, "lag map active ports: %s\n", buf); } else { - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { for (j = 0; j < ldev->buckets; j++) { idx = i * ldev->buckets + j; err = scnprintf(buf + written, 10, @@ -286,13 +293,55 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, { int i; - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) if (ldev->pf[i].netdev == ndev) return i; return -ENOENT; } +int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq) +{ + int i, num = 0; + + if (!ldev) + return -ENOENT; + + mlx5_ldev_for_each(i, 0, ldev) { + if (num == seq) + return i; + num++; + } + return -ENOENT; +} + +int mlx5_lag_num_devs(struct mlx5_lag *ldev) +{ + int i, num = 0; + + if (!ldev) + return 0; + + mlx5_ldev_for_each(i, 0, ldev) { + (void)i; + num++; + } + return num; +} + +int mlx5_lag_num_netdevs(struct mlx5_lag *ldev) +{ + int i, num = 0; + + if (!ldev) + return 0; + + mlx5_ldev_for_each(i, 0, ldev) + if (ldev->pf[i].netdev) + num++; + return num; +} + static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) { return ldev->mode == MLX5_LAG_MODE_ROCE; @@ -310,7 +359,7 @@ static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) * with mapping that points to active ports. */ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, - u8 num_ports, + struct mlx5_lag *ldev, u8 buckets, u8 *ports) { @@ -323,7 +372,7 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, int i; int j; - for (i = 0; i < num_ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { if (tracker->netdev_state[i].tx_enabled && tracker->netdev_state[i].link_up) enabled[enabled_ports_num++] = i; @@ -334,15 +383,16 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, /* Use native mapping by default where each port's buckets * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc */ - for (i = 0; i < num_ports; i++) + mlx5_ldev_for_each(i, 0, ldev) { for (j = 0; j < buckets; j++) { idx = i * buckets + j; - ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i; + ports[idx] = i + 1; } + } /* If all ports are disabled/enabled keep native mapping */ - if (enabled_ports_num == num_ports || - disabled_ports_num == num_ports) + if (enabled_ports_num == ldev->ports || + disabled_ports_num == ldev->ports) return; /* Go over the disabled ports and for each assign a random active port */ @@ -358,7 +408,7 @@ static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) { int i; - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) if (ldev->pf[i].has_drop) return true; return false; @@ -368,7 +418,7 @@ static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) { int i; - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { if (!ldev->pf[i].has_drop) continue; @@ -396,7 +446,7 @@ static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, if (!ldev->tracker.has_inactive) return; - mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled); + mlx5_infer_tx_disabled(tracker, ldev, disabled_ports, &num_disabled); for (i = 0; i < num_disabled; i++) { disabled_index = disabled_ports[i]; @@ -428,10 +478,15 @@ static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports) static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) { - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + struct mlx5_core_dev *dev0; u8 active_ports; int ret; + if (idx < 0) + return -EINVAL; + + dev0 = ldev->pf[idx].dev; if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { ret = mlx5_lag_port_sel_modify(ldev, ports); if (ret || @@ -442,7 +497,7 @@ static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) return mlx5_cmd_modify_active_port(dev0, active_ports); } - return mlx5_cmd_modify_lag(dev0, ldev->ports, ports); + return mlx5_cmd_modify_lag(dev0, ldev, ports); } static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev) @@ -450,7 +505,7 @@ static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev struct net_device *ndev = NULL; struct mlx5_lag *ldev; unsigned long flags; - int i; + int i, last_idx; spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); @@ -458,14 +513,17 @@ static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev if (!ldev) goto unlock; - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) if (ldev->tracker.netdev_state[i].tx_enabled) ndev = ldev->pf[i].netdev; - if (!ndev) - ndev = ldev->pf[ldev->ports - 1].netdev; + if (!ndev) { + last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1); + if (last_idx < 0) + goto unlock; + ndev = ldev->pf[last_idx].netdev; + } - if (ndev) - dev_hold(ndev); + dev_hold(ndev); unlock: spin_unlock_irqrestore(&lag_lock, flags); @@ -476,16 +534,21 @@ unlock: void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker) { + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev0; int idx; int err; int i; int j; - mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports); + if (first_idx < 0) + return; - for (i = 0; i < ldev->ports; i++) { + dev0 = ldev->pf[first_idx].dev; + mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports); + + mlx5_ldev_for_each(i, 0, ldev) { for (j = 0; j < ldev->buckets; j++) { idx = i * ldev->buckets + j; if (ports[idx] == ldev->v2p_map[idx]) @@ -520,10 +583,21 @@ void mlx5_modify_lag(struct mlx5_lag *ldev, } } -static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, - unsigned long *flags) +static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev, + enum mlx5_lag_mode mode, + unsigned long *flags) { - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + struct mlx5_core_dev *dev0; + + if (first_idx < 0) + return -EINVAL; + + if (mode == MLX5_LAG_MODE_MPESW || + mode == MLX5_LAG_MODE_MULTIPATH) + return 0; + + dev0 = ldev->pf[first_idx].dev; if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) { if (ldev->ports > 2) @@ -539,30 +613,10 @@ static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, return 0; } -static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev, - struct lag_tracker *tracker, - enum mlx5_lag_mode mode, - unsigned long *flags) -{ - struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; - - if (mode == MLX5_LAG_MODE_MPESW) - return; - - if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) && - tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) { - if (ldev->ports > 2) - ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; - set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); - } -} - static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, struct lag_tracker *tracker, bool shared_fdb, unsigned long *flags) { - bool roce_lag = mode == MLX5_LAG_MODE_ROCE; - *flags = 0; if (shared_fdb) { set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags); @@ -572,11 +626,7 @@ static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, if (mode == MLX5_LAG_MODE_MPESW) set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); - if (roce_lag) - return mlx5_lag_set_port_sel_mode_roce(ldev, flags); - - mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags); - return 0; + return mlx5_lag_set_port_sel_mode(ldev, mode, flags); } char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) @@ -593,12 +643,18 @@ char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_eswitch *master_esw = dev0->priv.eswitch; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + struct mlx5_eswitch *master_esw; + struct mlx5_core_dev *dev0; + int i, j; int err; - int i; - for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) { + if (first_idx < 0) + return -EINVAL; + + dev0 = ldev->pf[first_idx].dev; + master_esw = dev0->priv.eswitch; + mlx5_ldev_for_each(i, first_idx + 1, ldev) { struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch; err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw, @@ -608,9 +664,9 @@ static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) } return 0; err: - for (; i > MLX5_LAG_P1; i--) + mlx5_ldev_for_each_reverse(j, i, first_idx + 1, ldev) mlx5_eswitch_offloads_single_fdb_del_one(master_esw, - ldev->pf[i].dev->priv.eswitch); + ldev->pf[j].dev->priv.eswitch); return err; } @@ -620,16 +676,21 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, unsigned long flags) { bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; + struct mlx5_core_dev *dev0; int err; + if (first_idx < 0) + return -EINVAL; + + dev0 = ldev->pf[first_idx].dev; if (tracker) mlx5_lag_print_mapping(dev0, ldev, tracker, flags); mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", shared_fdb, mlx5_get_str_port_sel_mode(mode, flags)); - err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags); + err = mlx5_cmd_create_lag(dev0, ldev, mode, flags); if (err) { mlx5_core_err(dev0, "Failed to create LAG (%d)\n", @@ -661,17 +722,22 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, bool shared_fdb) { + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); bool roce_lag = mode == MLX5_LAG_MODE_ROCE; - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev0; unsigned long flags = 0; int err; + if (first_idx < 0) + return -EINVAL; + + dev0 = ldev->pf[first_idx].dev; err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags); if (err) return err; if (mode != MLX5_LAG_MODE_MPESW) { - mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map); + mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ldev->v2p_map); if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, ldev->v2p_map); @@ -709,20 +775,26 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, int mlx5_deactivate_lag(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_eswitch *master_esw = dev0->priv.eswitch; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; bool roce_lag = __mlx5_lag_is_roce(ldev); unsigned long flags = ldev->mode_flags; + struct mlx5_eswitch *master_esw; + struct mlx5_core_dev *dev0; int err; int i; + if (first_idx < 0) + return -EINVAL; + + dev0 = ldev->pf[first_idx].dev; + master_esw = dev0->priv.eswitch; ldev->mode = MLX5_LAG_MODE_NONE; ldev->mode_flags = 0; mlx5_lag_mp_reset(ldev); if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { - for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) + mlx5_ldev_for_each(i, first_idx + 1, ldev) mlx5_eswitch_offloads_single_fdb_del_one(master_esw, ldev->pf[i].dev->priv.eswitch); clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); @@ -754,6 +826,7 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev) bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) { + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); #ifdef CONFIG_MLX5_ESWITCH struct mlx5_core_dev *dev; u8 mode; @@ -761,30 +834,29 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) bool roce_support; int i; - for (i = 0; i < ldev->ports; i++) - if (!ldev->pf[i].dev) - return false; + if (first_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports) + return false; #ifdef CONFIG_MLX5_ESWITCH - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { dev = ldev->pf[i].dev; if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) return false; } - dev = ldev->pf[MLX5_LAG_P1].dev; + dev = ldev->pf[first_idx].dev; mode = mlx5_eswitch_mode(dev); - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) return false; #else - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) return false; #endif - roce_support = mlx5_get_roce_state(ldev->pf[MLX5_LAG_P1].dev); - for (i = 1; i < ldev->ports; i++) + roce_support = mlx5_get_roce_state(ldev->pf[first_idx].dev); + mlx5_ldev_for_each(i, first_idx + 1, ldev) if (mlx5_get_roce_state(ldev->pf[i].dev) != roce_support) return false; @@ -795,10 +867,7 @@ void mlx5_lag_add_devices(struct mlx5_lag *ldev) { int i; - for (i = 0; i < ldev->ports; i++) { - if (!ldev->pf[i].dev) - continue; - + mlx5_ldev_for_each(i, 0, ldev) { if (ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) continue; @@ -812,10 +881,7 @@ void mlx5_lag_remove_devices(struct mlx5_lag *ldev) { int i; - for (i = 0; i < ldev->ports; i++) { - if (!ldev->pf[i].dev) - continue; - + mlx5_ldev_for_each(i, 0, ldev) { if (ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) continue; @@ -828,11 +894,16 @@ void mlx5_lag_remove_devices(struct mlx5_lag *ldev) void mlx5_disable_lag(struct mlx5_lag *ldev) { bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + struct mlx5_core_dev *dev0; bool roce_lag; int err; int i; + if (idx < 0) + return; + + dev0 = ldev->pf[idx].dev; roce_lag = __mlx5_lag_is_roce(ldev); if (shared_fdb) { @@ -842,7 +913,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); } - for (i = 1; i < ldev->ports; i++) + mlx5_ldev_for_each(i, idx + 1, ldev) mlx5_nic_vport_disable_roce(ldev->pf[i].dev); } @@ -854,17 +925,21 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) mlx5_lag_add_devices(ldev); if (shared_fdb) - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); } -static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) +bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev) { + int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_core_dev *dev; int i; - for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) { + if (idx < 0) + return false; + + mlx5_ldev_for_each(i, idx + 1, ldev) { dev = ldev->pf[i].dev; if (is_mdev_switchdev_mode(dev) && mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && @@ -876,7 +951,7 @@ static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) return false; } - dev = ldev->pf[MLX5_LAG_P1].dev; + dev = ldev->pf[idx].dev; if (is_mdev_switchdev_mode(dev) && mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) && @@ -892,11 +967,11 @@ static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) bool roce_lag = true; int i; - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev); #ifdef CONFIG_MLX5_ESWITCH - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev); #endif @@ -917,13 +992,18 @@ static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond) static void mlx5_do_bond(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct lag_tracker tracker = { }; + struct mlx5_core_dev *dev0; struct net_device *ndev; bool do_bond, roce_lag; int err; int i; + if (idx < 0) + return; + + dev0 = ldev->pf[idx].dev; if (!mlx5_lag_is_ready(ldev)) { do_bond = false; } else { @@ -937,7 +1017,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) } if (do_bond && !__mlx5_lag_is_active(ldev)) { - bool shared_fdb = mlx5_shared_fdb_supported(ldev); + bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev); roce_lag = mlx5_lag_is_roce_lag(ldev); @@ -951,12 +1031,16 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) if (err) { if (shared_fdb || roce_lag) mlx5_lag_add_devices(ldev); + if (shared_fdb) { + mlx5_ldev_for_each(i, 0, ldev) + mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); + } return; } else if (roce_lag) { dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); - for (i = 1; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, idx + 1, ldev) { if (mlx5_get_roce_state(ldev->pf[i].dev)) mlx5_nic_vport_enable_roce(ldev->pf[i].dev); } @@ -966,7 +1050,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); if (err) break; @@ -977,7 +1061,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) mlx5_rescan_drivers_locked(dev0); mlx5_deactivate_lag(ldev); mlx5_lag_add_devices(ldev); - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); mlx5_core_err(dev0, "Failed to enable lag\n"); return; @@ -1010,12 +1094,9 @@ struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev) int i; mutex_lock(&ldev->lock); - for (i = 0; i < ldev->ports; i++) { - if (ldev->pf[i].dev) { - devcom = ldev->pf[i].dev->priv.hca_devcom_comp; - break; - } - } + i = mlx5_get_next_ldev_func(ldev, 0); + if (i < MLX5_MAX_PORTS) + devcom = ldev->pf[i].dev->priv.hca_devcom_comp; mutex_unlock(&ldev->lock); return devcom; } @@ -1068,7 +1149,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, u8 bond_status = 0; int num_slaves = 0; int changed = 0; - int idx; + int i, idx = -1; if (!netif_is_lag_master(upper)) return 0; @@ -1083,8 +1164,13 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, */ rcu_read_lock(); for_each_netdev_in_bond_rcu(upper, ndev_tmp) { - idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); - if (idx >= 0) { + mlx5_ldev_for_each(i, 0, ldev) { + if (ldev->pf[i].netdev == ndev_tmp) { + idx++; + break; + } + } + if (i < MLX5_MAX_PORTS) { slave = bond_slave_get_rcu(ndev_tmp); if (slave) has_inactive |= bond_is_slave_inactive(slave); @@ -1234,15 +1320,12 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, } static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, - struct mlx5_core_dev *dev, - struct net_device *netdev) + struct mlx5_core_dev *dev, + struct net_device *netdev) { unsigned int fn = mlx5_get_dev_index(dev); unsigned long flags; - if (fn >= ldev->ports) - return; - spin_lock_irqsave(&lag_lock, flags); ldev->pf[fn].netdev = netdev; ldev->tracker.netdev_state[fn].link_up = 0; @@ -1257,7 +1340,7 @@ static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, int i; spin_lock_irqsave(&lag_lock, flags); - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { if (ldev->pf[i].netdev == netdev) { ldev->pf[i].netdev = NULL; break; @@ -1267,13 +1350,10 @@ static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, } static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, - struct mlx5_core_dev *dev) + struct mlx5_core_dev *dev) { unsigned int fn = mlx5_get_dev_index(dev); - if (fn >= ldev->ports) - return; - ldev->pf[fn].dev = dev; dev->priv.lag = ldev; } @@ -1281,16 +1361,13 @@ static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, struct mlx5_core_dev *dev) { - int i; + int fn; - for (i = 0; i < ldev->ports; i++) - if (ldev->pf[i].dev == dev) - break; - - if (i == ldev->ports) + fn = mlx5_get_dev_index(dev); + if (ldev->pf[fn].dev != dev) return; - ldev->pf[i].dev = NULL; + ldev->pf[fn].dev = NULL; dev->priv.lag = NULL; } @@ -1398,7 +1475,7 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev) { struct mlx5_lag *ldev; - int i; + int num = 0; ldev = mlx5_lag_dev(dev); if (!ldev) @@ -1406,17 +1483,33 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, mutex_lock(&ldev->lock); mlx5_ldev_add_netdev(ldev, dev, netdev); - - for (i = 0; i < ldev->ports; i++) - if (!ldev->pf[i].netdev) - break; - - if (i >= ldev->ports) + num = mlx5_lag_num_netdevs(ldev); + if (num >= ldev->ports) set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); mutex_unlock(&ldev->lock); mlx5_queue_bond_work(ldev, 0); } +int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx) +{ + int i; + + for (i = start_idx; i >= end_idx; i--) + if (ldev->pf[i].dev) + return i; + return -1; +} + +int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx) +{ + int i; + + for (i = start_idx; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev) + return i; + return MLX5_MAX_PORTS; +} + bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; @@ -1467,12 +1560,13 @@ bool mlx5_lag_is_master(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; unsigned long flags; - bool res; + bool res = false; + int idx; spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); - res = ldev && __mlx5_lag_is_active(ldev) && - dev == ldev->pf[MLX5_LAG_P1].dev; + idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + res = ldev && __mlx5_lag_is_active(ldev) && idx >= 0 && dev == ldev->pf[idx].dev; spin_unlock_irqrestore(&lag_lock, flags); return res; @@ -1555,7 +1649,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, if (!(ldev && __mlx5_lag_is_roce(ldev))) goto unlock; - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { if (ldev->pf[i].netdev == slave) { port = i; break; @@ -1594,13 +1688,13 @@ struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int if (!ldev) goto unlock; - if (*i == ldev->ports) + if (*i == MLX5_MAX_PORTS) goto unlock; - for (idx = *i; idx < ldev->ports; idx++) + mlx5_ldev_for_each(idx, *i, ldev) if (ldev->pf[idx].dev != dev) break; - if (idx == ldev->ports) { + if (idx == MLX5_MAX_PORTS) { *i = idx; goto unlock; } @@ -1621,10 +1715,10 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, { int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); struct mlx5_core_dev **mdev; + int ret = 0, i, j, idx = 0; struct mlx5_lag *ldev; unsigned long flags; int num_ports; - int ret, i, j; void *out; out = kvzalloc(outlen, GFP_KERNEL); @@ -1643,8 +1737,8 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, ldev = mlx5_lag_dev(dev); if (ldev && __mlx5_lag_is_active(ldev)) { num_ports = ldev->ports; - for (i = 0; i < ldev->ports; i++) - mdev[i] = ldev->pf[i].dev; + mlx5_ldev_for_each(i, 0, ldev) + mdev[idx++] = ldev->pf[i].dev; } else { num_ports = 1; mdev[MLX5_LAG_P1] = dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index 50fcb1eee574..c2f256bb2bc2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -92,6 +92,7 @@ mlx5_lag_is_ready(struct mlx5_lag *ldev) return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); } +bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev); bool mlx5_lag_check_prereq(struct mlx5_lag *ldev); void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker); @@ -103,7 +104,7 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, struct net_device *ndev); char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags); -void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, +void mlx5_infer_tx_enabled(struct lag_tracker *tracker, struct mlx5_lag *ldev, u8 *ports, int *num_enabled); void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev); @@ -119,9 +120,24 @@ static inline bool mlx5_lag_is_supported(struct mlx5_core_dev *dev) if (!MLX5_CAP_GEN(dev, vport_group_manager) || !MLX5_CAP_GEN(dev, lag_master) || MLX5_CAP_GEN(dev, num_lag_ports) < 2 || + mlx5_get_dev_index(dev) >= MLX5_MAX_PORTS || MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS) return false; return true; } +#define mlx5_ldev_for_each(i, start_index, ldev) \ + for (int tmp = start_index; tmp = mlx5_get_next_ldev_func(ldev, tmp), \ + i = tmp, tmp < MLX5_MAX_PORTS; tmp++) + +#define mlx5_ldev_for_each_reverse(i, start_index, end_index, ldev) \ + for (int tmp = start_index, tmp1 = end_index; \ + tmp = mlx5_get_pre_ldev_func(ldev, tmp, tmp1), \ + i = tmp, tmp >= tmp1; tmp--) + +int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx); +int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx); +int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq); +int mlx5_lag_num_devs(struct mlx5_lag *ldev); +int mlx5_lag_num_netdevs(struct mlx5_lag *ldev); #endif /* __MLX5_LAG_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index b1aa494c76ba..aee17fcf3b36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -17,7 +17,10 @@ static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) #define MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS 2 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) { - if (!mlx5_lag_is_ready(ldev)) + int idx0 = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + int idx1 = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P2); + + if (idx0 < 0 || idx1 < 0 || !mlx5_lag_is_ready(ldev)) return false; if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev)) @@ -26,8 +29,8 @@ static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) if (ldev->ports > MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS) return false; - return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev, - ldev->pf[MLX5_LAG_P2].dev); + return mlx5_esw_multipath_prereq(ldev->pf[idx0].dev, + ldev->pf[idx1].dev); } bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) @@ -50,43 +53,45 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, enum mlx5_lag_port_affinity port) { + int idx0 = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + int idx1 = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P2); struct lag_tracker tracker = {}; - if (!__mlx5_lag_is_multipath(ldev)) + if (idx0 < 0 || idx1 < 0 || !__mlx5_lag_is_multipath(ldev)) return; switch (port) { case MLX5_LAG_NORMAL_AFFINITY: - tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; - tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; - tracker.netdev_state[MLX5_LAG_P1].link_up = true; - tracker.netdev_state[MLX5_LAG_P2].link_up = true; + tracker.netdev_state[idx0].tx_enabled = true; + tracker.netdev_state[idx1].tx_enabled = true; + tracker.netdev_state[idx0].link_up = true; + tracker.netdev_state[idx1].link_up = true; break; case MLX5_LAG_P1_AFFINITY: - tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; - tracker.netdev_state[MLX5_LAG_P1].link_up = true; - tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false; - tracker.netdev_state[MLX5_LAG_P2].link_up = false; + tracker.netdev_state[idx0].tx_enabled = true; + tracker.netdev_state[idx0].link_up = true; + tracker.netdev_state[idx1].tx_enabled = false; + tracker.netdev_state[idx1].link_up = false; break; case MLX5_LAG_P2_AFFINITY: - tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false; - tracker.netdev_state[MLX5_LAG_P1].link_up = false; - tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; - tracker.netdev_state[MLX5_LAG_P2].link_up = true; + tracker.netdev_state[idx0].tx_enabled = false; + tracker.netdev_state[idx0].link_up = false; + tracker.netdev_state[idx1].tx_enabled = true; + tracker.netdev_state[idx1].link_up = true; break; default: - mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, + mlx5_core_warn(ldev->pf[idx0].dev, "Invalid affinity port %d", port); return; } - if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled) - mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events, + if (tracker.netdev_state[idx0].tx_enabled) + mlx5_notifier_call_chain(ldev->pf[idx0].dev->priv.events, MLX5_DEV_EVENT_PORT_AFFINITY, (void *)0); - if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled) - mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events, + if (tracker.netdev_state[idx1].tx_enabled) + mlx5_notifier_call_chain(ldev->pf[idx1].dev->priv.events, MLX5_DEV_EVENT_PORT_AFFINITY, (void *)0); @@ -150,9 +155,14 @@ mlx5_lag_get_next_fib_dev(struct mlx5_lag *ldev, static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, struct fib_entry_notifier_info *fen_info) { + int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct net_device *nh_dev0, *nh_dev1; struct fib_info *fi = fen_info->fi; struct lag_mp *mp = &ldev->lag_mp; + int i, dev_idx = 0; + + if (idx < 0) + return; /* Handle delete event */ if (event == FIB_EVENT_ENTRY_DEL) { @@ -179,17 +189,19 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, } if (nh_dev0 == nh_dev1) { - mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, + mlx5_core_warn(ldev->pf[idx].dev, "Multipath offload doesn't support routes with multiple nexthops of the same device"); return; } if (!nh_dev1) { if (__mlx5_lag_is_active(ldev)) { - int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev0); - - i++; - mlx5_lag_set_port_affinity(ldev, i); + mlx5_ldev_for_each(i, 0, ldev) { + dev_idx++; + if (ldev->pf[i].netdev == nh_dev0) + break; + } + mlx5_lag_set_port_affinity(ldev, dev_idx); mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); } @@ -214,6 +226,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, struct fib_info *fi) { struct lag_mp *mp = &ldev->lag_mp; + int i, dev_idx = 0; /* Check the nh event is related to the route */ if (!mp->fib.mfi || mp->fib.mfi != fi) @@ -221,11 +234,15 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, /* nh added/removed */ if (event == FIB_EVENT_NH_DEL) { - int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev); + mlx5_ldev_for_each(i, 0, ldev) { + if (ldev->pf[i].netdev == fib_nh->fib_nh_dev) + break; + dev_idx++; + } - if (i >= 0) { - i = (i + 1) % 2 + 1; /* peer port */ - mlx5_lag_set_port_affinity(ldev, i); + if (dev_idx >= 0) { + dev_idx = (dev_idx + 1) % 2 + 1; /* peer port */ + mlx5_lag_set_port_affinity(ldev, dev_idx); } } else if (event == FIB_EVENT_NH_ADD && fib_info_num_path(fi) == 2) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index 571ea26edd0c..aad52d3a90e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -15,7 +15,7 @@ static void mlx5_mpesw_metadata_cleanup(struct mlx5_lag *ldev) u32 pf_metadata; int i; - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { dev = ldev->pf[i].dev; esw = dev->priv.eswitch; pf_metadata = ldev->lag_mpesw.pf_metadata[i]; @@ -36,7 +36,7 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev) u32 pf_metadata; int i, err; - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { dev = ldev->pf[i].dev; esw = dev->priv.eswitch; pf_metadata = mlx5_esw_match_metadata_alloc(esw); @@ -52,7 +52,7 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev) goto err_metadata; } - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { dev = ldev->pf[i].dev; mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_MULTIPORT_ESW, (void *)0); @@ -65,23 +65,22 @@ err_metadata: return err; } -#define MLX5_LAG_MPESW_OFFLOADS_SUPPORTED_PORTS 4 static int enable_mpesw(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + struct mlx5_core_dev *dev0; int err; int i; - if (ldev->mode != MLX5_LAG_MODE_NONE) + if (idx < 0 || ldev->mode != MLX5_LAG_MODE_NONE) return -EINVAL; - if (ldev->ports > MLX5_LAG_MPESW_OFFLOADS_SUPPORTED_PORTS) - return -EOPNOTSUPP; - + dev0 = ldev->pf[idx].dev; if (mlx5_eswitch_mode(dev0) != MLX5_ESWITCH_OFFLOADS || !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table) || !MLX5_CAP_GEN(dev0, create_lag_when_not_master_up) || - !mlx5_lag_check_prereq(ldev)) + !mlx5_lag_check_prereq(ldev) || + !mlx5_lag_shared_fdb_supported(ldev)) return -EOPNOTSUPP; err = mlx5_mpesw_metadata_set(ldev); @@ -98,7 +97,7 @@ static int enable_mpesw(struct mlx5_lag *ldev) dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); if (err) goto err_rescan_drivers; @@ -112,7 +111,7 @@ err_rescan_drivers: mlx5_deactivate_lag(ldev); err_add_devices: mlx5_lag_add_devices(ldev); - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); mlx5_mpesw_metadata_cleanup(ldev); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index ab2717012b79..d832a12ffec0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -39,15 +39,18 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, struct mlx5_lag_definer *lag_definer, u8 *ports) { - struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_namespace *ns; - int err, i; - int idx; - int j; + struct mlx5_core_dev *dev; + int err, i, j, k, idx; + if (first_idx < 0) + return -EINVAL; + + dev = ldev->pf[first_idx].dev; ft_attr.max_fte = ldev->ports * ldev->buckets; ft_attr.level = MLX5_LAG_FT_LEVEL_DEFINER; @@ -74,7 +77,7 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; flow_act.flags |= FLOW_ACT_NO_APPEND; - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { for (j = 0; j < ldev->buckets; j++) { u8 affinity; @@ -88,13 +91,13 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, &dest, 1); if (IS_ERR(lag_definer->rules[idx])) { err = PTR_ERR(lag_definer->rules[idx]); - do { + mlx5_ldev_for_each_reverse(k, i, 0, ldev) { while (j--) { - idx = i * ldev->buckets + j; + idx = k * ldev->buckets + j; mlx5_del_flow_rules(lag_definer->rules[idx]); } j = ldev->buckets; - } while (i--); + } goto destroy_fg; } } @@ -295,11 +298,16 @@ static struct mlx5_lag_definer * mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash, enum mlx5_traffic_types tt, bool tunnel, u8 *ports) { - struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_lag_definer *lag_definer; + struct mlx5_core_dev *dev; u32 *match_definer_mask; int format_id, err; + if (first_idx < 0) + return ERR_PTR(-EINVAL); + + dev = ldev->pf[first_idx].dev; lag_definer = kzalloc(sizeof(*lag_definer), GFP_KERNEL); if (!lag_definer) return ERR_PTR(-ENOMEM); @@ -341,12 +349,15 @@ free_lag_definer: static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev, struct mlx5_lag_definer *lag_definer) { - struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; - int idx; - int i; - int j; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + struct mlx5_core_dev *dev; + int idx, i, j; - for (i = 0; i < ldev->ports; i++) { + if (first_idx < 0) + return; + + dev = ldev->pf[first_idx].dev; + mlx5_ldev_for_each(i, first_idx, ldev) { for (j = 0; j < ldev->buckets; j++) { idx = i * ldev->buckets + j; mlx5_del_flow_rules(lag_definer->rules[idx]); @@ -501,10 +512,15 @@ static void mlx5_lag_set_outer_ttc_params(struct mlx5_lag *ldev, static int mlx5_lag_create_ttc_table(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; struct ttc_params ttc_params = {}; + struct mlx5_core_dev *dev; + + if (first_idx < 0) + return -EINVAL; + dev = ldev->pf[first_idx].dev; mlx5_lag_set_outer_ttc_params(ldev, &ttc_params); port_sel->outer.ttc = mlx5_create_ttc_table(dev, &ttc_params); return PTR_ERR_OR_ZERO(port_sel->outer.ttc); @@ -512,10 +528,15 @@ static int mlx5_lag_create_ttc_table(struct mlx5_lag *ldev) static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; struct ttc_params ttc_params = {}; + struct mlx5_core_dev *dev; + if (first_idx < 0) + return -EINVAL; + + dev = ldev->pf[first_idx].dev; mlx5_lag_set_inner_ttc_params(ldev, &ttc_params); port_sel->inner.ttc = mlx5_create_inner_ttc_table(dev, &ttc_params); return PTR_ERR_OR_ZERO(port_sel->inner.ttc); @@ -530,7 +551,7 @@ int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, set_tt_map(port_sel, hash_type); err = mlx5_lag_create_definers(ldev, hash_type, ports); if (err) - return err; + goto clear_port_sel; if (port_sel->tunnel) { err = mlx5_lag_create_inner_ttc_table(ldev); @@ -549,6 +570,8 @@ destroy_inner: mlx5_destroy_ttc_table(port_sel->inner.ttc); destroy_definers: mlx5_lag_destroy_definers(ldev); +clear_port_sel: + memset(port_sel, 0, sizeof(*port_sel)); return err; } @@ -565,7 +588,7 @@ static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { for (j = 0; j < ldev->buckets; j++) { idx = i * ldev->buckets + j; if (ldev->v2p_map[idx] == ports[idx]) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 4822d01123b4..214d732d18e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -43,6 +43,8 @@ #include <linux/cpufeature.h> #endif /* CONFIG_X86 */ +#define MLX5_RT_CLOCK_IDENTITY_SIZE MLX5_FLD_SZ_BYTES(mrtcq_reg, rt_clock_identity) + enum { MLX5_PIN_MODE_IN = 0x0, MLX5_PIN_MODE_OUT = 0x1, @@ -77,6 +79,56 @@ enum { MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX = 200000, }; +struct mlx5_clock_dev_state { + struct mlx5_core_dev *mdev; + struct mlx5_devcom_comp_dev *compdev; + struct mlx5_nb pps_nb; + struct work_struct out_work; +}; + +struct mlx5_clock_priv { + struct mlx5_clock clock; + struct mlx5_core_dev *mdev; + struct mutex lock; /* protect mdev and used in PTP callbacks */ + struct mlx5_core_dev *event_mdev; +}; + +static struct mlx5_clock_priv *clock_priv(struct mlx5_clock *clock) +{ + return container_of(clock, struct mlx5_clock_priv, clock); +} + +static void mlx5_clock_lockdep_assert(struct mlx5_clock *clock) +{ + if (!clock->shared) + return; + + lockdep_assert(lockdep_is_held(&clock_priv(clock)->lock)); +} + +static struct mlx5_core_dev *mlx5_clock_mdev_get(struct mlx5_clock *clock) +{ + mlx5_clock_lockdep_assert(clock); + + return clock_priv(clock)->mdev; +} + +static void mlx5_clock_lock(struct mlx5_clock *clock) +{ + if (!clock->shared) + return; + + mutex_lock(&clock_priv(clock)->lock); +} + +static void mlx5_clock_unlock(struct mlx5_clock *clock) +{ + if (!clock->shared) + return; + + mutex_unlock(&clock_priv(clock)->lock); +} + static bool mlx5_real_time_mode(struct mlx5_core_dev *mdev) { return (mlx5_is_real_time_rq(mdev) || mlx5_is_real_time_sq(mdev)); @@ -94,6 +146,22 @@ static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev) return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify); } +static int mlx5_clock_identity_get(struct mlx5_core_dev *mdev, + u8 identify[MLX5_RT_CLOCK_IDENTITY_SIZE]) +{ + u32 out[MLX5_ST_SZ_DW(mrtcq_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(mrtcq_reg)] = {}; + int err; + + err = mlx5_core_access_reg(mdev, in, sizeof(in), + out, sizeof(out), MLX5_REG_MRTCQ, 0, 0); + if (!err) + memcpy(identify, MLX5_ADDR_OF(mrtcq_reg, out, rt_clock_identity), + MLX5_RT_CLOCK_IDENTITY_SIZE); + + return err; +} + static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz) { /* Optimal shift constant leads to corrections above just 1 scaled ppm. @@ -119,21 +187,30 @@ static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz) ilog2((U32_MAX / NSEC_PER_MSEC) * dev_freq_khz)); } +static s32 mlx5_clock_getmaxphase(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range) ? + MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX : + MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX; +} + static s32 mlx5_ptp_getmaxphase(struct ptp_clock_info *ptp) { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); struct mlx5_core_dev *mdev; + s32 ret; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + ret = mlx5_clock_getmaxphase(mdev); + mlx5_clock_unlock(clock); - return MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range) ? - MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX : - MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX; + return ret; } static bool mlx5_is_mtutc_time_adj_cap(struct mlx5_core_dev *mdev, s64 delta) { - s64 max = mlx5_ptp_getmaxphase(&mdev->clock.ptp_info); + s64 max = mlx5_clock_getmaxphase(mdev); if (delta < -max || delta > max) return false; @@ -209,7 +286,7 @@ static int mlx5_mtctr_syncdevicetime(ktime_t *device_time, if (real_time_mode) *device_time = ns_to_ktime(REAL_TIME_TO_NS(device >> 32, device & U32_MAX)); else - *device_time = mlx5_timecounter_cyc2time(&mdev->clock, device); + *device_time = mlx5_timecounter_cyc2time(mdev->clock, device); return 0; } @@ -220,16 +297,23 @@ static int mlx5_ptp_getcrosststamp(struct ptp_clock_info *ptp, struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); struct system_time_snapshot history_begin = {0}; struct mlx5_core_dev *mdev; + int err; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); - if (!mlx5_is_ptm_source_time_available(mdev)) - return -EBUSY; + if (!mlx5_is_ptm_source_time_available(mdev)) { + err = -EBUSY; + goto unlock; + } ktime_get_snapshot(&history_begin); - return get_device_system_crosststamp(mlx5_mtctr_syncdevicetime, mdev, - &history_begin, cts); + err = get_device_system_crosststamp(mlx5_mtctr_syncdevicetime, mdev, + &history_begin, cts); +unlock: + mlx5_clock_unlock(clock); + return err; } #endif /* CONFIG_X86 */ @@ -259,12 +343,11 @@ static u64 mlx5_read_time(struct mlx5_core_dev *dev, (u64)timer_l | (u64)timer_h1 << 32; } -static u64 read_internal_timer(const struct cyclecounter *cc) +static u64 read_internal_timer(struct cyclecounter *cc) { struct mlx5_timer *timer = container_of(cc, struct mlx5_timer, cycles); struct mlx5_clock *clock = container_of(timer, struct mlx5_clock, timer); - struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, - clock); + struct mlx5_core_dev *mdev = mlx5_clock_mdev_get(clock); return mlx5_read_time(mdev, NULL, false) & cc->mask; } @@ -272,7 +355,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc) static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev) { struct mlx5_ib_clock_info *clock_info = mdev->clock_info; - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; struct mlx5_timer *timer; u32 sign; @@ -295,12 +378,10 @@ static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev) static void mlx5_pps_out(struct work_struct *work) { - struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps, - out_work); - struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock, - pps_info); - struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, - clock); + struct mlx5_clock_dev_state *clock_state = container_of(work, struct mlx5_clock_dev_state, + out_work); + struct mlx5_core_dev *mdev = clock_state->mdev; + struct mlx5_clock *clock = mdev->clock; u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; unsigned long flags; int i; @@ -322,17 +403,17 @@ static void mlx5_pps_out(struct work_struct *work) } } -static void mlx5_timestamp_overflow(struct work_struct *work) +static long mlx5_timestamp_overflow(struct ptp_clock_info *ptp_info) { - struct delayed_work *dwork = to_delayed_work(work); struct mlx5_core_dev *mdev; struct mlx5_timer *timer; struct mlx5_clock *clock; unsigned long flags; - timer = container_of(dwork, struct mlx5_timer, overflow_work); - clock = container_of(timer, struct mlx5_clock, timer); - mdev = container_of(clock, struct mlx5_core_dev, clock); + clock = container_of(ptp_info, struct mlx5_clock, ptp_info); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + timer = &clock->timer; if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) goto out; @@ -343,7 +424,8 @@ static void mlx5_timestamp_overflow(struct work_struct *work) write_sequnlock_irqrestore(&clock->lock, flags); out: - schedule_delayed_work(&timer->overflow_work, timer->overflow_period); + mlx5_clock_unlock(clock); + return timer->overflow_period; } static int mlx5_ptp_settime_real_time(struct mlx5_core_dev *mdev, @@ -362,15 +444,12 @@ static int mlx5_ptp_settime_real_time(struct mlx5_core_dev *mdev, return mlx5_set_mtutc(mdev, in, sizeof(in)); } -static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts) +static int mlx5_clock_settime(struct mlx5_core_dev *mdev, struct mlx5_clock *clock, + const struct timespec64 *ts) { - struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); struct mlx5_timer *timer = &clock->timer; - struct mlx5_core_dev *mdev; unsigned long flags; - mdev = container_of(clock, struct mlx5_core_dev, clock); - if (mlx5_modify_mtutc_allowed(mdev)) { int err = mlx5_ptp_settime_real_time(mdev, ts); @@ -386,6 +465,20 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 return 0; } +static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev; + int err; + + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + err = mlx5_clock_settime(mdev, clock, ts); + mlx5_clock_unlock(clock); + + return err; +} + static struct timespec64 mlx5_ptp_gettimex_real_time(struct mlx5_core_dev *mdev, struct ptp_system_timestamp *sts) @@ -405,7 +498,8 @@ static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, struct mlx5_core_dev *mdev; u64 cycles, ns; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); if (mlx5_real_time_mode(mdev)) { *ts = mlx5_ptp_gettimex_real_time(mdev, sts); goto out; @@ -415,6 +509,7 @@ static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, ns = mlx5_timecounter_cyc2time(clock, cycles); *ts = ns_to_timespec64(ns); out: + mlx5_clock_unlock(clock); return 0; } @@ -445,14 +540,16 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) struct mlx5_timer *timer = &clock->timer; struct mlx5_core_dev *mdev; unsigned long flags; + int err = 0; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); if (mlx5_modify_mtutc_allowed(mdev)) { - int err = mlx5_ptp_adjtime_real_time(mdev, delta); + err = mlx5_ptp_adjtime_real_time(mdev, delta); if (err) - return err; + goto unlock; } write_seqlock_irqsave(&clock->lock, flags); @@ -460,17 +557,23 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) mlx5_update_clock_info_page(mdev); write_sequnlock_irqrestore(&clock->lock, flags); - return 0; +unlock: + mlx5_clock_unlock(clock); + return err; } static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta) { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); struct mlx5_core_dev *mdev; + int err; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + err = mlx5_ptp_adjtime_real_time(mdev, delta); + mlx5_clock_unlock(clock); - return mlx5_ptp_adjtime_real_time(mdev, delta); + return err; } static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_ppm) @@ -499,15 +602,17 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) struct mlx5_timer *timer = &clock->timer; struct mlx5_core_dev *mdev; unsigned long flags; + int err = 0; u32 mult; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); if (mlx5_modify_mtutc_allowed(mdev)) { - int err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm); + err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm); if (err) - return err; + goto unlock; } mult = (u32)adjust_by_scaled_ppm(timer->nominal_c_mult, scaled_ppm); @@ -517,8 +622,11 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) timer->cycles.mult = mult; mlx5_update_clock_info_page(mdev); write_sequnlock_irqrestore(&clock->lock, flags); + ptp_schedule_worker(clock->ptp, timer->overflow_period); - return 0; +unlock: + mlx5_clock_unlock(clock); + return err; } static int mlx5_extts_configure(struct ptp_clock_info *ptp, @@ -527,25 +635,14 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); - struct mlx5_core_dev *mdev = - container_of(clock, struct mlx5_core_dev, clock); u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + struct mlx5_core_dev *mdev; u32 field_select = 0; u8 pin_mode = 0; u8 pattern = 0; int pin = -1; int err = 0; - if (!MLX5_PPS_CAP(mdev)) - return -EOPNOTSUPP; - - /* Reject requests with unsupported flags */ - if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | - PTP_RISING_EDGE | - PTP_FALLING_EDGE | - PTP_STRICT_FLAGS)) - return -EOPNOTSUPP; - /* Reject requests to enable time stamping on both edges. */ if ((rq->extts.flags & PTP_STRICT_FLAGS) && (rq->extts.flags & PTP_ENABLE_FEATURE) && @@ -569,6 +666,14 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, field_select = MLX5_MTPPS_FS_ENABLE; } + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + + if (!MLX5_PPS_CAP(mdev)) { + err = -EOPNOTSUPP; + goto unlock; + } + MLX5_SET(mtpps_reg, in, pin, pin); MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); MLX5_SET(mtpps_reg, in, pattern, pattern); @@ -577,15 +682,23 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, err = mlx5_set_mtpps(mdev, in, sizeof(in)); if (err) - return err; + goto unlock; + + err = mlx5_set_mtppse(mdev, pin, 0, MLX5_EVENT_MODE_REPETETIVE & on); + if (err) + goto unlock; + + clock->pps_info.pin_armed[pin] = on; + clock_priv(clock)->event_mdev = mdev; - return mlx5_set_mtppse(mdev, pin, 0, - MLX5_EVENT_MODE_REPETETIVE & on); +unlock: + mlx5_clock_unlock(clock); + return err; } static u64 find_target_cycles(struct mlx5_core_dev *mdev, s64 target_ns) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; u64 cycles_now, cycles_delta; u64 nsec_now, nsec_delta; struct mlx5_timer *timer; @@ -644,7 +757,7 @@ static int mlx5_perout_conf_out_pulse_duration(struct mlx5_core_dev *mdev, struct ptp_clock_request *rq, u32 *out_pulse_duration_ns) { - struct mlx5_pps *pps_info = &mdev->clock.pps_info; + struct mlx5_pps *pps_info = &mdev->clock->pps_info; u32 out_pulse_duration; struct timespec64 ts; @@ -677,7 +790,7 @@ static int perout_conf_npps_real_time(struct mlx5_core_dev *mdev, struct ptp_clo u32 *field_select, u32 *out_pulse_duration_ns, u64 *period, u64 *time_stamp) { - struct mlx5_pps *pps_info = &mdev->clock.pps_info; + struct mlx5_pps *pps_info = &mdev->clock->pps_info; struct ptp_clock_time *time = &rq->perout.start; struct timespec64 ts; @@ -700,38 +813,24 @@ static int perout_conf_npps_real_time(struct mlx5_core_dev *mdev, struct ptp_clo return 0; } -static bool mlx5_perout_verify_flags(struct mlx5_core_dev *mdev, unsigned int flags) -{ - return ((!mlx5_npps_real_time_supported(mdev) && flags) || - (mlx5_npps_real_time_supported(mdev) && flags & ~PTP_PEROUT_DUTY_CYCLE)); -} - static int mlx5_perout_configure(struct ptp_clock_info *ptp, struct ptp_clock_request *rq, int on) { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); - struct mlx5_core_dev *mdev = - container_of(clock, struct mlx5_core_dev, clock); - bool rt_mode = mlx5_real_time_mode(mdev); u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; u32 out_pulse_duration_ns = 0; + struct mlx5_core_dev *mdev; u32 field_select = 0; u64 npps_period = 0; u64 time_stamp = 0; u8 pin_mode = 0; u8 pattern = 0; + bool rt_mode; int pin = -1; int err = 0; - if (!MLX5_PPS_CAP(mdev)) - return -EOPNOTSUPP; - - /* Reject requests with unsupported flags */ - if (mlx5_perout_verify_flags(mdev, rq->perout.flags)) - return -EOPNOTSUPP; - if (rq->perout.index >= clock->ptp_info.n_pins) return -EINVAL; @@ -740,14 +839,23 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, if (pin < 0) return -EBUSY; - if (on) { - bool rt_mode = mlx5_real_time_mode(mdev); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + rt_mode = mlx5_real_time_mode(mdev); + if (!MLX5_PPS_CAP(mdev)) { + err = -EOPNOTSUPP; + goto unlock; + } + + if (on) { pin_mode = MLX5_PIN_MODE_OUT; pattern = MLX5_OUT_PATTERN_PERIODIC; - if (rt_mode && rq->perout.start.sec > U32_MAX) - return -EINVAL; + if (rt_mode && rq->perout.start.sec > U32_MAX) { + err = -EINVAL; + goto unlock; + } field_select |= MLX5_MTPPS_FS_PIN_MODE | MLX5_MTPPS_FS_PATTERN | @@ -760,7 +868,7 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, else err = perout_conf_1pps(mdev, rq, &time_stamp, rt_mode); if (err) - return err; + goto unlock; } MLX5_SET(mtpps_reg, in, pin, pin); @@ -773,13 +881,16 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, MLX5_SET(mtpps_reg, in, out_pulse_duration_ns, out_pulse_duration_ns); err = mlx5_set_mtpps(mdev, in, sizeof(in)); if (err) - return err; + goto unlock; if (rt_mode) - return 0; + goto unlock; - return mlx5_set_mtppse(mdev, pin, 0, - MLX5_EVENT_MODE_REPETETIVE & on); + err = mlx5_set_mtppse(mdev, pin, 0, MLX5_EVENT_MODE_REPETETIVE & on); + +unlock: + mlx5_clock_unlock(clock); + return err; } static int mlx5_pps_configure(struct ptp_clock_info *ptp, @@ -852,6 +963,7 @@ static const struct ptp_clock_info mlx5_ptp_clock_info = { .settime64 = mlx5_ptp_settime, .enable = NULL, .verify = NULL, + .do_aux_work = mlx5_timestamp_overflow, }; static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin, @@ -865,10 +977,8 @@ static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin, mtpps_size, MLX5_REG_MTPPS, 0, 0); } -static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin) +static int mlx5_get_pps_pin_mode(struct mlx5_core_dev *mdev, u8 pin) { - struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); - u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {}; u8 mode; int err; @@ -887,8 +997,9 @@ static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin) return PTP_PF_NONE; } -static void mlx5_init_pin_config(struct mlx5_clock *clock) +static void mlx5_init_pin_config(struct mlx5_core_dev *mdev) { + struct mlx5_clock *clock = mdev->clock; int i; if (!clock->ptp_info.n_pins) @@ -904,20 +1015,27 @@ static void mlx5_init_pin_config(struct mlx5_clock *clock) clock->ptp_info.verify = mlx5_ptp_verify; clock->ptp_info.pps = 1; + clock->ptp_info.supported_extts_flags = PTP_RISING_EDGE | + PTP_FALLING_EDGE | + PTP_STRICT_FLAGS; + + if (mlx5_npps_real_time_supported(mdev)) + clock->ptp_info.supported_perout_flags = PTP_PEROUT_DUTY_CYCLE; + for (i = 0; i < clock->ptp_info.n_pins; i++) { snprintf(clock->ptp_info.pin_config[i].name, sizeof(clock->ptp_info.pin_config[i].name), "mlx5_pps%d", i); clock->ptp_info.pin_config[i].index = i; - clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(clock, i); + clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(mdev, i); clock->ptp_info.pin_config[i].chan = 0; } } static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + struct mlx5_clock *clock = mdev->clock; mlx5_query_mtpps(mdev, out, sizeof(out)); @@ -967,16 +1085,16 @@ static u64 perout_conf_next_event_timer(struct mlx5_core_dev *mdev, static int mlx5_pps_event(struct notifier_block *nb, unsigned long type, void *data) { - struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb); + struct mlx5_clock_dev_state *clock_state = mlx5_nb_cof(nb, struct mlx5_clock_dev_state, + pps_nb); + struct mlx5_core_dev *mdev = clock_state->mdev; + struct mlx5_clock *clock = mdev->clock; struct ptp_clock_event ptp_event; struct mlx5_eqe *eqe = data; int pin = eqe->data.pps.pin; - struct mlx5_core_dev *mdev; unsigned long flags; u64 ns; - mdev = container_of(clock, struct mlx5_core_dev, clock); - switch (clock->ptp_info.pin_config[pin].func) { case PTP_PF_EXTTS: ptp_event.index = pin; @@ -996,11 +1114,15 @@ static int mlx5_pps_event(struct notifier_block *nb, ptp_clock_event(clock->ptp, &ptp_event); break; case PTP_PF_PEROUT: + if (clock->shared) { + mlx5_core_warn(mdev, " Received unexpected PPS out event\n"); + break; + } ns = perout_conf_next_event_timer(mdev, clock); write_seqlock_irqsave(&clock->lock, flags); clock->pps_info.start[pin] = ns; write_sequnlock_irqrestore(&clock->lock, flags); - schedule_work(&clock->pps_info.out_work); + schedule_work(&clock_state->out_work); break; default: mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n", @@ -1012,7 +1134,7 @@ static int mlx5_pps_event(struct notifier_block *nb, static void mlx5_timecounter_init(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; struct mlx5_timer *timer = &clock->timer; u32 dev_freq; @@ -1028,10 +1150,10 @@ static void mlx5_timecounter_init(struct mlx5_core_dev *mdev) ktime_to_ns(ktime_get_real())); } -static void mlx5_init_overflow_period(struct mlx5_clock *clock) +static void mlx5_init_overflow_period(struct mlx5_core_dev *mdev) { - struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); struct mlx5_ib_clock_info *clock_info = mdev->clock_info; + struct mlx5_clock *clock = mdev->clock; struct mlx5_timer *timer = &clock->timer; u64 overflow_cycles; u64 frac = 0; @@ -1052,12 +1174,11 @@ static void mlx5_init_overflow_period(struct mlx5_clock *clock) do_div(ns, NSEC_PER_SEC / HZ); timer->overflow_period = ns; - INIT_DELAYED_WORK(&timer->overflow_work, mlx5_timestamp_overflow); - if (timer->overflow_period) - schedule_delayed_work(&timer->overflow_work, 0); - else + if (!timer->overflow_period) { + timer->overflow_period = HZ; mlx5_core_warn(mdev, - "invalid overflow period, overflow_work is not scheduled\n"); + "invalid overflow period, overflow_work is scheduled once per second\n"); + } if (clock_info) clock_info->overflow_period = timer->overflow_period; @@ -1065,7 +1186,7 @@ static void mlx5_init_overflow_period(struct mlx5_clock *clock) static void mlx5_init_clock_info(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; struct mlx5_ib_clock_info *info; struct mlx5_timer *timer; @@ -1088,7 +1209,7 @@ static void mlx5_init_clock_info(struct mlx5_core_dev *mdev) static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; u32 out[MLX5_ST_SZ_DW(mtutc_reg)] = {}; u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {}; u8 log_max_freq_adjustment = 0; @@ -1107,7 +1228,7 @@ static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev) static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; /* Configure the PHC */ clock->ptp_info = mlx5_ptp_clock_info; @@ -1123,38 +1244,30 @@ static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) mlx5_timecounter_init(mdev); mlx5_init_clock_info(mdev); - mlx5_init_overflow_period(clock); + mlx5_init_overflow_period(mdev); if (mlx5_real_time_mode(mdev)) { struct timespec64 ts; ktime_get_real_ts64(&ts); - mlx5_ptp_settime(&clock->ptp_info, &ts); + mlx5_clock_settime(mdev, clock, &ts); } } static void mlx5_init_pps(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; - if (!MLX5_PPS_CAP(mdev)) return; mlx5_get_pps_caps(mdev); - mlx5_init_pin_config(clock); + mlx5_init_pin_config(mdev); } -void mlx5_init_clock(struct mlx5_core_dev *mdev) +static void mlx5_init_clock_dev(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; - - if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) { - mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n"); - return; - } + struct mlx5_clock *clock = mdev->clock; seqlock_init(&clock->lock); - INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out); /* Initialize the device clock */ mlx5_init_timer_clock(mdev); @@ -1163,33 +1276,27 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) mlx5_init_pps(mdev); clock->ptp = ptp_clock_register(&clock->ptp_info, - &mdev->pdev->dev); + clock->shared ? NULL : &mdev->pdev->dev); if (IS_ERR(clock->ptp)) { - mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n", + mlx5_core_warn(mdev, "%sptp_clock_register failed %ld\n", + clock->shared ? "shared clock " : "", PTR_ERR(clock->ptp)); clock->ptp = NULL; } - MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT); - mlx5_eq_notifier_register(mdev, &clock->pps_nb); + if (clock->ptp) + ptp_schedule_worker(clock->ptp, 0); } -void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) +static void mlx5_destroy_clock_dev(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; - - if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) - return; + struct mlx5_clock *clock = mdev->clock; - mlx5_eq_notifier_unregister(mdev, &clock->pps_nb); if (clock->ptp) { ptp_clock_unregister(clock->ptp); clock->ptp = NULL; } - cancel_work_sync(&clock->pps_info.out_work); - cancel_delayed_work_sync(&clock->timer.overflow_work); - if (mdev->clock_info) { free_page((unsigned long)mdev->clock_info); mdev->clock_info = NULL; @@ -1197,3 +1304,248 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) kfree(clock->ptp_info.pin_config); } + +static void mlx5_clock_free(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock_priv *cpriv = clock_priv(mdev->clock); + + mlx5_destroy_clock_dev(mdev); + mutex_destroy(&cpriv->lock); + kfree(cpriv); + mdev->clock = NULL; +} + +static int mlx5_clock_alloc(struct mlx5_core_dev *mdev, bool shared) +{ + struct mlx5_clock_priv *cpriv; + struct mlx5_clock *clock; + + cpriv = kzalloc(sizeof(*cpriv), GFP_KERNEL); + if (!cpriv) + return -ENOMEM; + + mutex_init(&cpriv->lock); + cpriv->mdev = mdev; + clock = &cpriv->clock; + clock->shared = shared; + mdev->clock = clock; + mlx5_clock_lock(clock); + mlx5_init_clock_dev(mdev); + mlx5_clock_unlock(clock); + + if (!clock->shared) + return 0; + + if (!clock->ptp) { + mlx5_core_warn(mdev, "failed to create ptp dev shared by multiple functions"); + mlx5_clock_free(mdev); + return -EINVAL; + } + + return 0; +} + +static void mlx5_shared_clock_register(struct mlx5_core_dev *mdev, u64 key) +{ + struct mlx5_core_dev *peer_dev, *next = NULL; + struct mlx5_devcom_comp_dev *pos; + + mdev->clock_state->compdev = mlx5_devcom_register_component(mdev->priv.devc, + MLX5_DEVCOM_SHARED_CLOCK, + key, NULL, mdev); + if (IS_ERR(mdev->clock_state->compdev)) + return; + + mlx5_devcom_comp_lock(mdev->clock_state->compdev); + mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) { + if (peer_dev->clock) { + next = peer_dev; + break; + } + } + + if (next) { + mdev->clock = next->clock; + /* clock info is shared among all the functions using the same clock */ + mdev->clock_info = next->clock_info; + } else { + mlx5_clock_alloc(mdev, true); + } + mlx5_devcom_comp_unlock(mdev->clock_state->compdev); + + if (!mdev->clock) { + mlx5_devcom_unregister_component(mdev->clock_state->compdev); + mdev->clock_state->compdev = NULL; + } +} + +static void mlx5_shared_clock_unregister(struct mlx5_core_dev *mdev) +{ + struct mlx5_core_dev *peer_dev, *next = NULL; + struct mlx5_clock *clock = mdev->clock; + struct mlx5_devcom_comp_dev *pos; + + mlx5_devcom_comp_lock(mdev->clock_state->compdev); + mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) { + if (peer_dev->clock && peer_dev != mdev) { + next = peer_dev; + break; + } + } + + if (next) { + struct mlx5_clock_priv *cpriv = clock_priv(clock); + + mlx5_clock_lock(clock); + if (mdev == cpriv->mdev) + cpriv->mdev = next; + mlx5_clock_unlock(clock); + } else { + mlx5_clock_free(mdev); + } + + mdev->clock = NULL; + mdev->clock_info = NULL; + mlx5_devcom_comp_unlock(mdev->clock_state->compdev); + + mlx5_devcom_unregister_component(mdev->clock_state->compdev); +} + +static void mlx5_clock_arm_pps_in_event(struct mlx5_clock *clock, + struct mlx5_core_dev *new_mdev, + struct mlx5_core_dev *old_mdev) +{ + struct ptp_clock_info *ptp_info = &clock->ptp_info; + struct mlx5_clock_priv *cpriv = clock_priv(clock); + int i; + + for (i = 0; i < ptp_info->n_pins; i++) { + if (ptp_info->pin_config[i].func != PTP_PF_EXTTS || + !clock->pps_info.pin_armed[i]) + continue; + + if (new_mdev) { + mlx5_set_mtppse(new_mdev, i, 0, MLX5_EVENT_MODE_REPETETIVE); + cpriv->event_mdev = new_mdev; + } else { + cpriv->event_mdev = NULL; + } + + if (old_mdev) + mlx5_set_mtppse(old_mdev, i, 0, MLX5_EVENT_MODE_DISABLE); + } +} + +void mlx5_clock_load(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = mdev->clock; + struct mlx5_clock_priv *cpriv; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) + return; + + INIT_WORK(&mdev->clock_state->out_work, mlx5_pps_out); + MLX5_NB_INIT(&mdev->clock_state->pps_nb, mlx5_pps_event, PPS_EVENT); + mlx5_eq_notifier_register(mdev, &mdev->clock_state->pps_nb); + + if (!clock->shared) { + mlx5_clock_arm_pps_in_event(clock, mdev, NULL); + return; + } + + cpriv = clock_priv(clock); + mlx5_devcom_comp_lock(mdev->clock_state->compdev); + mlx5_clock_lock(clock); + if (mdev == cpriv->mdev && mdev != cpriv->event_mdev) + mlx5_clock_arm_pps_in_event(clock, mdev, cpriv->event_mdev); + mlx5_clock_unlock(clock); + mlx5_devcom_comp_unlock(mdev->clock_state->compdev); +} + +void mlx5_clock_unload(struct mlx5_core_dev *mdev) +{ + struct mlx5_core_dev *peer_dev, *next = NULL; + struct mlx5_clock *clock = mdev->clock; + struct mlx5_devcom_comp_dev *pos; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) + return; + + if (!clock->shared) { + mlx5_clock_arm_pps_in_event(clock, NULL, mdev); + goto out; + } + + mlx5_devcom_comp_lock(mdev->clock_state->compdev); + mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) { + if (peer_dev->clock && peer_dev != mdev) { + next = peer_dev; + break; + } + } + + mlx5_clock_lock(clock); + if (mdev == clock_priv(clock)->event_mdev) + mlx5_clock_arm_pps_in_event(clock, next, mdev); + mlx5_clock_unlock(clock); + mlx5_devcom_comp_unlock(mdev->clock_state->compdev); + +out: + mlx5_eq_notifier_unregister(mdev, &mdev->clock_state->pps_nb); + cancel_work_sync(&mdev->clock_state->out_work); +} + +static struct mlx5_clock null_clock; + +int mlx5_init_clock(struct mlx5_core_dev *mdev) +{ + u8 identity[MLX5_RT_CLOCK_IDENTITY_SIZE]; + struct mlx5_clock_dev_state *clock_state; + u64 key; + int err; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) { + mdev->clock = &null_clock; + mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n"); + return 0; + } + + clock_state = kzalloc(sizeof(*clock_state), GFP_KERNEL); + if (!clock_state) + return -ENOMEM; + clock_state->mdev = mdev; + mdev->clock_state = clock_state; + + if (MLX5_CAP_MCAM_REG3(mdev, mrtcq) && mlx5_real_time_mode(mdev)) { + if (mlx5_clock_identity_get(mdev, identity)) { + mlx5_core_warn(mdev, "failed to get rt clock identity, create ptp dev per function\n"); + } else { + memcpy(&key, &identity, sizeof(key)); + mlx5_shared_clock_register(mdev, key); + } + } + + if (!mdev->clock) { + err = mlx5_clock_alloc(mdev, false); + if (err) { + kfree(clock_state); + mdev->clock_state = NULL; + return err; + } + } + + return 0; +} + +void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) +{ + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) + return; + + if (mdev->clock->shared) + mlx5_shared_clock_unregister(mdev); + else + mlx5_clock_free(mdev); + kfree(mdev->clock_state); + mdev->clock_state = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h index bd95b9f8d143..c18a652c0faa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -33,6 +33,35 @@ #ifndef __LIB_CLOCK_H__ #define __LIB_CLOCK_H__ +#include <linux/ptp_clock_kernel.h> + +#define MAX_PIN_NUM 8 +struct mlx5_pps { + u8 pin_caps[MAX_PIN_NUM]; + u64 start[MAX_PIN_NUM]; + u8 enabled; + u64 min_npps_period; + u64 min_out_pulse_duration_ns; + bool pin_armed[MAX_PIN_NUM]; +}; + +struct mlx5_timer { + struct cyclecounter cycles; + struct timecounter tc; + u32 nominal_c_mult; + unsigned long overflow_period; +}; + +struct mlx5_clock { + seqlock_t lock; + struct hwtstamp_config hwtstamp_config; + struct ptp_clock *ptp; + struct ptp_clock_info ptp_info; + struct mlx5_pps pps_info; + struct mlx5_timer timer; + bool shared; +}; + static inline bool mlx5_is_real_time_rq(struct mlx5_core_dev *mdev) { u8 rq_ts_format_cap = MLX5_CAP_GEN(mdev, rq_ts_format); @@ -54,12 +83,14 @@ static inline bool mlx5_is_real_time_sq(struct mlx5_core_dev *mdev) typedef ktime_t (*cqe_ts_to_ns)(struct mlx5_clock *, u64); #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) -void mlx5_init_clock(struct mlx5_core_dev *mdev); +int mlx5_init_clock(struct mlx5_core_dev *mdev); void mlx5_cleanup_clock(struct mlx5_core_dev *mdev); +void mlx5_clock_load(struct mlx5_core_dev *mdev); +void mlx5_clock_unload(struct mlx5_core_dev *mdev); static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) { - return mdev->clock.ptp ? ptp_clock_index(mdev->clock.ptp) : -1; + return mdev->clock->ptp ? ptp_clock_index(mdev->clock->ptp) : -1; } static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, @@ -87,8 +118,10 @@ static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock, return ns_to_ktime(time); } #else -static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {} +static inline int mlx5_init_clock(struct mlx5_core_dev *mdev) { return 0; } static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {} +static inline void mlx5_clock_load(struct mlx5_core_dev *mdev) {} +static inline void mlx5_clock_unload(struct mlx5_core_dev *mdev) {} static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) { return -1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index d58032dd0df7..c79699b94a02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -11,6 +11,7 @@ enum mlx5_devcom_component { MLX5_DEVCOM_MPV, MLX5_DEVCOM_HCA_PORTS, MLX5_DEVCOM_SD_GROUP, + MLX5_DEVCOM_SHARED_CLOCK, MLX5_DEVCOM_NUM_COMPONENTS, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c index 7c5516b0a844..8115071c34a4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c @@ -30,7 +30,7 @@ struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev) dm = kzalloc(sizeof(*dm), GFP_KERNEL); if (!dm) - return ERR_PTR(-ENOMEM); + return NULL; spin_lock_init(&dm->lock); @@ -96,7 +96,7 @@ err_modify_hdr: err_steering: kfree(dm); - return ERR_PTR(-ENOMEM); + return NULL; } void mlx5_dm_cleanup(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index a80ecb672f33..0a3c260af377 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -161,7 +161,8 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains, ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); - sz = (chain == mlx5_chains_get_nf_ft_chain(chains)) ? FT_TBL_SZ : POOL_NEXT_SIZE; + sz = (chain == mlx5_chains_get_nf_ft_chain(chains)) ? + FT_TBL_SZ : MLX5_FS_MAX_POOL_SIZE; ft_attr.max_fte = sz; /* We use chains_default_ft(chains) as the table's next_ft till @@ -196,6 +197,11 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains, ns = mlx5_get_flow_namespace(chains->dev, chains->ns); } + if (!ns) { + mlx5_core_warn(chains->dev, "Failed to get flow namespace\n"); + return ERR_PTR(-EOPNOTSUPP); + } + ft_attr.autogroup.num_reserved_entries = 2; ft_attr.autogroup.max_num_groups = chains->group_num; ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); @@ -699,7 +705,7 @@ mlx5_chains_create_global_table(struct mlx5_fs_chains *chains) goto err_ignore; } - chain = mlx5_chains_get_chain_range(chains), + chain = mlx5_chains_get_chain_range(chains); prio = mlx5_chains_get_prio_range(chains); level = mlx5_chains_get_level_range(chains); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c index 9f13cea16446..ca9ecec358b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -61,6 +61,25 @@ static void mlx5_cleanup_ttc_rules(struct mlx5_ttc_table *ttc) } } +static const char *mlx5_traffic_types_names[MLX5_NUM_TT] = { + [MLX5_TT_IPV4_TCP] = "TT_IPV4_TCP", + [MLX5_TT_IPV6_TCP] = "TT_IPV6_TCP", + [MLX5_TT_IPV4_UDP] = "TT_IPV4_UDP", + [MLX5_TT_IPV6_UDP] = "TT_IPV6_UDP", + [MLX5_TT_IPV4_IPSEC_AH] = "TT_IPV4_IPSEC_AH", + [MLX5_TT_IPV6_IPSEC_AH] = "TT_IPV6_IPSEC_AH", + [MLX5_TT_IPV4_IPSEC_ESP] = "TT_IPV4_IPSEC_ESP", + [MLX5_TT_IPV6_IPSEC_ESP] = "TT_IPV6_IPSEC_ESP", + [MLX5_TT_IPV4] = "TT_IPV4", + [MLX5_TT_IPV6] = "TT_IPV6", + [MLX5_TT_ANY] = "TT_ANY" +}; + +const char *mlx5_ttc_get_name(enum mlx5_traffic_types tt) +{ + return mlx5_traffic_types_names[tt]; +} + struct mlx5_etype_proto { u16 etype; u8 proto; @@ -618,10 +637,6 @@ struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, bool use_l4_type; int err; - ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); - if (!ttc) - return ERR_PTR(-ENOMEM); - switch (params->ns_type) { case MLX5_FLOW_NAMESPACE_PORT_SEL: use_l4_type = MLX5_CAP_GEN_2(dev, pcc_ifa2) && @@ -635,7 +650,16 @@ struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, return ERR_PTR(-EINVAL); } + ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); + if (!ttc) + return ERR_PTR(-ENOMEM); + ns = mlx5_get_flow_namespace(dev, params->ns_type); + if (!ns) { + kvfree(ttc); + return ERR_PTR(-EOPNOTSUPP); + } + groups = use_l4_type ? &inner_ttc_groups[TTC_GROUPS_USE_L4_TYPE] : &inner_ttc_groups[TTC_GROUPS_DEFAULT]; @@ -691,10 +715,6 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, bool use_l4_type; int err; - ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); - if (!ttc) - return ERR_PTR(-ENOMEM); - switch (params->ns_type) { case MLX5_FLOW_NAMESPACE_PORT_SEL: use_l4_type = MLX5_CAP_GEN_2(dev, pcc_ifa2) && @@ -708,7 +728,16 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, return ERR_PTR(-EINVAL); } + ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); + if (!ttc) + return ERR_PTR(-ENOMEM); + ns = mlx5_get_flow_namespace(dev, params->ns_type); + if (!ns) { + kvfree(ttc); + return ERR_PTR(-EOPNOTSUPP); + } + groups = use_l4_type ? &ttc_groups[TTC_GROUPS_USE_L4_TYPE] : &ttc_groups[TTC_GROUPS_DEFAULT]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h index 92eea6bea310..ab9434fe3ae6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h @@ -49,6 +49,7 @@ struct ttc_params { struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT]; }; +const char *mlx5_ttc_get_name(enum mlx5_traffic_types tt); struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc); struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c index 4a078113e292..762d55ba9e51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c @@ -497,7 +497,7 @@ static int macsec_fs_tx_create(struct mlx5_macsec_fs *macsec_fs) memset(&dest, 0, sizeof(struct mlx5_flow_destination)); memset(&flow_act, 0, sizeof(flow_act)); dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(tx_tables->check_miss_rule_counter); + dest.counter = tx_tables->check_miss_rule_counter; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; rule = mlx5_add_flow_rules(tx_tables->ft_check, NULL, &flow_act, &dest, 1); if (IS_ERR(rule)) { @@ -519,7 +519,7 @@ static int macsec_fs_tx_create(struct mlx5_macsec_fs *macsec_fs) flow_act.flags = FLOW_ACT_NO_APPEND; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | MLX5_FLOW_CONTEXT_ACTION_COUNT; dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(tx_tables->check_rule_counter); + dest.counter = tx_tables->check_rule_counter; rule = mlx5_add_flow_rules(tx_tables->ft_check, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -1200,7 +1200,7 @@ static int macsec_fs_rx_create_check_decap_rule(struct mlx5_macsec_fs *macsec_fs flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | MLX5_FLOW_CONTEXT_ACTION_COUNT; roce_dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - roce_dest[dstn].counter_id = mlx5_fc_id(rx_tables->check_rule_counter); + roce_dest[dstn].counter = rx_tables->check_rule_counter; rule = mlx5_add_flow_rules(rx_tables->ft_check, spec, flow_act, roce_dest, dstn + 1); if (IS_ERR(rule)) { @@ -1592,7 +1592,7 @@ static int macsec_fs_rx_create(struct mlx5_macsec_fs *macsec_fs) memset(&flow_act, 0, sizeof(flow_act)); dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(rx_tables->check_miss_rule_counter); + dest.counter = rx_tables->check_miss_rule_counter; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; rule = mlx5_add_flow_rules(rx_tables->ft_check, NULL, &flow_act, &dest, 1); if (IS_ERR(rule)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 37d5f445598c..b111ccd03b02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -45,11 +45,6 @@ int mlx5_crdump_enable(struct mlx5_core_dev *dev); void mlx5_crdump_disable(struct mlx5_core_dev *dev); int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data); -static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) -{ - return devlink_net(priv_to_devlink(dev)); -} - static inline struct net_device *mlx5_uplink_netdev_get(struct mlx5_core_dev *mdev) { return mdev->mlx5e_res.uplink_netdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c new file mode 100644 index 000000000000..47fe215f66bf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/st.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved + */ + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/device.h> + +#include "mlx5_core.h" +#include "lib/mlx5.h" + +struct mlx5_st_idx_data { + refcount_t usecount; + u16 tag; +}; + +struct mlx5_st { + /* serialize access upon alloc/free flows */ + struct mutex lock; + struct xa_limit index_limit; + struct xarray idx_xa; /* key == index, value == struct mlx5_st_idx_data */ +}; + +struct mlx5_st *mlx5_st_create(struct mlx5_core_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + struct mlx5_st *st; + u16 num_entries; + int ret; + + if (!MLX5_CAP_GEN(dev, mkey_pcie_tph)) + return NULL; + +#ifdef CONFIG_MLX5_SF + if (mlx5_core_is_sf(dev)) + return dev->priv.parent_mdev->st; +#endif + + /* Checking whether the device is capable */ + if (!pdev->tph_cap) + return NULL; + + num_entries = pcie_tph_get_st_table_size(pdev); + /* We need a reserved entry for non TPH cases */ + if (num_entries < 2) + return NULL; + + /* The OS doesn't support ST */ + ret = pcie_enable_tph(pdev, PCI_TPH_ST_DS_MODE); + if (ret) + return NULL; + + st = kzalloc(sizeof(*st), GFP_KERNEL); + if (!st) + goto end; + + mutex_init(&st->lock); + xa_init_flags(&st->idx_xa, XA_FLAGS_ALLOC); + /* entry 0 is reserved for non TPH cases */ + st->index_limit.min = MLX5_MKC_PCIE_TPH_NO_STEERING_TAG_INDEX + 1; + st->index_limit.max = num_entries - 1; + + return st; + +end: + pcie_disable_tph(dev->pdev); + return NULL; +} + +void mlx5_st_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_st *st = dev->st; + + if (mlx5_core_is_sf(dev) || !st) + return; + + pcie_disable_tph(dev->pdev); + WARN_ON_ONCE(!xa_empty(&st->idx_xa)); + kfree(st); +} + +int mlx5_st_alloc_index(struct mlx5_core_dev *dev, enum tph_mem_type mem_type, + unsigned int cpu_uid, u16 *st_index) +{ + struct mlx5_st_idx_data *idx_data; + struct mlx5_st *st = dev->st; + unsigned long index; + u32 xa_id; + u16 tag; + int ret; + + if (!st) + return -EOPNOTSUPP; + + ret = pcie_tph_get_cpu_st(dev->pdev, mem_type, cpu_uid, &tag); + if (ret) + return ret; + + mutex_lock(&st->lock); + + xa_for_each(&st->idx_xa, index, idx_data) { + if (tag == idx_data->tag) { + refcount_inc(&idx_data->usecount); + *st_index = index; + goto end; + } + } + + idx_data = kzalloc(sizeof(*idx_data), GFP_KERNEL); + if (!idx_data) { + ret = -ENOMEM; + goto end; + } + + refcount_set(&idx_data->usecount, 1); + idx_data->tag = tag; + + ret = xa_alloc(&st->idx_xa, &xa_id, idx_data, st->index_limit, GFP_KERNEL); + if (ret) + goto clean_idx_data; + + ret = pcie_tph_set_st_entry(dev->pdev, xa_id, tag); + if (ret) + goto clean_idx_xa; + + *st_index = xa_id; + goto end; + +clean_idx_xa: + xa_erase(&st->idx_xa, xa_id); +clean_idx_data: + kfree(idx_data); +end: + mutex_unlock(&st->lock); + return ret; +} +EXPORT_SYMBOL_GPL(mlx5_st_alloc_index); + +int mlx5_st_dealloc_index(struct mlx5_core_dev *dev, u16 st_index) +{ + struct mlx5_st_idx_data *idx_data; + struct mlx5_st *st = dev->st; + int ret = 0; + + if (!st) + return -EOPNOTSUPP; + + mutex_lock(&st->lock); + idx_data = xa_load(&st->idx_xa, st_index); + if (WARN_ON_ONCE(!idx_data)) { + ret = -EINVAL; + goto end; + } + + if (refcount_dec_and_test(&idx_data->usecount)) { + xa_erase(&st->idx_xa, st_index); + /* We leave PCI config space as was before, no mkey will refer to it */ + } + +end: + mutex_unlock(&st->lock); + return ret; +} +EXPORT_SYMBOL_GPL(mlx5_st_dealloc_index); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 220a9ac75c8b..8517d4e5d5ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -368,6 +368,10 @@ int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_ty u16 opmod = (cap_type << 1) | (cap_mode & 0x01); int err; + if (WARN_ON(!dev->caps.hca[cap_type])) + /* this cap_type must be added to mlx5_hca_caps_alloc() */ + return -EINVAL; + memset(in, 0, sizeof(in)); out = kzalloc(out_sz, GFP_KERNEL); if (!out) @@ -664,6 +668,10 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_current_uc_list, ilog2(max_uc_list)); + /* enable absolute native port num */ + if (MLX5_CAP_GEN_MAX(dev, abs_native_port_num)) + MLX5_SET(cmd_hca_cap, set_hca_cap, abs_native_port_num, 1); + return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); } @@ -941,9 +949,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, mlx5_pci_vsc_init(dev); - err = pci_enable_ptm(pdev, NULL); - if (err) - mlx5_core_info(dev, "PTM is not supported by PCIe\n"); + pci_enable_ptm(pdev, NULL); return 0; @@ -1032,7 +1038,11 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) mlx5_init_reserved_gids(dev); - mlx5_init_clock(dev); + err = mlx5_init_clock(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize hardware clock\n"); + goto err_tables_cleanup; + } dev->vxlan = mlx5_vxlan_create(dev); dev->geneve = mlx5_geneve_create(dev); @@ -1040,7 +1050,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) err = mlx5_init_rl_table(dev); if (err) { mlx5_core_err(dev, "Failed to init rate limiting\n"); - goto err_tables_cleanup; + goto err_clock_cleanup; } err = mlx5_mpfs_init(dev); @@ -1092,9 +1102,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) } dev->dm = mlx5_dm_create(dev); - if (IS_ERR(dev->dm)) - mlx5_core_warn(dev, "Failed to init device memory %ld\n", PTR_ERR(dev->dm)); - + dev->st = mlx5_st_create(dev); dev->tracer = mlx5_fw_tracer_create(dev); dev->hv_vhca = mlx5_hv_vhca_create(dev); dev->rsc_dump = mlx5_rsc_dump_create(dev); @@ -1117,10 +1125,11 @@ err_mpfs_cleanup: mlx5_mpfs_cleanup(dev); err_rl_cleanup: mlx5_cleanup_rl_table(dev); -err_tables_cleanup: +err_clock_cleanup: mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_clock(dev); +err_tables_cleanup: mlx5_cleanup_reserved_gids(dev); mlx5_cq_debugfs_cleanup(dev); mlx5_fw_reset_cleanup(dev); @@ -1142,6 +1151,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_rsc_dump_destroy(dev); mlx5_hv_vhca_destroy(dev->hv_vhca); mlx5_fw_tracer_destroy(dev->tracer); + mlx5_st_destroy(dev); mlx5_dm_cleanup(dev); mlx5_fs_core_free(dev); mlx5_sf_table_cleanup(dev); @@ -1199,24 +1209,24 @@ static int mlx5_function_enable(struct mlx5_core_dev *dev, bool boot, u64 timeou dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP); - mlx5_start_health_poll(dev); - err = mlx5_core_enable_hca(dev, 0); if (err) { mlx5_core_err(dev, "enable hca failed\n"); - goto stop_health_poll; + goto err_cmd_cleanup; } + mlx5_start_health_poll(dev); + err = mlx5_core_set_issi(dev); if (err) { mlx5_core_err(dev, "failed to set issi\n"); - goto err_disable_hca; + goto stop_health_poll; } err = mlx5_satisfy_startup_pages(dev, 1); if (err) { mlx5_core_err(dev, "failed to allocate boot pages\n"); - goto err_disable_hca; + goto stop_health_poll; } err = mlx5_tout_query_dtor(dev); @@ -1229,10 +1239,9 @@ static int mlx5_function_enable(struct mlx5_core_dev *dev, bool boot, u64 timeou reclaim_boot_pages: mlx5_reclaim_startup_pages(dev); -err_disable_hca: - mlx5_core_disable_hca(dev, 0); stop_health_poll: mlx5_stop_health_poll(dev, boot); + mlx5_core_disable_hca(dev, 0); err_cmd_cleanup: mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_disable(dev); @@ -1243,8 +1252,8 @@ err_cmd_cleanup: static void mlx5_function_disable(struct mlx5_core_dev *dev, bool boot) { mlx5_reclaim_startup_pages(dev); - mlx5_core_disable_hca(dev, 0); mlx5_stop_health_poll(dev, boot); + mlx5_core_disable_hca(dev, 0); mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); mlx5_cmd_disable(dev); } @@ -1353,6 +1362,8 @@ static int mlx5_load(struct mlx5_core_dev *dev) goto err_eq_table; } + mlx5_clock_load(dev); + err = mlx5_fw_tracer_init(dev->tracer); if (err) { mlx5_core_err(dev, "Failed to init FW tracer %d\n", err); @@ -1436,6 +1447,7 @@ err_fpga_start: mlx5_hv_vhca_cleanup(dev->hv_vhca); mlx5_fw_reset_events_stop(dev); mlx5_fw_tracer_cleanup(dev->tracer); + mlx5_clock_unload(dev); mlx5_eq_table_destroy(dev); err_eq_table: mlx5_irq_table_destroy(dev); @@ -1462,6 +1474,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_hv_vhca_cleanup(dev->hv_vhca); mlx5_fw_reset_events_stop(dev); mlx5_fw_tracer_cleanup(dev->tracer); + mlx5_clock_unload(dev); mlx5_eq_table_destroy(dev); mlx5_irq_table_destroy(dev); mlx5_pagealloc_stop(dev); @@ -1788,6 +1801,8 @@ static const int types[] = { MLX5_CAP_MACSEC, MLX5_CAP_ADV_VIRTUALIZATION, MLX5_CAP_CRYPTO, + MLX5_CAP_SHAMPO, + MLX5_CAP_ADV_RDMA, }; static void mlx5_hca_caps_free(struct mlx5_core_dev *dev) @@ -2241,6 +2256,7 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */ { PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */ { PCI_VDEVICE(MELLANOX, 0x1025) }, /* ConnectX-9 */ + { PCI_VDEVICE(MELLANOX, 0x1027) }, /* ConnectX-10 */ { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 99de67c3aa74..9d3504f5abfa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -114,6 +114,26 @@ struct mlx5_cmd_alias_obj_create_attr { u8 access_key[ACCESS_KEY_LEN]; }; +struct mlx5_port_eth_proto { + u32 cap; + u32 admin; + u32 oper; +}; + +struct mlx5_module_eeprom_query_params { + u16 size; + u16 offset; + u16 i2c_address; + u32 page; + u32 bank; + u32 module_number; +}; + +struct mlx5_link_info { + u32 speed; + u32 lanes; +}; + static inline void mlx5_printk(struct mlx5_core_dev *dev, int level, const char *format, ...) { struct device *device = dev->device; @@ -280,6 +300,89 @@ int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode); struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev); void mlx5_dm_cleanup(struct mlx5_core_dev *dev); +#ifdef CONFIG_PCIE_TPH +struct mlx5_st *mlx5_st_create(struct mlx5_core_dev *dev); +void mlx5_st_destroy(struct mlx5_core_dev *dev); +#else +static inline struct mlx5_st * +mlx5_st_create(struct mlx5_core_dev *dev) { return NULL; } +static inline void mlx5_st_destroy(struct mlx5_core_dev *dev) { return; } +#endif + +void mlx5_toggle_port_link(struct mlx5_core_dev *dev); +int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, + enum mlx5_port_status status); +int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, + enum mlx5_port_status *status); +int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration); + +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port); +int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause); +int mlx5_query_port_pause(struct mlx5_core_dev *dev, + u32 *rx_pause, u32 *tx_pause); + +int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx); +int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, + u8 *pfc_en_rx); + +int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev, + u16 stall_critical_watermark, + u16 stall_minor_watermark); +int mlx5_query_port_stall_watermark(struct mlx5_core_dev *dev, + u16 *stall_critical_watermark, + u16 *stall_minor_watermark); + +int mlx5_max_tc(struct mlx5_core_dev *mdev); +int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc); +int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev, + u8 prio, u8 *tc); +int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group); +int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev, + u8 tc, u8 *tc_group); +int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw); +int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev, + u8 tc, u8 *bw_pct); +int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev, + u8 *max_bw_value, + u8 *max_bw_unit); +int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev, + u8 *max_bw_value, + u8 *max_bw_unit); +int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode); +int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode); + +int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen); +int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen); +int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable); +void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, + bool *enabled); +int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, + u16 offset, u16 size, u8 *data); +int +mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev, + struct mlx5_module_eeprom_query_params *params, + u8 *data); + +int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out); +int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in); +int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state); +int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state); +int mlx5_query_port_buffer_ownership(struct mlx5_core_dev *mdev, + u8 *buffer_ownership); +int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio); +int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio); + +int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, + struct mlx5_port_eth_proto *eproto); +bool mlx5_ptys_ext_supported(struct mlx5_core_dev *mdev); +const struct mlx5_link_info *mlx5_port_ptys2info(struct mlx5_core_dev *mdev, + u32 eth_proto_oper, + bool force_legacy); +u32 mlx5_port_info2linkmodes(struct mlx5_core_dev *mdev, + struct mlx5_link_info *info, + bool force_legacy); +int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); + #define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) && \ MLX5_CAP_GEN((mdev), pps_modify) && \ MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \ @@ -346,6 +449,8 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap #define mlx5_vport_get_other_func_general_cap(dev, vport, out) \ mlx5_vport_get_other_func_cap(dev, vport, out, MLX5_CAP_GENERAL) +int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id); + static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); @@ -401,4 +506,17 @@ static inline int mlx5_max_eq_cap_get(const struct mlx5_core_dev *dev) return 1 << MLX5_CAP_GEN(dev, log_max_eq); } + +static inline bool mlx5_pcie_cong_event_supported(struct mlx5_core_dev *dev) +{ + u64 features = MLX5_CAP_GEN_2_64(dev, general_obj_types_127_64); + + if (!(features & MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT)) + return false; + + if (dev->sd) + return false; + + return true; +} #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h index 0881e961d8b1..586688da9940 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h @@ -10,12 +10,15 @@ struct mlx5_irq; struct cpu_rmap; +struct mlx5_irq_pool; int mlx5_irq_table_init(struct mlx5_core_dev *dev); void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); int mlx5_irq_table_create(struct mlx5_core_dev *dev); void mlx5_irq_table_destroy(struct mlx5_core_dev *dev); void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev); +struct mlx5_irq_pool * +mlx5_irq_table_get_comp_irq_pool(struct mlx5_core_dev *dev); int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table); int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table); struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev); @@ -38,7 +41,6 @@ struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq); int mlx5_irq_get_index(struct mlx5_irq *irq); int mlx5_irq_get_irq(const struct mlx5_irq *irq); -struct mlx5_irq_pool; #ifdef CONFIG_MLX5_SF struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev, struct cpumask *used_cpus, u16 vecidx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 972e8e9df585..9bc9bd83c232 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -291,7 +291,7 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr, u32 function) static int alloc_system_page(struct mlx5_core_dev *dev, u32 function) { struct device *device = mlx5_core_dma_dev(dev); - int nid = dev_to_node(device); + int nid = dev->priv.numa_node; struct page *page; u64 zero_addr = 1; u64 addr; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 7db9cab9bedf..692ef9c2f729 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -148,7 +148,7 @@ out: * Free the IRQ and other resources such as rmap from the system. * BUT doesn't free or remove reference from mlx5. * This function is very important for the shutdown flow, where we need to - * cleanup system resoruces but keep mlx5 objects alive, + * cleanup system resources but keep mlx5 objects alive, * see mlx5_irq_table_free_irqs(). */ static void mlx5_system_free_irq(struct mlx5_irq *irq) @@ -378,6 +378,11 @@ int mlx5_irq_get_index(struct mlx5_irq *irq) return irq->map.index; } +struct mlx5_irq_pool *mlx5_irq_get_pool(struct mlx5_irq *irq) +{ + return irq->pool; +} + /* irq_pool API */ /* requesting an irq from a given pool according to given index */ @@ -405,18 +410,20 @@ static struct mlx5_irq_pool *sf_ctrl_irq_pool_get(struct mlx5_irq_table *irq_tab return irq_table->sf_ctrl_pool; } -static struct mlx5_irq_pool *sf_irq_pool_get(struct mlx5_irq_table *irq_table) +static struct mlx5_irq_pool * +sf_comp_irq_pool_get(struct mlx5_irq_table *irq_table) { return irq_table->sf_comp_pool; } -struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev) +struct mlx5_irq_pool * +mlx5_irq_table_get_comp_irq_pool(struct mlx5_core_dev *dev) { struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev); struct mlx5_irq_pool *pool = NULL; if (mlx5_core_is_sf(dev)) - pool = sf_irq_pool_get(irq_table); + pool = sf_comp_irq_pool_get(irq_table); /* In some configs, there won't be a pool of SFs IRQs. Hence, returning * the PF IRQs pool in case the SF pool doesn't exist. @@ -463,26 +470,32 @@ void mlx5_ctrl_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *ctrl_irq) struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev) { struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev); - struct irq_affinity_desc af_desc; + struct irq_affinity_desc *af_desc; struct mlx5_irq *irq; - cpumask_copy(&af_desc.mask, cpu_online_mask); - af_desc.is_managed = false; + af_desc = kvzalloc(sizeof(*af_desc), GFP_KERNEL); + if (!af_desc) + return ERR_PTR(-ENOMEM); + + cpumask_copy(&af_desc->mask, cpu_online_mask); + af_desc->is_managed = false; if (!mlx5_irq_pool_is_sf_pool(pool)) { /* In case we are allocating a control IRQ from a pci device's pool. * This can happen also for a SF if the SFs pool is empty. */ if (!pool->xa_num_irqs.max) { - cpumask_clear(&af_desc.mask); + cpumask_clear(&af_desc->mask); /* In case we only have a single IRQ for PF/VF */ - cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask); + cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc->mask); } /* Allocate the IRQ in index 0. The vector was already allocated */ - irq = irq_pool_request_vector(pool, 0, &af_desc, NULL); + irq = irq_pool_request_vector(pool, 0, af_desc, NULL); } else { - irq = mlx5_irq_affinity_request(dev, pool, &af_desc); + irq = mlx5_irq_affinity_request(dev, pool, af_desc); } + kvfree(af_desc); + return irq; } @@ -541,16 +554,26 @@ struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu, { struct mlx5_irq_table *table = mlx5_irq_table_get(dev); struct mlx5_irq_pool *pool = table->pcif_pool; - struct irq_affinity_desc af_desc; int offset = MLX5_IRQ_VEC_COMP_BASE; + struct irq_affinity_desc *af_desc; + struct mlx5_irq *irq; + + af_desc = kvzalloc(sizeof(*af_desc), GFP_KERNEL); + if (!af_desc) + return ERR_PTR(-ENOMEM); if (!pool->xa_num_irqs.max) offset = 0; - af_desc.is_managed = false; - cpumask_clear(&af_desc.mask); - cpumask_set_cpu(cpu, &af_desc.mask); - return mlx5_irq_request(dev, vecidx + offset, &af_desc, rmap); + af_desc->is_managed = false; + cpumask_clear(&af_desc->mask); + cpumask_set_cpu(cpu, &af_desc->mask); + + irq = mlx5_irq_request(dev, vecidx + offset, af_desc, rmap); + + kvfree(af_desc); + + return irq; } static struct mlx5_irq_pool * @@ -572,7 +595,7 @@ irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ; pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ; mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d", - name, size, start); + name ? name : "mlx5_pcif_pool", size, start); return pool; } @@ -581,7 +604,7 @@ static void irq_pool_free(struct mlx5_irq_pool *pool) struct mlx5_irq *irq; unsigned long index; - /* There are cases in which we are destrying the irq_table before + /* There are cases in which we are destroying the irq_table before * freeing all the IRQs, fast teardown for example. Hence, free the irqs * which might not have been freed. */ @@ -610,7 +633,7 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec, if (!mlx5_sf_max_functions(dev)) return 0; if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) { - mlx5_core_dbg(dev, "Not enught IRQs for SFs. SF may run at lower performance\n"); + mlx5_core_dbg(dev, "Not enough IRQs for SFs. SF may run at lower performance\n"); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h index c4d377f8df30..cc064425fe16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h @@ -28,7 +28,6 @@ struct mlx5_irq_pool { struct mlx5_core_dev *dev; }; -struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev); static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool) { return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf")); @@ -40,5 +39,6 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, int mlx5_irq_get_locked(struct mlx5_irq *irq); int mlx5_irq_read_locked(struct mlx5_irq *irq); int mlx5_irq_put(struct mlx5_irq *irq); +struct mlx5_irq_pool *mlx5_irq_get_pool(struct mlx5_irq *irq); #endif /* __PCI_IRQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 50931584132b..2d7adf7444ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -196,7 +196,6 @@ void mlx5_toggle_port_link(struct mlx5_core_dev *dev) if (ps == MLX5_PORT_UP) mlx5_set_port_admin_status(dev, MLX5_PORT_UP); } -EXPORT_SYMBOL_GPL(mlx5_toggle_port_link); int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status) @@ -210,7 +209,6 @@ int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PAOS, 0, 1); } -EXPORT_SYMBOL_GPL(mlx5_set_port_admin_status); int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status *status) @@ -227,7 +225,6 @@ int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, *status = MLX5_GET(paos_reg, out, admin_status); return 0; } -EXPORT_SYMBOL_GPL(mlx5_query_port_admin_status); static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu, u16 *max_mtu, u16 *oper_mtu, u8 port) @@ -257,7 +254,6 @@ int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port) return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PMTU, 0, 1); } -EXPORT_SYMBOL_GPL(mlx5_set_port_mtu); void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port) @@ -447,7 +443,6 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, return mlx5_query_mcia(dev, &query, data); } -EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom); int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev, struct mlx5_module_eeprom_query_params *params, @@ -467,7 +462,6 @@ int mlx5_query_module_eeprom_by_page(struct mlx5_core_dev *dev, return mlx5_query_mcia(dev, params, data); } -EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom_by_page); static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc, int pvlc_size, u8 local_port) @@ -518,7 +512,6 @@ int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause) return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PFCC, 0, 1); } -EXPORT_SYMBOL_GPL(mlx5_set_port_pause); int mlx5_query_port_pause(struct mlx5_core_dev *dev, u32 *rx_pause, u32 *tx_pause) @@ -538,7 +531,6 @@ int mlx5_query_port_pause(struct mlx5_core_dev *dev, return 0; } -EXPORT_SYMBOL_GPL(mlx5_query_port_pause); int mlx5_set_port_stall_watermark(struct mlx5_core_dev *dev, u16 stall_critical_watermark, @@ -597,7 +589,6 @@ int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx) return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PFCC, 0, 1); } -EXPORT_SYMBOL_GPL(mlx5_set_port_pfc); int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) { @@ -616,7 +607,6 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) return 0; } -EXPORT_SYMBOL_GPL(mlx5_query_port_pfc); int mlx5_max_tc(struct mlx5_core_dev *mdev) { @@ -667,7 +657,6 @@ int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc) return 0; } -EXPORT_SYMBOL_GPL(mlx5_set_port_prio_tc); int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev, u8 prio, u8 *tc) @@ -689,7 +678,6 @@ int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev, return err; } -EXPORT_SYMBOL_GPL(mlx5_query_port_prio_tc); static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in, int inlen) @@ -728,7 +716,6 @@ int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group) return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); } -EXPORT_SYMBOL_GPL(mlx5_set_port_tc_group); int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev, u8 tc, u8 *tc_group) @@ -749,7 +736,6 @@ int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev, return 0; } -EXPORT_SYMBOL_GPL(mlx5_query_port_tc_group); int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw) { @@ -763,7 +749,6 @@ int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw) return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); } -EXPORT_SYMBOL_GPL(mlx5_set_port_tc_bw_alloc); int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 tc, u8 *bw_pct) @@ -784,7 +769,6 @@ int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev, return 0; } -EXPORT_SYMBOL_GPL(mlx5_query_port_tc_bw_alloc); int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev, u8 *max_bw_value, @@ -808,7 +792,6 @@ int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev, return mlx5_set_port_qetcr_reg(mdev, in, sizeof(in)); } -EXPORT_SYMBOL_GPL(mlx5_modify_port_ets_rate_limit); int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev, u8 *max_bw_value, @@ -834,7 +817,6 @@ int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev, return 0; } -EXPORT_SYMBOL_GPL(mlx5_query_port_ets_rate_limit); int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode) { @@ -845,7 +827,6 @@ int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode) MLX5_SET(set_wol_rol_in, in, wol_mode, wol_mode); return mlx5_cmd_exec_in(mdev, set_wol_rol, in); } -EXPORT_SYMBOL_GPL(mlx5_set_port_wol); int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode) { @@ -860,7 +841,6 @@ int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode) return err; } -EXPORT_SYMBOL_GPL(mlx5_query_port_wol); int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen) { @@ -988,6 +968,26 @@ int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state) return err; } +int mlx5_query_port_buffer_ownership(struct mlx5_core_dev *mdev, + u8 *buffer_ownership) +{ + u32 out[MLX5_ST_SZ_DW(pfcc_reg)] = {}; + int err; + + if (!MLX5_CAP_PCAM_FEATURE(mdev, buffer_ownership)) { + *buffer_ownership = MLX5_BUF_OWNERSHIP_UNKNOWN; + return 0; + } + + err = mlx5_query_pfcc_reg(mdev, out, sizeof(out)); + if (err) + return err; + + *buffer_ownership = MLX5_GET(pfcc_reg, out, buf_ownership); + + return 0; +} + int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio) { int sz = MLX5_ST_SZ_BYTES(qpdpm_reg); @@ -1058,53 +1058,57 @@ out: } /* speed in units of 1Mb */ -static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = { - [MLX5E_1000BASE_CX_SGMII] = 1000, - [MLX5E_1000BASE_KX] = 1000, - [MLX5E_10GBASE_CX4] = 10000, - [MLX5E_10GBASE_KX4] = 10000, - [MLX5E_10GBASE_KR] = 10000, - [MLX5E_20GBASE_KR2] = 20000, - [MLX5E_40GBASE_CR4] = 40000, - [MLX5E_40GBASE_KR4] = 40000, - [MLX5E_56GBASE_R4] = 56000, - [MLX5E_10GBASE_CR] = 10000, - [MLX5E_10GBASE_SR] = 10000, - [MLX5E_10GBASE_ER] = 10000, - [MLX5E_40GBASE_SR4] = 40000, - [MLX5E_40GBASE_LR4] = 40000, - [MLX5E_50GBASE_SR2] = 50000, - [MLX5E_100GBASE_CR4] = 100000, - [MLX5E_100GBASE_SR4] = 100000, - [MLX5E_100GBASE_KR4] = 100000, - [MLX5E_100GBASE_LR4] = 100000, - [MLX5E_100BASE_TX] = 100, - [MLX5E_1000BASE_T] = 1000, - [MLX5E_10GBASE_T] = 10000, - [MLX5E_25GBASE_CR] = 25000, - [MLX5E_25GBASE_KR] = 25000, - [MLX5E_25GBASE_SR] = 25000, - [MLX5E_50GBASE_CR2] = 50000, - [MLX5E_50GBASE_KR2] = 50000, +static const struct mlx5_link_info mlx5e_link_info[MLX5E_LINK_MODES_NUMBER] = { + [MLX5E_1000BASE_CX_SGMII] = {.speed = 1000, .lanes = 1}, + [MLX5E_1000BASE_KX] = {.speed = 1000, .lanes = 1}, + [MLX5E_10GBASE_CX4] = {.speed = 10000, .lanes = 4}, + [MLX5E_10GBASE_KX4] = {.speed = 10000, .lanes = 4}, + [MLX5E_10GBASE_KR] = {.speed = 10000, .lanes = 1}, + [MLX5E_20GBASE_KR2] = {.speed = 20000, .lanes = 2}, + [MLX5E_40GBASE_CR4] = {.speed = 40000, .lanes = 4}, + [MLX5E_40GBASE_KR4] = {.speed = 40000, .lanes = 4}, + [MLX5E_56GBASE_R4] = {.speed = 56000, .lanes = 4}, + [MLX5E_10GBASE_CR] = {.speed = 10000, .lanes = 1}, + [MLX5E_10GBASE_SR] = {.speed = 10000, .lanes = 1}, + [MLX5E_10GBASE_ER] = {.speed = 10000, .lanes = 1}, + [MLX5E_40GBASE_SR4] = {.speed = 40000, .lanes = 4}, + [MLX5E_40GBASE_LR4] = {.speed = 40000, .lanes = 4}, + [MLX5E_50GBASE_SR2] = {.speed = 50000, .lanes = 2}, + [MLX5E_100GBASE_CR4] = {.speed = 100000, .lanes = 4}, + [MLX5E_100GBASE_SR4] = {.speed = 100000, .lanes = 4}, + [MLX5E_100GBASE_KR4] = {.speed = 100000, .lanes = 4}, + [MLX5E_100GBASE_LR4] = {.speed = 100000, .lanes = 4}, + [MLX5E_100BASE_TX] = {.speed = 100, .lanes = 1}, + [MLX5E_1000BASE_T] = {.speed = 1000, .lanes = 1}, + [MLX5E_10GBASE_T] = {.speed = 10000, .lanes = 1}, + [MLX5E_25GBASE_CR] = {.speed = 25000, .lanes = 1}, + [MLX5E_25GBASE_KR] = {.speed = 25000, .lanes = 1}, + [MLX5E_25GBASE_SR] = {.speed = 25000, .lanes = 1}, + [MLX5E_50GBASE_CR2] = {.speed = 50000, .lanes = 2}, + [MLX5E_50GBASE_KR2] = {.speed = 50000, .lanes = 2}, }; -static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { - [MLX5E_SGMII_100M] = 100, - [MLX5E_1000BASE_X_SGMII] = 1000, - [MLX5E_5GBASE_R] = 5000, - [MLX5E_10GBASE_XFI_XAUI_1] = 10000, - [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = 40000, - [MLX5E_25GAUI_1_25GBASE_CR_KR] = 25000, - [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = 50000, - [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = 50000, - [MLX5E_CAUI_4_100GBASE_CR4_KR4] = 100000, - [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = 100000, - [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = 200000, - [MLX5E_400GAUI_8_400GBASE_CR8] = 400000, - [MLX5E_100GAUI_1_100GBASE_CR_KR] = 100000, - [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000, - [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000, - [MLX5E_800GAUI_8_800GBASE_CR8_KR8] = 800000, +static const struct mlx5_link_info +mlx5e_ext_link_info[MLX5E_EXT_LINK_MODES_NUMBER] = { + [MLX5E_SGMII_100M] = {.speed = 100, .lanes = 1}, + [MLX5E_1000BASE_X_SGMII] = {.speed = 1000, .lanes = 1}, + [MLX5E_5GBASE_R] = {.speed = 5000, .lanes = 1}, + [MLX5E_10GBASE_XFI_XAUI_1] = {.speed = 10000, .lanes = 1}, + [MLX5E_40GBASE_XLAUI_4_XLPPI_4] = {.speed = 40000, .lanes = 4}, + [MLX5E_25GAUI_1_25GBASE_CR_KR] = {.speed = 25000, .lanes = 1}, + [MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2] = {.speed = 50000, .lanes = 2}, + [MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR] = {.speed = 50000, .lanes = 1}, + [MLX5E_CAUI_4_100GBASE_CR4_KR4] = {.speed = 100000, .lanes = 4}, + [MLX5E_100GAUI_2_100GBASE_CR2_KR2] = {.speed = 100000, .lanes = 2}, + [MLX5E_200GAUI_4_200GBASE_CR4_KR4] = {.speed = 200000, .lanes = 4}, + [MLX5E_400GAUI_8_400GBASE_CR8] = {.speed = 400000, .lanes = 8}, + [MLX5E_100GAUI_1_100GBASE_CR_KR] = {.speed = 100000, .lanes = 1}, + [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = {.speed = 200000, .lanes = 2}, + [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = {.speed = 400000, .lanes = 4}, + [MLX5E_800GAUI_8_800GBASE_CR8_KR8] = {.speed = 800000, .lanes = 8}, + [MLX5E_200GAUI_1_200GBASE_CR1_KR1] = {.speed = 200000, .lanes = 1}, + [MLX5E_400GAUI_2_400GBASE_CR2_KR2] = {.speed = 400000, .lanes = 2}, + [MLX5E_800GAUI_4_800GBASE_CR4_KR4] = {.speed = 800000, .lanes = 4}, }; int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, @@ -1142,54 +1146,61 @@ bool mlx5_ptys_ext_supported(struct mlx5_core_dev *mdev) return !!eproto.cap; } -static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev, - const u32 **arr, u32 *size, - bool force_legacy) +static void mlx5e_port_get_link_mode_info_arr(struct mlx5_core_dev *mdev, + const struct mlx5_link_info **arr, + u32 *size, + bool force_legacy) { bool ext = force_legacy ? false : mlx5_ptys_ext_supported(mdev); - *size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) : - ARRAY_SIZE(mlx5e_link_speed); - *arr = ext ? mlx5e_ext_link_speed : mlx5e_link_speed; + *size = ext ? ARRAY_SIZE(mlx5e_ext_link_info) : + ARRAY_SIZE(mlx5e_link_info); + *arr = ext ? mlx5e_ext_link_info : mlx5e_link_info; } -u32 mlx5_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper, - bool force_legacy) +const struct mlx5_link_info *mlx5_port_ptys2info(struct mlx5_core_dev *mdev, + u32 eth_proto_oper, + bool force_legacy) { unsigned long temp = eth_proto_oper; - const u32 *table; - u32 speed = 0; + const struct mlx5_link_info *table; u32 max_size; int i; - mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy); + mlx5e_port_get_link_mode_info_arr(mdev, &table, &max_size, + force_legacy); i = find_first_bit(&temp, max_size); if (i < max_size) - speed = table[i]; - return speed; + return &table[i]; + + return NULL; } -u32 mlx5_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, - bool force_legacy) +u32 mlx5_port_info2linkmodes(struct mlx5_core_dev *mdev, + struct mlx5_link_info *info, + bool force_legacy) { + const struct mlx5_link_info *table; u32 link_modes = 0; - const u32 *table; u32 max_size; int i; - mlx5e_port_get_speed_arr(mdev, &table, &max_size, force_legacy); + mlx5e_port_get_link_mode_info_arr(mdev, &table, &max_size, + force_legacy); for (i = 0; i < max_size; ++i) { - if (table[i] == speed) - link_modes |= MLX5E_PROT_MASK(i); + if (table[i].speed == info->speed) { + if (!info->lanes || table[i].lanes == info->lanes) + link_modes |= MLX5E_PROT_MASK(i); + } } return link_modes; } int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) { + const struct mlx5_link_info *table; struct mlx5_port_eth_proto eproto; u32 max_speed = 0; - const u32 *table; u32 max_size; bool ext; int err; @@ -1200,10 +1211,10 @@ int mlx5_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) if (err) return err; - mlx5e_port_get_speed_arr(mdev, &table, &max_size, false); + mlx5e_port_get_link_mode_info_arr(mdev, &table, &max_size, false); for (i = 0; i < max_size; ++i) if (eproto.cap & MLX5E_PROT_MASK(i)) - max_speed = max(max_speed, table[i]); + max_speed = max(max_speed, table[i].speed); *speed = max_speed; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index a42f6cd99b74..5c552b71e371 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -118,8 +118,8 @@ static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid * static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev) { + u8 mac[ETH_ALEN] = {}; union ib_gid gid; - u8 mac[ETH_ALEN]; mlx5_rdma_make_default_gid(dev, &gid); return mlx5_core_roce_gid_set(dev, 0, @@ -140,17 +140,17 @@ void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) mlx5_nic_vport_disable_roce(dev); } -void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { int err; if (!MLX5_CAP_GEN(dev, roce)) - return; + return 0; err = mlx5_nic_vport_enable_roce(dev); if (err) { mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err); - return; + return err; } err = mlx5_rdma_add_roce_addr(dev); @@ -165,10 +165,11 @@ void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) goto del_roce_addr; } - return; + return err; del_roce_addr: mlx5_rdma_del_roce_addr(dev); disable_roce: mlx5_nic_vport_disable_roce(dev); + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h index 750cff2a71a4..3d9e76c3d42f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h @@ -8,12 +8,12 @@ #ifdef CONFIG_MLX5_ESWITCH -void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev); +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev); void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */ -static inline void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) {} +static inline int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) {} #endif /* CONFIG_MLX5_ESWITCH */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index e393391966e0..39a209b9b684 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -56,6 +56,8 @@ bool mlx5_qos_tsar_type_supported(struct mlx5_core_dev *dev, int type, u8 hierar return cap & TSAR_TYPE_CAP_MASK_ROUND_ROBIN; case TSAR_ELEMENT_TSAR_TYPE_ETS: return cap & TSAR_TYPE_CAP_MASK_ETS; + case TSAR_ELEMENT_TSAR_TYPE_TC_ARB: + return cap & TSAR_TYPE_CAP_MASK_TC_ARB; } return false; @@ -87,6 +89,8 @@ bool mlx5_qos_element_type_supported(struct mlx5_core_dev *dev, int type, u8 hie return cap & ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; case SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP: return cap & ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT: + return cap & ELEMENT_TYPE_CAP_MASK_RATE_LIMIT; } return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index a96be98be032..0864ba625c07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -257,6 +257,7 @@ static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, return 0; esw_err: + mlx5_sf_function_id_erase(table, sf); mlx5_sf_free(table, sf); return err; } @@ -284,7 +285,7 @@ mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_ NL_SET_ERR_MSG_MOD(extack, "External controller is unsupported"); return -EOPNOTSUPP; } - if (new_attr->pfnum != mlx5_get_dev_index(dev)) { + if (new_attr->pfnum != PCI_FUNC(dev->pdev->devfn)) { NL_SET_ERR_MSG_MOD(extack, "Invalid pfnum supplied"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c index a897cdc60fdb..396804369b00 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c @@ -11,31 +11,29 @@ /* This is the longest supported action sequence for FDB table: * DECAP, POP_VLAN, MODIFY, CTR, ASO, PUSH_VLAN, MODIFY, ENCAP, Term. */ -static const u32 action_order_arr[MLX5HWS_TABLE_TYPE_MAX][MLX5HWS_ACTION_TYP_MAX] = { - [MLX5HWS_TABLE_TYPE_FDB] = { - BIT(MLX5HWS_ACTION_TYP_REMOVE_HEADER) | - BIT(MLX5HWS_ACTION_TYP_REFORMAT_TNL_L2_TO_L2) | - BIT(MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2), - BIT(MLX5HWS_ACTION_TYP_POP_VLAN), - BIT(MLX5HWS_ACTION_TYP_POP_VLAN), - BIT(MLX5HWS_ACTION_TYP_MODIFY_HDR), - BIT(MLX5HWS_ACTION_TYP_PUSH_VLAN), - BIT(MLX5HWS_ACTION_TYP_PUSH_VLAN), - BIT(MLX5HWS_ACTION_TYP_INSERT_HEADER) | - BIT(MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2) | - BIT(MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3), - BIT(MLX5HWS_ACTION_TYP_CTR), - BIT(MLX5HWS_ACTION_TYP_TAG), - BIT(MLX5HWS_ACTION_TYP_ASO_METER), - BIT(MLX5HWS_ACTION_TYP_MODIFY_HDR), - BIT(MLX5HWS_ACTION_TYP_TBL) | - BIT(MLX5HWS_ACTION_TYP_VPORT) | - BIT(MLX5HWS_ACTION_TYP_DROP) | - BIT(MLX5HWS_ACTION_TYP_SAMPLER) | - BIT(MLX5HWS_ACTION_TYP_RANGE) | - BIT(MLX5HWS_ACTION_TYP_DEST_ARRAY), - BIT(MLX5HWS_ACTION_TYP_LAST), - }, +static const u32 action_order_arr[MLX5HWS_ACTION_TYP_MAX] = { + BIT(MLX5HWS_ACTION_TYP_REMOVE_HEADER) | + BIT(MLX5HWS_ACTION_TYP_REFORMAT_TNL_L2_TO_L2) | + BIT(MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2), + BIT(MLX5HWS_ACTION_TYP_POP_VLAN), + BIT(MLX5HWS_ACTION_TYP_POP_VLAN), + BIT(MLX5HWS_ACTION_TYP_MODIFY_HDR), + BIT(MLX5HWS_ACTION_TYP_PUSH_VLAN), + BIT(MLX5HWS_ACTION_TYP_PUSH_VLAN), + BIT(MLX5HWS_ACTION_TYP_INSERT_HEADER) | + BIT(MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2) | + BIT(MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3), + BIT(MLX5HWS_ACTION_TYP_CTR), + BIT(MLX5HWS_ACTION_TYP_TAG), + BIT(MLX5HWS_ACTION_TYP_ASO_METER), + BIT(MLX5HWS_ACTION_TYP_MODIFY_HDR), + BIT(MLX5HWS_ACTION_TYP_TBL) | + BIT(MLX5HWS_ACTION_TYP_VPORT) | + BIT(MLX5HWS_ACTION_TYP_DROP) | + BIT(MLX5HWS_ACTION_TYP_SAMPLER) | + BIT(MLX5HWS_ACTION_TYP_RANGE) | + BIT(MLX5HWS_ACTION_TYP_DEST_ARRAY), + BIT(MLX5HWS_ACTION_TYP_LAST), }; static const char * const mlx5hws_action_type_str[] = { @@ -74,6 +72,11 @@ enum mlx5hws_action_type mlx5hws_action_get_type(struct mlx5hws_action *action) return action->type; } +struct mlx5_core_dev *mlx5hws_action_get_dev(struct mlx5hws_action *action) +{ + return action->ctx->mdev; +} + static int hws_action_get_shared_stc_nic(struct mlx5hws_context *ctx, enum mlx5hws_context_shared_stc_type stc_type, u8 tbl_type) @@ -83,8 +86,8 @@ static int hws_action_get_shared_stc_nic(struct mlx5hws_context *ctx, int ret; mutex_lock(&ctx->ctrl_lock); - if (ctx->common_res[tbl_type].shared_stc[stc_type]) { - ctx->common_res[tbl_type].shared_stc[stc_type]->refcount++; + if (ctx->common_res.shared_stc[stc_type]) { + ctx->common_res.shared_stc[stc_type]->refcount++; mutex_unlock(&ctx->ctrl_lock); return 0; } @@ -124,8 +127,8 @@ static int hws_action_get_shared_stc_nic(struct mlx5hws_context *ctx, goto free_shared_stc; } - ctx->common_res[tbl_type].shared_stc[stc_type] = shared_stc; - ctx->common_res[tbl_type].shared_stc[stc_type]->refcount = 1; + ctx->common_res.shared_stc[stc_type] = shared_stc; + ctx->common_res.shared_stc[stc_type]->refcount = 1; mutex_unlock(&ctx->ctrl_lock); @@ -178,16 +181,16 @@ static void hws_action_put_shared_stc(struct mlx5hws_action *action, } mutex_lock(&ctx->ctrl_lock); - if (--ctx->common_res[tbl_type].shared_stc[stc_type]->refcount) { + if (--ctx->common_res.shared_stc[stc_type]->refcount) { mutex_unlock(&ctx->ctrl_lock); return; } - shared_stc = ctx->common_res[tbl_type].shared_stc[stc_type]; + shared_stc = ctx->common_res.shared_stc[stc_type]; mlx5hws_action_free_single_stc(ctx, tbl_type, &shared_stc->stc_chunk); kfree(shared_stc); - ctx->common_res[tbl_type].shared_stc[stc_type] = NULL; + ctx->common_res.shared_stc[stc_type] = NULL; mutex_unlock(&ctx->ctrl_lock); } @@ -206,10 +209,10 @@ bool mlx5hws_action_check_combo(struct mlx5hws_context *ctx, enum mlx5hws_action_type *user_actions, enum mlx5hws_table_type table_type) { - const u32 *order_arr = action_order_arr[table_type]; + const u32 *order_arr = action_order_arr; + bool valid_combo; u8 order_idx = 0; u8 user_idx = 0; - bool valid_combo; if (table_type >= MLX5HWS_TABLE_TYPE_MAX) { mlx5hws_err(ctx, "Invalid table_type %d", table_type); @@ -240,6 +243,7 @@ hws_action_fixup_stc_attr(struct mlx5hws_context *ctx, enum mlx5hws_table_type table_type, bool is_mirror) { + struct mlx5hws_pool *pool; bool use_fixup = false; u32 fw_tbl_type; u32 base_id; @@ -255,13 +259,11 @@ hws_action_fixup_stc_attr(struct mlx5hws_context *ctx, use_fixup = true; break; } + pool = stc_attr->ste_table.ste_pool; if (!is_mirror) - base_id = mlx5hws_pool_chunk_get_base_id(stc_attr->ste_table.ste_pool, - &stc_attr->ste_table.ste); + base_id = mlx5hws_pool_get_base_id(pool); else - base_id = - mlx5hws_pool_chunk_get_base_mirror_id(stc_attr->ste_table.ste_pool, - &stc_attr->ste_table.ste); + base_id = mlx5hws_pool_get_base_mirror_id(pool); *fixup_stc_attr = *stc_attr; fixup_stc_attr->ste_table.ste_obj_id = base_id; @@ -321,8 +323,8 @@ int mlx5hws_action_alloc_single_stc(struct mlx5hws_context *ctx, __must_hold(&ctx->ctrl_lock) { struct mlx5hws_cmd_stc_modify_attr cleanup_stc_attr = {0}; - struct mlx5hws_pool *stc_pool = ctx->stc_pool[table_type]; struct mlx5hws_cmd_stc_modify_attr fixup_stc_attr = {0}; + struct mlx5hws_pool *stc_pool = ctx->stc_pool; bool use_fixup; u32 obj_0_id; int ret; @@ -339,7 +341,7 @@ __must_hold(&ctx->ctrl_lock) if (!mlx5hws_context_cap_dynamic_reparse(ctx)) stc_attr->reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE; - obj_0_id = mlx5hws_pool_chunk_get_base_id(stc_pool, stc); + obj_0_id = mlx5hws_pool_get_base_id(stc_pool); /* According to table/action limitation change the stc_attr */ use_fixup = hws_action_fixup_stc_attr(ctx, stc_attr, &fixup_stc_attr, table_type, false); @@ -355,7 +357,7 @@ __must_hold(&ctx->ctrl_lock) if (table_type == MLX5HWS_TABLE_TYPE_FDB) { u32 obj_1_id; - obj_1_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, stc); + obj_1_id = mlx5hws_pool_get_base_mirror_id(stc_pool); use_fixup = hws_action_fixup_stc_attr(ctx, stc_attr, &fixup_stc_attr, @@ -387,19 +389,19 @@ void mlx5hws_action_free_single_stc(struct mlx5hws_context *ctx, struct mlx5hws_pool_chunk *stc) __must_hold(&ctx->ctrl_lock) { - struct mlx5hws_pool *stc_pool = ctx->stc_pool[table_type]; struct mlx5hws_cmd_stc_modify_attr stc_attr = {0}; + struct mlx5hws_pool *stc_pool = ctx->stc_pool; u32 obj_id; /* Modify the STC not to point to an object */ stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_DROP; stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT; stc_attr.stc_offset = stc->offset; - obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, stc); + obj_id = mlx5hws_pool_get_base_id(stc_pool); mlx5hws_cmd_stc_modify(ctx->mdev, obj_id, &stc_attr); if (table_type == MLX5HWS_TABLE_TYPE_FDB) { - obj_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, stc); + obj_id = mlx5hws_pool_get_base_mirror_id(stc_pool); mlx5hws_cmd_stc_modify(ctx->mdev, obj_id, &stc_attr); } @@ -473,6 +475,7 @@ static void hws_action_fill_stc_attr(struct mlx5hws_action *action, break; case MLX5HWS_ACTION_TYP_TBL: case MLX5HWS_ACTION_TYP_DEST_ARRAY: + case MLX5HWS_ACTION_TYP_SAMPLER: attr->action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_FT; attr->action_offset = MLX5HWS_ACTION_OFFSET_HIT; attr->dest_table_id = obj_id; @@ -561,7 +564,7 @@ hws_action_create_stcs(struct mlx5hws_action *action, u32 obj_id) if (action->flags & MLX5HWS_ACTION_FLAG_HWS_FDB) { ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, MLX5HWS_TABLE_TYPE_FDB, - &action->stc[MLX5HWS_TABLE_TYPE_FDB]); + &action->stc); if (ret) goto out_err; } @@ -585,7 +588,7 @@ hws_action_destroy_stcs(struct mlx5hws_action *action) if (action->flags & MLX5HWS_ACTION_FLAG_HWS_FDB) mlx5hws_action_free_single_stc(ctx, MLX5HWS_TABLE_TYPE_FDB, - &action->stc[MLX5HWS_TABLE_TYPE_FDB]); + &action->stc); mutex_unlock(&ctx->ctrl_lock); } @@ -1187,14 +1190,15 @@ hws_action_create_modify_header_hws(struct mlx5hws_action *action, struct mlx5hws_action_mh_pattern *pattern, u32 log_bulk_size) { + u16 num_actions, max_mh_actions = 0, hw_max_actions; struct mlx5hws_context *ctx = action->ctx; - u16 num_actions, max_mh_actions = 0; int i, ret, size_in_bytes; u32 pat_id, arg_id = 0; __be64 *new_pattern; size_t pat_max_sz; pat_max_sz = MLX5HWS_ARG_CHUNK_SIZE_MAX * MLX5HWS_ARG_DATA_SIZE; + hw_max_actions = pat_max_sz / MLX5HWS_MODIFY_ACTION_SIZE; size_in_bytes = pat_max_sz * sizeof(__be64); new_pattern = kcalloc(num_of_patterns, size_in_bytes, GFP_KERNEL); if (!new_pattern) @@ -1204,16 +1208,20 @@ hws_action_create_modify_header_hws(struct mlx5hws_action *action, for (i = 0; i < num_of_patterns; i++) { size_t new_num_actions; size_t cur_num_actions; - u32 nope_location; + u32 nop_locations; cur_num_actions = pattern[i].sz / MLX5HWS_MODIFY_ACTION_SIZE; - mlx5hws_pat_calc_nope(pattern[i].data, cur_num_actions, - pat_max_sz / MLX5HWS_MODIFY_ACTION_SIZE, - &new_num_actions, &nope_location, - &new_pattern[i * pat_max_sz]); + ret = mlx5hws_pat_calc_nop(pattern[i].data, cur_num_actions, + hw_max_actions, &new_num_actions, + &nop_locations, + &new_pattern[i * pat_max_sz]); + if (ret) { + mlx5hws_err(ctx, "Too many actions after nop insertion\n"); + goto free_new_pat; + } - action[i].modify_header.nope_locations = nope_location; + action[i].modify_header.nop_locations = nop_locations; action[i].modify_header.num_of_actions = new_num_actions; max_mh_actions = max(max_mh_actions, new_num_actions); @@ -1260,7 +1268,7 @@ hws_action_create_modify_header_hws(struct mlx5hws_action *action, MLX5_GET(set_action_in, pattern[i].data, action_type); } else { /* Multiple modify actions require a pattern */ - if (unlikely(action[i].modify_header.nope_locations)) { + if (unlikely(action[i].modify_header.nop_locations)) { size_t pattern_sz; pattern_sz = action[i].modify_header.num_of_actions * @@ -1350,20 +1358,17 @@ free_action: } struct mlx5hws_action * -mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, - size_t num_dest, +mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, size_t num_dest, struct mlx5hws_action_dest_attr *dests, - bool ignore_flow_level, - u32 flow_source, - u32 flags) + bool ignore_flow_level, u32 flags) { struct mlx5hws_cmd_set_fte_dest *dest_list = NULL; struct mlx5hws_cmd_ft_create_attr ft_attr = {0}; struct mlx5hws_cmd_set_fte_attr fte_attr = {0}; struct mlx5hws_cmd_forward_tbl *fw_island; struct mlx5hws_action *action; - u32 i /*, packet_reformat_id*/; - int ret; + int ret, last_dest_idx = -1; + u32 i; if (num_dest <= 1) { mlx5hws_err(ctx, "Action must have multiple dests\n"); @@ -1393,11 +1398,8 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, dest_list[i].destination_id = dests[i].dest->dest_obj.obj_id; fte_attr.action_flags |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; fte_attr.ignore_flow_level = ignore_flow_level; - /* ToDo: In SW steering we have a handling of 'go to WIRE' - * destination here by upper layer setting 'is_wire_ft' flag - * if the destination is wire. - * This is because uplink should be last dest in the list. - */ + if (dests[i].is_wire_ft) + last_dest_idx = i; break; case MLX5HWS_ACTION_TYP_VPORT: dest_list[i].destination_type = MLX5_FLOW_DESTINATION_TYPE_VPORT; @@ -1421,6 +1423,9 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, } } + if (last_dest_idx != -1) + swap(dest_list[last_dest_idx], dest_list[num_dest - 1]); + fte_attr.dests_num = num_dest; fte_attr.dests = dest_list; @@ -1576,17 +1581,15 @@ hws_action_create_dest_match_range_definer(struct mlx5hws_context *ctx) return definer; } -static struct mlx5hws_matcher_action_ste * +static struct mlx5hws_range_action_table * hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx, struct mlx5hws_definer *definer, u32 miss_ft_id) { struct mlx5hws_cmd_rtc_create_attr rtc_attr = {0}; - struct mlx5hws_action_default_stc *default_stc; - struct mlx5hws_matcher_action_ste *table_ste; + struct mlx5hws_range_action_table *table_ste; struct mlx5hws_pool_attr pool_attr = {0}; struct mlx5hws_pool *ste_pool, *stc_pool; - struct mlx5hws_pool_chunk *ste; u32 *rtc_0_id, *rtc_1_id; u32 obj_id; int ret; @@ -1605,7 +1608,6 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx, pool_attr.table_type = MLX5HWS_TABLE_TYPE_FDB; pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE; - pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL; pool_attr.alloc_log_sz = 1; table_ste->pool = mlx5hws_pool_create(ctx, &pool_attr); if (!table_ste->pool) { @@ -1617,8 +1619,6 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx, rtc_0_id = &table_ste->rtc_0_id; rtc_1_id = &table_ste->rtc_1_id; ste_pool = table_ste->pool; - ste = &table_ste->ste; - ste->order = 1; rtc_attr.log_size = 0; rtc_attr.log_depth = 0; @@ -1630,18 +1630,16 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx, rtc_attr.fw_gen_wqe = true; rtc_attr.is_scnd_range = true; - obj_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste); + obj_id = mlx5hws_pool_get_base_id(ste_pool); rtc_attr.pd = ctx->pd_num; rtc_attr.ste_base = obj_id; - rtc_attr.ste_offset = ste->offset; rtc_attr.reparse_mode = mlx5hws_context_get_reparse_mode(ctx); rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(MLX5HWS_TABLE_TYPE_FDB, false); /* STC is a single resource (obj_id), use any STC for the ID */ - stc_pool = ctx->stc_pool[MLX5HWS_TABLE_TYPE_FDB]; - default_stc = ctx->common_res[MLX5HWS_TABLE_TYPE_FDB].default_stc; - obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, &default_stc->default_hit); + stc_pool = ctx->stc_pool; + obj_id = mlx5hws_pool_get_base_id(stc_pool); rtc_attr.stc_base = obj_id; ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_0_id); @@ -1651,11 +1649,11 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx, } /* Create mirror RTC */ - obj_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste); + obj_id = mlx5hws_pool_get_base_mirror_id(ste_pool); rtc_attr.ste_base = obj_id; rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(MLX5HWS_TABLE_TYPE_FDB, true); - obj_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, &default_stc->default_hit); + obj_id = mlx5hws_pool_get_base_mirror_id(stc_pool); rtc_attr.stc_base = obj_id; ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_1_id); @@ -1678,9 +1676,9 @@ free_ste: return NULL; } -static void -hws_action_destroy_dest_match_range_table(struct mlx5hws_context *ctx, - struct mlx5hws_matcher_action_ste *table_ste) +static void hws_action_destroy_dest_match_range_table( + struct mlx5hws_context *ctx, + struct mlx5hws_range_action_table *table_ste) { mutex_lock(&ctx->ctrl_lock); @@ -1692,12 +1690,11 @@ hws_action_destroy_dest_match_range_table(struct mlx5hws_context *ctx, mutex_unlock(&ctx->ctrl_lock); } -static int -hws_action_create_dest_match_range_fill_table(struct mlx5hws_context *ctx, - struct mlx5hws_matcher_action_ste *table_ste, - struct mlx5hws_action *hit_ft_action, - struct mlx5hws_definer *range_definer, - u32 min, u32 max) +static int hws_action_create_dest_match_range_fill_table( + struct mlx5hws_context *ctx, + struct mlx5hws_range_action_table *table_ste, + struct mlx5hws_action *hit_ft_action, + struct mlx5hws_definer *range_definer, u32 min, u32 max) { struct mlx5hws_wqe_gta_data_seg_ste match_wqe_data = {0}; struct mlx5hws_wqe_gta_data_seg_ste range_wqe_data = {0}; @@ -1731,7 +1728,7 @@ hws_action_create_dest_match_range_fill_table(struct mlx5hws_context *ctx, ste_attr.used_id_rtc_0 = &used_rtc_0_id; ste_attr.used_id_rtc_1 = &used_rtc_1_id; - common_res = &ctx->common_res[MLX5HWS_TABLE_TYPE_FDB]; + common_res = &ctx->common_res; /* init an empty match STE which will always hit */ ste_attr.wqe_ctrl = &wqe_ctrl; @@ -1750,7 +1747,7 @@ hws_action_create_dest_match_range_fill_table(struct mlx5hws_context *ctx, wqe_ctrl.stc_ix[MLX5HWS_ACTION_STC_IDX_CTRL] |= htonl(MLX5HWS_ACTION_STC_IDX_LAST_COMBO2 << 29); wqe_ctrl.stc_ix[MLX5HWS_ACTION_STC_IDX_HIT] = - htonl(hit_ft_action->stc[MLX5HWS_TABLE_TYPE_FDB].offset); + htonl(hit_ft_action->stc.offset); wqe_data_arr = (__force __be32 *)&range_wqe_data; @@ -1793,7 +1790,7 @@ mlx5hws_action_create_dest_match_range(struct mlx5hws_context *ctx, u32 min, u32 max, u32 flags) { struct mlx5hws_cmd_stc_modify_attr stc_attr = {0}; - struct mlx5hws_matcher_action_ste *table_ste; + struct mlx5hws_range_action_table *table_ste; struct mlx5hws_action *hit_ft_action; struct mlx5hws_definer *definer; struct mlx5hws_action *action; @@ -1838,12 +1835,11 @@ mlx5hws_action_create_dest_match_range(struct mlx5hws_context *ctx, stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT; stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE; stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE; - stc_attr.ste_table.ste = table_ste->ste; stc_attr.ste_table.ste_pool = table_ste->pool; stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer; ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, MLX5HWS_TABLE_TYPE_FDB, - &action->stc[MLX5HWS_TABLE_TYPE_FDB]); + &action->stc); if (ret) goto error_unlock; @@ -1875,7 +1871,50 @@ struct mlx5hws_action * mlx5hws_action_create_flow_sampler(struct mlx5hws_context *ctx, u32 sampler_id, u32 flags) { - mlx5hws_err(ctx, "Flow sampler action - unsupported\n"); + struct mlx5hws_cmd_ft_create_attr ft_attr = {0}; + struct mlx5hws_cmd_set_fte_attr fte_attr = {0}; + struct mlx5hws_cmd_forward_tbl *fw_island; + struct mlx5hws_cmd_set_fte_dest dest; + struct mlx5hws_action *action; + int ret; + + if (flags != (MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED)) { + mlx5hws_err(ctx, "Unsupported flags for flow sampler\n"); + return NULL; + } + + ft_attr.type = FS_FT_FDB; + ft_attr.level = ctx->caps->fdb_ft.max_level - 1; + + dest.destination_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; + dest.destination_id = sampler_id; + + fte_attr.dests_num = 1; + fte_attr.dests = &dest; + fte_attr.action_flags = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + fte_attr.ignore_flow_level = 1; + + fw_island = mlx5hws_cmd_forward_tbl_create(ctx->mdev, &ft_attr, &fte_attr); + if (!fw_island) + return NULL; + + action = hws_action_create_generic(ctx, flags, + MLX5HWS_ACTION_TYP_SAMPLER); + if (!action) + goto destroy_fw_island; + + ret = hws_action_create_stcs(action, fw_island->ft_id); + if (ret) + goto free_action; + + action->flow_sampler.fw_island = fw_island; + + return action; + +free_action: + kfree(action); +destroy_fw_island: + mlx5hws_cmd_forward_tbl_destroy(ctx->mdev, fw_island); return NULL; } @@ -1914,6 +1953,11 @@ static void hws_action_destroy_hws(struct mlx5hws_action *action) } kfree(action->dest_array.dest_list); break; + case MLX5HWS_ACTION_TYP_SAMPLER: + hws_action_destroy_stcs(action); + mlx5hws_cmd_forward_tbl_destroy(action->ctx->mdev, + action->flow_sampler.fw_island); + break; case MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2: case MLX5HWS_ACTION_TYP_MODIFY_HDR: shared_arg = false; @@ -1970,8 +2014,8 @@ __must_hold(&ctx->ctrl_lock) struct mlx5hws_action_default_stc *default_stc; int ret; - if (ctx->common_res[tbl_type].default_stc) { - ctx->common_res[tbl_type].default_stc->refcount++; + if (ctx->common_res.default_stc) { + ctx->common_res.default_stc->refcount++; return 0; } @@ -2023,8 +2067,8 @@ __must_hold(&ctx->ctrl_lock) goto free_nop_dw7; } - ctx->common_res[tbl_type].default_stc = default_stc; - ctx->common_res[tbl_type].default_stc->refcount++; + ctx->common_res.default_stc = default_stc; + ctx->common_res.default_stc->refcount++; return 0; @@ -2046,9 +2090,7 @@ __must_hold(&ctx->ctrl_lock) { struct mlx5hws_action_default_stc *default_stc; - default_stc = ctx->common_res[tbl_type].default_stc; - - default_stc = ctx->common_res[tbl_type].default_stc; + default_stc = ctx->common_res.default_stc; if (--default_stc->refcount) return; @@ -2058,28 +2100,30 @@ __must_hold(&ctx->ctrl_lock) mlx5hws_action_free_single_stc(ctx, tbl_type, &default_stc->nop_dw5); mlx5hws_action_free_single_stc(ctx, tbl_type, &default_stc->nop_ctr); kfree(default_stc); - ctx->common_res[tbl_type].default_stc = NULL; + ctx->common_res.default_stc = NULL; } static void hws_action_modify_write(struct mlx5hws_send_engine *queue, u32 arg_idx, u8 *arg_data, u16 num_of_actions, - u32 nope_locations) + u32 nop_locations) { u8 *new_arg_data = NULL; int i, j; - if (unlikely(nope_locations)) { + if (unlikely(nop_locations)) { new_arg_data = kcalloc(num_of_actions, MLX5HWS_MODIFY_ACTION_SIZE, GFP_KERNEL); if (unlikely(!new_arg_data)) return; - for (i = 0, j = 0; i < num_of_actions; i++, j++) { - memcpy(&new_arg_data[j], arg_data, MLX5HWS_MODIFY_ACTION_SIZE); - if (BIT(i) & nope_locations) + for (i = 0, j = 0; j < num_of_actions; i++, j++) { + if (BIT(i) & nop_locations) j++; + memcpy(&new_arg_data[j * MLX5HWS_MODIFY_ACTION_SIZE], + &arg_data[i * MLX5HWS_MODIFY_ACTION_SIZE], + MLX5HWS_MODIFY_ACTION_SIZE); } } @@ -2150,8 +2194,7 @@ hws_action_apply_stc(struct mlx5hws_actions_apply_data *apply, { struct mlx5hws_action *action = apply->rule_action[action_idx].action; - apply->wqe_ctrl->stc_ix[stc_idx] = - htonl(action->stc[apply->tbl_type].offset); + apply->wqe_ctrl->stc_ix[stc_idx] = htonl(action->stc.offset); } static void @@ -2176,12 +2219,13 @@ hws_action_setter_modify_header(struct mlx5hws_actions_apply_data *apply, struct mlx5hws_action *action; u32 arg_sz, arg_idx; u8 *single_action; + u8 max_actions; __be32 stc_idx; rule_action = &apply->rule_action[setter->idx_double]; action = rule_action->action; - stc_idx = htonl(action->stc[apply->tbl_type].offset); + stc_idx = htonl(action->stc.offset); apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW6] = stc_idx; apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW7] = 0; @@ -2203,21 +2247,23 @@ hws_action_setter_modify_header(struct mlx5hws_actions_apply_data *apply, apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = *(__be32 *)MLX5_ADDR_OF(set_action_in, single_action, data); - } else { - /* Argument offset multiple with number of args per these actions */ - arg_sz = mlx5hws_arg_get_arg_size(action->modify_header.max_num_of_actions); - arg_idx = rule_action->modify_header.offset * arg_sz; - - apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = htonl(arg_idx); - - if (!(action->flags & MLX5HWS_ACTION_FLAG_SHARED)) { - apply->require_dep = 1; - hws_action_modify_write(apply->queue, - action->modify_header.arg_id + arg_idx, - rule_action->modify_header.data, - action->modify_header.num_of_actions, - action->modify_header.nope_locations); - } + return; + } + + /* Argument offset multiple with number of args per these actions */ + max_actions = action->modify_header.max_num_of_actions; + arg_sz = mlx5hws_arg_get_arg_size(max_actions); + arg_idx = rule_action->modify_header.offset * arg_sz; + + apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = htonl(arg_idx); + + if (!(action->flags & MLX5HWS_ACTION_FLAG_SHARED)) { + apply->require_dep = 1; + hws_action_modify_write(apply->queue, + action->modify_header.arg_id + arg_idx, + rule_action->modify_header.data, + action->modify_header.num_of_actions, + action->modify_header.nop_locations); } } @@ -2240,7 +2286,7 @@ hws_action_setter_insert_ptr(struct mlx5hws_actions_apply_data *apply, apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW6] = 0; apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = htonl(arg_idx); - stc_idx = htonl(action->stc[apply->tbl_type].offset); + stc_idx = htonl(action->stc.offset); apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW6] = stc_idx; apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW7] = 0; @@ -2272,7 +2318,7 @@ hws_action_setter_tnl_l3_to_l2(struct mlx5hws_actions_apply_data *apply, apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW6] = 0; apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = htonl(arg_idx); - stc_idx = htonl(action->stc[apply->tbl_type].offset); + stc_idx = htonl(action->stc.offset); apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW6] = stc_idx; apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW7] = 0; @@ -2434,6 +2480,7 @@ int mlx5hws_action_template_process(struct mlx5hws_action_template *at) case MLX5HWS_ACTION_TYP_DROP: case MLX5HWS_ACTION_TYP_TBL: case MLX5HWS_ACTION_TYP_DEST_ARRAY: + case MLX5HWS_ACTION_TYP_SAMPLER: case MLX5HWS_ACTION_TYP_VPORT: case MLX5HWS_ACTION_TYP_MISS: /* Hit action */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h index e8f562c31826..55a079fdd08f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h @@ -70,12 +70,12 @@ struct mlx5hws_action_default_stc { struct mlx5hws_pool_chunk nop_dw6; struct mlx5hws_pool_chunk nop_dw7; struct mlx5hws_pool_chunk default_hit; - u32 refcount; + u32 refcount; /* protected by context ctrl lock */ }; struct mlx5hws_action_shared_stc { struct mlx5hws_pool_chunk stc_chunk; - u32 refcount; + u32 refcount; /* protected by context ctrl lock */ }; struct mlx5hws_actions_apply_data { @@ -118,19 +118,25 @@ struct mlx5hws_action_template { u8 only_term; }; +struct mlx5hws_range_action_table { + struct mlx5hws_pool *pool; + u32 rtc_0_id; + u32 rtc_1_id; +}; + struct mlx5hws_action { u8 type; u8 flags; struct mlx5hws_context *ctx; union { struct { - struct mlx5hws_pool_chunk stc[MLX5HWS_TABLE_TYPE_MAX]; + struct mlx5hws_pool_chunk stc; union { struct { u32 pat_id; u32 arg_id; __be64 single_action; - u32 nope_locations; + u32 nop_locations; u8 num_of_patterns; u8 single_action_type; u8 num_of_actions; @@ -166,6 +172,9 @@ struct mlx5hws_action { struct mlx5hws_cmd_set_fte_dest *dest_list; } dest_array; struct { + struct mlx5hws_cmd_forward_tbl *fw_island; + } flow_sampler; + struct { u8 type; u8 start_anchor; u8 end_anchor; @@ -183,7 +192,7 @@ struct mlx5hws_action { size_t size; } remove_header; struct { - struct mlx5hws_matcher_action_ste *table_ste; + struct mlx5hws_range_action_table *table_ste; struct mlx5hws_action *hit_ft_action; struct mlx5hws_definer *definer; } range; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c new file mode 100644 index 000000000000..5766a9c82f96 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c @@ -0,0 +1,467 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2025 NVIDIA Corporation & Affiliates */ + +#include "internal.h" + +static const char * +hws_pool_opt_to_str(enum mlx5hws_pool_optimize opt) +{ + switch (opt) { + case MLX5HWS_POOL_OPTIMIZE_NONE: + return "rx-and-tx"; + case MLX5HWS_POOL_OPTIMIZE_ORIG: + return "rx-only"; + case MLX5HWS_POOL_OPTIMIZE_MIRROR: + return "tx-only"; + default: + return "unknown"; + } +} + +static int +hws_action_ste_table_create_pool(struct mlx5hws_context *ctx, + struct mlx5hws_action_ste_table *action_tbl, + enum mlx5hws_pool_optimize opt, size_t log_sz) +{ + struct mlx5hws_pool_attr pool_attr = { 0 }; + + pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE; + pool_attr.table_type = MLX5HWS_TABLE_TYPE_FDB; + pool_attr.flags = MLX5HWS_POOL_FLAG_BUDDY; + pool_attr.opt_type = opt; + pool_attr.alloc_log_sz = log_sz; + + action_tbl->pool = mlx5hws_pool_create(ctx, &pool_attr); + if (!action_tbl->pool) { + mlx5hws_err(ctx, "Failed to allocate STE pool\n"); + return -EINVAL; + } + + return 0; +} + +static int hws_action_ste_table_create_single_rtc( + struct mlx5hws_context *ctx, + struct mlx5hws_action_ste_table *action_tbl, + enum mlx5hws_pool_optimize opt, size_t log_sz, bool tx) +{ + struct mlx5hws_cmd_rtc_create_attr rtc_attr = { 0 }; + u32 *rtc_id; + + rtc_attr.log_depth = 0; + rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET; + /* Action STEs use the default always hit definer. */ + rtc_attr.match_definer_0 = ctx->caps->trivial_match_definer; + rtc_attr.is_frst_jumbo = false; + rtc_attr.miss_ft_id = 0; + rtc_attr.pd = ctx->pd_num; + rtc_attr.reparse_mode = mlx5hws_context_get_reparse_mode(ctx); + + if (tx) { + rtc_attr.table_type = FS_FT_FDB_TX; + rtc_attr.ste_base = + mlx5hws_pool_get_base_mirror_id(action_tbl->pool); + rtc_attr.stc_base = + mlx5hws_pool_get_base_mirror_id(ctx->stc_pool); + rtc_attr.log_size = + opt == MLX5HWS_POOL_OPTIMIZE_ORIG ? 0 : log_sz; + rtc_id = &action_tbl->rtc_1_id; + } else { + rtc_attr.table_type = FS_FT_FDB_RX; + rtc_attr.ste_base = mlx5hws_pool_get_base_id(action_tbl->pool); + rtc_attr.stc_base = mlx5hws_pool_get_base_id(ctx->stc_pool); + rtc_attr.log_size = + opt == MLX5HWS_POOL_OPTIMIZE_MIRROR ? 0 : log_sz; + rtc_id = &action_tbl->rtc_0_id; + } + + return mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_id); +} + +static int +hws_action_ste_table_create_rtcs(struct mlx5hws_context *ctx, + struct mlx5hws_action_ste_table *action_tbl, + enum mlx5hws_pool_optimize opt, size_t log_sz) +{ + int err; + + err = hws_action_ste_table_create_single_rtc(ctx, action_tbl, opt, + log_sz, false); + if (err) + return err; + + err = hws_action_ste_table_create_single_rtc(ctx, action_tbl, opt, + log_sz, true); + if (err) { + mlx5hws_cmd_rtc_destroy(ctx->mdev, action_tbl->rtc_0_id); + return err; + } + + return 0; +} + +static void +hws_action_ste_table_destroy_rtcs(struct mlx5hws_action_ste_table *action_tbl) +{ + mlx5hws_cmd_rtc_destroy(action_tbl->pool->ctx->mdev, + action_tbl->rtc_1_id); + mlx5hws_cmd_rtc_destroy(action_tbl->pool->ctx->mdev, + action_tbl->rtc_0_id); +} + +static int +hws_action_ste_table_create_stc(struct mlx5hws_context *ctx, + struct mlx5hws_action_ste_table *action_tbl) +{ + struct mlx5hws_cmd_stc_modify_attr stc_attr = { 0 }; + + stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT; + stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE; + stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE; + stc_attr.ste_table.ste_pool = action_tbl->pool; + stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer; + + return mlx5hws_action_alloc_single_stc(ctx, &stc_attr, + MLX5HWS_TABLE_TYPE_FDB, + &action_tbl->stc); +} + +static struct mlx5hws_action_ste_table * +hws_action_ste_table_alloc(struct mlx5hws_action_ste_pool_element *parent_elem) +{ + enum mlx5hws_pool_optimize opt = parent_elem->opt; + struct mlx5hws_context *ctx = parent_elem->ctx; + struct mlx5hws_action_ste_table *action_tbl; + size_t log_sz; + int err; + + log_sz = min(parent_elem->log_sz ? + parent_elem->log_sz + + MLX5HWS_ACTION_STE_TABLE_STEP_LOG_SZ : + MLX5HWS_ACTION_STE_TABLE_INIT_LOG_SZ, + MLX5HWS_ACTION_STE_TABLE_MAX_LOG_SZ); + + action_tbl = kzalloc(sizeof(*action_tbl), GFP_KERNEL); + if (!action_tbl) + return ERR_PTR(-ENOMEM); + + err = hws_action_ste_table_create_pool(ctx, action_tbl, opt, log_sz); + if (err) + goto free_tbl; + + err = hws_action_ste_table_create_rtcs(ctx, action_tbl, opt, log_sz); + if (err) + goto destroy_pool; + + err = hws_action_ste_table_create_stc(ctx, action_tbl); + if (err) + goto destroy_rtcs; + + action_tbl->parent_elem = parent_elem; + INIT_LIST_HEAD(&action_tbl->list_node); + action_tbl->last_used = jiffies; + list_add(&action_tbl->list_node, &parent_elem->available); + parent_elem->log_sz = log_sz; + + mlx5hws_dbg(ctx, + "Allocated %s action STE table log_sz %zu; STEs (%d, %d); RTCs (%d, %d); STC %d\n", + hws_pool_opt_to_str(opt), log_sz, + mlx5hws_pool_get_base_id(action_tbl->pool), + mlx5hws_pool_get_base_mirror_id(action_tbl->pool), + action_tbl->rtc_0_id, action_tbl->rtc_1_id, + action_tbl->stc.offset); + + return action_tbl; + +destroy_rtcs: + hws_action_ste_table_destroy_rtcs(action_tbl); +destroy_pool: + mlx5hws_pool_destroy(action_tbl->pool); +free_tbl: + kfree(action_tbl); + + return ERR_PTR(err); +} + +static void +hws_action_ste_table_destroy(struct mlx5hws_action_ste_table *action_tbl) +{ + struct mlx5hws_context *ctx = action_tbl->parent_elem->ctx; + + mlx5hws_dbg(ctx, + "Destroying %s action STE table: STEs (%d, %d); RTCs (%d, %d); STC %d\n", + hws_pool_opt_to_str(action_tbl->parent_elem->opt), + mlx5hws_pool_get_base_id(action_tbl->pool), + mlx5hws_pool_get_base_mirror_id(action_tbl->pool), + action_tbl->rtc_0_id, action_tbl->rtc_1_id, + action_tbl->stc.offset); + + mlx5hws_action_free_single_stc(ctx, MLX5HWS_TABLE_TYPE_FDB, + &action_tbl->stc); + hws_action_ste_table_destroy_rtcs(action_tbl); + mlx5hws_pool_destroy(action_tbl->pool); + + list_del(&action_tbl->list_node); + kfree(action_tbl); +} + +static int +hws_action_ste_pool_element_init(struct mlx5hws_context *ctx, + struct mlx5hws_action_ste_pool_element *elem, + enum mlx5hws_pool_optimize opt) +{ + elem->ctx = ctx; + elem->opt = opt; + INIT_LIST_HEAD(&elem->available); + INIT_LIST_HEAD(&elem->full); + + return 0; +} + +static void hws_action_ste_pool_element_destroy( + struct mlx5hws_action_ste_pool_element *elem) +{ + struct mlx5hws_action_ste_table *action_tbl, *p; + + /* This should be empty, but attempt to free its elements anyway. */ + list_for_each_entry_safe(action_tbl, p, &elem->full, list_node) + hws_action_ste_table_destroy(action_tbl); + + list_for_each_entry_safe(action_tbl, p, &elem->available, list_node) + hws_action_ste_table_destroy(action_tbl); +} + +static int hws_action_ste_pool_init(struct mlx5hws_context *ctx, + struct mlx5hws_action_ste_pool *pool) +{ + enum mlx5hws_pool_optimize opt; + int err; + + mutex_init(&pool->lock); + + /* Rules which are added for both RX and TX must use the same action STE + * indices for both. If we were to use a single table, then RX-only and + * TX-only rules would waste the unused entries. Thus, we use separate + * table sets for the three cases. + */ + for (opt = MLX5HWS_POOL_OPTIMIZE_NONE; opt < MLX5HWS_POOL_OPTIMIZE_MAX; + opt++) { + err = hws_action_ste_pool_element_init(ctx, &pool->elems[opt], + opt); + if (err) + goto destroy_elems; + pool->elems[opt].parent_pool = pool; + } + + return 0; + +destroy_elems: + while (opt-- > MLX5HWS_POOL_OPTIMIZE_NONE) + hws_action_ste_pool_element_destroy(&pool->elems[opt]); + + return err; +} + +static void hws_action_ste_pool_destroy(struct mlx5hws_action_ste_pool *pool) +{ + int opt; + + for (opt = MLX5HWS_POOL_OPTIMIZE_MAX - 1; + opt >= MLX5HWS_POOL_OPTIMIZE_NONE; opt--) + hws_action_ste_pool_element_destroy(&pool->elems[opt]); +} + +static void hws_action_ste_pool_element_collect_stale( + struct mlx5hws_action_ste_pool_element *elem, struct list_head *cleanup) +{ + struct mlx5hws_action_ste_table *action_tbl, *p; + unsigned long expire_time, now; + + expire_time = secs_to_jiffies(MLX5HWS_ACTION_STE_POOL_EXPIRE_SECONDS); + now = jiffies; + + list_for_each_entry_safe(action_tbl, p, &elem->available, list_node) { + if (mlx5hws_pool_full(action_tbl->pool) && + time_before(action_tbl->last_used + expire_time, now)) + list_move(&action_tbl->list_node, cleanup); + } +} + +static void hws_action_ste_table_cleanup_list(struct list_head *cleanup) +{ + struct mlx5hws_action_ste_table *action_tbl, *p; + + list_for_each_entry_safe(action_tbl, p, cleanup, list_node) + hws_action_ste_table_destroy(action_tbl); +} + +static void hws_action_ste_pool_cleanup(struct work_struct *work) +{ + enum mlx5hws_pool_optimize opt; + struct mlx5hws_context *ctx; + LIST_HEAD(cleanup); + int i; + + ctx = container_of(work, struct mlx5hws_context, + action_ste_cleanup.work); + + for (i = 0; i < ctx->queues; i++) { + struct mlx5hws_action_ste_pool *p = &ctx->action_ste_pool[i]; + + mutex_lock(&p->lock); + for (opt = MLX5HWS_POOL_OPTIMIZE_NONE; + opt < MLX5HWS_POOL_OPTIMIZE_MAX; opt++) + hws_action_ste_pool_element_collect_stale( + &p->elems[opt], &cleanup); + mutex_unlock(&p->lock); + } + + hws_action_ste_table_cleanup_list(&cleanup); + + schedule_delayed_work(&ctx->action_ste_cleanup, + secs_to_jiffies( + MLX5HWS_ACTION_STE_POOL_CLEANUP_SECONDS)); +} + +int mlx5hws_action_ste_pool_init(struct mlx5hws_context *ctx) +{ + struct mlx5hws_action_ste_pool *pool; + size_t queues = ctx->queues; + int i, err; + + pool = kcalloc(queues, sizeof(*pool), GFP_KERNEL); + if (!pool) + return -ENOMEM; + + for (i = 0; i < queues; i++) { + err = hws_action_ste_pool_init(ctx, &pool[i]); + if (err) + goto free_pool; + } + + ctx->action_ste_pool = pool; + + INIT_DELAYED_WORK(&ctx->action_ste_cleanup, + hws_action_ste_pool_cleanup); + schedule_delayed_work( + &ctx->action_ste_cleanup, + secs_to_jiffies(MLX5HWS_ACTION_STE_POOL_CLEANUP_SECONDS)); + + return 0; + +free_pool: + while (i--) + hws_action_ste_pool_destroy(&pool[i]); + kfree(pool); + + return err; +} + +void mlx5hws_action_ste_pool_uninit(struct mlx5hws_context *ctx) +{ + size_t queues = ctx->queues; + int i; + + cancel_delayed_work_sync(&ctx->action_ste_cleanup); + + for (i = 0; i < queues; i++) + hws_action_ste_pool_destroy(&ctx->action_ste_pool[i]); + + kfree(ctx->action_ste_pool); +} + +static struct mlx5hws_action_ste_pool_element * +hws_action_ste_choose_elem(struct mlx5hws_action_ste_pool *pool, + bool skip_rx, bool skip_tx) +{ + if (skip_rx) + return &pool->elems[MLX5HWS_POOL_OPTIMIZE_MIRROR]; + + if (skip_tx) + return &pool->elems[MLX5HWS_POOL_OPTIMIZE_ORIG]; + + return &pool->elems[MLX5HWS_POOL_OPTIMIZE_NONE]; +} + +static int +hws_action_ste_table_chunk_alloc(struct mlx5hws_action_ste_table *action_tbl, + struct mlx5hws_action_ste_chunk *chunk) +{ + int err; + + err = mlx5hws_pool_chunk_alloc(action_tbl->pool, &chunk->ste); + if (err) + return err; + + chunk->action_tbl = action_tbl; + action_tbl->last_used = jiffies; + + return 0; +} + +int mlx5hws_action_ste_chunk_alloc(struct mlx5hws_action_ste_pool *pool, + bool skip_rx, bool skip_tx, + struct mlx5hws_action_ste_chunk *chunk) +{ + struct mlx5hws_action_ste_pool_element *elem; + struct mlx5hws_action_ste_table *action_tbl; + bool found; + int err; + + if (skip_rx && skip_tx) + return -EINVAL; + + mutex_lock(&pool->lock); + + elem = hws_action_ste_choose_elem(pool, skip_rx, skip_tx); + + mlx5hws_dbg(elem->ctx, + "Allocating action STEs skip_rx %d skip_tx %d order %d\n", + skip_rx, skip_tx, chunk->ste.order); + + found = false; + list_for_each_entry(action_tbl, &elem->available, list_node) { + if (!hws_action_ste_table_chunk_alloc(action_tbl, chunk)) { + found = true; + break; + } + } + + if (!found) { + action_tbl = hws_action_ste_table_alloc(elem); + if (IS_ERR(action_tbl)) { + err = PTR_ERR(action_tbl); + goto out; + } + + err = hws_action_ste_table_chunk_alloc(action_tbl, chunk); + if (err) + goto out; + } + + if (mlx5hws_pool_empty(action_tbl->pool)) + list_move(&action_tbl->list_node, &elem->full); + + err = 0; + +out: + mutex_unlock(&pool->lock); + + return err; +} + +void mlx5hws_action_ste_chunk_free(struct mlx5hws_action_ste_chunk *chunk) +{ + struct mutex *lock = &chunk->action_tbl->parent_elem->parent_pool->lock; + + mlx5hws_dbg(chunk->action_tbl->pool->ctx, + "Freeing action STEs offset %d order %d\n", + chunk->ste.offset, chunk->ste.order); + + mutex_lock(lock); + mlx5hws_pool_chunk_free(chunk->action_tbl->pool, &chunk->ste); + chunk->action_tbl->last_used = jiffies; + list_move(&chunk->action_tbl->list_node, + &chunk->action_tbl->parent_elem->available); + mutex_unlock(lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h new file mode 100644 index 000000000000..a8ba97359e31 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2025 NVIDIA Corporation & Affiliates */ + +#ifndef ACTION_STE_POOL_H_ +#define ACTION_STE_POOL_H_ + +#define MLX5HWS_ACTION_STE_TABLE_INIT_LOG_SZ 10 +#define MLX5HWS_ACTION_STE_TABLE_STEP_LOG_SZ 1 +#define MLX5HWS_ACTION_STE_TABLE_MAX_LOG_SZ 20 + +#define MLX5HWS_ACTION_STE_POOL_CLEANUP_SECONDS 300 +#define MLX5HWS_ACTION_STE_POOL_EXPIRE_SECONDS 300 + +struct mlx5hws_action_ste_pool_element; + +struct mlx5hws_action_ste_table { + struct mlx5hws_action_ste_pool_element *parent_elem; + /* Wraps the RTC and STE range for this given action. */ + struct mlx5hws_pool *pool; + /* Match STEs use this STC to jump to this pool's RTC. */ + struct mlx5hws_pool_chunk stc; + u32 rtc_0_id; + u32 rtc_1_id; + struct list_head list_node; + unsigned long last_used; +}; + +struct mlx5hws_action_ste_pool_element { + struct mlx5hws_context *ctx; + struct mlx5hws_action_ste_pool *parent_pool; + size_t log_sz; /* Size of the largest table so far. */ + enum mlx5hws_pool_optimize opt; + struct list_head available; + struct list_head full; +}; + +/* Central repository of action STEs. The context contains one of these pools + * per queue. + */ +struct mlx5hws_action_ste_pool { + /* Protects the entire pool. We have one pool per queue and only one + * operation can be active per rule at a given time. Thus this lock + * protects solely against concurrent garbage collection and we expect + * very little contention. + */ + struct mutex lock; + struct mlx5hws_action_ste_pool_element elems[MLX5HWS_POOL_OPTIMIZE_MAX]; +}; + +/* A chunk of STEs and the table it was allocated from. Used by rules. */ +struct mlx5hws_action_ste_chunk { + struct mlx5hws_action_ste_table *action_tbl; + struct mlx5hws_pool_chunk ste; +}; + +int mlx5hws_action_ste_pool_init(struct mlx5hws_context *ctx); + +void mlx5hws_action_ste_pool_uninit(struct mlx5hws_context *ctx); + +/* Callers are expected to fill chunk->ste.order. On success, this function + * populates chunk->tbl and chunk->ste.offset. + */ +int mlx5hws_action_ste_chunk_alloc(struct mlx5hws_action_ste_pool *pool, + bool skip_rx, bool skip_tx, + struct mlx5hws_action_ste_chunk *chunk); + +void mlx5hws_action_ste_chunk_free(struct mlx5hws_action_ste_chunk *chunk); + +#endif /* ACTION_STE_POOL_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c index baacf662c0ab..adeccc588e5d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c @@ -46,10 +46,14 @@ static void hws_bwc_unlock_all_queues(struct mlx5hws_context *ctx) } } -static void hws_bwc_matcher_init_attr(struct mlx5hws_matcher_attr *attr, +static void hws_bwc_matcher_init_attr(struct mlx5hws_bwc_matcher *bwc_matcher, u32 priority, - u8 size_log) + u8 size_log_rx, u8 size_log_tx, + struct mlx5hws_matcher_attr *attr) { + struct mlx5hws_bwc_matcher *first_matcher = + bwc_matcher->complex_first_bwc_matcher; + memset(attr, 0, sizeof(*attr)); attr->priority = priority; @@ -58,9 +62,165 @@ static void hws_bwc_matcher_init_attr(struct mlx5hws_matcher_attr *attr, attr->optimize_flow_src = MLX5HWS_MATCHER_FLOW_SRC_ANY; attr->insert_mode = MLX5HWS_MATCHER_INSERT_BY_HASH; attr->distribute_mode = MLX5HWS_MATCHER_DISTRIBUTE_BY_HASH; - attr->rule.num_log = size_log; + attr->size[MLX5HWS_MATCHER_SIZE_TYPE_RX].rule.num_log = size_log_rx; + attr->size[MLX5HWS_MATCHER_SIZE_TYPE_TX].rule.num_log = size_log_tx; attr->resizable = true; attr->max_num_of_at_attach = MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM; + + attr->isolated_matcher_end_ft_id = + first_matcher ? first_matcher->matcher->end_ft_id : 0; +} + +static int +hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mlx5hws_matcher *matcher = bwc_matcher->matcher; + int drain_error = 0, move_error = 0, poll_error = 0; + u16 bwc_queues = mlx5hws_bwc_queues(ctx); + struct mlx5hws_rule_attr rule_attr; + struct mlx5hws_bwc_rule *bwc_rule; + struct mlx5hws_send_engine *queue; + struct list_head *rules_list; + u32 pending_rules; + int i, ret = 0; + bool drain; + + mlx5hws_bwc_rule_fill_attr(bwc_matcher, 0, 0, &rule_attr); + + for (i = 0; i < bwc_queues; i++) { + if (list_empty(&bwc_matcher->rules[i])) + continue; + + pending_rules = 0; + rule_attr.queue_id = mlx5hws_bwc_get_queue_id(ctx, i); + rules_list = &bwc_matcher->rules[i]; + + list_for_each_entry(bwc_rule, rules_list, list_node) { + ret = mlx5hws_matcher_resize_rule_move(matcher, + bwc_rule->rule, + &rule_attr); + if (unlikely(ret)) { + if (!move_error) { + mlx5hws_err(ctx, + "Moving BWC rule: move failed (%d), attempting to move rest of the rules\n", + ret); + move_error = ret; + } + /* Rule wasn't queued, no need to poll */ + continue; + } + + pending_rules++; + drain = pending_rules >= + hws_bwc_get_burst_th(ctx, rule_attr.queue_id); + ret = mlx5hws_bwc_queue_poll(ctx, + rule_attr.queue_id, + &pending_rules, + drain); + if (unlikely(ret)) { + if (ret == -ETIMEDOUT) { + mlx5hws_err(ctx, + "Moving BWC rule: timeout polling for completions (%d), aborting rehash\n", + ret); + return ret; + } + if (!poll_error) { + mlx5hws_err(ctx, + "Moving BWC rule: polling for completions failed (%d), attempting to move rest of the rules\n", + ret); + poll_error = ret; + } + } + } + + if (pending_rules) { + queue = &ctx->send_queue[rule_attr.queue_id]; + mlx5hws_send_engine_flush_queue(queue); + ret = mlx5hws_bwc_queue_poll(ctx, + rule_attr.queue_id, + &pending_rules, + true); + if (unlikely(ret)) { + if (ret == -ETIMEDOUT) { + mlx5hws_err(ctx, + "Moving bwc rule: timeout draining completions (%d), aborting rehash\n", + ret); + return ret; + } + if (!drain_error) { + mlx5hws_err(ctx, + "Moving bwc rule: drain failed (%d), attempting to move rest of the rules\n", + ret); + drain_error = ret; + } + } + } + } + + /* Return the first error that happened */ + if (unlikely(move_error)) + return move_error; + if (unlikely(poll_error)) + return poll_error; + if (unlikely(drain_error)) + return drain_error; + + return ret; +} + +static int hws_bwc_matcher_move_all(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + if (!bwc_matcher->complex) + return hws_bwc_matcher_move_all_simple(bwc_matcher); + + return mlx5hws_bwc_matcher_move_all_complex(bwc_matcher); +} + +static int hws_bwc_matcher_move(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mlx5hws_matcher_attr matcher_attr = {0}; + struct mlx5hws_matcher *old_matcher; + struct mlx5hws_matcher *new_matcher; + int ret; + + hws_bwc_matcher_init_attr(bwc_matcher, + bwc_matcher->priority, + bwc_matcher->rx_size.size_log, + bwc_matcher->tx_size.size_log, + &matcher_attr); + + old_matcher = bwc_matcher->matcher; + new_matcher = mlx5hws_matcher_create(old_matcher->tbl, + &bwc_matcher->mt, 1, + bwc_matcher->at, + bwc_matcher->num_of_at, + &matcher_attr); + if (!new_matcher) { + mlx5hws_err(ctx, "Rehash error: matcher creation failed\n"); + return -ENOMEM; + } + + ret = mlx5hws_matcher_resize_set_target(old_matcher, new_matcher); + if (ret) { + mlx5hws_err(ctx, "Rehash error: failed setting resize target\n"); + return ret; + } + + ret = hws_bwc_matcher_move_all(bwc_matcher); + if (ret) + mlx5hws_err(ctx, "Rehash error: moving rules failed, attempting to remove the old matcher\n"); + + /* Error during rehash can't be rolled back. + * The best option here is to allow the rehash to complete and remove + * the old matcher - can't leave the matcher in the 'in_resize' state. + */ + + bwc_matcher->matcher = new_matcher; + mlx5hws_matcher_destroy(old_matcher); + + return ret; } int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher, @@ -83,12 +243,19 @@ int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher, for (i = 0; i < bwc_queues; i++) INIT_LIST_HEAD(&bwc_matcher->rules[i]); - hws_bwc_matcher_init_attr(&attr, + hws_bwc_matcher_init_attr(bwc_matcher, priority, - MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG); + bwc_matcher->rx_size.size_log, + bwc_matcher->tx_size.size_log, + &attr); bwc_matcher->priority = priority; - bwc_matcher->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG; + + bwc_matcher->size_of_at_array = MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM; + bwc_matcher->at = kcalloc(bwc_matcher->size_of_at_array, + sizeof(*bwc_matcher->at), GFP_KERNEL); + if (!bwc_matcher->at) + goto free_bwc_matcher_rules; /* create dummy action template */ bwc_matcher->at[0] = @@ -96,7 +263,7 @@ int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher, action_types : init_action_types); if (!bwc_matcher->at[0]) { mlx5hws_err(table->ctx, "BWC matcher: failed creating action template\n"); - goto free_bwc_matcher_rules; + goto free_bwc_matcher_at_array; } bwc_matcher->num_of_at = 1; @@ -126,12 +293,28 @@ free_mt: mlx5hws_match_template_destroy(bwc_matcher->mt); free_at: mlx5hws_action_template_destroy(bwc_matcher->at[0]); +free_bwc_matcher_at_array: + kfree(bwc_matcher->at); free_bwc_matcher_rules: kfree(bwc_matcher->rules); err: return -EINVAL; } +static void +hws_bwc_matcher_init_size_rxtx(struct mlx5hws_bwc_matcher_size *size) +{ + size->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG; + atomic_set(&size->num_of_rules, 0); + atomic_set(&size->rehash_required, false); +} + +static void hws_bwc_matcher_init_size(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + hws_bwc_matcher_init_size_rxtx(&bwc_matcher->rx_size); + hws_bwc_matcher_init_size_rxtx(&bwc_matcher->tx_size); +} + struct mlx5hws_bwc_matcher * mlx5hws_bwc_matcher_create(struct mlx5hws_table *table, u32 priority, @@ -152,6 +335,8 @@ mlx5hws_bwc_matcher_create(struct mlx5hws_table *table, if (!bwc_matcher) return NULL; + hws_bwc_matcher_init_size(bwc_matcher); + /* Check if the required match params can be all matched * in single STE, otherwise complex matcher is needed. */ @@ -190,6 +375,7 @@ int mlx5hws_bwc_matcher_destroy_simple(struct mlx5hws_bwc_matcher *bwc_matcher) for (i = 0; i < bwc_matcher->num_of_at; i++) mlx5hws_action_template_destroy(bwc_matcher->at[i]); + kfree(bwc_matcher->at); mlx5hws_match_template_destroy(bwc_matcher->mt); kfree(bwc_matcher->rules); @@ -199,22 +385,30 @@ int mlx5hws_bwc_matcher_destroy_simple(struct mlx5hws_bwc_matcher *bwc_matcher) int mlx5hws_bwc_matcher_destroy(struct mlx5hws_bwc_matcher *bwc_matcher) { - if (bwc_matcher->num_of_rules) + u32 rx_rules = atomic_read(&bwc_matcher->rx_size.num_of_rules); + u32 tx_rules = atomic_read(&bwc_matcher->tx_size.num_of_rules); + + if (rx_rules || tx_rules) mlx5hws_err(bwc_matcher->matcher->tbl->ctx, - "BWC matcher destroy: matcher still has %d rules\n", - bwc_matcher->num_of_rules); + "BWC matcher destroy: matcher still has %u RX and %u TX rules\n", + rx_rules, tx_rules); - mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); + if (bwc_matcher->complex) + mlx5hws_bwc_matcher_destroy_complex(bwc_matcher); + else + mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); kfree(bwc_matcher); return 0; } -static int hws_bwc_queue_poll(struct mlx5hws_context *ctx, - u16 queue_id, - u32 *pending_rules, - bool drain) +int mlx5hws_bwc_queue_poll(struct mlx5hws_context *ctx, + u16 queue_id, + u32 *pending_rules, + bool drain) { + unsigned long timeout = jiffies + + secs_to_jiffies(MLX5HWS_BWC_POLLING_TIMEOUT); struct mlx5hws_flow_op_result comp[MLX5HWS_BWC_MATCHER_REHASH_BURST_TH]; u16 burst_th = hws_bwc_get_burst_th(ctx, queue_id); bool got_comp = *pending_rules >= burst_th; @@ -250,6 +444,11 @@ static int hws_bwc_queue_poll(struct mlx5hws_context *ctx, } got_comp = !!ret; + + if (unlikely(!got_comp && time_after(jiffies, timeout))) { + mlx5hws_err(ctx, "BWC poll error: polling queue %d - TIMEOUT\n", queue_id); + return -ETIMEDOUT; + } } return err; @@ -309,16 +508,12 @@ static void hws_bwc_rule_list_add(struct mlx5hws_bwc_rule *bwc_rule, u16 idx) { struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; - bwc_matcher->num_of_rules++; bwc_rule->bwc_queue_idx = idx; list_add(&bwc_rule->list_node, &bwc_matcher->rules[idx]); } static void hws_bwc_rule_list_remove(struct mlx5hws_bwc_rule *bwc_rule) { - struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; - - bwc_matcher->num_of_rules--; list_del_init(&bwc_rule->list_node); } @@ -334,28 +529,102 @@ hws_bwc_rule_destroy_hws_sync(struct mlx5hws_bwc_rule *bwc_rule, struct mlx5hws_rule_attr *rule_attr) { struct mlx5hws_context *ctx = bwc_rule->bwc_matcher->matcher->tbl->ctx; - struct mlx5hws_flow_op_result completion; + u32 expected_completions = 1; int ret; ret = hws_bwc_rule_destroy_hws_async(bwc_rule, rule_attr); if (unlikely(ret)) return ret; - do { - ret = mlx5hws_send_queue_poll(ctx, rule_attr->queue_id, &completion, 1); - } while (ret != 1); + ret = mlx5hws_bwc_queue_poll(ctx, rule_attr->queue_id, + &expected_completions, true); + if (unlikely(ret)) + return ret; - if (unlikely(completion.status != MLX5HWS_FLOW_OP_SUCCESS || - (bwc_rule->rule->status != MLX5HWS_RULE_STATUS_DELETED && - bwc_rule->rule->status != MLX5HWS_RULE_STATUS_DELETING))) { - mlx5hws_err(ctx, "Failed destroying BWC rule: completion %d, rule status %d\n", - completion.status, bwc_rule->rule->status); + if (unlikely(bwc_rule->rule->status != MLX5HWS_RULE_STATUS_DELETED && + bwc_rule->rule->status != MLX5HWS_RULE_STATUS_DELETING)) { + mlx5hws_err(ctx, "Failed destroying BWC rule: rule status %d\n", + bwc_rule->rule->status); return -EINVAL; } return 0; } +static void hws_bwc_rule_cnt_dec(struct mlx5hws_bwc_rule *bwc_rule) +{ + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + + if (!bwc_rule->skip_rx) + atomic_dec(&bwc_matcher->rx_size.num_of_rules); + if (!bwc_rule->skip_tx) + atomic_dec(&bwc_matcher->tx_size.num_of_rules); +} + +static int +hws_bwc_matcher_rehash_shrink(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + struct mlx5hws_bwc_matcher_size *rx_size = &bwc_matcher->rx_size; + struct mlx5hws_bwc_matcher_size *tx_size = &bwc_matcher->tx_size; + + /* It is possible that another thread has added a rule. + * Need to check again if we really need rehash/shrink. + */ + if (atomic_read(&rx_size->num_of_rules) || + atomic_read(&tx_size->num_of_rules)) + return 0; + + /* If the current matcher RX/TX size is already at its initial size. */ + if (rx_size->size_log == MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG && + tx_size->size_log == MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG) + return 0; + + /* Now we've done all the checking - do the shrinking: + * - reset match RTC size to the initial size + * - create new matcher + * - move the rules, which will not do anything as the matcher is empty + * - destroy the old matcher + */ + + rx_size->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG; + tx_size->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG; + + return hws_bwc_matcher_move(bwc_matcher); +} + +static int hws_bwc_rule_cnt_dec_with_shrink(struct mlx5hws_bwc_rule *bwc_rule, + u16 bwc_queue_idx) +{ + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mutex *queue_lock; /* Protect the queue */ + int ret; + + hws_bwc_rule_cnt_dec(bwc_rule); + + if (atomic_read(&bwc_matcher->rx_size.num_of_rules) || + atomic_read(&bwc_matcher->tx_size.num_of_rules)) + return 0; + + /* Matcher has no more rules - shrink it to save ICM. */ + + queue_lock = hws_bwc_get_queue_lock(ctx, bwc_queue_idx); + mutex_unlock(queue_lock); + + hws_bwc_lock_all_queues(ctx); + ret = hws_bwc_matcher_rehash_shrink(bwc_matcher); + hws_bwc_unlock_all_queues(ctx); + + mutex_lock(queue_lock); + + if (unlikely(ret)) + mlx5hws_err(ctx, + "BWC rule deletion: shrinking empty matcher failed (%d)\n", + ret); + + return ret; +} + int mlx5hws_bwc_rule_destroy_simple(struct mlx5hws_bwc_rule *bwc_rule) { struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; @@ -373,6 +642,7 @@ int mlx5hws_bwc_rule_destroy_simple(struct mlx5hws_bwc_rule *bwc_rule) ret = hws_bwc_rule_destroy_hws_sync(bwc_rule, &attr); hws_bwc_rule_list_remove(bwc_rule); + hws_bwc_rule_cnt_dec_with_shrink(bwc_rule, idx); mutex_unlock(queue_lock); @@ -381,9 +651,13 @@ int mlx5hws_bwc_rule_destroy_simple(struct mlx5hws_bwc_rule *bwc_rule) int mlx5hws_bwc_rule_destroy(struct mlx5hws_bwc_rule *bwc_rule) { - int ret; + bool is_complex = !!bwc_rule->bwc_matcher->complex; + int ret = 0; - ret = mlx5hws_bwc_rule_destroy_simple(bwc_rule); + if (is_complex) + ret = mlx5hws_bwc_rule_destroy_complex(bwc_rule); + else + ret = mlx5hws_bwc_rule_destroy_simple(bwc_rule); mlx5hws_bwc_rule_free(bwc_rule); return ret; @@ -423,9 +697,8 @@ hws_bwc_rule_create_sync(struct mlx5hws_bwc_rule *bwc_rule, if (unlikely(ret)) return ret; - ret = hws_bwc_queue_poll(ctx, rule_attr->queue_id, &expected_completions, true); - - return ret; + return mlx5hws_bwc_queue_poll(ctx, rule_attr->queue_id, + &expected_completions, true); } static int @@ -446,7 +719,8 @@ hws_bwc_rule_update_sync(struct mlx5hws_bwc_rule *bwc_rule, if (unlikely(ret)) return ret; - ret = hws_bwc_queue_poll(ctx, rule_attr->queue_id, &expected_completions, true); + ret = mlx5hws_bwc_queue_poll(ctx, rule_attr->queue_id, + &expected_completions, true); if (unlikely(ret)) mlx5hws_err(ctx, "Failed updating BWC rule (%d)\n", ret); @@ -454,23 +728,27 @@ hws_bwc_rule_update_sync(struct mlx5hws_bwc_rule *bwc_rule, } static bool -hws_bwc_matcher_size_maxed_out(struct mlx5hws_bwc_matcher *bwc_matcher) +hws_bwc_matcher_size_maxed_out(struct mlx5hws_bwc_matcher *bwc_matcher, + struct mlx5hws_bwc_matcher_size *size) { struct mlx5hws_cmd_query_caps *caps = bwc_matcher->matcher->tbl->ctx->caps; - return bwc_matcher->size_log + MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH >= - caps->ste_alloc_log_max - 1; + /* check the match RTC size */ + return (size->size_log + MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH + + MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP) > + (caps->ste_alloc_log_max - 1); } static bool hws_bwc_matcher_rehash_size_needed(struct mlx5hws_bwc_matcher *bwc_matcher, + struct mlx5hws_bwc_matcher_size *size, u32 num_of_rules) { - if (unlikely(hws_bwc_matcher_size_maxed_out(bwc_matcher))) + if (unlikely(hws_bwc_matcher_size_maxed_out(bwc_matcher, size))) return false; if (unlikely((num_of_rules * 100 / MLX5HWS_BWC_MATCHER_REHASH_PERCENT_TH) >= - (1UL << bwc_matcher->size_log))) + (1UL << size->size_log))) return true; return false; @@ -496,6 +774,23 @@ hws_bwc_matcher_extend_at(struct mlx5hws_bwc_matcher *bwc_matcher, struct mlx5hws_rule_action rule_actions[]) { enum mlx5hws_action_type action_types[MLX5HWS_BWC_MAX_ACTS]; + void *p; + + if (unlikely(bwc_matcher->num_of_at >= bwc_matcher->size_of_at_array)) { + if (bwc_matcher->size_of_at_array >= MLX5HWS_MATCHER_MAX_AT) + return -ENOMEM; + bwc_matcher->size_of_at_array *= 2; + p = krealloc(bwc_matcher->at, + bwc_matcher->size_of_at_array * + sizeof(*bwc_matcher->at), + __GFP_ZERO | GFP_KERNEL); + if (!p) { + bwc_matcher->size_of_at_array /= 2; + return -ENOMEM; + } + + bwc_matcher->at = p; + } hws_bwc_rule_actions_to_action_types(rule_actions, action_types); @@ -510,20 +805,21 @@ hws_bwc_matcher_extend_at(struct mlx5hws_bwc_matcher *bwc_matcher, } static int -hws_bwc_matcher_extend_size(struct mlx5hws_bwc_matcher *bwc_matcher) +hws_bwc_matcher_extend_size(struct mlx5hws_bwc_matcher *bwc_matcher, + struct mlx5hws_bwc_matcher_size *size) { struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; struct mlx5hws_cmd_query_caps *caps = ctx->caps; - if (unlikely(hws_bwc_matcher_size_maxed_out(bwc_matcher))) { + if (unlikely(hws_bwc_matcher_size_maxed_out(bwc_matcher, size))) { mlx5hws_err(ctx, "Can't resize matcher: depth exceeds limit %d\n", caps->rtc_log_depth_max); return -ENOMEM; } - bwc_matcher->size_log = - min(bwc_matcher->size_log + MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP, - caps->ste_alloc_log_max - MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH); + size->size_log = min(size->size_log + MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP, + caps->ste_alloc_log_max - + MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH); return 0; } @@ -556,182 +852,171 @@ hws_bwc_matcher_find_at(struct mlx5hws_bwc_matcher *bwc_matcher, return -1; } -static int hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) +static int +hws_bwc_matcher_rehash_size(struct mlx5hws_bwc_matcher *bwc_matcher) { - struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; - u16 bwc_queues = mlx5hws_bwc_queues(ctx); - struct mlx5hws_bwc_rule **bwc_rules; - struct mlx5hws_rule_attr rule_attr; - u32 *pending_rules; - int i, j, ret = 0; - bool all_done; - u16 burst_th; + bool need_rx_rehash, need_tx_rehash; + int ret; - mlx5hws_bwc_rule_fill_attr(bwc_matcher, 0, 0, &rule_attr); + need_rx_rehash = atomic_read(&bwc_matcher->rx_size.rehash_required); + need_tx_rehash = atomic_read(&bwc_matcher->tx_size.rehash_required); - pending_rules = kcalloc(bwc_queues, sizeof(*pending_rules), GFP_KERNEL); - if (!pending_rules) - return -ENOMEM; + /* It is possible that another rule has already performed rehash. + * Need to check again if we really need rehash. + */ + if (!need_rx_rehash && !need_tx_rehash) + return 0; - bwc_rules = kcalloc(bwc_queues, sizeof(*bwc_rules), GFP_KERNEL); - if (!bwc_rules) { - ret = -ENOMEM; - goto free_pending_rules; + /* If the current matcher RX/TX size is already at its max size, + * it can't be rehashed. + */ + if (need_rx_rehash && + hws_bwc_matcher_size_maxed_out(bwc_matcher, + &bwc_matcher->rx_size)) { + atomic_set(&bwc_matcher->rx_size.rehash_required, false); + need_rx_rehash = false; } - - for (i = 0; i < bwc_queues; i++) { - if (list_empty(&bwc_matcher->rules[i])) - bwc_rules[i] = NULL; - else - bwc_rules[i] = list_first_entry(&bwc_matcher->rules[i], - struct mlx5hws_bwc_rule, - list_node); + if (need_tx_rehash && + hws_bwc_matcher_size_maxed_out(bwc_matcher, + &bwc_matcher->tx_size)) { + atomic_set(&bwc_matcher->tx_size.rehash_required, false); + need_tx_rehash = false; } - do { - all_done = true; - - for (i = 0; i < bwc_queues; i++) { - rule_attr.queue_id = mlx5hws_bwc_get_queue_id(ctx, i); - burst_th = hws_bwc_get_burst_th(ctx, rule_attr.queue_id); - - for (j = 0; j < burst_th && bwc_rules[i]; j++) { - rule_attr.burst = !!((j + 1) % burst_th); - ret = mlx5hws_matcher_resize_rule_move(bwc_matcher->matcher, - bwc_rules[i]->rule, - &rule_attr); - if (unlikely(ret)) { - mlx5hws_err(ctx, - "Moving BWC rule failed during rehash (%d)\n", - ret); - goto free_bwc_rules; - } - - all_done = false; - pending_rules[i]++; - bwc_rules[i] = list_is_last(&bwc_rules[i]->list_node, - &bwc_matcher->rules[i]) ? - NULL : list_next_entry(bwc_rules[i], list_node); + /* If both RX and TX rehash flags are now off, it means that whatever + * we wanted to rehash is now at its max size - no rehash can be done. + * Return and try adding the rule again - perhaps there was some change. + */ + if (!need_rx_rehash && !need_tx_rehash) + return 0; - ret = hws_bwc_queue_poll(ctx, rule_attr.queue_id, - &pending_rules[i], false); - if (unlikely(ret)) - goto free_bwc_rules; - } - } - } while (!all_done); + /* Now we're done all the checking - do the rehash: + * - extend match RTC size + * - create new matcher + * - move all the rules to the new matcher + * - destroy the old matcher + */ + atomic_set(&bwc_matcher->rx_size.rehash_required, false); + atomic_set(&bwc_matcher->tx_size.rehash_required, false); - /* drain all the bwc queues */ - for (i = 0; i < bwc_queues; i++) { - if (pending_rules[i]) { - u16 queue_id = mlx5hws_bwc_get_queue_id(ctx, i); - - mlx5hws_send_engine_flush_queue(&ctx->send_queue[queue_id]); - ret = hws_bwc_queue_poll(ctx, queue_id, - &pending_rules[i], true); - if (unlikely(ret)) - goto free_bwc_rules; - } + if (need_rx_rehash) { + ret = hws_bwc_matcher_extend_size(bwc_matcher, + &bwc_matcher->rx_size); + if (ret) + return ret; } -free_bwc_rules: - kfree(bwc_rules); -free_pending_rules: - kfree(pending_rules); - - return ret; -} + if (need_tx_rehash) { + ret = hws_bwc_matcher_extend_size(bwc_matcher, + &bwc_matcher->tx_size); + if (ret) + return ret; + } -static int hws_bwc_matcher_move_all(struct mlx5hws_bwc_matcher *bwc_matcher) -{ - return hws_bwc_matcher_move_all_simple(bwc_matcher); + return hws_bwc_matcher_move(bwc_matcher); } -static int hws_bwc_matcher_move(struct mlx5hws_bwc_matcher *bwc_matcher) +static int hws_bwc_rule_get_at_idx(struct mlx5hws_bwc_rule *bwc_rule, + struct mlx5hws_rule_action rule_actions[], + u16 bwc_queue_idx) { + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; - struct mlx5hws_matcher_attr matcher_attr = {0}; - struct mlx5hws_matcher *old_matcher; - struct mlx5hws_matcher *new_matcher; - int ret; + struct mutex *queue_lock; /* Protect the queue */ + int at_idx, ret; - hws_bwc_matcher_init_attr(&matcher_attr, - bwc_matcher->priority, - bwc_matcher->size_log); + /* check if rehash needed due to missing action template */ + at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions); + if (likely(at_idx >= 0)) + return at_idx; - old_matcher = bwc_matcher->matcher; - new_matcher = mlx5hws_matcher_create(old_matcher->tbl, - &bwc_matcher->mt, 1, - bwc_matcher->at, - bwc_matcher->num_of_at, - &matcher_attr); - if (!new_matcher) { - mlx5hws_err(ctx, "Rehash error: matcher creation failed\n"); - return -ENOMEM; - } + /* we need to extend BWC matcher action templates array */ + queue_lock = hws_bwc_get_queue_lock(ctx, bwc_queue_idx); + mutex_unlock(queue_lock); + hws_bwc_lock_all_queues(ctx); - ret = mlx5hws_matcher_resize_set_target(old_matcher, new_matcher); - if (ret) { - mlx5hws_err(ctx, "Rehash error: failed setting resize target\n"); - return ret; + /* check again - perhaps other thread already did extend_at */ + at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions); + if (at_idx >= 0) + goto out; + + ret = hws_bwc_matcher_extend_at(bwc_matcher, rule_actions); + if (unlikely(ret)) { + mlx5hws_err(ctx, "BWC rule: failed extending AT (%d)", ret); + at_idx = -EINVAL; + goto out; } - ret = hws_bwc_matcher_move_all(bwc_matcher); - if (ret) { - mlx5hws_err(ctx, "Rehash error: moving rules failed\n"); - return -ENOMEM; + /* action templates array was extended, we need the last idx */ + at_idx = bwc_matcher->num_of_at - 1; + ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher, + bwc_matcher->at[at_idx]); + if (unlikely(ret)) { + mlx5hws_err(ctx, "BWC rule: failed attaching new AT (%d)", ret); + at_idx = -EINVAL; + goto out; } - bwc_matcher->matcher = new_matcher; - mlx5hws_matcher_destroy(old_matcher); +out: + hws_bwc_unlock_all_queues(ctx); + mutex_lock(queue_lock); + return at_idx; +} - return 0; +static void hws_bwc_rule_cnt_inc_rxtx(struct mlx5hws_bwc_rule *bwc_rule, + struct mlx5hws_bwc_matcher_size *size) +{ + u32 num_of_rules = atomic_inc_return(&size->num_of_rules); + + if (unlikely(hws_bwc_matcher_rehash_size_needed(bwc_rule->bwc_matcher, + size, num_of_rules))) + atomic_set(&size->rehash_required, true); } -static int -hws_bwc_matcher_rehash_size(struct mlx5hws_bwc_matcher *bwc_matcher) +static void hws_bwc_rule_cnt_inc(struct mlx5hws_bwc_rule *bwc_rule) { + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + + if (!bwc_rule->skip_rx) + hws_bwc_rule_cnt_inc_rxtx(bwc_rule, &bwc_matcher->rx_size); + if (!bwc_rule->skip_tx) + hws_bwc_rule_cnt_inc_rxtx(bwc_rule, &bwc_matcher->tx_size); +} + +static int hws_bwc_rule_cnt_inc_with_rehash(struct mlx5hws_bwc_rule *bwc_rule, + u16 bwc_queue_idx) +{ + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mutex *queue_lock; /* Protect the queue */ int ret; - /* If the current matcher size is already at its max size, we can't - * do the rehash. Skip it and try adding the rule again - perhaps - * there was some change. - */ - if (hws_bwc_matcher_size_maxed_out(bwc_matcher)) - return 0; + hws_bwc_rule_cnt_inc(bwc_rule); - /* It is possible that other rule has already performed rehash. - * Need to check again if we really need rehash. - * If the reason for rehash was size, but not any more - skip rehash. - */ - if (!hws_bwc_matcher_rehash_size_needed(bwc_matcher, bwc_matcher->num_of_rules)) + if (!atomic_read(&bwc_matcher->rx_size.rehash_required) && + !atomic_read(&bwc_matcher->tx_size.rehash_required)) return 0; - /* Now we're done all the checking - do the rehash: - * - extend match RTC size - * - create new matcher - * - move all the rules to the new matcher - * - destroy the old matcher - */ + queue_lock = hws_bwc_get_queue_lock(ctx, bwc_queue_idx); + mutex_unlock(queue_lock); - ret = hws_bwc_matcher_extend_size(bwc_matcher); - if (ret) - return ret; + hws_bwc_lock_all_queues(ctx); + ret = hws_bwc_matcher_rehash_size(bwc_matcher); + hws_bwc_unlock_all_queues(ctx); - return hws_bwc_matcher_move(bwc_matcher); -} + mutex_lock(queue_lock); -static int -hws_bwc_matcher_rehash_at(struct mlx5hws_bwc_matcher *bwc_matcher) -{ - /* Rehash by action template doesn't require any additional checking. - * The bwc_matcher already contains the new action template. - * Just do the usual rehash: - * - create new matcher - * - move all the rules to the new matcher - * - destroy the old matcher - */ - return hws_bwc_matcher_move(bwc_matcher); + if (likely(!ret)) + return 0; + + /* Failed to rehash. Print a diagnostic and rollback the counters. */ + mlx5hws_err(ctx, + "BWC rule insertion: rehash to sizes [%d, %d] failed (%d)\n", + bwc_matcher->rx_size.size_log, + bwc_matcher->tx_size.size_log, ret); + hws_bwc_rule_cnt_dec(bwc_rule); + + return ret; } int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, @@ -744,7 +1029,6 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; struct mlx5hws_rule_attr rule_attr; struct mutex *queue_lock; /* Protect the queue */ - u32 num_of_rules; int ret = 0; int at_idx; @@ -754,67 +1038,18 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, mutex_lock(queue_lock); - /* check if rehash needed due to missing action template */ - at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions); + at_idx = hws_bwc_rule_get_at_idx(bwc_rule, rule_actions, bwc_queue_idx); if (unlikely(at_idx < 0)) { - /* we need to extend BWC matcher action templates array */ mutex_unlock(queue_lock); - hws_bwc_lock_all_queues(ctx); - - ret = hws_bwc_matcher_extend_at(bwc_matcher, rule_actions); - if (unlikely(ret)) { - hws_bwc_unlock_all_queues(ctx); - return ret; - } - - /* action templates array was extended, we need the last idx */ - at_idx = bwc_matcher->num_of_at - 1; - - ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher, - bwc_matcher->at[at_idx]); - if (unlikely(ret)) { - /* Action template attach failed, possibly due to - * requiring more action STEs. - * Need to attempt creating new matcher with all - * the action templates, including the new one. - */ - ret = hws_bwc_matcher_rehash_at(bwc_matcher); - if (unlikely(ret)) { - mlx5hws_action_template_destroy(bwc_matcher->at[at_idx]); - bwc_matcher->at[at_idx] = NULL; - bwc_matcher->num_of_at--; - - hws_bwc_unlock_all_queues(ctx); - - mlx5hws_err(ctx, - "BWC rule insertion: rehash AT failed (%d)\n", ret); - return ret; - } - } - - hws_bwc_unlock_all_queues(ctx); - mutex_lock(queue_lock); + mlx5hws_err(ctx, "BWC rule create: failed getting AT (%d)", + ret); + return -EINVAL; } - /* check if number of rules require rehash */ - num_of_rules = bwc_matcher->num_of_rules; - - if (unlikely(hws_bwc_matcher_rehash_size_needed(bwc_matcher, num_of_rules))) { + ret = hws_bwc_rule_cnt_inc_with_rehash(bwc_rule, bwc_queue_idx); + if (unlikely(ret)) { mutex_unlock(queue_lock); - - hws_bwc_lock_all_queues(ctx); - ret = hws_bwc_matcher_rehash_size(bwc_matcher); - hws_bwc_unlock_all_queues(ctx); - - if (ret) { - mlx5hws_err(ctx, "BWC rule insertion: rehash size [%d -> %d] failed (%d)\n", - bwc_matcher->size_log - MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP, - bwc_matcher->size_log, - ret); - return ret; - } - - mutex_lock(queue_lock); + return ret; } ret = hws_bwc_rule_create_sync(bwc_rule, @@ -828,12 +1063,29 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, return 0; /* rule inserted successfully */ } + /* Rule insertion could fail due to queue being full, timeout, or + * matcher in resize. In such cases, no point in trying to rehash. + */ + if (ret == -EBUSY || ret == -ETIMEDOUT || ret == -EAGAIN) { + mutex_unlock(queue_lock); + mlx5hws_err(ctx, + "BWC rule insertion failed - %s (%d)\n", + ret == -EBUSY ? "queue is full" : + ret == -ETIMEDOUT ? "timeout" : + ret == -EAGAIN ? "matcher in resize" : "N/A", + ret); + hws_bwc_rule_cnt_dec(bwc_rule); + return ret; + } + /* At this point the rule wasn't added. * It could be because there was collision, or some other problem. - * If we don't dive deeper than API, the only thing we know is that - * the status of completion is RTE_FLOW_OP_ERROR. * Try rehash by size and insert rule again - last chance. */ + if (!bwc_rule->skip_rx) + atomic_set(&bwc_matcher->rx_size.rehash_required, true); + if (!bwc_rule->skip_tx) + atomic_set(&bwc_matcher->tx_size.rehash_required, true); mutex_unlock(queue_lock); @@ -843,6 +1095,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, if (ret) { mlx5hws_err(ctx, "BWC rule insertion: rehash failed (%d)\n", ret); + hws_bwc_rule_cnt_dec(bwc_rule); return ret; } @@ -858,6 +1111,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, if (unlikely(ret)) { mutex_unlock(queue_lock); mlx5hws_err(ctx, "BWC rule insertion failed (%d)\n", ret); + hws_bwc_rule_cnt_dec(bwc_rule); return ret; } @@ -887,13 +1141,24 @@ mlx5hws_bwc_rule_create(struct mlx5hws_bwc_matcher *bwc_matcher, if (unlikely(!bwc_rule)) return NULL; + bwc_rule->flow_source = flow_source; + mlx5hws_rule_skip(bwc_matcher->matcher, flow_source, + &bwc_rule->skip_rx, &bwc_rule->skip_tx); + bwc_queue_idx = hws_bwc_gen_queue_idx(ctx); - ret = mlx5hws_bwc_rule_create_simple(bwc_rule, - params->match_buf, - rule_actions, - flow_source, - bwc_queue_idx); + if (bwc_matcher->complex) + ret = mlx5hws_bwc_rule_create_complex(bwc_rule, + params, + flow_source, + rule_actions, + bwc_queue_idx); + else + ret = mlx5hws_bwc_rule_create_simple(bwc_rule, + params->match_buf, + rule_actions, + flow_source, + bwc_queue_idx); if (unlikely(ret)) { mlx5hws_bwc_rule_free(bwc_rule); return NULL; @@ -915,57 +1180,17 @@ hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule, idx = bwc_rule->bwc_queue_idx; - mlx5hws_bwc_rule_fill_attr(bwc_matcher, idx, 0, &rule_attr); + mlx5hws_bwc_rule_fill_attr(bwc_matcher, idx, bwc_rule->flow_source, + &rule_attr); queue_lock = hws_bwc_get_queue_lock(ctx, idx); mutex_lock(queue_lock); - /* check if rehash needed due to missing action template */ - at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions); + at_idx = hws_bwc_rule_get_at_idx(bwc_rule, rule_actions, idx); if (unlikely(at_idx < 0)) { - /* we need to extend BWC matcher action templates array */ mutex_unlock(queue_lock); - hws_bwc_lock_all_queues(ctx); - - /* check again - perhaps other thread already did extend_at */ - at_idx = hws_bwc_matcher_find_at(bwc_matcher, rule_actions); - if (likely(at_idx < 0)) { - ret = hws_bwc_matcher_extend_at(bwc_matcher, rule_actions); - if (unlikely(ret)) { - hws_bwc_unlock_all_queues(ctx); - mlx5hws_err(ctx, "BWC rule update: failed extending AT (%d)", ret); - return -EINVAL; - } - - /* action templates array was extended, we need the last idx */ - at_idx = bwc_matcher->num_of_at - 1; - - ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher, - bwc_matcher->at[at_idx]); - if (unlikely(ret)) { - /* Action template attach failed, possibly due to - * requiring more action STEs. - * Need to attempt creating new matcher with all - * the action templates, including the new one. - */ - ret = hws_bwc_matcher_rehash_at(bwc_matcher); - if (unlikely(ret)) { - mlx5hws_action_template_destroy(bwc_matcher->at[at_idx]); - bwc_matcher->at[at_idx] = NULL; - bwc_matcher->num_of_at--; - - hws_bwc_unlock_all_queues(ctx); - - mlx5hws_err(ctx, - "BWC rule update: rehash AT failed (%d)\n", - ret); - return ret; - } - } - } - - hws_bwc_unlock_all_queues(ctx); - mutex_lock(queue_lock); + mlx5hws_err(ctx, "BWC rule update: failed getting AT\n"); + return -EINVAL; } ret = hws_bwc_rule_update_sync(bwc_rule, @@ -991,5 +1216,10 @@ int mlx5hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule, return -EINVAL; } - return hws_bwc_rule_action_update(bwc_rule, rule_actions); + /* For complex rule, the update should happen on the second matcher */ + if (bwc_rule->isolated_bwc_rule) + return hws_bwc_rule_action_update(bwc_rule->isolated_bwc_rule, + rule_actions); + else + return hws_bwc_rule_action_update(bwc_rule, rule_actions); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h index 0b745968e21e..af391d70c14f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h @@ -8,25 +8,47 @@ #define MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP 1 #define MLX5HWS_BWC_MATCHER_REHASH_PERCENT_TH 70 #define MLX5HWS_BWC_MATCHER_REHASH_BURST_TH 32 -#define MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM 255 + +/* Max number of AT attach operations for the same matcher. + * When the limit is reached, a larger buffer is allocated for the ATs. + */ +#define MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM 8 #define MLX5HWS_BWC_MAX_ACTS 16 +#define MLX5HWS_BWC_POLLING_TIMEOUT 60 + +struct mlx5hws_bwc_matcher_complex_data; + +struct mlx5hws_bwc_matcher_size { + u8 size_log; + atomic_t num_of_rules; + atomic_t rehash_required; +}; + struct mlx5hws_bwc_matcher { struct mlx5hws_matcher *matcher; struct mlx5hws_match_template *mt; - struct mlx5hws_action_template *at[MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM]; + struct mlx5hws_action_template **at; + struct mlx5hws_bwc_matcher_complex_data *complex; + struct mlx5hws_bwc_matcher *complex_first_bwc_matcher; u8 num_of_at; - u16 priority; - u8 size_log; - u32 num_of_rules; /* atomically accessed */ + u8 size_of_at_array; + u32 priority; + struct mlx5hws_bwc_matcher_size rx_size; + struct mlx5hws_bwc_matcher_size tx_size; struct list_head *rules; }; struct mlx5hws_bwc_rule { struct mlx5hws_bwc_matcher *bwc_matcher; struct mlx5hws_rule *rule; + struct mlx5hws_bwc_rule *isolated_bwc_rule; + struct mlx5hws_bwc_complex_rule_hash_node *complex_hash_node; + u32 flow_source; u16 bwc_queue_idx; + bool skip_rx; + bool skip_tx; struct list_head list_node; }; @@ -57,12 +79,19 @@ void mlx5hws_bwc_rule_fill_attr(struct mlx5hws_bwc_matcher *bwc_matcher, u32 flow_source, struct mlx5hws_rule_attr *rule_attr); +int mlx5hws_bwc_queue_poll(struct mlx5hws_context *ctx, + u16 queue_id, + u32 *pending_rules, + bool drain); + static inline u16 mlx5hws_bwc_queues(struct mlx5hws_context *ctx) { /* Besides the control queue, half of the queues are - * reguler HWS queues, and the other half are BWC queues. + * regular HWS queues, and the other half are BWC queues. */ - return (ctx->queues - 1) / 2; + if (mlx5hws_context_bwc_supported(ctx)) + return (ctx->queues - 1) / 2; + return 0; } static inline u16 mlx5hws_bwc_get_queue_id(struct mlx5hws_context *ctx, u16 idx) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c index c00010ca86bd..14e79579c719 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c @@ -3,6 +3,22 @@ #include "internal.h" +#define HWS_CLEAR_MATCH_PARAM(mask, field) \ + MLX5_SET(fte_match_param, (mask)->match_buf, field, 0) + +#define HWS_SZ_MATCH_PARAM (MLX5_ST_SZ_DW_MATCH_PARAM * 4) + +static const struct rhashtable_params hws_refcount_hash = { + .key_len = sizeof_field(struct mlx5hws_bwc_complex_rule_hash_node, + match_buf), + .key_offset = offsetof(struct mlx5hws_bwc_complex_rule_hash_node, + match_buf), + .head_offset = offsetof(struct mlx5hws_bwc_complex_rule_hash_node, + hash_node), + .automatic_shrinking = true, + .min_size = 1, +}; + bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, u8 match_criteria_enable, struct mlx5hws_match_parameters *mask) @@ -39,6 +55,8 @@ bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, } else { mlx5hws_err(ctx, "Failed to calculate matcher definer layout\n"); } + } else { + kfree(mt->fc); } mlx5hws_match_template_destroy(mt); @@ -46,20 +64,1093 @@ bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, return is_complex; } +static void +hws_bwc_matcher_complex_params_clear_fld(struct mlx5hws_context *ctx, + enum mlx5hws_definer_fname fname, + struct mlx5hws_match_parameters *mask) +{ + struct mlx5hws_cmd_query_caps *caps = ctx->caps; + + switch (fname) { + case MLX5HWS_DEFINER_FNAME_ETH_TYPE_O: + case MLX5HWS_DEFINER_FNAME_ETH_TYPE_I: + case MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_O: + case MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_I: + case MLX5HWS_DEFINER_FNAME_IP_VERSION_O: + case MLX5HWS_DEFINER_FNAME_IP_VERSION_I: + /* Because of the strict requirements for IP address matching + * that require ethtype/ip_version matching as well, don't clear + * these fields - have them in both parts of the complex matcher + */ + break; + case MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.smac_47_16); + break; + case MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.smac_47_16); + break; + case MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.smac_15_0); + break; + case MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.smac_15_0); + break; + case MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.dmac_47_16); + break; + case MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.dmac_47_16); + break; + case MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.dmac_15_0); + break; + case MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.dmac_15_0); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_TYPE_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.cvlan_tag); + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.svlan_tag); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_TYPE_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.cvlan_tag); + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.svlan_tag); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_FIRST_PRIO_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.first_prio); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_FIRST_PRIO_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.first_prio); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_CFI_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.first_cfi); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_CFI_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.first_cfi); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_ID_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.first_vid); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_ID_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.first_vid); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_TYPE_O: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.outer_second_cvlan_tag); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.outer_second_svlan_tag); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_TYPE_I: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.inner_second_cvlan_tag); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.inner_second_svlan_tag); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_PRIO_O: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.outer_second_prio); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_PRIO_I: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.inner_second_prio); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_CFI_O: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.outer_second_cfi); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_CFI_I: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.inner_second_cfi); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_ID_O: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.outer_second_vid); + break; + case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_ID_I: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.inner_second_vid); + break; + case MLX5HWS_DEFINER_FNAME_IPV4_IHL_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ipv4_ihl); + break; + case MLX5HWS_DEFINER_FNAME_IPV4_IHL_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ipv4_ihl); + break; + case MLX5HWS_DEFINER_FNAME_IP_DSCP_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ip_dscp); + break; + case MLX5HWS_DEFINER_FNAME_IP_DSCP_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ip_dscp); + break; + case MLX5HWS_DEFINER_FNAME_IP_ECN_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ip_ecn); + break; + case MLX5HWS_DEFINER_FNAME_IP_ECN_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ip_ecn); + break; + case MLX5HWS_DEFINER_FNAME_IP_TTL_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ttl_hoplimit); + break; + case MLX5HWS_DEFINER_FNAME_IP_TTL_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ttl_hoplimit); + break; + case MLX5HWS_DEFINER_FNAME_IPV4_DST_O: + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IPV4_SRC_O: + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IPV4_DST_I: + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IPV4_SRC_I: + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IP_FRAG_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.frag); + break; + case MLX5HWS_DEFINER_FNAME_IP_FRAG_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.frag); + break; + case MLX5HWS_DEFINER_FNAME_IPV6_FLOW_LABEL_O: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.outer_ipv6_flow_label); + break; + case MLX5HWS_DEFINER_FNAME_IPV6_FLOW_LABEL_I: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.inner_ipv6_flow_label); + break; + case MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_O: + case MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_O: + case MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_O: + case MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_O: + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_127_96); + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_95_64); + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_63_32); + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_O: + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_O: + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_O: + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_O: + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_127_96); + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_95_64); + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_63_32); + HWS_CLEAR_MATCH_PARAM(mask, + outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_I: + case MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_I: + case MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_I: + case MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_I: + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_127_96); + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_95_64); + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_63_32); + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_I: + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_I: + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_I: + case MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_I: + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_127_96); + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_95_64); + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_63_32); + HWS_CLEAR_MATCH_PARAM(mask, + inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0); + break; + case MLX5HWS_DEFINER_FNAME_IP_PROTOCOL_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ip_protocol); + break; + case MLX5HWS_DEFINER_FNAME_IP_PROTOCOL_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ip_protocol); + break; + case MLX5HWS_DEFINER_FNAME_L4_SPORT_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.tcp_sport); + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.udp_sport); + break; + case MLX5HWS_DEFINER_FNAME_L4_SPORT_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.tcp_dport); + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.udp_dport); + break; + case MLX5HWS_DEFINER_FNAME_L4_DPORT_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.tcp_dport); + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.udp_dport); + break; + case MLX5HWS_DEFINER_FNAME_L4_DPORT_I: + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.tcp_dport); + HWS_CLEAR_MATCH_PARAM(mask, inner_headers.udp_dport); + break; + case MLX5HWS_DEFINER_FNAME_TCP_FLAGS_O: + HWS_CLEAR_MATCH_PARAM(mask, outer_headers.tcp_flags); + break; + case MLX5HWS_DEFINER_FNAME_TCP_ACK_NUM: + case MLX5HWS_DEFINER_FNAME_TCP_SEQ_NUM: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.outer_tcp_seq_num); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.outer_tcp_ack_num); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.inner_tcp_seq_num); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.inner_tcp_ack_num); + break; + case MLX5HWS_DEFINER_FNAME_GTP_TEID: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_teid); + break; + case MLX5HWS_DEFINER_FNAME_GTP_MSG_TYPE: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_msg_type); + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_msg_flags); + break; + case MLX5HWS_DEFINER_FNAME_GTPU_FIRST_EXT_DW0: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.gtpu_first_ext_dw_0); + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_dw_0); + break; + case MLX5HWS_DEFINER_FNAME_GTPU_DW2: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_dw_2); + break; + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_0: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_1: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_2: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_3: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_4: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_5: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_6: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_7: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_2.outer_first_mpls_over_gre); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_2.outer_first_mpls_over_udp); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.geneve_tlv_option_0_data); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_id_0); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_value_0); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_value_1); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_id_2); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_value_2); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_id_3); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_4.prog_sample_field_value_3); + break; + case MLX5HWS_DEFINER_FNAME_VXLAN_VNI: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.vxlan_vni); + break; + case MLX5HWS_DEFINER_FNAME_VXLAN_GPE_FLAGS: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.outer_vxlan_gpe_flags); + break; + case MLX5HWS_DEFINER_FNAME_VXLAN_GPE_RSVD0: + break; + case MLX5HWS_DEFINER_FNAME_VXLAN_GPE_PROTO: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.outer_vxlan_gpe_next_protocol); + break; + case MLX5HWS_DEFINER_FNAME_VXLAN_GPE_VNI: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.outer_vxlan_gpe_vni); + break; + case MLX5HWS_DEFINER_FNAME_GENEVE_OPT_LEN: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.geneve_opt_len); + break; + case MLX5HWS_DEFINER_FNAME_GENEVE_OAM: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.geneve_oam); + break; + case MLX5HWS_DEFINER_FNAME_GENEVE_PROTO: + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.geneve_protocol_type); + break; + case MLX5HWS_DEFINER_FNAME_GENEVE_VNI: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.geneve_vni); + break; + case MLX5HWS_DEFINER_FNAME_SOURCE_QP: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.source_sqn); + break; + case MLX5HWS_DEFINER_FNAME_SOURCE_GVMI: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.source_port); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.source_eswitch_owner_vhca_id); + break; + case MLX5HWS_DEFINER_FNAME_REG_0: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_0); + break; + case MLX5HWS_DEFINER_FNAME_REG_1: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_1); + break; + case MLX5HWS_DEFINER_FNAME_REG_2: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_2); + break; + case MLX5HWS_DEFINER_FNAME_REG_3: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_3); + break; + case MLX5HWS_DEFINER_FNAME_REG_4: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_4); + break; + case MLX5HWS_DEFINER_FNAME_REG_5: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_5); + break; + case MLX5HWS_DEFINER_FNAME_REG_7: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_7); + break; + case MLX5HWS_DEFINER_FNAME_REG_A: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_a); + break; + case MLX5HWS_DEFINER_FNAME_GRE_C: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_c_present); + break; + case MLX5HWS_DEFINER_FNAME_GRE_K: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_k_present); + break; + case MLX5HWS_DEFINER_FNAME_GRE_S: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_s_present); + break; + case MLX5HWS_DEFINER_FNAME_GRE_PROTOCOL: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_protocol); + break; + case MLX5HWS_DEFINER_FNAME_GRE_OPT_KEY: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_key.key); + break; + case MLX5HWS_DEFINER_FNAME_ICMP_DW1: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmp_header_data); + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmp_type); + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmp_code); + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters_3.icmpv6_header_data); + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmpv6_type); + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmpv6_code); + break; + case MLX5HWS_DEFINER_FNAME_MPLS0_O: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.outer_first_mpls); + break; + case MLX5HWS_DEFINER_FNAME_MPLS0_I: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.inner_first_mpls); + break; + case MLX5HWS_DEFINER_FNAME_TNL_HDR_0: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_5.tunnel_header_0); + break; + case MLX5HWS_DEFINER_FNAME_TNL_HDR_1: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_5.tunnel_header_1); + break; + case MLX5HWS_DEFINER_FNAME_TNL_HDR_2: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_5.tunnel_header_2); + break; + case MLX5HWS_DEFINER_FNAME_TNL_HDR_3: + HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_5.tunnel_header_3); + break; + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER0_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER1_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER2_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER3_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER4_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER5_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER6_OK: + case MLX5HWS_DEFINER_FNAME_FLEX_PARSER7_OK: + /* assuming this is flex parser for geneve option */ + if ((fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER0_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 0) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER1_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 1) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER2_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 2) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER3_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 3) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER4_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 4) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER5_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 5) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER6_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 6) || + (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER7_OK && + ctx->caps->flex_parser_id_geneve_tlv_option_0 != 7)) { + mlx5hws_err(ctx, + "Complex params: unsupported field %s (%d), flex parser ID for geneve is %d\n", + mlx5hws_definer_fname_to_str(fname), fname, + caps->flex_parser_id_geneve_tlv_option_0); + break; + } + HWS_CLEAR_MATCH_PARAM(mask, + misc_parameters.geneve_tlv_option_0_exist); + break; + case MLX5HWS_DEFINER_FNAME_REG_6: + default: + mlx5hws_err(ctx, "Complex params: unsupported field %s (%d)\n", + mlx5hws_definer_fname_to_str(fname), fname); + break; + } +} + +static bool +hws_bwc_matcher_complex_params_comb_is_valid(struct mlx5hws_definer_fc *fc, + int fc_sz, + u32 combination_num) +{ + bool m1[MLX5HWS_DEFINER_FNAME_MAX] = {0}; + bool m2[MLX5HWS_DEFINER_FNAME_MAX] = {0}; + bool is_first_matcher; + int i; + + for (i = 0; i < fc_sz; i++) { + is_first_matcher = !(combination_num & BIT(i)); + if (is_first_matcher) + m1[fc[i].fname] = true; + else + m2[fc[i].fname] = true; + } + + /* Not all the fields can be split into separate matchers. + * Some should be together on the same matcher. + * For example, IPv6 parts - the whole IPv6 address should be on the + * same matcher in order for us to deduce if it's IPv6 or IPv4 address. + */ + if (m1[MLX5HWS_DEFINER_FNAME_IP_FRAG_O] && + (m2[MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_O] || + m2[MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_O] || + m2[MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_O] || + m2[MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_O])) + return false; + + if (m2[MLX5HWS_DEFINER_FNAME_IP_FRAG_O] && + (m1[MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_O] || + m1[MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_O] || + m1[MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_O] || + m1[MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_O])) + return false; + + if (m1[MLX5HWS_DEFINER_FNAME_IP_FRAG_I] && + (m2[MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_I] || + m2[MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_I] || + m2[MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_I] || + m2[MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_I])) + return false; + + if (m2[MLX5HWS_DEFINER_FNAME_IP_FRAG_I] && + (m1[MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_I] || + m1[MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_I] || + m1[MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_I] || + m1[MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_I])) + return false; + + /* Don't split outer IPv6 dest address. */ + if ((m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_O] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_O] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_O] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_O]) && + (m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_O] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_O] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_O] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_O])) + return false; + + /* Don't split outer IPv6 source address. */ + if ((m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_O] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_O] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_O] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_O]) && + (m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_O] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_O] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_O] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_O])) + return false; + + /* Don't split inner IPv6 dest address. */ + if ((m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_I] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_I] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_I] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_I]) && + (m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_I] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_I] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_I] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_I])) + return false; + + /* Don't split inner IPv6 source address. */ + if ((m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_I] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_I] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_I] || + m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_I]) && + (m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_I] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_I] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_I] || + m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_I])) + return false; + + /* Don't split GRE parameters. */ + if ((m1[MLX5HWS_DEFINER_FNAME_GRE_C] || + m1[MLX5HWS_DEFINER_FNAME_GRE_K] || + m1[MLX5HWS_DEFINER_FNAME_GRE_S] || + m1[MLX5HWS_DEFINER_FNAME_GRE_PROTOCOL]) && + (m2[MLX5HWS_DEFINER_FNAME_GRE_C] || + m2[MLX5HWS_DEFINER_FNAME_GRE_K] || + m2[MLX5HWS_DEFINER_FNAME_GRE_S] || + m2[MLX5HWS_DEFINER_FNAME_GRE_PROTOCOL])) + return false; + + /* Don't split TCP ack/seq numbers. */ + if ((m1[MLX5HWS_DEFINER_FNAME_TCP_ACK_NUM] || + m1[MLX5HWS_DEFINER_FNAME_TCP_SEQ_NUM]) && + (m2[MLX5HWS_DEFINER_FNAME_TCP_ACK_NUM] || + m2[MLX5HWS_DEFINER_FNAME_TCP_SEQ_NUM])) + return false; + + /* Don't split flex parser. */ + if ((m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_0] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_1] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_2] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_3] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_4] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_5] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_6] || + m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_7]) && + (m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_0] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_1] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_2] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_3] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_4] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_5] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_6] || + m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_7])) + return false; + + return true; +} + +static void +hws_bwc_matcher_complex_params_comb_create(struct mlx5hws_context *ctx, + struct mlx5hws_match_parameters *m, + struct mlx5hws_match_parameters *m1, + struct mlx5hws_match_parameters *m2, + struct mlx5hws_definer_fc *fc, + int fc_sz, + u32 combination_num) +{ + bool is_first_matcher; + int i; + + memcpy(m1->match_buf, m->match_buf, m->match_sz); + memcpy(m2->match_buf, m->match_buf, m->match_sz); + + for (i = 0; i < fc_sz; i++) { + is_first_matcher = !(combination_num & BIT(i)); + hws_bwc_matcher_complex_params_clear_fld(ctx, + fc[i].fname, + is_first_matcher ? + m2 : m1); + } + + MLX5_SET(fte_match_param, m2->match_buf, + misc_parameters_2.metadata_reg_c_6, -1); +} + +static void +hws_bwc_matcher_complex_params_destroy(struct mlx5hws_match_parameters *mask_1, + struct mlx5hws_match_parameters *mask_2) +{ + kfree(mask_1->match_buf); + kfree(mask_2->match_buf); +} + +static int +hws_bwc_matcher_complex_params_create(struct mlx5hws_context *ctx, + u8 match_criteria, + struct mlx5hws_match_parameters *mask, + struct mlx5hws_match_parameters *mask_1, + struct mlx5hws_match_parameters *mask_2) +{ + struct mlx5hws_definer_fc *fc; + u32 num_of_combinations; + int fc_sz = 0; + int res = 0; + u32 i; + + if (MLX5_GET(fte_match_param, mask->match_buf, + misc_parameters_2.metadata_reg_c_6)) { + mlx5hws_err(ctx, "Complex matcher: REG_C_6 matching is reserved\n"); + res = -EINVAL; + goto out; + } + + mask_1->match_buf = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), + GFP_KERNEL); + mask_2->match_buf = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), + GFP_KERNEL); + if (!mask_1->match_buf || !mask_2->match_buf) { + mlx5hws_err(ctx, "Complex matcher: failed to allocate match_param\n"); + res = -ENOMEM; + goto free_params; + } + + mask_1->match_sz = mask->match_sz; + mask_2->match_sz = mask->match_sz; + + fc = mlx5hws_definer_conv_match_params_to_compressed_fc(ctx, + match_criteria, + mask->match_buf, + &fc_sz); + if (!fc) { + res = -ENOMEM; + goto free_params; + } + + if (fc_sz >= sizeof(num_of_combinations) * BITS_PER_BYTE) { + mlx5hws_err(ctx, + "Complex matcher: too many match parameters (%d)\n", + fc_sz); + res = -EINVAL; + goto free_fc; + } + + /* We have list of all the match fields from the match parameter. + * Now try all the possibilities of splitting them into two match + * buffers and look for the supported combination. + */ + num_of_combinations = 1 << fc_sz; + + /* Start from combination at index 1 - we know that 0 is unsupported */ + for (i = 1; i < num_of_combinations; i++) { + if (!hws_bwc_matcher_complex_params_comb_is_valid(fc, fc_sz, i)) + continue; + + hws_bwc_matcher_complex_params_comb_create(ctx, + mask, mask_1, mask_2, + fc, fc_sz, i); + /* We now have two separate sets of match params. + * Check if each of them can be used in its own matcher. + */ + if (!mlx5hws_bwc_match_params_is_complex(ctx, + match_criteria, + mask_1) && + !mlx5hws_bwc_match_params_is_complex(ctx, + match_criteria, + mask_2)) + break; + } + + if (i == num_of_combinations) { + /* We've scanned all the combinations, but to no avail */ + mlx5hws_err(ctx, "Complex matcher: couldn't find match params combination\n"); + res = -EINVAL; + goto free_fc; + } + + kfree(fc); + return 0; + +free_fc: + kfree(fc); +free_params: + hws_bwc_matcher_complex_params_destroy(mask_1, mask_2); +out: + return res; +} + +static int +hws_bwc_isolated_table_create(struct mlx5hws_bwc_matcher *bwc_matcher, + struct mlx5hws_table *table) +{ + struct mlx5hws_cmd_ft_modify_attr ft_attr = {0}; + struct mlx5hws_context *ctx = table->ctx; + struct mlx5hws_table_attr tbl_attr = {0}; + struct mlx5hws_table *isolated_tbl; + int ret = 0; + + tbl_attr.type = table->type; + tbl_attr.level = table->level; + + bwc_matcher->complex->isolated_tbl = + mlx5hws_table_create(ctx, &tbl_attr); + isolated_tbl = bwc_matcher->complex->isolated_tbl; + if (!isolated_tbl) + return -EINVAL; + + /* Set the default miss of the isolated table to + * point to the end anchor of the original matcher. + */ + mlx5hws_cmd_set_attr_connect_miss_tbl(ctx, + isolated_tbl->fw_ft_type, + isolated_tbl->type, + &ft_attr); + ft_attr.table_miss_id = bwc_matcher->matcher->end_ft_id; + + ret = mlx5hws_cmd_flow_table_modify(ctx->mdev, + &ft_attr, + isolated_tbl->ft_id); + if (ret) { + mlx5hws_err(ctx, "Failed setting isolated tbl default miss\n"); + goto destroy_tbl; + } + + return 0; + +destroy_tbl: + mlx5hws_table_destroy(isolated_tbl); + return ret; +} + +static void hws_bwc_isolated_table_destroy(struct mlx5hws_table *isolated_tbl) +{ + /* This table is isolated - no table is pointing to it, no need to + * disconnect it from anywhere, it won't affect any other table's miss. + */ + mlx5hws_table_destroy(isolated_tbl); +} + +static int +hws_bwc_isolated_matcher_create(struct mlx5hws_bwc_matcher *bwc_matcher, + struct mlx5hws_table *table, + u8 match_criteria_enable, + struct mlx5hws_match_parameters *mask) +{ + struct mlx5hws_table *isolated_tbl = bwc_matcher->complex->isolated_tbl; + struct mlx5hws_bwc_matcher *isolated_bwc_matcher; + struct mlx5hws_context *ctx = table->ctx; + int ret; + + isolated_bwc_matcher = kzalloc(sizeof(*bwc_matcher), GFP_KERNEL); + if (!isolated_bwc_matcher) + return -ENOMEM; + + bwc_matcher->complex->isolated_bwc_matcher = isolated_bwc_matcher; + + /* Isolated BWC matcher needs access to the first BWC matcher */ + isolated_bwc_matcher->complex_first_bwc_matcher = bwc_matcher; + + /* Isolated matcher needs to match on REG_C_6, + * so make sure its criteria bit is on. + */ + match_criteria_enable |= MLX5HWS_DEFINER_MATCH_CRITERIA_MISC2; + + ret = mlx5hws_bwc_matcher_create_simple(isolated_bwc_matcher, + isolated_tbl, + 0, + match_criteria_enable, + mask, + NULL); + if (ret) { + mlx5hws_err(ctx, "Complex matcher: failed creating isolated BWC matcher\n"); + goto free_matcher; + } + + return 0; + +free_matcher: + kfree(bwc_matcher->complex->isolated_bwc_matcher); + return ret; +} + +static void +hws_bwc_isolated_matcher_destroy(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); + kfree(bwc_matcher); +} + +static int +hws_bwc_isolated_actions_create(struct mlx5hws_bwc_matcher *bwc_matcher, + struct mlx5hws_table *table) +{ + struct mlx5hws_table *isolated_tbl = bwc_matcher->complex->isolated_tbl; + u8 modify_hdr_action[MLX5_ST_SZ_BYTES(set_action_in)] = {0}; + struct mlx5hws_context *ctx = table->ctx; + struct mlx5hws_action_mh_pattern ptrn; + int ret = 0; + + /* Create action to jump to isolated table */ + + bwc_matcher->complex->action_go_to_tbl = + mlx5hws_action_create_dest_table(ctx, + isolated_tbl, + MLX5HWS_ACTION_FLAG_HWS_FDB); + if (!bwc_matcher->complex->action_go_to_tbl) { + mlx5hws_err(ctx, "Complex matcher: failed to create go-to-tbl action\n"); + return -EINVAL; + } + + /* Create modify header action to set REG_C_6 */ + + MLX5_SET(set_action_in, modify_hdr_action, + action_type, MLX5_MODIFICATION_TYPE_SET); + MLX5_SET(set_action_in, modify_hdr_action, + field, MLX5_MODI_META_REG_C_6); + MLX5_SET(set_action_in, modify_hdr_action, + length, 0); /* zero means length of 32 */ + MLX5_SET(set_action_in, modify_hdr_action, offset, 0); + MLX5_SET(set_action_in, modify_hdr_action, data, 0); + + ptrn.data = (void *)modify_hdr_action; + ptrn.sz = MLX5HWS_ACTION_DOUBLE_SIZE; + + bwc_matcher->complex->action_metadata = + mlx5hws_action_create_modify_header(ctx, 1, &ptrn, 0, + MLX5HWS_ACTION_FLAG_HWS_FDB); + if (!bwc_matcher->complex->action_metadata) { + ret = -EINVAL; + goto destroy_action_go_to_tbl; + } + + /* Create last action */ + + bwc_matcher->complex->action_last = + mlx5hws_action_create_last(ctx, MLX5HWS_ACTION_FLAG_HWS_FDB); + if (!bwc_matcher->complex->action_last) { + mlx5hws_err(ctx, "Complex matcher: failed to create last action\n"); + ret = -EINVAL; + goto destroy_action_metadata; + } + + return 0; + +destroy_action_metadata: + mlx5hws_action_destroy(bwc_matcher->complex->action_metadata); +destroy_action_go_to_tbl: + mlx5hws_action_destroy(bwc_matcher->complex->action_go_to_tbl); + return ret; +} + +static void +hws_bwc_isolated_actions_destroy(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + mlx5hws_action_destroy(bwc_matcher->complex->action_last); + mlx5hws_action_destroy(bwc_matcher->complex->action_metadata); + mlx5hws_action_destroy(bwc_matcher->complex->action_go_to_tbl); +} + int mlx5hws_bwc_matcher_create_complex(struct mlx5hws_bwc_matcher *bwc_matcher, struct mlx5hws_table *table, u32 priority, u8 match_criteria_enable, struct mlx5hws_match_parameters *mask) { - mlx5hws_err(table->ctx, "Complex matcher is not supported yet\n"); - return -EOPNOTSUPP; + enum mlx5hws_action_type complex_init_action_types[3]; + struct mlx5hws_bwc_matcher *isolated_bwc_matcher; + struct mlx5hws_match_parameters mask_1 = {0}; + struct mlx5hws_match_parameters mask_2 = {0}; + struct mlx5hws_context *ctx = table->ctx; + int ret; + + ret = hws_bwc_matcher_complex_params_create(table->ctx, + match_criteria_enable, + mask, &mask_1, &mask_2); + if (ret) + goto err; + + bwc_matcher->complex = + kzalloc(sizeof(*bwc_matcher->complex), GFP_KERNEL); + if (!bwc_matcher->complex) { + ret = -ENOMEM; + goto free_masks; + } + + ret = rhashtable_init(&bwc_matcher->complex->refcount_hash, + &hws_refcount_hash); + if (ret) { + mlx5hws_err(ctx, "Complex matcher: failed to initialize rhashtable\n"); + goto free_complex; + } + + mutex_init(&bwc_matcher->complex->hash_lock); + ida_init(&bwc_matcher->complex->metadata_ida); + + /* Create initial action template for the first matcher. + * Usually the initial AT is just dummy, but in case of complex + * matcher we know exactly which actions should it have. + */ + + complex_init_action_types[0] = MLX5HWS_ACTION_TYP_MODIFY_HDR; + complex_init_action_types[1] = MLX5HWS_ACTION_TYP_TBL; + complex_init_action_types[2] = MLX5HWS_ACTION_TYP_LAST; + + /* Create the first matcher */ + + ret = mlx5hws_bwc_matcher_create_simple(bwc_matcher, + table, + priority, + match_criteria_enable, + &mask_1, + complex_init_action_types); + if (ret) + goto destroy_ida; + + /* Create isolated table to hold the second isolated matcher */ + + ret = hws_bwc_isolated_table_create(bwc_matcher, table); + if (ret) { + mlx5hws_err(ctx, "Complex matcher: failed creating isolated table\n"); + goto destroy_first_matcher; + } + + /* Now create the second BWC matcher - the isolated one */ + + ret = hws_bwc_isolated_matcher_create(bwc_matcher, table, + match_criteria_enable, &mask_2); + if (ret) { + mlx5hws_err(ctx, "Complex matcher: failed creating isolated matcher\n"); + goto destroy_isolated_tbl; + } + + /* Create action for isolated matcher's rules */ + + ret = hws_bwc_isolated_actions_create(bwc_matcher, table); + if (ret) { + mlx5hws_err(ctx, "Complex matcher: failed creating isolated actions\n"); + goto destroy_isolated_matcher; + } + + hws_bwc_matcher_complex_params_destroy(&mask_1, &mask_2); + return 0; + +destroy_isolated_matcher: + isolated_bwc_matcher = bwc_matcher->complex->isolated_bwc_matcher; + hws_bwc_isolated_matcher_destroy(isolated_bwc_matcher); +destroy_isolated_tbl: + hws_bwc_isolated_table_destroy(bwc_matcher->complex->isolated_tbl); +destroy_first_matcher: + mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); +destroy_ida: + ida_destroy(&bwc_matcher->complex->metadata_ida); + mutex_destroy(&bwc_matcher->complex->hash_lock); + rhashtable_destroy(&bwc_matcher->complex->refcount_hash); +free_complex: + kfree(bwc_matcher->complex); + bwc_matcher->complex = NULL; +free_masks: + hws_bwc_matcher_complex_params_destroy(&mask_1, &mask_2); +err: + return ret; } void mlx5hws_bwc_matcher_destroy_complex(struct mlx5hws_bwc_matcher *bwc_matcher) { - /* nothing to do here */ + struct mlx5hws_bwc_matcher *isolated_bwc_matcher = + bwc_matcher->complex->isolated_bwc_matcher; + + hws_bwc_isolated_actions_destroy(bwc_matcher); + hws_bwc_isolated_matcher_destroy(isolated_bwc_matcher); + hws_bwc_isolated_table_destroy(bwc_matcher->complex->isolated_tbl); + mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); + ida_destroy(&bwc_matcher->complex->metadata_ida); + mutex_destroy(&bwc_matcher->complex->hash_lock); + rhashtable_destroy(&bwc_matcher->complex->refcount_hash); + kfree(bwc_matcher->complex); + bwc_matcher->complex = NULL; +} + +static void +hws_bwc_matcher_complex_hash_lock(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + mutex_lock(&bwc_matcher->complex->hash_lock); +} + +static void +hws_bwc_matcher_complex_hash_unlock(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + mutex_unlock(&bwc_matcher->complex->hash_lock); +} + +static int +hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule, + struct mlx5hws_match_parameters *params) +{ + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + struct mlx5hws_bwc_complex_rule_hash_node *node, *old_node; + struct rhashtable *refcount_hash; + int ret, i; + + bwc_rule->complex_hash_node = NULL; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (unlikely(!node)) + return -ENOMEM; + + ret = ida_alloc(&bwc_matcher->complex->metadata_ida, GFP_KERNEL); + if (ret < 0) + goto err_free_node; + node->tag = ret; + + refcount_set(&node->refcount, 1); + + /* Clear match buffer - turn off all the unrelated fields + * in accordance with the match params mask for the first + * matcher out of the two parts of the complex matcher. + * The resulting mask is the key for the hash. + */ + for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++) + node->match_buf[i] = params->match_buf[i] & + bwc_matcher->mt->match_param[i]; + + refcount_hash = &bwc_matcher->complex->refcount_hash; + old_node = rhashtable_lookup_get_insert_fast(refcount_hash, + &node->hash_node, + hws_refcount_hash); + if (IS_ERR(old_node)) { + ret = PTR_ERR(old_node); + goto err_free_ida; + } + + if (old_node) { + /* Rule with the same tag already exists - update refcount */ + refcount_inc(&old_node->refcount); + /* Let the new rule use the same tag as the existing rule. + * Note that we don't have any indication for the rule creation + * process that a rule with similar matching params already + * exists - no harm done when this rule is be overwritten by + * the same STE. + * There's some performance advantage in skipping such cases, + * so this is left for future optimizations. + */ + ida_free(&bwc_matcher->complex->metadata_ida, node->tag); + kfree(node); + node = old_node; + } + + bwc_rule->complex_hash_node = node; + return 0; + +err_free_ida: + ida_free(&bwc_matcher->complex->metadata_ida, node->tag); +err_free_node: + kfree(node); + return ret; +} + +static void +hws_bwc_rule_complex_hash_node_put(struct mlx5hws_bwc_rule *bwc_rule, + bool *is_last_rule) +{ + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + struct mlx5hws_bwc_complex_rule_hash_node *node; + + if (is_last_rule) + *is_last_rule = false; + + node = bwc_rule->complex_hash_node; + if (refcount_dec_and_test(&node->refcount)) { + rhashtable_remove_fast(&bwc_matcher->complex->refcount_hash, + &node->hash_node, + hws_refcount_hash); + ida_free(&bwc_matcher->complex->metadata_ida, node->tag); + kfree(node); + if (is_last_rule) + *is_last_rule = true; + } + + bwc_rule->complex_hash_node = NULL; } int mlx5hws_bwc_rule_create_complex(struct mlx5hws_bwc_rule *bwc_rule, @@ -68,19 +1159,286 @@ int mlx5hws_bwc_rule_create_complex(struct mlx5hws_bwc_rule *bwc_rule, struct mlx5hws_rule_action rule_actions[], u16 bwc_queue_idx) { - mlx5hws_err(bwc_rule->bwc_matcher->matcher->tbl->ctx, - "Complex rule is not supported yet\n"); - return -EOPNOTSUPP; + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + u8 modify_hdr_action[MLX5_ST_SZ_BYTES(set_action_in)] = {0}; + struct mlx5hws_rule_action rule_actions_1[3] = {0}; + struct mlx5hws_bwc_matcher *isolated_bwc_matcher; + u32 *match_buf_2; + u32 metadata_val; + int ret = 0; + + isolated_bwc_matcher = bwc_matcher->complex->isolated_bwc_matcher; + bwc_rule->isolated_bwc_rule = + mlx5hws_bwc_rule_alloc(isolated_bwc_matcher); + if (unlikely(!bwc_rule->isolated_bwc_rule)) + return -ENOMEM; + + hws_bwc_matcher_complex_hash_lock(bwc_matcher); + + /* Get a new hash node for this complex rule. + * If this is a unique set of match params for the first matcher, + * we will get a new hash node with newly allocated IDA. + * Otherwise we will get an existing node with IDA and updated refcount. + */ + ret = hws_bwc_rule_complex_hash_node_get(bwc_rule, params); + if (unlikely(ret)) { + mlx5hws_err(ctx, "Complex rule: failed getting RHT node for this rule\n"); + goto free_isolated_rule; + } + + /* No need to clear match buffer's fields in accordance to what + * will actually be matched on first and second matchers. + * Both matchers were created with the appropriate masks + * and each of them holds the appropriate field copy array, + * so rule creation will use only the fields that will be copied + * in accordance with setters in field copy array. + * We do, however, need to temporary allocate match buffer + * for the second (isolated) rule in order to not modify + * user's match params buffer. + */ + + match_buf_2 = kmemdup(params->match_buf, + MLX5_ST_SZ_BYTES(fte_match_param), + GFP_KERNEL); + if (unlikely(!match_buf_2)) { + mlx5hws_err(ctx, "Complex rule: failed allocating match_buf\n"); + ret = -ENOMEM; + goto hash_node_put; + } + + /* On 2nd matcher, use unique 32-bit ID as a matching tag */ + metadata_val = bwc_rule->complex_hash_node->tag; + MLX5_SET(fte_match_param, match_buf_2, + misc_parameters_2.metadata_reg_c_6, metadata_val); + + /* Isolated rule's rule_actions contain all the original actions */ + ret = mlx5hws_bwc_rule_create_simple(bwc_rule->isolated_bwc_rule, + match_buf_2, + rule_actions, + flow_source, + bwc_queue_idx); + kfree(match_buf_2); + if (unlikely(ret)) { + mlx5hws_err(ctx, + "Complex rule: failed creating isolated BWC rule (%d)\n", + ret); + goto hash_node_put; + } + + /* First rule's rule_actions contain setting metadata and + * jump to isolated table that contains the second matcher. + * Set metadata value to a unique value for this rule. + */ + + MLX5_SET(set_action_in, modify_hdr_action, + action_type, MLX5_MODIFICATION_TYPE_SET); + MLX5_SET(set_action_in, modify_hdr_action, + field, MLX5_MODI_META_REG_C_6); + MLX5_SET(set_action_in, modify_hdr_action, + length, 0); /* zero means length of 32 */ + MLX5_SET(set_action_in, modify_hdr_action, + offset, 0); + MLX5_SET(set_action_in, modify_hdr_action, + data, metadata_val); + + rule_actions_1[0].action = bwc_matcher->complex->action_metadata; + rule_actions_1[0].modify_header.offset = 0; + rule_actions_1[0].modify_header.data = modify_hdr_action; + + rule_actions_1[1].action = bwc_matcher->complex->action_go_to_tbl; + rule_actions_1[2].action = bwc_matcher->complex->action_last; + + ret = mlx5hws_bwc_rule_create_simple(bwc_rule, + params->match_buf, + rule_actions_1, + flow_source, + bwc_queue_idx); + + if (unlikely(ret)) { + mlx5hws_err(ctx, + "Complex rule: failed creating first BWC rule (%d)\n", + ret); + goto destroy_isolated_rule; + } + + hws_bwc_matcher_complex_hash_unlock(bwc_matcher); + + return 0; + +destroy_isolated_rule: + mlx5hws_bwc_rule_destroy_simple(bwc_rule->isolated_bwc_rule); +hash_node_put: + hws_bwc_rule_complex_hash_node_put(bwc_rule, NULL); +free_isolated_rule: + hws_bwc_matcher_complex_hash_unlock(bwc_matcher); + mlx5hws_bwc_rule_free(bwc_rule->isolated_bwc_rule); + return ret; } int mlx5hws_bwc_rule_destroy_complex(struct mlx5hws_bwc_rule *bwc_rule) { - return 0; + struct mlx5hws_context *ctx = bwc_rule->bwc_matcher->matcher->tbl->ctx; + struct mlx5hws_bwc_rule *isolated_bwc_rule; + int ret_isolated, ret; + bool is_last_rule; + + hws_bwc_matcher_complex_hash_lock(bwc_rule->bwc_matcher); + + hws_bwc_rule_complex_hash_node_put(bwc_rule, &is_last_rule); + bwc_rule->rule->skip_delete = !is_last_rule; + + ret = mlx5hws_bwc_rule_destroy_simple(bwc_rule); + if (unlikely(ret)) + mlx5hws_err(ctx, "BWC complex rule: failed destroying first rule\n"); + + isolated_bwc_rule = bwc_rule->isolated_bwc_rule; + ret_isolated = mlx5hws_bwc_rule_destroy_simple(isolated_bwc_rule); + if (unlikely(ret_isolated)) + mlx5hws_err(ctx, "BWC complex rule: failed destroying second (isolated) rule\n"); + + hws_bwc_matcher_complex_hash_unlock(bwc_rule->bwc_matcher); + + mlx5hws_bwc_rule_free(isolated_bwc_rule); + + return ret || ret_isolated; } -int mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher) +static void +hws_bwc_matcher_clear_hash_rtcs(struct mlx5hws_bwc_matcher *bwc_matcher) { - mlx5hws_err(bwc_matcher->matcher->tbl->ctx, - "Moving complex rule is not supported yet\n"); - return -EOPNOTSUPP; + struct mlx5hws_bwc_complex_rule_hash_node *node; + struct rhashtable_iter iter; + + rhashtable_walk_enter(&bwc_matcher->complex->refcount_hash, &iter); + rhashtable_walk_start(&iter); + + while ((node = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(node)) + continue; + node->rtc_valid = false; + } + + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); +} + +int +mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mlx5hws_matcher *matcher = bwc_matcher->matcher; + u16 bwc_queues = mlx5hws_bwc_queues(ctx); + struct mlx5hws_bwc_rule *tmp_bwc_rule; + struct mlx5hws_rule_attr rule_attr; + struct mlx5hws_table *isolated_tbl; + int move_error = 0, poll_error = 0; + struct mlx5hws_rule *tmp_rule; + struct list_head *rules_list; + u32 expected_completions = 1; + u32 end_ft_id; + int i, ret; + + /* We are rehashing the matcher that is the first part of the complex + * matcher. Need to update the isolated matcher to point to the end_ft + * of this new matcher. This needs to be done before moving any rules + * to prevent possible steering loops. + */ + isolated_tbl = bwc_matcher->complex->isolated_tbl; + end_ft_id = bwc_matcher->matcher->resize_dst->end_ft_id; + ret = mlx5hws_matcher_update_end_ft_isolated(isolated_tbl, end_ft_id); + if (ret) { + mlx5hws_err(ctx, + "Failed updating end_ft of isolated matcher (%d)\n", + ret); + return ret; + } + + hws_bwc_matcher_clear_hash_rtcs(bwc_matcher); + + mlx5hws_bwc_rule_fill_attr(bwc_matcher, 0, 0, &rule_attr); + + for (i = 0; i < bwc_queues; i++) { + rules_list = &bwc_matcher->rules[i]; + if (list_empty(rules_list)) + continue; + + rule_attr.queue_id = mlx5hws_bwc_get_queue_id(ctx, i); + + list_for_each_entry(tmp_bwc_rule, rules_list, list_node) { + /* Check if a rule with similar tag has already + * been moved. + */ + if (tmp_bwc_rule->complex_hash_node->rtc_valid) { + /* This rule is a duplicate of rule with similar + * tag that has already been moved earlier. + * Just update this rule's RTCs. + */ + tmp_bwc_rule->rule->rtc_0 = + tmp_bwc_rule->complex_hash_node->rtc_0; + tmp_bwc_rule->rule->rtc_1 = + tmp_bwc_rule->complex_hash_node->rtc_1; + tmp_bwc_rule->rule->matcher = + tmp_bwc_rule->rule->matcher->resize_dst; + continue; + } + + /* First time we're moving rule with this tag. + * Move it for real. + */ + tmp_rule = tmp_bwc_rule->rule; + tmp_rule->skip_delete = false; + ret = mlx5hws_matcher_resize_rule_move(matcher, + tmp_rule, + &rule_attr); + if (unlikely(ret)) { + if (!move_error) { + mlx5hws_err(ctx, + "Moving complex BWC rule: move failed (%d), attempting to move rest of the rules\n", + ret); + move_error = ret; + } + /* Rule wasn't queued, no need to poll */ + continue; + } + + expected_completions = 1; + ret = mlx5hws_bwc_queue_poll(ctx, + rule_attr.queue_id, + &expected_completions, + true); + if (unlikely(ret)) { + if (ret == -ETIMEDOUT) { + mlx5hws_err(ctx, + "Moving complex BWC rule: timeout polling for completions (%d), aborting rehash\n", + ret); + return ret; + } + if (!poll_error) { + mlx5hws_err(ctx, + "Moving complex BWC rule: polling for completions failed (%d), attempting to move rest of the rules\n", + ret); + poll_error = ret; + } + } + + /* Done moving the rule to the new matcher, + * now update RTCs for all the duplicated rules. + */ + tmp_bwc_rule->complex_hash_node->rtc_0 = + tmp_bwc_rule->rule->rtc_0; + tmp_bwc_rule->complex_hash_node->rtc_1 = + tmp_bwc_rule->rule->rtc_1; + + tmp_bwc_rule->complex_hash_node->rtc_valid = true; + } + } + + /* Return the first error that happened */ + if (unlikely(move_error)) + return move_error; + if (unlikely(poll_error)) + return poll_error; + + return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.h index 340f0688e394..a6887c7e39d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.h @@ -4,6 +4,27 @@ #ifndef HWS_BWC_COMPLEX_H_ #define HWS_BWC_COMPLEX_H_ +struct mlx5hws_bwc_complex_rule_hash_node { + u32 match_buf[MLX5_ST_SZ_DW_MATCH_PARAM]; + u32 tag; + refcount_t refcount; + bool rtc_valid; + u32 rtc_0; + u32 rtc_1; + struct rhash_head hash_node; +}; + +struct mlx5hws_bwc_matcher_complex_data { + struct mlx5hws_table *isolated_tbl; + struct mlx5hws_bwc_matcher *isolated_bwc_matcher; + struct mlx5hws_action *action_metadata; + struct mlx5hws_action *action_go_to_tbl; + struct mlx5hws_action *action_last; + struct rhashtable refcount_hash; + struct mutex hash_lock; /* Protect the refcount rhashtable */ + struct ida metadata_ida; +}; + bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, u8 match_criteria_enable, struct mlx5hws_match_parameters *mask); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c index c00c138c3366..0bdcab2e5cf3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c @@ -55,6 +55,7 @@ int mlx5hws_cmd_flow_table_create(struct mlx5_core_dev *mdev, MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE); MLX5_SET(create_flow_table_in, in, table_type, ft_attr->type); + MLX5_SET(create_flow_table_in, in, uid, ft_attr->uid); ft_ctx = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context); MLX5_SET(flow_table_context, ft_ctx, level, ft_attr->level); @@ -130,12 +131,6 @@ int mlx5hws_cmd_flow_table_destroy(struct mlx5_core_dev *mdev, return mlx5_cmd_exec_in(mdev, destroy_flow_table, in); } -void mlx5hws_cmd_alias_flow_table_destroy(struct mlx5_core_dev *mdev, - u32 table_id) -{ - hws_cmd_general_obj_destroy(mdev, MLX5_OBJ_TYPE_FT_ALIAS, table_id); -} - static int hws_cmd_flow_group_create(struct mlx5_core_dev *mdev, struct mlx5hws_cmd_fg_attr *fg_attr, u32 *group_id) @@ -257,6 +252,12 @@ int mlx5hws_cmd_set_fte(struct mlx5_core_dev *mdev, dest->ext_reformat_id); } break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER: + MLX5_SET(dest_format, in_dests, + destination_type, ifc_dest_type); + MLX5_SET(dest_format, in_dests, destination_id, + dest->destination_id); + break; default: ret = -EOPNOTSUPP; goto out; @@ -359,7 +360,7 @@ void mlx5hws_cmd_set_attr_connect_miss_tbl(struct mlx5hws_context *ctx, ft_attr->type = fw_ft_type; ft_attr->table_miss_action = MLX5_IFC_MODIFY_FLOW_TABLE_MISS_ACTION_GOTO_TBL; - default_miss_tbl = ctx->common_res[type].default_miss->ft_id; + default_miss_tbl = ctx->common_res.default_miss->ft_id; if (!default_miss_tbl) { pr_warn("HWS: no flow table ID for default miss\n"); return; @@ -406,7 +407,6 @@ int mlx5hws_cmd_rtc_create(struct mlx5_core_dev *mdev, MLX5_SET(rtc, attr, match_definer_1, rtc_attr->match_definer_1); MLX5_SET(rtc, attr, stc_id, rtc_attr->stc_base); MLX5_SET(rtc, attr, ste_table_base_id, rtc_attr->ste_base); - MLX5_SET(rtc, attr, ste_table_offset, rtc_attr->ste_offset); MLX5_SET(rtc, attr, miss_flow_table_id, rtc_attr->miss_ft_id); MLX5_SET(rtc, attr, reparse_mode, rtc_attr->reparse_mode); @@ -622,12 +622,12 @@ int mlx5hws_cmd_arg_create(struct mlx5_core_dev *mdev, u32 pd, u32 *arg_id) { + u32 in[MLX5_ST_SZ_DW(create_modify_header_arg_in)] = {0}; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; - u32 in[MLX5_ST_SZ_DW(create_arg_in)] = {0}; void *attr; int ret; - attr = MLX5_ADDR_OF(create_arg_in, in, hdr); + attr = MLX5_ADDR_OF(create_modify_header_arg_in, in, hdr); MLX5_SET(general_obj_in_cmd_hdr, attr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); MLX5_SET(general_obj_in_cmd_hdr, @@ -635,8 +635,8 @@ int mlx5hws_cmd_arg_create(struct mlx5_core_dev *mdev, MLX5_SET(general_obj_in_cmd_hdr, attr, op_param.create.log_obj_range, log_obj_range); - attr = MLX5_ADDR_OF(create_arg_in, in, arg); - MLX5_SET(arg, attr, access_pd, pd); + attr = MLX5_ADDR_OF(create_modify_header_arg_in, in, arg); + MLX5_SET(modify_header_arg, attr, access_pd, pd); ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); if (ret) { @@ -812,7 +812,7 @@ int mlx5hws_cmd_packet_reformat_create(struct mlx5_core_dev *mdev, struct mlx5hws_cmd_packet_reformat_create_attr *attr, u32 *reformat_id) { - u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_out)] = {0}; + u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)] = {0}; size_t insz, cmd_data_sz, cmd_total_sz; void *prctx; void *pdata; @@ -845,7 +845,7 @@ int mlx5hws_cmd_packet_reformat_create(struct mlx5_core_dev *mdev, goto out; } - *reformat_id = MLX5_GET(alloc_packet_reformat_out, out, packet_reformat_id); + *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id); out: kfree(in); return ret; @@ -854,13 +854,13 @@ out: int mlx5hws_cmd_packet_reformat_destroy(struct mlx5_core_dev *mdev, u32 reformat_id) { - u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)] = {0}; int ret; - MLX5_SET(dealloc_packet_reformat_in, in, opcode, + MLX5_SET(dealloc_packet_reformat_context_in, in, opcode, MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); - MLX5_SET(dealloc_packet_reformat_in, in, + MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id, reformat_id); ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); @@ -889,73 +889,6 @@ int mlx5hws_cmd_sq_modify_rdy(struct mlx5_core_dev *mdev, u32 sqn) return ret; } -int mlx5hws_cmd_allow_other_vhca_access(struct mlx5_core_dev *mdev, - struct mlx5hws_cmd_allow_other_vhca_access_attr *attr) -{ - u32 out[MLX5_ST_SZ_DW(allow_other_vhca_access_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(allow_other_vhca_access_in)] = {0}; - void *key; - int ret; - - MLX5_SET(allow_other_vhca_access_in, - in, opcode, MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS); - MLX5_SET(allow_other_vhca_access_in, - in, object_type_to_be_accessed, attr->obj_type); - MLX5_SET(allow_other_vhca_access_in, - in, object_id_to_be_accessed, attr->obj_id); - - key = MLX5_ADDR_OF(allow_other_vhca_access_in, in, access_key); - memcpy(key, attr->access_key, sizeof(attr->access_key)); - - ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); - if (ret) - mlx5_core_err(mdev, "Failed to execute ALLOW_OTHER_VHCA_ACCESS command\n"); - - return ret; -} - -int mlx5hws_cmd_alias_obj_create(struct mlx5_core_dev *mdev, - struct mlx5hws_cmd_alias_obj_create_attr *alias_attr, - u32 *obj_id) -{ - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; - u32 in[MLX5_ST_SZ_DW(create_alias_obj_in)] = {0}; - void *attr; - void *key; - int ret; - - attr = MLX5_ADDR_OF(create_alias_obj_in, in, hdr); - MLX5_SET(general_obj_in_cmd_hdr, - attr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, - attr, obj_type, alias_attr->obj_type); - MLX5_SET(general_obj_in_cmd_hdr, attr, op_param.create.alias_object, 1); - - attr = MLX5_ADDR_OF(create_alias_obj_in, in, alias_ctx); - MLX5_SET(alias_context, attr, vhca_id_to_be_accessed, alias_attr->vhca_id); - MLX5_SET(alias_context, attr, object_id_to_be_accessed, alias_attr->obj_id); - - key = MLX5_ADDR_OF(alias_context, attr, access_key); - memcpy(key, alias_attr->access_key, sizeof(alias_attr->access_key)); - - ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); - if (ret) { - mlx5_core_err(mdev, "Failed to create ALIAS OBJ\n"); - goto out; - } - - *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); -out: - return ret; -} - -int mlx5hws_cmd_alias_obj_destroy(struct mlx5_core_dev *mdev, - u16 obj_type, - u32 obj_id) -{ - return hws_cmd_general_obj_destroy(mdev, obj_type, obj_id); -} - int mlx5hws_cmd_generate_wqe(struct mlx5_core_dev *mdev, struct mlx5hws_cmd_generate_wqe_attr *attr, struct mlx5_cqe64 *ret_cqe) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h index 434f62b0904e..122ccc671628 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h @@ -36,6 +36,7 @@ struct mlx5hws_cmd_set_fte_attr { struct mlx5hws_cmd_ft_create_attr { u8 type; u8 level; + u16 uid; bool rtc_valid; bool decap_en; bool reformat_en; @@ -63,14 +64,13 @@ struct mlx5hws_cmd_forward_tbl { u8 type; u32 ft_id; u32 fg_id; - u32 refcount; + u32 refcount; /* protected by context ctrl lock */ }; struct mlx5hws_cmd_rtc_create_attr { u32 pd; u32 stc_base; u32 ste_base; - u32 ste_offset; u32 miss_ft_id; bool fw_gen_wqe; u8 update_index_mode; @@ -258,9 +258,6 @@ int mlx5hws_cmd_flow_table_query(struct mlx5_core_dev *mdev, int mlx5hws_cmd_flow_table_destroy(struct mlx5_core_dev *mdev, u8 fw_ft_type, u32 table_id); -void mlx5hws_cmd_alias_flow_table_destroy(struct mlx5_core_dev *mdev, - u32 table_id); - int mlx5hws_cmd_rtc_create(struct mlx5_core_dev *mdev, struct mlx5hws_cmd_rtc_create_attr *rtc_attr, u32 *rtc_id); @@ -334,14 +331,6 @@ mlx5hws_cmd_forward_tbl_create(struct mlx5_core_dev *mdev, void mlx5hws_cmd_forward_tbl_destroy(struct mlx5_core_dev *mdev, struct mlx5hws_cmd_forward_tbl *tbl); -int mlx5hws_cmd_alias_obj_create(struct mlx5_core_dev *mdev, - struct mlx5hws_cmd_alias_obj_create_attr *alias_attr, - u32 *obj_id); - -int mlx5hws_cmd_alias_obj_destroy(struct mlx5_core_dev *mdev, - u16 obj_type, - u32 obj_id); - int mlx5hws_cmd_sq_modify_rdy(struct mlx5_core_dev *mdev, u32 sqn); int mlx5hws_cmd_query_caps(struct mlx5_core_dev *mdev, @@ -352,9 +341,6 @@ void mlx5hws_cmd_set_attr_connect_miss_tbl(struct mlx5hws_context *ctx, enum mlx5hws_table_type type, struct mlx5hws_cmd_ft_modify_attr *ft_attr); -int mlx5hws_cmd_allow_other_vhca_access(struct mlx5_core_dev *mdev, - struct mlx5hws_cmd_allow_other_vhca_access_attr *attr); - int mlx5hws_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_function, u16 vport_number, u16 *gvmi); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c index fd48b05e91e0..428dae869706 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c @@ -23,7 +23,6 @@ static int hws_context_pools_init(struct mlx5hws_context *ctx) struct mlx5hws_pool_attr pool_attr = {0}; u8 max_log_sz; int ret; - int i; ret = mlx5hws_pat_init_pattern_cache(&ctx->pattern_cache); if (ret) @@ -35,27 +34,20 @@ static int hws_context_pools_init(struct mlx5hws_context *ctx) /* Create an STC pool per FT type */ pool_attr.pool_type = MLX5HWS_POOL_TYPE_STC; - pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STC_POOL; max_log_sz = min(MLX5HWS_POOL_STC_LOG_SZ, ctx->caps->stc_alloc_log_max); pool_attr.alloc_log_sz = max(max_log_sz, ctx->caps->stc_alloc_log_gran); - for (i = 0; i < MLX5HWS_TABLE_TYPE_MAX; i++) { - pool_attr.table_type = i; - ctx->stc_pool[i] = mlx5hws_pool_create(ctx, &pool_attr); - if (!ctx->stc_pool[i]) { - mlx5hws_err(ctx, "Failed to allocate STC pool [%d]", i); - ret = -ENOMEM; - goto free_stc_pools; - } + pool_attr.table_type = MLX5HWS_TABLE_TYPE_FDB; + ctx->stc_pool = mlx5hws_pool_create(ctx, &pool_attr); + if (!ctx->stc_pool) { + mlx5hws_err(ctx, "Failed to allocate STC pool\n"); + ret = -ENOMEM; + goto uninit_cache; } return 0; -free_stc_pools: - for (i = 0; i < MLX5HWS_TABLE_TYPE_MAX; i++) - if (ctx->stc_pool[i]) - mlx5hws_pool_destroy(ctx->stc_pool[i]); - +uninit_cache: mlx5hws_definer_uninit_cache(ctx->definer_cache); uninit_pat_cache: mlx5hws_pat_uninit_pattern_cache(ctx->pattern_cache); @@ -64,12 +56,8 @@ uninit_pat_cache: static void hws_context_pools_uninit(struct mlx5hws_context *ctx) { - int i; - - for (i = 0; i < MLX5HWS_TABLE_TYPE_MAX; i++) { - if (ctx->stc_pool[i]) - mlx5hws_pool_destroy(ctx->stc_pool[i]); - } + if (ctx->stc_pool) + mlx5hws_pool_destroy(ctx->stc_pool); mlx5hws_definer_uninit_cache(ctx->definer_cache); mlx5hws_pat_uninit_pattern_cache(ctx->pattern_cache); @@ -161,17 +149,25 @@ static int hws_context_init_hws(struct mlx5hws_context *ctx, if (ret) goto uninit_pd; - if (attr->bwc) - ctx->flags |= MLX5HWS_CONTEXT_FLAG_BWC_SUPPORT; + /* Context has support for backward compatible API, + * and does not have support for native HWS API. + */ + ctx->flags |= MLX5HWS_CONTEXT_FLAG_BWC_SUPPORT; ret = mlx5hws_send_queues_open(ctx, attr->queues, attr->queue_size); if (ret) goto pools_uninit; + ret = mlx5hws_action_ste_pool_init(ctx); + if (ret) + goto close_queues; + INIT_LIST_HEAD(&ctx->tbl_list); return 0; +close_queues: + mlx5hws_send_queues_close(ctx); pools_uninit: hws_context_pools_uninit(ctx); uninit_pd: @@ -184,6 +180,7 @@ static void hws_context_uninit_hws(struct mlx5hws_context *ctx) if (!(ctx->flags & MLX5HWS_CONTEXT_FLAG_HWS_SUPPORT)) return; + mlx5hws_action_ste_pool_uninit(ctx); mlx5hws_send_queues_close(ctx); hws_context_pools_uninit(ctx); hws_context_uninit_pd(ctx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h index 47f5cc8de73f..3f8938c73dc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h @@ -8,6 +8,7 @@ enum mlx5hws_context_flags { MLX5HWS_CONTEXT_FLAG_HWS_SUPPORT = 1 << 0, MLX5HWS_CONTEXT_FLAG_PRIVATE_PD = 1 << 1, MLX5HWS_CONTEXT_FLAG_BWC_SUPPORT = 1 << 2, + MLX5HWS_CONTEXT_FLAG_NATIVE_SUPPORT = 1 << 3, }; enum mlx5hws_context_shared_stc_type { @@ -37,8 +38,10 @@ struct mlx5hws_context { struct mlx5_core_dev *mdev; struct mlx5hws_cmd_query_caps *caps; u32 pd_num; - struct mlx5hws_pool *stc_pool[MLX5HWS_TABLE_TYPE_MAX]; - struct mlx5hws_context_common_res common_res[MLX5HWS_TABLE_TYPE_MAX]; + struct mlx5hws_pool *stc_pool; + struct mlx5hws_action_ste_pool *action_ste_pool; /* One per queue */ + struct delayed_work action_ste_cleanup; + struct mlx5hws_context_common_res common_res; struct mlx5hws_pattern_cache *pattern_cache; struct mlx5hws_definer_cache *definer_cache; struct mutex ctrl_lock; /* control lock to protect the whole context */ @@ -58,6 +61,11 @@ static inline bool mlx5hws_context_bwc_supported(struct mlx5hws_context *ctx) return ctx->flags & MLX5HWS_CONTEXT_FLAG_BWC_SUPPORT; } +static inline bool mlx5hws_context_native_supported(struct mlx5hws_context *ctx) +{ + return ctx->flags & MLX5HWS_CONTEXT_FLAG_NATIVE_SUPPORT; +} + bool mlx5hws_context_cap_dynamic_reparse(struct mlx5hws_context *ctx); u8 mlx5hws_context_get_reparse_mode(struct mlx5hws_context *ctx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c index 5b200b4bc1a8..2ec8cb10139a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c @@ -99,17 +99,19 @@ hws_debug_dump_matcher_attr(struct seq_file *f, struct mlx5hws_matcher *matcher) { struct mlx5hws_matcher_attr *attr = &matcher->attr; - seq_printf(f, "%d,0x%llx,%d,%d,%d,%d,%d,%d,%d,%d\n", + seq_printf(f, "%d,0x%llx,%d,%d,%d,%d,%d,%d,%d,%d,-1,-1,%d,%d\n", MLX5HWS_DEBUG_RES_TYPE_MATCHER_ATTR, HWS_PTR_TO_ID(matcher), attr->priority, attr->mode, - attr->table.sz_row_log, - attr->table.sz_col_log, + attr->size[MLX5HWS_MATCHER_SIZE_TYPE_RX].table.sz_row_log, + attr->size[MLX5HWS_MATCHER_SIZE_TYPE_RX].table.sz_col_log, attr->optimize_using_rule_idx, attr->optimize_flow_src, attr->insert_mode, - attr->distribute_mode); + attr->distribute_mode, + attr->size[MLX5HWS_MATCHER_SIZE_TYPE_TX].table.sz_row_log, + attr->size[MLX5HWS_MATCHER_SIZE_TYPE_TX].table.sz_col_log); return 0; } @@ -118,8 +120,6 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma { enum mlx5hws_table_type tbl_type = matcher->tbl->type; struct mlx5hws_cmd_ft_query_attr ft_attr = {0}; - struct mlx5hws_pool_chunk *ste; - struct mlx5hws_pool *ste_pool; u64 icm_addr_0 = 0; u64 icm_addr_1 = 0; u32 ste_0_id = -1; @@ -134,13 +134,9 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma matcher->end_ft_id, matcher->col_matcher ? HWS_PTR_TO_ID(matcher->col_matcher) : 0); - ste = &matcher->match_ste.ste; - ste_pool = matcher->match_ste.pool; - if (ste_pool) { - ste_0_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste); - if (tbl_type == MLX5HWS_TABLE_TYPE_FDB) - ste_1_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste); - } + ste_0_id = matcher->match_ste.ste_0_base; + if (tbl_type == MLX5HWS_TABLE_TYPE_FDB) + ste_1_id = matcher->match_ste.ste_1_base; seq_printf(f, ",%d,%d,%d,%d", matcher->match_ste.rtc_0_id, @@ -148,19 +144,6 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma matcher->match_ste.rtc_1_id, (int)ste_1_id); - ste = &matcher->action_ste[0].ste; - ste_pool = matcher->action_ste[0].pool; - if (ste_pool) { - ste_0_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste); - if (tbl_type == MLX5HWS_TABLE_TYPE_FDB) - ste_1_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste); - else - ste_1_id = -1; - } else { - ste_0_id = -1; - ste_1_id = -1; - } - ft_attr.type = matcher->tbl->fw_ft_type; ret = mlx5hws_cmd_flow_table_query(matcher->tbl->ctx->mdev, matcher->end_ft_id, @@ -170,12 +153,7 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma if (ret) return ret; - seq_printf(f, ",%d,%d,%d,%d,%d,0x%llx,0x%llx\n", - matcher->action_ste[0].rtc_0_id, - (int)ste_0_id, - matcher->action_ste[0].rtc_1_id, - (int)ste_1_id, - 0, + seq_printf(f, ",-1,-1,-1,-1,0,0x%llx,0x%llx\n", mlx5hws_debug_icm_to_idx(icm_addr_0), mlx5hws_debug_icm_to_idx(icm_addr_1)); @@ -368,9 +346,10 @@ static int hws_debug_dump_context_info(struct seq_file *f, struct mlx5hws_contex static int hws_debug_dump_context_stc_resource(struct seq_file *f, struct mlx5hws_context *ctx, - u32 tbl_type, struct mlx5hws_pool_resource *resource) { + u32 tbl_type = MLX5HWS_TABLE_TYPE_BASE + MLX5HWS_TABLE_TYPE_FDB; + seq_printf(f, "%d,0x%llx,%u,%u\n", MLX5HWS_DEBUG_RES_TYPE_CONTEXT_STC, HWS_PTR_TO_ID(ctx), @@ -382,40 +361,65 @@ static int hws_debug_dump_context_stc_resource(struct seq_file *f, static int hws_debug_dump_context_stc(struct seq_file *f, struct mlx5hws_context *ctx) { - struct mlx5hws_pool *stc_pool; - u32 table_type; + struct mlx5hws_pool *stc_pool = ctx->stc_pool; int ret; - int i; - for (i = 0; i < MLX5HWS_TABLE_TYPE_MAX; i++) { - stc_pool = ctx->stc_pool[i]; - table_type = MLX5HWS_TABLE_TYPE_BASE + i; + if (!stc_pool) + return 0; - if (!stc_pool) - continue; + if (stc_pool->resource) { + ret = hws_debug_dump_context_stc_resource(f, ctx, + stc_pool->resource); + if (ret) + return ret; + } - if (stc_pool->resource[0]) { - ret = hws_debug_dump_context_stc_resource(f, ctx, table_type, - stc_pool->resource[0]); - if (ret) - return ret; - } + if (stc_pool->mirror_resource) { + struct mlx5hws_pool_resource *res = stc_pool->mirror_resource; - if (i == MLX5HWS_TABLE_TYPE_FDB && stc_pool->mirror_resource[0]) { - ret = hws_debug_dump_context_stc_resource(f, ctx, table_type, - stc_pool->mirror_resource[0]); - if (ret) - return ret; - } + ret = hws_debug_dump_context_stc_resource(f, ctx, res); + if (ret) + return ret; } return 0; } +static void +hws_debug_dump_action_ste_table(struct seq_file *f, + struct mlx5hws_action_ste_table *action_tbl) +{ + int ste_0_id = mlx5hws_pool_get_base_id(action_tbl->pool); + int ste_1_id = mlx5hws_pool_get_base_mirror_id(action_tbl->pool); + + seq_printf(f, "%d,0x%llx,%d,%d,%d,%d\n", + MLX5HWS_DEBUG_RES_TYPE_ACTION_STE_TABLE, + HWS_PTR_TO_ID(action_tbl), + action_tbl->rtc_0_id, ste_0_id, + action_tbl->rtc_1_id, ste_1_id); +} + +static void hws_debug_dump_action_ste_pool(struct seq_file *f, + struct mlx5hws_action_ste_pool *pool) +{ + struct mlx5hws_action_ste_table *action_tbl; + enum mlx5hws_pool_optimize opt; + + mutex_lock(&pool->lock); + for (opt = MLX5HWS_POOL_OPTIMIZE_NONE; opt < MLX5HWS_POOL_OPTIMIZE_MAX; + opt++) { + list_for_each_entry(action_tbl, &pool->elems[opt].available, + list_node) { + hws_debug_dump_action_ste_table(f, action_tbl); + } + } + mutex_unlock(&pool->lock); +} + static int hws_debug_dump_context(struct seq_file *f, struct mlx5hws_context *ctx) { struct mlx5hws_table *tbl; - int ret; + int ret, i; ret = hws_debug_dump_context_info(f, ctx); if (ret) @@ -435,6 +439,9 @@ static int hws_debug_dump_context(struct seq_file *f, struct mlx5hws_context *ct return ret; } + for (i = 0; i < ctx->queues; i++) + hws_debug_dump_action_ste_pool(f, &ctx->action_ste_pool[i]); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h index e44e7ae28f93..89c396f9f266 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h @@ -26,6 +26,8 @@ enum mlx5hws_debug_res_type { MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_HASH_DEFINER = 4205, MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_RANGE_DEFINER = 4206, MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_COMPARE_MATCH_DEFINER = 4207, + + MLX5HWS_DEBUG_RES_TYPE_ACTION_STE_TABLE = 4300, }; static inline u64 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c index 8fe96eb76baf..c6436c3a7a83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c @@ -70,7 +70,7 @@ u32 second_dw_mask = (mask) & ((1 << _bit_off) - 1); \ _HWS_SET32(p, (v) >> _bit_off, byte_off, 0, (mask) >> _bit_off); \ _HWS_SET32(p, (v) & second_dw_mask, (byte_off) + DW_SIZE, \ - (bit_off) % BITS_IN_DW, second_dw_mask); \ + (bit_off + BITS_IN_DW) % BITS_IN_DW, second_dw_mask); \ } else { \ _HWS_SET32(p, v, byte_off, (bit_off), (mask)); \ } \ @@ -158,6 +158,218 @@ struct mlx5hws_definer_conv_data { u32 match_flags; }; +#define HWS_DEFINER_ENTRY(name)[MLX5HWS_DEFINER_FNAME_##name] = #name + +static const char * const hws_definer_fname_to_str[] = { + HWS_DEFINER_ENTRY(ETH_SMAC_47_16_O), + HWS_DEFINER_ENTRY(ETH_SMAC_47_16_I), + HWS_DEFINER_ENTRY(ETH_SMAC_15_0_O), + HWS_DEFINER_ENTRY(ETH_SMAC_15_0_I), + HWS_DEFINER_ENTRY(ETH_DMAC_47_16_O), + HWS_DEFINER_ENTRY(ETH_DMAC_47_16_I), + HWS_DEFINER_ENTRY(ETH_DMAC_15_0_O), + HWS_DEFINER_ENTRY(ETH_DMAC_15_0_I), + HWS_DEFINER_ENTRY(ETH_TYPE_O), + HWS_DEFINER_ENTRY(ETH_TYPE_I), + HWS_DEFINER_ENTRY(ETH_L3_TYPE_O), + HWS_DEFINER_ENTRY(ETH_L3_TYPE_I), + HWS_DEFINER_ENTRY(VLAN_TYPE_O), + HWS_DEFINER_ENTRY(VLAN_TYPE_I), + HWS_DEFINER_ENTRY(VLAN_FIRST_PRIO_O), + HWS_DEFINER_ENTRY(VLAN_FIRST_PRIO_I), + HWS_DEFINER_ENTRY(VLAN_CFI_O), + HWS_DEFINER_ENTRY(VLAN_CFI_I), + HWS_DEFINER_ENTRY(VLAN_ID_O), + HWS_DEFINER_ENTRY(VLAN_ID_I), + HWS_DEFINER_ENTRY(VLAN_SECOND_TYPE_O), + HWS_DEFINER_ENTRY(VLAN_SECOND_TYPE_I), + HWS_DEFINER_ENTRY(VLAN_SECOND_PRIO_O), + HWS_DEFINER_ENTRY(VLAN_SECOND_PRIO_I), + HWS_DEFINER_ENTRY(VLAN_SECOND_CFI_O), + HWS_DEFINER_ENTRY(VLAN_SECOND_CFI_I), + HWS_DEFINER_ENTRY(VLAN_SECOND_ID_O), + HWS_DEFINER_ENTRY(VLAN_SECOND_ID_I), + HWS_DEFINER_ENTRY(IPV4_IHL_O), + HWS_DEFINER_ENTRY(IPV4_IHL_I), + HWS_DEFINER_ENTRY(IP_DSCP_O), + HWS_DEFINER_ENTRY(IP_DSCP_I), + HWS_DEFINER_ENTRY(IP_ECN_O), + HWS_DEFINER_ENTRY(IP_ECN_I), + HWS_DEFINER_ENTRY(IP_TTL_O), + HWS_DEFINER_ENTRY(IP_TTL_I), + HWS_DEFINER_ENTRY(IPV4_DST_O), + HWS_DEFINER_ENTRY(IPV4_DST_I), + HWS_DEFINER_ENTRY(IPV4_SRC_O), + HWS_DEFINER_ENTRY(IPV4_SRC_I), + HWS_DEFINER_ENTRY(IP_VERSION_O), + HWS_DEFINER_ENTRY(IP_VERSION_I), + HWS_DEFINER_ENTRY(IP_FRAG_O), + HWS_DEFINER_ENTRY(IP_FRAG_I), + HWS_DEFINER_ENTRY(IP_LEN_O), + HWS_DEFINER_ENTRY(IP_LEN_I), + HWS_DEFINER_ENTRY(IP_TOS_O), + HWS_DEFINER_ENTRY(IP_TOS_I), + HWS_DEFINER_ENTRY(IPV6_FLOW_LABEL_O), + HWS_DEFINER_ENTRY(IPV6_FLOW_LABEL_I), + HWS_DEFINER_ENTRY(IPV6_DST_127_96_O), + HWS_DEFINER_ENTRY(IPV6_DST_95_64_O), + HWS_DEFINER_ENTRY(IPV6_DST_63_32_O), + HWS_DEFINER_ENTRY(IPV6_DST_31_0_O), + HWS_DEFINER_ENTRY(IPV6_DST_127_96_I), + HWS_DEFINER_ENTRY(IPV6_DST_95_64_I), + HWS_DEFINER_ENTRY(IPV6_DST_63_32_I), + HWS_DEFINER_ENTRY(IPV6_DST_31_0_I), + HWS_DEFINER_ENTRY(IPV6_SRC_127_96_O), + HWS_DEFINER_ENTRY(IPV6_SRC_95_64_O), + HWS_DEFINER_ENTRY(IPV6_SRC_63_32_O), + HWS_DEFINER_ENTRY(IPV6_SRC_31_0_O), + HWS_DEFINER_ENTRY(IPV6_SRC_127_96_I), + HWS_DEFINER_ENTRY(IPV6_SRC_95_64_I), + HWS_DEFINER_ENTRY(IPV6_SRC_63_32_I), + HWS_DEFINER_ENTRY(IPV6_SRC_31_0_I), + HWS_DEFINER_ENTRY(IP_PROTOCOL_O), + HWS_DEFINER_ENTRY(IP_PROTOCOL_I), + HWS_DEFINER_ENTRY(L4_SPORT_O), + HWS_DEFINER_ENTRY(L4_SPORT_I), + HWS_DEFINER_ENTRY(L4_DPORT_O), + HWS_DEFINER_ENTRY(L4_DPORT_I), + HWS_DEFINER_ENTRY(TCP_FLAGS_I), + HWS_DEFINER_ENTRY(TCP_FLAGS_O), + HWS_DEFINER_ENTRY(TCP_SEQ_NUM), + HWS_DEFINER_ENTRY(TCP_ACK_NUM), + HWS_DEFINER_ENTRY(GTP_TEID), + HWS_DEFINER_ENTRY(GTP_MSG_TYPE), + HWS_DEFINER_ENTRY(GTP_EXT_FLAG), + HWS_DEFINER_ENTRY(GTP_NEXT_EXT_HDR), + HWS_DEFINER_ENTRY(GTP_EXT_HDR_PDU), + HWS_DEFINER_ENTRY(GTP_EXT_HDR_QFI), + HWS_DEFINER_ENTRY(GTPU_DW0), + HWS_DEFINER_ENTRY(GTPU_FIRST_EXT_DW0), + HWS_DEFINER_ENTRY(GTPU_DW2), + HWS_DEFINER_ENTRY(FLEX_PARSER_0), + HWS_DEFINER_ENTRY(FLEX_PARSER_1), + HWS_DEFINER_ENTRY(FLEX_PARSER_2), + HWS_DEFINER_ENTRY(FLEX_PARSER_3), + HWS_DEFINER_ENTRY(FLEX_PARSER_4), + HWS_DEFINER_ENTRY(FLEX_PARSER_5), + HWS_DEFINER_ENTRY(FLEX_PARSER_6), + HWS_DEFINER_ENTRY(FLEX_PARSER_7), + HWS_DEFINER_ENTRY(VPORT_REG_C_0), + HWS_DEFINER_ENTRY(VXLAN_FLAGS), + HWS_DEFINER_ENTRY(VXLAN_VNI), + HWS_DEFINER_ENTRY(VXLAN_GPE_FLAGS), + HWS_DEFINER_ENTRY(VXLAN_GPE_RSVD0), + HWS_DEFINER_ENTRY(VXLAN_GPE_PROTO), + HWS_DEFINER_ENTRY(VXLAN_GPE_VNI), + HWS_DEFINER_ENTRY(VXLAN_GPE_RSVD1), + HWS_DEFINER_ENTRY(GENEVE_OPT_LEN), + HWS_DEFINER_ENTRY(GENEVE_OAM), + HWS_DEFINER_ENTRY(GENEVE_PROTO), + HWS_DEFINER_ENTRY(GENEVE_VNI), + HWS_DEFINER_ENTRY(SOURCE_QP), + HWS_DEFINER_ENTRY(SOURCE_GVMI), + HWS_DEFINER_ENTRY(REG_0), + HWS_DEFINER_ENTRY(REG_1), + HWS_DEFINER_ENTRY(REG_2), + HWS_DEFINER_ENTRY(REG_3), + HWS_DEFINER_ENTRY(REG_4), + HWS_DEFINER_ENTRY(REG_5), + HWS_DEFINER_ENTRY(REG_6), + HWS_DEFINER_ENTRY(REG_7), + HWS_DEFINER_ENTRY(REG_8), + HWS_DEFINER_ENTRY(REG_9), + HWS_DEFINER_ENTRY(REG_10), + HWS_DEFINER_ENTRY(REG_11), + HWS_DEFINER_ENTRY(REG_A), + HWS_DEFINER_ENTRY(REG_B), + HWS_DEFINER_ENTRY(GRE_KEY_PRESENT), + HWS_DEFINER_ENTRY(GRE_C), + HWS_DEFINER_ENTRY(GRE_K), + HWS_DEFINER_ENTRY(GRE_S), + HWS_DEFINER_ENTRY(GRE_PROTOCOL), + HWS_DEFINER_ENTRY(GRE_OPT_KEY), + HWS_DEFINER_ENTRY(GRE_OPT_SEQ), + HWS_DEFINER_ENTRY(GRE_OPT_CHECKSUM), + HWS_DEFINER_ENTRY(INTEGRITY_O), + HWS_DEFINER_ENTRY(INTEGRITY_I), + HWS_DEFINER_ENTRY(ICMP_DW1), + HWS_DEFINER_ENTRY(ICMP_DW2), + HWS_DEFINER_ENTRY(ICMP_DW3), + HWS_DEFINER_ENTRY(IPSEC_SPI), + HWS_DEFINER_ENTRY(IPSEC_SEQUENCE_NUMBER), + HWS_DEFINER_ENTRY(IPSEC_SYNDROME), + HWS_DEFINER_ENTRY(MPLS0_O), + HWS_DEFINER_ENTRY(MPLS1_O), + HWS_DEFINER_ENTRY(MPLS2_O), + HWS_DEFINER_ENTRY(MPLS3_O), + HWS_DEFINER_ENTRY(MPLS4_O), + HWS_DEFINER_ENTRY(MPLS0_I), + HWS_DEFINER_ENTRY(MPLS1_I), + HWS_DEFINER_ENTRY(MPLS2_I), + HWS_DEFINER_ENTRY(MPLS3_I), + HWS_DEFINER_ENTRY(MPLS4_I), + HWS_DEFINER_ENTRY(FLEX_PARSER0_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER1_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER2_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER3_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER4_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER5_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER6_OK), + HWS_DEFINER_ENTRY(FLEX_PARSER7_OK), + HWS_DEFINER_ENTRY(OKS2_MPLS0_O), + HWS_DEFINER_ENTRY(OKS2_MPLS1_O), + HWS_DEFINER_ENTRY(OKS2_MPLS2_O), + HWS_DEFINER_ENTRY(OKS2_MPLS3_O), + HWS_DEFINER_ENTRY(OKS2_MPLS4_O), + HWS_DEFINER_ENTRY(OKS2_MPLS0_I), + HWS_DEFINER_ENTRY(OKS2_MPLS1_I), + HWS_DEFINER_ENTRY(OKS2_MPLS2_I), + HWS_DEFINER_ENTRY(OKS2_MPLS3_I), + HWS_DEFINER_ENTRY(OKS2_MPLS4_I), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_0), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_1), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_2), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_3), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_4), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_5), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_6), + HWS_DEFINER_ENTRY(GENEVE_OPT_OK_7), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_0), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_1), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_2), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_3), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_4), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_5), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_6), + HWS_DEFINER_ENTRY(GENEVE_OPT_DW_7), + HWS_DEFINER_ENTRY(IB_L4_OPCODE), + HWS_DEFINER_ENTRY(IB_L4_QPN), + HWS_DEFINER_ENTRY(IB_L4_A), + HWS_DEFINER_ENTRY(RANDOM_NUM), + HWS_DEFINER_ENTRY(PTYPE_L2_O), + HWS_DEFINER_ENTRY(PTYPE_L2_I), + HWS_DEFINER_ENTRY(PTYPE_L3_O), + HWS_DEFINER_ENTRY(PTYPE_L3_I), + HWS_DEFINER_ENTRY(PTYPE_L4_O), + HWS_DEFINER_ENTRY(PTYPE_L4_I), + HWS_DEFINER_ENTRY(PTYPE_L4_EXT_O), + HWS_DEFINER_ENTRY(PTYPE_L4_EXT_I), + HWS_DEFINER_ENTRY(PTYPE_FRAG_O), + HWS_DEFINER_ENTRY(PTYPE_FRAG_I), + HWS_DEFINER_ENTRY(TNL_HDR_0), + HWS_DEFINER_ENTRY(TNL_HDR_1), + HWS_DEFINER_ENTRY(TNL_HDR_2), + HWS_DEFINER_ENTRY(TNL_HDR_3), + [MLX5HWS_DEFINER_FNAME_MAX] = "DEFINER_FNAME_UNKNOWN", +}; + +const char *mlx5hws_definer_fname_to_str(enum mlx5hws_definer_fname fname) +{ + if (fname > MLX5HWS_DEFINER_FNAME_MAX) + fname = MLX5HWS_DEFINER_FNAME_MAX; + return hws_definer_fname_to_str[fname]; +} + static void hws_definer_ones_set(struct mlx5hws_definer_fc *fc, void *match_param, @@ -500,7 +712,8 @@ hws_definer_check_match_flags(struct mlx5hws_definer_conv_data *cd) return 0; err_conflict: - mlx5hws_err(cd->ctx, "Invalid definer fields combination\n"); + mlx5hws_err(cd->ctx, "Invalid definer fields combination: match_flags = 0x%08x\n", + cd->match_flags); return -EINVAL; } @@ -508,18 +721,33 @@ static int hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, u32 *match_param) { - bool is_s_ipv6, is_d_ipv6, smac_set, dmac_set; + bool is_ipv6, smac_set, dmac_set, ip_addr_set, ip_ver_set; struct mlx5hws_definer_fc *fc = cd->fc; struct mlx5hws_definer_fc *curr_fc; u32 *s_ipv6, *d_ipv6; if (HWS_IS_FLD_SET_SZ(match_param, outer_headers.l4_type, 0x2) || - HWS_IS_FLD_SET_SZ(match_param, outer_headers.reserved_at_c2, 0xe) || - HWS_IS_FLD_SET_SZ(match_param, outer_headers.reserved_at_c4, 0x4)) { + HWS_IS_FLD_SET_SZ(match_param, outer_headers.l4_type_ext, 0x4) || + HWS_IS_FLD_SET_SZ(match_param, outer_headers.reserved_at_c6, 0xa) || + HWS_IS_FLD_SET_SZ(match_param, outer_headers.reserved_at_d4, 0x4)) { mlx5hws_err(cd->ctx, "Unsupported outer parameters set\n"); return -EINVAL; } + ip_addr_set = HWS_IS_FLD_SET_SZ(match_param, + outer_headers.src_ipv4_src_ipv6, + 0x80) || + HWS_IS_FLD_SET_SZ(match_param, + outer_headers.dst_ipv4_dst_ipv6, 0x80); + ip_ver_set = HWS_IS_FLD_SET(match_param, outer_headers.ip_version) || + HWS_IS_FLD_SET(match_param, outer_headers.ethertype); + + if (ip_addr_set && !ip_ver_set) { + mlx5hws_err(cd->ctx, + "Unsupported match on IP address without version or ethertype\n"); + return -EINVAL; + } + /* L2 Check ethertype */ HWS_SET_HDR(fc, match_param, ETH_TYPE_O, outer_headers.ethertype, @@ -558,6 +786,9 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, HWS_SET_HDR(fc, match_param, IP_PROTOCOL_O, outer_headers.ip_protocol, eth_l3_outer.protocol_next_header); + HWS_SET_HDR(fc, match_param, IP_VERSION_O, + outer_headers.ip_version, + eth_l3_outer.ip_version); HWS_SET_HDR(fc, match_param, IP_TTL_O, outer_headers.ttl_hoplimit, eth_l3_outer.time_to_live_hop_limit); @@ -569,10 +800,16 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, outer_headers.dst_ipv4_dst_ipv6.ipv6_layout); /* Assume IPv6 is used if ipv6 bits are set */ - is_s_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2]; - is_d_ipv6 = d_ipv6[0] || d_ipv6[1] || d_ipv6[2]; + is_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2] || + d_ipv6[0] || d_ipv6[1] || d_ipv6[2]; + + /* IHL is an IPv4-specific field. */ + if (is_ipv6 && HWS_IS_FLD_SET(match_param, outer_headers.ipv4_ihl)) { + mlx5hws_err(cd->ctx, "Unsupported match on IPv6 address and IPv4 IHL\n"); + return -EINVAL; + } - if (is_s_ipv6) { + if (is_ipv6) { /* Handle IPv6 source address */ HWS_SET_HDR(fc, match_param, IPV6_SRC_127_96_O, outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_127_96, @@ -586,13 +823,6 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, HWS_SET_HDR(fc, match_param, IPV6_SRC_31_0_O, outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0, ipv6_src_outer.ipv6_address_31_0); - } else { - /* Handle IPv4 source address */ - HWS_SET_HDR(fc, match_param, IPV4_SRC_O, - outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0, - ipv4_src_dest_outer.source_address); - } - if (is_d_ipv6) { /* Handle IPv6 destination address */ HWS_SET_HDR(fc, match_param, IPV6_DST_127_96_O, outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_127_96, @@ -607,6 +837,10 @@ hws_definer_conv_outer(struct mlx5hws_definer_conv_data *cd, outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0, ipv6_dst_outer.ipv6_address_31_0); } else { + /* Handle IPv4 source address */ + HWS_SET_HDR(fc, match_param, IPV4_SRC_O, + outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0, + ipv4_src_dest_outer.source_address); /* Handle IPv4 destination address */ HWS_SET_HDR(fc, match_param, IPV4_DST_O, outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0, @@ -664,18 +898,33 @@ static int hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd, u32 *match_param) { - bool is_s_ipv6, is_d_ipv6, smac_set, dmac_set; + bool is_ipv6, smac_set, dmac_set, ip_addr_set, ip_ver_set; struct mlx5hws_definer_fc *fc = cd->fc; struct mlx5hws_definer_fc *curr_fc; u32 *s_ipv6, *d_ipv6; if (HWS_IS_FLD_SET_SZ(match_param, inner_headers.l4_type, 0x2) || - HWS_IS_FLD_SET_SZ(match_param, inner_headers.reserved_at_c2, 0xe) || - HWS_IS_FLD_SET_SZ(match_param, inner_headers.reserved_at_c4, 0x4)) { + HWS_IS_FLD_SET_SZ(match_param, inner_headers.l4_type_ext, 0x4) || + HWS_IS_FLD_SET_SZ(match_param, inner_headers.reserved_at_c6, 0xa) || + HWS_IS_FLD_SET_SZ(match_param, inner_headers.reserved_at_d4, 0x4)) { mlx5hws_err(cd->ctx, "Unsupported inner parameters set\n"); return -EINVAL; } + ip_addr_set = HWS_IS_FLD_SET_SZ(match_param, + inner_headers.src_ipv4_src_ipv6, + 0x80) || + HWS_IS_FLD_SET_SZ(match_param, + inner_headers.dst_ipv4_dst_ipv6, 0x80); + ip_ver_set = HWS_IS_FLD_SET(match_param, inner_headers.ip_version) || + HWS_IS_FLD_SET(match_param, inner_headers.ethertype); + + if (ip_addr_set && !ip_ver_set) { + mlx5hws_err(cd->ctx, + "Unsupported match on IP address without version or ethertype\n"); + return -EINVAL; + } + /* L2 Check ethertype */ HWS_SET_HDR(fc, match_param, ETH_TYPE_I, inner_headers.ethertype, @@ -727,10 +976,16 @@ hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd, inner_headers.dst_ipv4_dst_ipv6.ipv6_layout); /* Assume IPv6 is used if ipv6 bits are set */ - is_s_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2]; - is_d_ipv6 = d_ipv6[0] || d_ipv6[1] || d_ipv6[2]; + is_ipv6 = s_ipv6[0] || s_ipv6[1] || s_ipv6[2] || + d_ipv6[0] || d_ipv6[1] || d_ipv6[2]; - if (is_s_ipv6) { + /* IHL is an IPv4-specific field. */ + if (is_ipv6 && HWS_IS_FLD_SET(match_param, inner_headers.ipv4_ihl)) { + mlx5hws_err(cd->ctx, "Unsupported match on IPv6 address and IPv4 IHL\n"); + return -EINVAL; + } + + if (is_ipv6) { /* Handle IPv6 source address */ HWS_SET_HDR(fc, match_param, IPV6_SRC_127_96_I, inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_127_96, @@ -744,13 +999,6 @@ hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd, HWS_SET_HDR(fc, match_param, IPV6_SRC_31_0_I, inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0, ipv6_src_inner.ipv6_address_31_0); - } else { - /* Handle IPv4 source address */ - HWS_SET_HDR(fc, match_param, IPV4_SRC_I, - inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0, - ipv4_src_dest_inner.source_address); - } - if (is_d_ipv6) { /* Handle IPv6 destination address */ HWS_SET_HDR(fc, match_param, IPV6_DST_127_96_I, inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_127_96, @@ -765,6 +1013,10 @@ hws_definer_conv_inner(struct mlx5hws_definer_conv_data *cd, inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0, ipv6_dst_inner.ipv6_address_31_0); } else { + /* Handle IPv4 source address */ + HWS_SET_HDR(fc, match_param, IPV4_SRC_I, + inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0, + ipv4_src_dest_inner.source_address); /* Handle IPv4 destination address */ HWS_SET_HDR(fc, match_param, IPV4_DST_I, inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0, @@ -1029,7 +1281,8 @@ hws_definer_conv_misc2(struct mlx5hws_definer_conv_data *cd, struct mlx5hws_definer_fc *curr_fc; if (HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.reserved_at_1a0, 0x8) || - HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.reserved_at_1b8, 0x8) || + HWS_IS_FLD_SET_SZ(match_param, + misc_parameters_2.ipsec_next_header, 0x8) || HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.reserved_at_1c0, 0x40) || HWS_IS_FLD_SET(match_param, misc_parameters_2.macsec_syndrome) || HWS_IS_FLD_SET(match_param, misc_parameters_2.ipsec_syndrome)) { @@ -1985,8 +2238,7 @@ int mlx5hws_definer_get_obj(struct mlx5hws_context *ctx, continue; /* Reuse definer and set LRU (move to be first in the list) */ - list_del_init(&cached_definer->list_node); - list_add(&cached_definer->list_node, &cache->list_head); + list_move(&cached_definer->list_node, &cache->list_head); cached_definer->refcount++; return cached_definer->definer.obj_id; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h index 9432d5084def..62da55389331 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h @@ -785,7 +785,7 @@ struct mlx5hws_definer_cache { struct mlx5hws_definer_cache_item { struct mlx5hws_definer definer; - u32 refcount; + u32 refcount; /* protected by context ctrl lock */ struct list_head list_node; }; @@ -831,4 +831,6 @@ mlx5hws_definer_conv_match_params_to_compressed_fc(struct mlx5hws_context *ctx, u32 *match_param, int *fc_sz); +const char *mlx5hws_definer_fname_to_str(enum mlx5hws_definer_fname fname); + #endif /* HWS_DEFINER_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c new file mode 100644 index 000000000000..131e74b2b774 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c @@ -0,0 +1,1646 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2025 NVIDIA Corporation & Affiliates */ + +#include <linux/mlx5/vport.h> +#include <mlx5_core.h> +#include <fs_core.h> +#include <fs_cmd.h> +#include "fs_hws_pools.h" +#include "mlx5hws.h" + +#define MLX5HWS_CTX_MAX_NUM_OF_QUEUES 16 +#define MLX5HWS_CTX_QUEUE_SIZE 256 + +static struct mlx5hws_action * +mlx5_fs_create_action_remove_header_vlan(struct mlx5hws_context *ctx); +static void +mlx5_fs_destroy_pr_pool(struct mlx5_fs_pool *pool, struct xarray *pr_pools, + unsigned long index); +static void +mlx5_fs_destroy_mh_pool(struct mlx5_fs_pool *pool, struct xarray *mh_pools, + unsigned long index); + +static int mlx5_fs_init_hws_actions_pool(struct mlx5_core_dev *dev, + struct mlx5_fs_hws_context *fs_ctx) +{ + u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED; + struct mlx5_fs_hws_actions_pool *hws_pool = &fs_ctx->hws_pool; + struct mlx5hws_action_reformat_header reformat_hdr = {}; + struct mlx5hws_context *ctx = fs_ctx->hws_ctx; + enum mlx5hws_action_type action_type; + int err = -ENOSPC; + + hws_pool->tag_action = mlx5hws_action_create_tag(ctx, flags); + if (!hws_pool->tag_action) + return err; + hws_pool->pop_vlan_action = mlx5hws_action_create_pop_vlan(ctx, flags); + if (!hws_pool->pop_vlan_action) + goto destroy_tag; + hws_pool->push_vlan_action = mlx5hws_action_create_push_vlan(ctx, flags); + if (!hws_pool->push_vlan_action) + goto destroy_pop_vlan; + hws_pool->drop_action = mlx5hws_action_create_dest_drop(ctx, flags); + if (!hws_pool->drop_action) + goto destroy_push_vlan; + action_type = MLX5HWS_ACTION_TYP_REFORMAT_TNL_L2_TO_L2; + hws_pool->decapl2_action = + mlx5hws_action_create_reformat(ctx, action_type, 1, + &reformat_hdr, 0, flags); + if (!hws_pool->decapl2_action) + goto destroy_drop; + hws_pool->remove_hdr_vlan_action = + mlx5_fs_create_action_remove_header_vlan(ctx); + if (!hws_pool->remove_hdr_vlan_action) + goto destroy_decapl2; + err = mlx5_fs_hws_pr_pool_init(&hws_pool->insert_hdr_pool, dev, 0, + MLX5HWS_ACTION_TYP_INSERT_HEADER); + if (err) + goto destroy_remove_hdr; + err = mlx5_fs_hws_pr_pool_init(&hws_pool->dl3tnltol2_pool, dev, 0, + MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2); + if (err) + goto cleanup_insert_hdr; + xa_init(&hws_pool->el2tol3tnl_pools); + xa_init(&hws_pool->el2tol2tnl_pools); + xa_init(&hws_pool->mh_pools); + xa_init(&hws_pool->table_dests); + xa_init(&hws_pool->vport_dests); + xa_init(&hws_pool->vport_vhca_dests); + xa_init(&hws_pool->aso_meters); + xa_init(&hws_pool->sample_dests); + return 0; + +cleanup_insert_hdr: + mlx5_fs_hws_pr_pool_cleanup(&hws_pool->insert_hdr_pool); +destroy_remove_hdr: + mlx5hws_action_destroy(hws_pool->remove_hdr_vlan_action); +destroy_decapl2: + mlx5hws_action_destroy(hws_pool->decapl2_action); +destroy_drop: + mlx5hws_action_destroy(hws_pool->drop_action); +destroy_push_vlan: + mlx5hws_action_destroy(hws_pool->push_vlan_action); +destroy_pop_vlan: + mlx5hws_action_destroy(hws_pool->pop_vlan_action); +destroy_tag: + mlx5hws_action_destroy(hws_pool->tag_action); + return err; +} + +static void mlx5_fs_cleanup_hws_actions_pool(struct mlx5_fs_hws_context *fs_ctx) +{ + struct mlx5_fs_hws_actions_pool *hws_pool = &fs_ctx->hws_pool; + struct mlx5_fs_hws_data *fs_hws_data; + struct mlx5hws_action *action; + struct mlx5_fs_pool *pool; + unsigned long i; + + xa_for_each(&hws_pool->sample_dests, i, fs_hws_data) + kfree(fs_hws_data); + xa_destroy(&hws_pool->sample_dests); + xa_for_each(&hws_pool->aso_meters, i, fs_hws_data) + kfree(fs_hws_data); + xa_destroy(&hws_pool->aso_meters); + xa_for_each(&hws_pool->vport_vhca_dests, i, action) + mlx5hws_action_destroy(action); + xa_destroy(&hws_pool->vport_vhca_dests); + xa_for_each(&hws_pool->vport_dests, i, action) + mlx5hws_action_destroy(action); + xa_destroy(&hws_pool->vport_dests); + xa_destroy(&hws_pool->table_dests); + xa_for_each(&hws_pool->mh_pools, i, pool) + mlx5_fs_destroy_mh_pool(pool, &hws_pool->mh_pools, i); + xa_destroy(&hws_pool->mh_pools); + xa_for_each(&hws_pool->el2tol2tnl_pools, i, pool) + mlx5_fs_destroy_pr_pool(pool, &hws_pool->el2tol2tnl_pools, i); + xa_destroy(&hws_pool->el2tol2tnl_pools); + xa_for_each(&hws_pool->el2tol3tnl_pools, i, pool) + mlx5_fs_destroy_pr_pool(pool, &hws_pool->el2tol3tnl_pools, i); + xa_destroy(&hws_pool->el2tol3tnl_pools); + mlx5_fs_hws_pr_pool_cleanup(&hws_pool->dl3tnltol2_pool); + mlx5_fs_hws_pr_pool_cleanup(&hws_pool->insert_hdr_pool); + mlx5hws_action_destroy(hws_pool->remove_hdr_vlan_action); + mlx5hws_action_destroy(hws_pool->decapl2_action); + mlx5hws_action_destroy(hws_pool->drop_action); + mlx5hws_action_destroy(hws_pool->push_vlan_action); + mlx5hws_action_destroy(hws_pool->pop_vlan_action); + mlx5hws_action_destroy(hws_pool->tag_action); +} + +static int mlx5_cmd_hws_create_ns(struct mlx5_flow_root_namespace *ns) +{ + struct mlx5hws_context_attr hws_ctx_attr = {}; + int err; + + hws_ctx_attr.queues = min_t(int, num_online_cpus(), + MLX5HWS_CTX_MAX_NUM_OF_QUEUES); + hws_ctx_attr.queue_size = MLX5HWS_CTX_QUEUE_SIZE; + + ns->fs_hws_context.hws_ctx = + mlx5hws_context_open(ns->dev, &hws_ctx_attr); + if (!ns->fs_hws_context.hws_ctx) { + mlx5_core_err(ns->dev, "Failed to create hws flow namespace\n"); + return -EINVAL; + } + err = mlx5_fs_init_hws_actions_pool(ns->dev, &ns->fs_hws_context); + if (err) { + mlx5_core_err(ns->dev, "Failed to init hws actions pool\n"); + mlx5hws_context_close(ns->fs_hws_context.hws_ctx); + return err; + } + return 0; +} + +static int mlx5_cmd_hws_destroy_ns(struct mlx5_flow_root_namespace *ns) +{ + mlx5_fs_cleanup_hws_actions_pool(&ns->fs_hws_context); + return mlx5hws_context_close(ns->fs_hws_context.hws_ctx); +} + +static int mlx5_cmd_hws_set_peer(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_root_namespace *peer_ns, + u16 peer_vhca_id) +{ + struct mlx5hws_context *peer_ctx = NULL; + + if (peer_ns) + peer_ctx = peer_ns->fs_hws_context.hws_ctx; + mlx5hws_context_set_peer(ns->fs_hws_context.hws_ctx, peer_ctx, + peer_vhca_id); + return 0; +} + +static int mlx5_fs_set_ft_default_miss(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + struct mlx5hws_table *next_tbl; + int err; + + if (!ns->fs_hws_context.hws_ctx) + return -EINVAL; + + /* if no change required, return */ + if (!next_ft && !ft->fs_hws_table.miss_ft_set) + return 0; + + next_tbl = next_ft ? next_ft->fs_hws_table.hws_table : NULL; + err = mlx5hws_table_set_default_miss(ft->fs_hws_table.hws_table, next_tbl); + if (err) { + mlx5_core_err(ns->dev, "Failed setting FT default miss (%d)\n", err); + return err; + } + ft->fs_hws_table.miss_ft_set = !!next_tbl; + return 0; +} + +static int mlx5_fs_add_flow_table_dest_action(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft) +{ + u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED; + struct mlx5_fs_hws_context *fs_ctx = &ns->fs_hws_context; + struct mlx5hws_action *dest_ft_action; + struct xarray *dests_xa; + int err; + + dest_ft_action = mlx5hws_action_create_dest_table_num(fs_ctx->hws_ctx, + ft->id, flags); + if (!dest_ft_action) { + mlx5_core_err(ns->dev, "Failed creating dest table action\n"); + return -ENOMEM; + } + + dests_xa = &fs_ctx->hws_pool.table_dests; + err = xa_insert(dests_xa, ft->id, dest_ft_action, GFP_KERNEL); + if (err) + mlx5hws_action_destroy(dest_ft_action); + return err; +} + +static int mlx5_fs_del_flow_table_dest_action(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft) +{ + struct mlx5_fs_hws_context *fs_ctx = &ns->fs_hws_context; + struct mlx5hws_action *dest_ft_action; + struct xarray *dests_xa; + int err; + + dests_xa = &fs_ctx->hws_pool.table_dests; + dest_ft_action = xa_erase(dests_xa, ft->id); + if (!dest_ft_action) { + mlx5_core_err(ns->dev, "Failed to erase dest ft action\n"); + return -ENOENT; + } + + err = mlx5hws_action_destroy(dest_ft_action); + if (err) + mlx5_core_err(ns->dev, "Failed to destroy dest ft action\n"); + return err; +} + +static int mlx5_cmd_hws_create_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table_attr *ft_attr, + struct mlx5_flow_table *next_ft) +{ + struct mlx5hws_context *ctx = ns->fs_hws_context.hws_ctx; + struct mlx5hws_table_attr tbl_attr = {}; + struct mlx5hws_table *tbl; + int err; + + if (mlx5_fs_cmd_is_fw_term_table(ft)) { + err = mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft, ft_attr, + next_ft); + if (err) + return err; + err = mlx5_fs_add_flow_table_dest_action(ns, ft); + if (err) + mlx5_fs_cmd_get_fw_cmds()->destroy_flow_table(ns, ft); + return err; + } + + if (ns->table_type != FS_FT_FDB) { + mlx5_core_err(ns->dev, "Table type %d not supported for HWS\n", + ns->table_type); + return -EOPNOTSUPP; + } + + tbl_attr.type = MLX5HWS_TABLE_TYPE_FDB; + tbl_attr.level = ft_attr->level; + tbl_attr.uid = ft_attr->uid; + tbl = mlx5hws_table_create(ctx, &tbl_attr); + if (!tbl) { + mlx5_core_err(ns->dev, "Failed creating hws flow_table\n"); + return -EINVAL; + } + + ft->fs_hws_table.hws_table = tbl; + ft->id = mlx5hws_table_get_id(tbl); + + if (next_ft) { + err = mlx5_fs_set_ft_default_miss(ns, ft, next_ft); + if (err) + goto destroy_table; + } + + ft->max_fte = INT_MAX; + + err = mlx5_fs_add_flow_table_dest_action(ns, ft); + if (err) + goto clear_ft_miss; + return 0; + +clear_ft_miss: + mlx5_fs_set_ft_default_miss(ns, ft, NULL); +destroy_table: + mlx5hws_table_destroy(tbl); + ft->fs_hws_table.hws_table = NULL; + return err; +} + +static int mlx5_cmd_hws_destroy_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft) +{ + int err; + + err = mlx5_fs_del_flow_table_dest_action(ns, ft); + if (err) + mlx5_core_err(ns->dev, "Failed to remove dest action (%d)\n", err); + + if (mlx5_fs_cmd_is_fw_term_table(ft)) + return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_table(ns, ft); + + err = mlx5_fs_set_ft_default_miss(ns, ft, NULL); + if (err) + mlx5_core_err(ns->dev, "Failed to disconnect next table (%d)\n", err); + + err = mlx5hws_table_destroy(ft->fs_hws_table.hws_table); + if (err) + mlx5_core_err(ns->dev, "Failed to destroy flow_table (%d)\n", err); + + return err; +} + +static int mlx5_cmd_hws_modify_flow_table(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_table *next_ft) +{ + if (mlx5_fs_cmd_is_fw_term_table(ft)) + return mlx5_fs_cmd_get_fw_cmds()->modify_flow_table(ns, ft, next_ft); + + return mlx5_fs_set_ft_default_miss(ns, ft, next_ft); +} + +static int mlx5_cmd_hws_update_root_ft(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + u32 underlay_qpn, + bool disconnect) +{ + return mlx5_fs_cmd_get_fw_cmds()->update_root_ft(ns, ft, underlay_qpn, + disconnect); +} + +static int mlx5_cmd_hws_create_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, u32 *in, + struct mlx5_flow_group *fg) +{ + struct mlx5hws_match_parameters mask; + struct mlx5hws_bwc_matcher *matcher; + u8 match_criteria_enable; + u32 priority; + + if (mlx5_fs_cmd_is_fw_term_table(ft)) + return mlx5_fs_cmd_get_fw_cmds()->create_flow_group(ns, ft, in, fg); + + mask.match_buf = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + mask.match_sz = sizeof(fg->mask.match_criteria); + + match_criteria_enable = MLX5_GET(create_flow_group_in, in, + match_criteria_enable); + priority = MLX5_GET(create_flow_group_in, in, start_flow_index); + matcher = mlx5hws_bwc_matcher_create(ft->fs_hws_table.hws_table, + priority, match_criteria_enable, + &mask); + if (!matcher) { + mlx5_core_err(ns->dev, "Failed creating matcher\n"); + return -EINVAL; + } + + fg->fs_hws_matcher.matcher = matcher; + return 0; +} + +static int mlx5_cmd_hws_destroy_flow_group(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *fg) +{ + if (mlx5_fs_cmd_is_fw_term_table(ft)) + return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_group(ns, ft, fg); + + return mlx5hws_bwc_matcher_destroy(fg->fs_hws_matcher.matcher); +} + +static struct mlx5hws_action * +mlx5_fs_get_dest_action_ft(struct mlx5_fs_hws_context *fs_ctx, + struct mlx5_flow_rule *dst) +{ + return xa_load(&fs_ctx->hws_pool.table_dests, dst->dest_attr.ft->id); +} + +static struct mlx5hws_action * +mlx5_fs_get_dest_action_table_num(struct mlx5_fs_hws_context *fs_ctx, + struct mlx5_flow_rule *dst) +{ + u32 table_num = dst->dest_attr.ft_num; + + return xa_load(&fs_ctx->hws_pool.table_dests, table_num); +} + +static struct mlx5hws_action * +mlx5_fs_create_dest_action_table_num(struct mlx5_fs_hws_context *fs_ctx, + struct mlx5_flow_rule *dst) +{ + u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED; + struct mlx5hws_context *ctx = fs_ctx->hws_ctx; + u32 table_num = dst->dest_attr.ft_num; + + return mlx5hws_action_create_dest_table_num(ctx, table_num, flags); +} + +static struct mlx5hws_action * +mlx5_fs_get_dest_action_vport(struct mlx5_fs_hws_context *fs_ctx, + struct mlx5_flow_rule *dst, + bool is_dest_type_uplink) +{ + u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED; + struct mlx5_flow_destination *dest_attr = &dst->dest_attr; + struct mlx5hws_context *ctx = fs_ctx->hws_ctx; + struct mlx5hws_action *dest; + struct xarray *dests_xa; + bool vhca_id_valid; + unsigned long idx; + u16 vport_num; + int err; + + vhca_id_valid = is_dest_type_uplink || + (dest_attr->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID); + vport_num = is_dest_type_uplink ? MLX5_VPORT_UPLINK : dest_attr->vport.num; + if (vhca_id_valid) { + dests_xa = &fs_ctx->hws_pool.vport_vhca_dests; + idx = (unsigned long)dest_attr->vport.vhca_id << 16 | vport_num; + } else { + dests_xa = &fs_ctx->hws_pool.vport_dests; + idx = vport_num; + } +dest_load: + dest = xa_load(dests_xa, idx); + if (dest) + return dest; + + dest = mlx5hws_action_create_dest_vport(ctx, vport_num, vhca_id_valid, + dest_attr->vport.vhca_id, flags); + + err = xa_insert(dests_xa, idx, dest, GFP_KERNEL); + if (err) { + mlx5hws_action_destroy(dest); + dest = NULL; + + if (err == -EBUSY) + /* xarray entry was already stored by another thread */ + goto dest_load; + } + + return dest; +} + +static struct mlx5hws_action * +mlx5_fs_create_dest_action_range(struct mlx5hws_context *ctx, + struct mlx5_flow_rule *dst) +{ + u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED; + struct mlx5_flow_destination *dest_attr = &dst->dest_attr; + + return mlx5hws_action_create_dest_match_range(ctx, + dest_attr->range.field, + dest_attr->range.hit_ft, + dest_attr->range.miss_ft, + dest_attr->range.min, + dest_attr->range.max, + flags); +} + +static struct mlx5_fs_hws_data * +mlx5_fs_get_cached_hws_data(struct xarray *cache_xa, unsigned long index) +{ + struct mlx5_fs_hws_data *fs_hws_data; + int err; + + xa_lock(cache_xa); + fs_hws_data = xa_load(cache_xa, index); + if (!fs_hws_data) { + fs_hws_data = kzalloc(sizeof(*fs_hws_data), GFP_ATOMIC); + if (!fs_hws_data) { + xa_unlock(cache_xa); + return NULL; + } + refcount_set(&fs_hws_data->hws_action_refcount, 0); + mutex_init(&fs_hws_data->lock); + err = __xa_insert(cache_xa, index, fs_hws_data, GFP_ATOMIC); + if (err) { + kfree(fs_hws_data); + xa_unlock(cache_xa); + return NULL; + } + } + xa_unlock(cache_xa); + + return fs_hws_data; +} + +static struct mlx5hws_action * +mlx5_fs_get_action_aso_meter(struct mlx5_fs_hws_context *fs_ctx, + struct mlx5_exe_aso *exe_aso) +{ + struct mlx5_fs_hws_create_action_ctx create_ctx; + struct mlx5hws_context *ctx = fs_ctx->hws_ctx; + struct mlx5_fs_hws_data *meter_hws_data; + u32 id = exe_aso->base_id; + struct xarray *meters_xa; + + meters_xa = &fs_ctx->hws_pool.aso_meters; + meter_hws_data = mlx5_fs_get_cached_hws_data(meters_xa, id); + if (!meter_hws_data) + return NULL; + + create_ctx.hws_ctx = ctx; + create_ctx.actions_type = MLX5HWS_ACTION_TYP_ASO_METER; + create_ctx.id = id; + create_ctx.return_reg_id = exe_aso->return_reg_id; + + return mlx5_fs_get_hws_action(meter_hws_data, &create_ctx); +} + +static void mlx5_fs_put_action_aso_meter(struct mlx5_fs_hws_context *fs_ctx, + struct mlx5_exe_aso *exe_aso) +{ + struct mlx5_fs_hws_data *meter_hws_data; + struct xarray *meters_xa; + + meters_xa = &fs_ctx->hws_pool.aso_meters; + meter_hws_data = xa_load(meters_xa, exe_aso->base_id); + if (!meter_hws_data) + return; + return mlx5_fs_put_hws_action(meter_hws_data); +} + +static struct mlx5hws_action * +mlx5_fs_get_dest_action_sampler(struct mlx5_fs_hws_context *fs_ctx, + struct mlx5_flow_rule *dst) +{ + struct mlx5_fs_hws_create_action_ctx create_ctx; + struct mlx5hws_context *ctx = fs_ctx->hws_ctx; + struct mlx5_fs_hws_data *sampler_hws_data; + u32 id = dst->dest_attr.sampler_id; + struct xarray *sampler_xa; + + sampler_xa = &fs_ctx->hws_pool.sample_dests; + sampler_hws_data = mlx5_fs_get_cached_hws_data(sampler_xa, id); + if (!sampler_hws_data) + return NULL; + + create_ctx.hws_ctx = ctx; + create_ctx.actions_type = MLX5HWS_ACTION_TYP_SAMPLER; + create_ctx.id = id; + + return mlx5_fs_get_hws_action(sampler_hws_data, &create_ctx); +} + +static void mlx5_fs_put_dest_action_sampler(struct mlx5_fs_hws_context *fs_ctx, + u32 sampler_id) +{ + struct mlx5_fs_hws_data *sampler_hws_data; + struct xarray *sampler_xa; + + sampler_xa = &fs_ctx->hws_pool.sample_dests; + sampler_hws_data = xa_load(sampler_xa, sampler_id); + if (!sampler_hws_data) + return; + + mlx5_fs_put_hws_action(sampler_hws_data); +} + +static struct mlx5hws_action * +mlx5_fs_create_action_dest_array(struct mlx5hws_context *ctx, + struct mlx5hws_action_dest_attr *dests, + u32 num_of_dests, bool ignore_flow_level) +{ + u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED; + + return mlx5hws_action_create_dest_array(ctx, num_of_dests, dests, + ignore_flow_level, flags); +} + +static struct mlx5hws_action * +mlx5_fs_get_action_push_vlan(struct mlx5_fs_hws_context *fs_ctx) +{ + return fs_ctx->hws_pool.push_vlan_action; +} + +static u32 mlx5_fs_calc_vlan_hdr(struct mlx5_fs_vlan *vlan) +{ + u16 n_ethtype = vlan->ethtype; + u8 prio = vlan->prio; + u16 vid = vlan->vid; + + return (u32)n_ethtype << 16 | (u32)(prio) << 12 | (u32)vid; +} + +static struct mlx5hws_action * +mlx5_fs_get_action_pop_vlan(struct mlx5_fs_hws_context *fs_ctx) +{ + return fs_ctx->hws_pool.pop_vlan_action; +} + +static struct mlx5hws_action * +mlx5_fs_get_action_decap_tnl_l2_to_l2(struct mlx5_fs_hws_context *fs_ctx) +{ + return fs_ctx->hws_pool.decapl2_action; +} + +static struct mlx5hws_action * +mlx5_fs_get_dest_action_drop(struct mlx5_fs_hws_context *fs_ctx) +{ + return fs_ctx->hws_pool.drop_action; +} + +static struct mlx5hws_action * +mlx5_fs_get_action_tag(struct mlx5_fs_hws_context *fs_ctx) +{ + return fs_ctx->hws_pool.tag_action; +} + +static struct mlx5hws_action * +mlx5_fs_create_action_last(struct mlx5hws_context *ctx) +{ + u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED; + + return mlx5hws_action_create_last(ctx, flags); +} + +static struct mlx5hws_action * +mlx5_fs_create_hws_action(struct mlx5_fs_hws_create_action_ctx *create_ctx) +{ + u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED; + + switch (create_ctx->actions_type) { + case MLX5HWS_ACTION_TYP_CTR: + return mlx5hws_action_create_counter(create_ctx->hws_ctx, + create_ctx->id, flags); + case MLX5HWS_ACTION_TYP_ASO_METER: + return mlx5hws_action_create_aso_meter(create_ctx->hws_ctx, + create_ctx->id, + create_ctx->return_reg_id, + flags); + case MLX5HWS_ACTION_TYP_SAMPLER: + return mlx5hws_action_create_flow_sampler(create_ctx->hws_ctx, + create_ctx->id, flags); + default: + return NULL; + } +} + +struct mlx5hws_action * +mlx5_fs_get_hws_action(struct mlx5_fs_hws_data *fs_hws_data, + struct mlx5_fs_hws_create_action_ctx *create_ctx) +{ + /* try avoid locking if not necessary */ + if (refcount_inc_not_zero(&fs_hws_data->hws_action_refcount)) + return fs_hws_data->hws_action; + + mutex_lock(&fs_hws_data->lock); + if (refcount_inc_not_zero(&fs_hws_data->hws_action_refcount)) { + mutex_unlock(&fs_hws_data->lock); + return fs_hws_data->hws_action; + } + fs_hws_data->hws_action = mlx5_fs_create_hws_action(create_ctx); + if (!fs_hws_data->hws_action) { + mutex_unlock(&fs_hws_data->lock); + return NULL; + } + refcount_set(&fs_hws_data->hws_action_refcount, 1); + mutex_unlock(&fs_hws_data->lock); + + return fs_hws_data->hws_action; +} + +void mlx5_fs_put_hws_action(struct mlx5_fs_hws_data *fs_hws_data) +{ + if (!fs_hws_data) + return; + + /* try avoid locking if not necessary */ + if (refcount_dec_not_one(&fs_hws_data->hws_action_refcount)) + return; + + mutex_lock(&fs_hws_data->lock); + if (!refcount_dec_and_test(&fs_hws_data->hws_action_refcount)) { + mutex_unlock(&fs_hws_data->lock); + return; + } + mlx5hws_action_destroy(fs_hws_data->hws_action); + fs_hws_data->hws_action = NULL; + mutex_unlock(&fs_hws_data->lock); +} + +static void mlx5_fs_destroy_fs_action(struct mlx5_flow_root_namespace *ns, + struct mlx5_fs_hws_rule_action *fs_action) +{ + struct mlx5_fs_hws_context *fs_ctx = &ns->fs_hws_context; + + switch (mlx5hws_action_get_type(fs_action->action)) { + case MLX5HWS_ACTION_TYP_CTR: + mlx5_fc_put_hws_action(fs_action->counter); + break; + case MLX5HWS_ACTION_TYP_ASO_METER: + mlx5_fs_put_action_aso_meter(fs_ctx, fs_action->exe_aso); + break; + case MLX5HWS_ACTION_TYP_SAMPLER: + mlx5_fs_put_dest_action_sampler(fs_ctx, fs_action->sampler_id); + break; + default: + mlx5hws_action_destroy(fs_action->action); + } +} + +static void +mlx5_fs_destroy_fs_actions(struct mlx5_flow_root_namespace *ns, + struct mlx5_fs_hws_rule_action **fs_actions, + int *num_fs_actions) +{ + int i; + + /* Free in reverse order to handle action dependencies */ + for (i = *num_fs_actions - 1; i >= 0; i--) + mlx5_fs_destroy_fs_action(ns, *fs_actions + i); + *num_fs_actions = 0; + kfree(*fs_actions); + *fs_actions = NULL; +} + +/* Splits FTE's actions into cached, rule and destination actions. + * The cached and destination actions are saved on the fte hws rule. + * The rule actions are returned as a parameter, together with their count. + * We want to support a rule with 32 destinations, which means we need to + * account for 32 destinations plus usually a counter plus one more action + * for a multi-destination flow table. + * 32 is SW limitation for array size, keep. HWS limitation is 16M STEs per matcher + */ +#define MLX5_FLOW_CONTEXT_ACTION_MAX 34 +static int mlx5_fs_fte_get_hws_actions(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + struct fs_fte *fte, + struct mlx5hws_rule_action **ractions) +{ + struct mlx5_flow_act *fte_action = &fte->act_dests.action; + struct mlx5_fs_hws_context *fs_ctx = &ns->fs_hws_context; + struct mlx5hws_action_dest_attr *dest_actions; + struct mlx5hws_context *ctx = fs_ctx->hws_ctx; + struct mlx5_fs_hws_rule_action *fs_actions; + struct mlx5_core_dev *dev = ns->dev; + struct mlx5hws_action *dest_action; + struct mlx5hws_action *tmp_action; + struct mlx5_fs_hws_pr *pr_data; + struct mlx5_fs_hws_mh *mh_data; + bool delay_encap_set = false; + struct mlx5_flow_rule *dst; + int num_dest_actions = 0; + int num_fs_actions = 0; + int num_actions = 0; + int err; + + *ractions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, sizeof(**ractions), + GFP_KERNEL); + if (!*ractions) { + err = -ENOMEM; + goto out_err; + } + + fs_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, + sizeof(*fs_actions), GFP_KERNEL); + if (!fs_actions) { + err = -ENOMEM; + goto free_actions_alloc; + } + + dest_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, + sizeof(*dest_actions), GFP_KERNEL); + if (!dest_actions) { + err = -ENOMEM; + goto free_fs_actions_alloc; + } + + /* The order of the actions are must to be kept, only the following + * order is supported by HW steering: + * HWS: decap -> remove_hdr -> pop_vlan -> modify header -> push_vlan + * -> reformat (insert_hdr/encap) -> ctr -> tag -> aso + * -> drop -> FWD:tbl/vport/sampler/tbl_num/range -> dest_array -> last + */ + if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { + tmp_action = mlx5_fs_get_action_decap_tnl_l2_to_l2(fs_ctx); + if (!tmp_action) { + err = -ENOMEM; + goto free_dest_actions_alloc; + } + (*ractions)[num_actions++].action = tmp_action; + } + + if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { + int reformat_type = fte_action->pkt_reformat->reformat_type; + + if (fte_action->pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_FW) { + mlx5_core_err(dev, "FW-owned reformat can't be used in HWS rule\n"); + err = -EINVAL; + goto free_actions; + } + + if (reformat_type == MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2) { + pr_data = fte_action->pkt_reformat->fs_hws_action.pr_data; + (*ractions)[num_actions].reformat.offset = pr_data->offset; + (*ractions)[num_actions].reformat.hdr_idx = pr_data->hdr_idx; + (*ractions)[num_actions].reformat.data = pr_data->data; + (*ractions)[num_actions++].action = + fte_action->pkt_reformat->fs_hws_action.hws_action; + } else if (reformat_type == MLX5_REFORMAT_TYPE_REMOVE_HDR) { + (*ractions)[num_actions++].action = + fte_action->pkt_reformat->fs_hws_action.hws_action; + } else { + delay_encap_set = true; + } + } + + if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) { + tmp_action = mlx5_fs_get_action_pop_vlan(fs_ctx); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + (*ractions)[num_actions++].action = tmp_action; + } + + if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2) { + tmp_action = mlx5_fs_get_action_pop_vlan(fs_ctx); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + (*ractions)[num_actions++].action = tmp_action; + } + + if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + mh_data = fte_action->modify_hdr->fs_hws_action.mh_data; + (*ractions)[num_actions].modify_header.offset = mh_data->offset; + (*ractions)[num_actions].modify_header.data = mh_data->data; + (*ractions)[num_actions++].action = + fte_action->modify_hdr->fs_hws_action.hws_action; + } + + if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { + tmp_action = mlx5_fs_get_action_push_vlan(fs_ctx); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + (*ractions)[num_actions].push_vlan.vlan_hdr = + htonl(mlx5_fs_calc_vlan_hdr(&fte_action->vlan[0])); + (*ractions)[num_actions++].action = tmp_action; + } + + if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) { + tmp_action = mlx5_fs_get_action_push_vlan(fs_ctx); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + (*ractions)[num_actions].push_vlan.vlan_hdr = + htonl(mlx5_fs_calc_vlan_hdr(&fte_action->vlan[1])); + (*ractions)[num_actions++].action = tmp_action; + } + + if (delay_encap_set) { + pr_data = fte_action->pkt_reformat->fs_hws_action.pr_data; + (*ractions)[num_actions].reformat.offset = pr_data->offset; + (*ractions)[num_actions].reformat.data = pr_data->data; + (*ractions)[num_actions++].action = + fte_action->pkt_reformat->fs_hws_action.hws_action; + } + + if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + list_for_each_entry(dst, &fte->node.children, node.list) { + struct mlx5_fc *counter; + + if (dst->dest_attr.type != + MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } + + counter = dst->dest_attr.counter; + tmp_action = mlx5_fc_get_hws_action(ctx, counter); + if (!tmp_action) { + err = -EINVAL; + goto free_actions; + } + + (*ractions)[num_actions].counter.offset = + mlx5_fc_id(counter) - mlx5_fc_get_base_id(counter); + (*ractions)[num_actions++].action = tmp_action; + fs_actions[num_fs_actions].action = tmp_action; + fs_actions[num_fs_actions++].counter = counter; + } + } + + if (fte->act_dests.flow_context.flow_tag) { + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } + tmp_action = mlx5_fs_get_action_tag(fs_ctx); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + (*ractions)[num_actions].tag.value = fte->act_dests.flow_context.flow_tag; + (*ractions)[num_actions++].action = tmp_action; + } + + if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) { + if (fte_action->exe_aso.type != MLX5_EXE_ASO_FLOW_METER || + num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } + + tmp_action = mlx5_fs_get_action_aso_meter(fs_ctx, + &fte_action->exe_aso); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + (*ractions)[num_actions].aso_meter.offset = + fte_action->exe_aso.flow_meter.meter_idx; + (*ractions)[num_actions].aso_meter.init_color = + fte_action->exe_aso.flow_meter.init_color; + (*ractions)[num_actions++].action = tmp_action; + fs_actions[num_fs_actions].action = tmp_action; + fs_actions[num_fs_actions++].exe_aso = &fte_action->exe_aso; + } + + if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_DROP) { + dest_action = mlx5_fs_get_dest_action_drop(fs_ctx); + if (!dest_action) { + err = -ENOMEM; + goto free_actions; + } + dest_actions[num_dest_actions++].dest = dest_action; + } + + if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + list_for_each_entry(dst, &fte->node.children, node.list) { + struct mlx5_flow_destination *attr = &dst->dest_attr; + bool type_uplink = + attr->type == MLX5_FLOW_DESTINATION_TYPE_UPLINK; + + if (num_fs_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + num_dest_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } + if (attr->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + switch (attr->type) { + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + dest_action = mlx5_fs_get_dest_action_ft(fs_ctx, dst); + if (dst->dest_attr.ft->flags & + MLX5_FLOW_TABLE_UPLINK_VPORT) + dest_actions[num_dest_actions].is_wire_ft = true; + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM: + dest_action = mlx5_fs_get_dest_action_table_num(fs_ctx, + dst); + if (dest_action) + break; + dest_action = mlx5_fs_create_dest_action_table_num(fs_ctx, + dst); + fs_actions[num_fs_actions++].action = dest_action; + break; + case MLX5_FLOW_DESTINATION_TYPE_RANGE: + dest_action = mlx5_fs_create_dest_action_range(ctx, dst); + fs_actions[num_fs_actions++].action = dest_action; + break; + case MLX5_FLOW_DESTINATION_TYPE_UPLINK: + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + dest_action = mlx5_fs_get_dest_action_vport(fs_ctx, dst, + type_uplink); + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER: + dest_action = + mlx5_fs_get_dest_action_sampler(fs_ctx, + dst); + fs_actions[num_fs_actions].action = dest_action; + fs_actions[num_fs_actions++].sampler_id = + dst->dest_attr.sampler_id; + break; + default: + err = -EOPNOTSUPP; + goto free_actions; + } + if (!dest_action) { + err = -ENOMEM; + goto free_actions; + } + dest_actions[num_dest_actions++].dest = dest_action; + } + } + + if (num_dest_actions == 1) { + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } + (*ractions)[num_actions++].action = dest_actions->dest; + } else if (num_dest_actions > 1) { + bool ignore_flow_level; + + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + num_fs_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } + ignore_flow_level = + !!(fte_action->flags & FLOW_ACT_IGNORE_FLOW_LEVEL); + tmp_action = + mlx5_fs_create_action_dest_array(ctx, dest_actions, + num_dest_actions, + ignore_flow_level); + if (!tmp_action) { + err = -EOPNOTSUPP; + goto free_actions; + } + fs_actions[num_fs_actions++].action = tmp_action; + (*ractions)[num_actions++].action = tmp_action; + } + + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || + num_fs_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -EOPNOTSUPP; + goto free_actions; + } + + tmp_action = mlx5_fs_create_action_last(ctx); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_actions[num_fs_actions++].action = tmp_action; + (*ractions)[num_actions++].action = tmp_action; + + kfree(dest_actions); + + /* Actions created specifically for this rule will be destroyed + * once rule is deleted. + */ + fte->fs_hws_rule.num_fs_actions = num_fs_actions; + fte->fs_hws_rule.hws_fs_actions = fs_actions; + + return 0; + +free_actions: + mlx5_fs_destroy_fs_actions(ns, &fs_actions, &num_fs_actions); +free_dest_actions_alloc: + kfree(dest_actions); +free_fs_actions_alloc: + kfree(fs_actions); +free_actions_alloc: + kfree(*ractions); + *ractions = NULL; +out_err: + return err; +} + +static int mlx5_cmd_hws_create_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + struct fs_fte *fte) +{ + struct mlx5hws_match_parameters params; + struct mlx5hws_rule_action *ractions; + struct mlx5hws_bwc_rule *rule; + int err = 0; + + if (mlx5_fs_cmd_is_fw_term_table(ft)) + return mlx5_fs_cmd_get_fw_cmds()->create_fte(ns, ft, group, fte); + + err = mlx5_fs_fte_get_hws_actions(ns, ft, group, fte, &ractions); + if (err) + goto out_err; + + params.match_sz = sizeof(fte->val); + params.match_buf = fte->val; + + rule = mlx5hws_bwc_rule_create(group->fs_hws_matcher.matcher, ¶ms, + fte->act_dests.flow_context.flow_source, + ractions); + kfree(ractions); + if (!rule) { + err = -EINVAL; + goto free_actions; + } + + fte->fs_hws_rule.bwc_rule = rule; + return 0; + +free_actions: + mlx5_fs_destroy_fs_actions(ns, &fte->fs_hws_rule.hws_fs_actions, + &fte->fs_hws_rule.num_fs_actions); +out_err: + mlx5_core_err(ns->dev, "Failed to create hws rule err(%d)\n", err); + return err; +} + +static int mlx5_cmd_hws_delete_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct fs_fte *fte) +{ + struct mlx5_fs_hws_rule *rule = &fte->fs_hws_rule; + int err; + + if (mlx5_fs_cmd_is_fw_term_table(ft)) + return mlx5_fs_cmd_get_fw_cmds()->delete_fte(ns, ft, fte); + + err = mlx5hws_bwc_rule_destroy(rule->bwc_rule); + rule->bwc_rule = NULL; + + mlx5_fs_destroy_fs_actions(ns, &rule->hws_fs_actions, + &rule->num_fs_actions); + + return err; +} + +static int mlx5_cmd_hws_update_fte(struct mlx5_flow_root_namespace *ns, + struct mlx5_flow_table *ft, + struct mlx5_flow_group *group, + int modify_mask, + struct fs_fte *fte) +{ + int allowed_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) | + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST) | + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); + struct mlx5_fs_hws_rule_action *saved_hws_fs_actions; + struct mlx5hws_rule_action *ractions; + int saved_num_fs_actions; + int ret; + + if (mlx5_fs_cmd_is_fw_term_table(ft)) + return mlx5_fs_cmd_get_fw_cmds()->update_fte(ns, ft, group, + modify_mask, fte); + + if ((modify_mask & ~allowed_mask) != 0) + return -EINVAL; + + saved_hws_fs_actions = fte->fs_hws_rule.hws_fs_actions; + saved_num_fs_actions = fte->fs_hws_rule.num_fs_actions; + + ret = mlx5_fs_fte_get_hws_actions(ns, ft, group, fte, &ractions); + if (ret) + return ret; + + ret = mlx5hws_bwc_rule_action_update(fte->fs_hws_rule.bwc_rule, ractions); + kfree(ractions); + if (ret) + goto restore_actions; + + mlx5_fs_destroy_fs_actions(ns, &saved_hws_fs_actions, + &saved_num_fs_actions); + return ret; + +restore_actions: + mlx5_fs_destroy_fs_actions(ns, &fte->fs_hws_rule.hws_fs_actions, + &fte->fs_hws_rule.num_fs_actions); + fte->fs_hws_rule.hws_fs_actions = saved_hws_fs_actions; + fte->fs_hws_rule.num_fs_actions = saved_num_fs_actions; + return ret; +} + +static struct mlx5hws_action * +mlx5_fs_create_action_remove_header_vlan(struct mlx5hws_context *ctx) +{ + u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED; + struct mlx5hws_action_remove_header_attr remove_hdr_vlan = {}; + + /* MAC anchor not supported in HWS reformat, use VLAN anchor */ + remove_hdr_vlan.anchor = MLX5_REFORMAT_CONTEXT_ANCHOR_VLAN_START; + remove_hdr_vlan.offset = 0; + remove_hdr_vlan.size = sizeof(struct vlan_hdr); + return mlx5hws_action_create_remove_header(ctx, &remove_hdr_vlan, flags); +} + +static struct mlx5hws_action * +mlx5_fs_get_action_remove_header_vlan(struct mlx5_fs_hws_context *fs_ctx, + struct mlx5_pkt_reformat_params *params) +{ + if (!params || + params->param_0 != MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START || + params->param_1 != offsetof(struct vlan_ethhdr, h_vlan_proto) || + params->size != sizeof(struct vlan_hdr)) + return NULL; + + return fs_ctx->hws_pool.remove_hdr_vlan_action; +} + +static int +mlx5_fs_verify_insert_header_params(struct mlx5_core_dev *mdev, + struct mlx5_pkt_reformat_params *params) +{ + if ((!params->data && params->size) || (params->data && !params->size) || + MLX5_CAP_GEN_2(mdev, max_reformat_insert_size) < params->size || + MLX5_CAP_GEN_2(mdev, max_reformat_insert_offset) < params->param_1) { + mlx5_core_err(mdev, "Invalid reformat params for INSERT_HDR\n"); + return -EINVAL; + } + if (params->param_0 != MLX5_FS_INSERT_HDR_VLAN_ANCHOR || + params->param_1 != MLX5_FS_INSERT_HDR_VLAN_OFFSET || + params->size != MLX5_FS_INSERT_HDR_VLAN_SIZE) { + mlx5_core_err(mdev, "Only vlan insert header supported\n"); + return -EOPNOTSUPP; + } + return 0; +} + +static int +mlx5_fs_verify_encap_decap_params(struct mlx5_core_dev *dev, + struct mlx5_pkt_reformat_params *params) +{ + if (params->param_0 || params->param_1) { + mlx5_core_err(dev, "Invalid reformat params\n"); + return -EINVAL; + } + return 0; +} + +static struct mlx5_fs_pool * +mlx5_fs_get_pr_encap_pool(struct mlx5_core_dev *dev, struct xarray *pr_pools, + enum mlx5hws_action_type reformat_type, size_t size) +{ + struct mlx5_fs_pool *pr_pool; + unsigned long index = size; + int err; + + pr_pool = xa_load(pr_pools, index); + if (pr_pool) + return pr_pool; + + pr_pool = kzalloc(sizeof(*pr_pool), GFP_KERNEL); + if (!pr_pool) + return ERR_PTR(-ENOMEM); + err = mlx5_fs_hws_pr_pool_init(pr_pool, dev, size, reformat_type); + if (err) + goto free_pr_pool; + err = xa_insert(pr_pools, index, pr_pool, GFP_KERNEL); + if (err) + goto cleanup_pr_pool; + return pr_pool; + +cleanup_pr_pool: + mlx5_fs_hws_pr_pool_cleanup(pr_pool); +free_pr_pool: + kfree(pr_pool); + return ERR_PTR(err); +} + +static void +mlx5_fs_destroy_pr_pool(struct mlx5_fs_pool *pool, struct xarray *pr_pools, + unsigned long index) +{ + xa_erase(pr_pools, index); + mlx5_fs_hws_pr_pool_cleanup(pool); + kfree(pool); +} + +static int +mlx5_cmd_hws_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat_params *params, + enum mlx5_flow_namespace_type namespace, + struct mlx5_pkt_reformat *pkt_reformat) +{ + struct mlx5_fs_hws_context *fs_ctx = &ns->fs_hws_context; + struct mlx5_fs_hws_actions_pool *hws_pool; + struct mlx5hws_action *hws_action = NULL; + struct mlx5_fs_hws_pr *pr_data = NULL; + struct mlx5_fs_pool *pr_pool = NULL; + struct mlx5_core_dev *dev = ns->dev; + u8 hdr_idx = 0; + int err; + + if (!params) + return -EINVAL; + + hws_pool = &fs_ctx->hws_pool; + + switch (params->type) { + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_NVGRE: + case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + if (mlx5_fs_verify_encap_decap_params(dev, params)) + return -EINVAL; + pr_pool = mlx5_fs_get_pr_encap_pool(dev, &hws_pool->el2tol2tnl_pools, + MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2, + params->size); + if (IS_ERR(pr_pool)) + return PTR_ERR(pr_pool); + break; + case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + if (mlx5_fs_verify_encap_decap_params(dev, params)) + return -EINVAL; + pr_pool = mlx5_fs_get_pr_encap_pool(dev, &hws_pool->el2tol3tnl_pools, + MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3, + params->size); + if (IS_ERR(pr_pool)) + return PTR_ERR(pr_pool); + break; + case MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2: + if (mlx5_fs_verify_encap_decap_params(dev, params)) + return -EINVAL; + pr_pool = &hws_pool->dl3tnltol2_pool; + hdr_idx = params->size == ETH_HLEN ? + MLX5_FS_DL3TNLTOL2_MAC_HDR_IDX : + MLX5_FS_DL3TNLTOL2_MAC_VLAN_HDR_IDX; + break; + case MLX5_REFORMAT_TYPE_INSERT_HDR: + err = mlx5_fs_verify_insert_header_params(dev, params); + if (err) + return err; + pr_pool = &hws_pool->insert_hdr_pool; + break; + case MLX5_REFORMAT_TYPE_REMOVE_HDR: + hws_action = mlx5_fs_get_action_remove_header_vlan(fs_ctx, params); + if (!hws_action) + mlx5_core_err(dev, "Only vlan remove header supported\n"); + break; + default: + mlx5_core_err(ns->dev, "Packet-reformat not supported(%d)\n", + params->type); + return -EOPNOTSUPP; + } + + if (pr_pool) { + pr_data = mlx5_fs_hws_pr_pool_acquire_pr(pr_pool); + if (IS_ERR_OR_NULL(pr_data)) + return !pr_data ? -EINVAL : PTR_ERR(pr_data); + hws_action = pr_data->bulk->hws_action; + if (!hws_action) { + mlx5_core_err(dev, + "Failed allocating packet-reformat action\n"); + err = -EINVAL; + goto release_pr; + } + pr_data->data = kmemdup(params->data, params->size, GFP_KERNEL); + if (!pr_data->data) { + err = -ENOMEM; + goto release_pr; + } + pr_data->hdr_idx = hdr_idx; + pr_data->data_size = params->size; + pkt_reformat->fs_hws_action.pr_data = pr_data; + } + + mutex_init(&pkt_reformat->fs_hws_action.lock); + pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_HWS; + pkt_reformat->fs_hws_action.hws_action = hws_action; + return 0; + +release_pr: + if (pr_pool && pr_data) + mlx5_fs_hws_pr_pool_release_pr(pr_pool, pr_data); + return err; +} + +static void mlx5_cmd_hws_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_pkt_reformat *pkt_reformat) +{ + struct mlx5_fs_hws_actions_pool *hws_pool = &ns->fs_hws_context.hws_pool; + struct mlx5_core_dev *dev = ns->dev; + struct mlx5_fs_hws_pr *pr_data; + struct mlx5_fs_pool *pr_pool; + + if (pkt_reformat->fs_hws_action.fw_reformat_id != 0) { + struct mlx5_pkt_reformat fw_pkt_reformat = { 0 }; + + fw_pkt_reformat.id = pkt_reformat->fs_hws_action.fw_reformat_id; + mlx5_fs_cmd_get_fw_cmds()-> + packet_reformat_dealloc(ns, &fw_pkt_reformat); + pkt_reformat->fs_hws_action.fw_reformat_id = 0; + } + + if (pkt_reformat->reformat_type == MLX5_REFORMAT_TYPE_REMOVE_HDR) + return; + + if (!pkt_reformat->fs_hws_action.pr_data) { + mlx5_core_err(ns->dev, "Failed release packet-reformat\n"); + return; + } + pr_data = pkt_reformat->fs_hws_action.pr_data; + + switch (pkt_reformat->reformat_type) { + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_NVGRE: + case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + pr_pool = mlx5_fs_get_pr_encap_pool(dev, &hws_pool->el2tol2tnl_pools, + MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2, + pr_data->data_size); + break; + case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + pr_pool = mlx5_fs_get_pr_encap_pool(dev, &hws_pool->el2tol2tnl_pools, + MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2, + pr_data->data_size); + break; + case MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2: + pr_pool = &hws_pool->dl3tnltol2_pool; + break; + case MLX5_REFORMAT_TYPE_INSERT_HDR: + pr_pool = &hws_pool->insert_hdr_pool; + break; + default: + mlx5_core_err(ns->dev, "Unknown packet-reformat type\n"); + return; + } + if (!pkt_reformat->fs_hws_action.pr_data || IS_ERR(pr_pool)) { + mlx5_core_err(ns->dev, "Failed release packet-reformat\n"); + return; + } + kfree(pr_data->data); + mlx5_fs_hws_pr_pool_release_pr(pr_pool, pr_data); + pkt_reformat->fs_hws_action.pr_data = NULL; +} + +static struct mlx5_fs_pool * +mlx5_fs_create_mh_pool(struct mlx5_core_dev *dev, + struct mlx5hws_action_mh_pattern *pattern, + struct xarray *mh_pools, unsigned long index) +{ + struct mlx5_fs_pool *pool; + int err; + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return ERR_PTR(-ENOMEM); + err = mlx5_fs_hws_mh_pool_init(pool, dev, pattern); + if (err) + goto free_pool; + err = xa_insert(mh_pools, index, pool, GFP_KERNEL); + if (err) + goto cleanup_pool; + return pool; + +cleanup_pool: + mlx5_fs_hws_mh_pool_cleanup(pool); +free_pool: + kfree(pool); + return ERR_PTR(err); +} + +static void +mlx5_fs_destroy_mh_pool(struct mlx5_fs_pool *pool, struct xarray *mh_pools, + unsigned long index) +{ + xa_erase(mh_pools, index); + mlx5_fs_hws_mh_pool_cleanup(pool); + kfree(pool); +} + +static int mlx5_cmd_hws_modify_header_alloc(struct mlx5_flow_root_namespace *ns, + u8 namespace, u8 num_actions, + void *modify_actions, + struct mlx5_modify_hdr *modify_hdr) +{ + struct mlx5_fs_hws_actions_pool *hws_pool = &ns->fs_hws_context.hws_pool; + struct mlx5hws_action_mh_pattern pattern = {}; + struct mlx5_fs_hws_mh *mh_data = NULL; + struct mlx5hws_action *hws_action; + struct mlx5_fs_pool *pool; + unsigned long i, cnt = 0; + bool known_pattern; + int err; + + pattern.sz = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) * num_actions; + pattern.data = modify_actions; + + known_pattern = false; + xa_for_each(&hws_pool->mh_pools, i, pool) { + if (mlx5_fs_hws_mh_pool_match(pool, &pattern)) { + known_pattern = true; + break; + } + cnt++; + } + + if (!known_pattern) { + pool = mlx5_fs_create_mh_pool(ns->dev, &pattern, + &hws_pool->mh_pools, cnt); + if (IS_ERR(pool)) + return PTR_ERR(pool); + } + mh_data = mlx5_fs_hws_mh_pool_acquire_mh(pool); + if (IS_ERR(mh_data)) { + err = PTR_ERR(mh_data); + goto destroy_pool; + } + hws_action = mh_data->bulk->hws_action; + mh_data->data = kmemdup(pattern.data, pattern.sz, GFP_KERNEL); + if (!mh_data->data) { + err = -ENOMEM; + goto release_mh; + } + modify_hdr->fs_hws_action.mh_data = mh_data; + modify_hdr->fs_hws_action.fs_pool = pool; + modify_hdr->owner = MLX5_FLOW_RESOURCE_OWNER_SW; + modify_hdr->fs_hws_action.hws_action = hws_action; + + return 0; + +release_mh: + mlx5_fs_hws_mh_pool_release_mh(pool, mh_data); +destroy_pool: + if (!known_pattern) + mlx5_fs_destroy_mh_pool(pool, &hws_pool->mh_pools, cnt); + return err; +} + +static void mlx5_cmd_hws_modify_header_dealloc(struct mlx5_flow_root_namespace *ns, + struct mlx5_modify_hdr *modify_hdr) +{ + struct mlx5_fs_hws_mh *mh_data; + struct mlx5_fs_pool *pool; + + if (!modify_hdr->fs_hws_action.fs_pool || !modify_hdr->fs_hws_action.mh_data) { + mlx5_core_err(ns->dev, "Failed release modify-header\n"); + return; + } + + mh_data = modify_hdr->fs_hws_action.mh_data; + kfree(mh_data->data); + pool = modify_hdr->fs_hws_action.fs_pool; + mlx5_fs_hws_mh_pool_release_mh(pool, mh_data); + modify_hdr->fs_hws_action.mh_data = NULL; +} + +int +mlx5_fs_hws_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat, + u32 *reformat_id) +{ + enum mlx5_flow_namespace_type ns_type = pkt_reformat->ns_type; + struct mutex *lock = &pkt_reformat->fs_hws_action.lock; + u32 *id = &pkt_reformat->fs_hws_action.fw_reformat_id; + struct mlx5_pkt_reformat fw_pkt_reformat = { 0 }; + struct mlx5_pkt_reformat_params params = { 0 }; + struct mlx5_flow_root_namespace *ns; + struct mlx5_core_dev *dev; + int ret; + + mutex_lock(lock); + + if (*id != 0) { + *reformat_id = *id; + ret = 0; + goto unlock; + } + + dev = mlx5hws_action_get_dev(pkt_reformat->fs_hws_action.hws_action); + if (!dev) { + ret = -EINVAL; + goto unlock; + } + + ns = mlx5_get_root_namespace(dev, ns_type); + if (!ns) { + ret = -EINVAL; + goto unlock; + } + + params.type = pkt_reformat->reformat_type; + params.size = pkt_reformat->fs_hws_action.pr_data->data_size; + params.data = pkt_reformat->fs_hws_action.pr_data->data; + + ret = mlx5_fs_cmd_get_fw_cmds()-> + packet_reformat_alloc(ns, ¶ms, ns_type, &fw_pkt_reformat); + if (ret) + goto unlock; + + *id = fw_pkt_reformat.id; + *reformat_id = *id; + ret = 0; + +unlock: + mutex_unlock(lock); + + return ret; +} + +static int mlx5_cmd_hws_create_match_definer(struct mlx5_flow_root_namespace *ns, + u16 format_id, u32 *match_mask) +{ + return -EOPNOTSUPP; +} + +static int mlx5_cmd_hws_destroy_match_definer(struct mlx5_flow_root_namespace *ns, + int definer_id) +{ + return -EOPNOTSUPP; +} + +static u32 mlx5_cmd_hws_get_capabilities(struct mlx5_flow_root_namespace *ns, + enum fs_flow_table_type ft_type) +{ + if (ft_type != FS_FT_FDB) + return 0; + + return MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX | + MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX | + MLX5_FLOW_STEERING_CAP_MATCH_RANGES; +} + +bool mlx5_fs_hws_is_supported(struct mlx5_core_dev *dev) +{ + return mlx5hws_is_supported(dev); +} + +static const struct mlx5_flow_cmds mlx5_flow_cmds_hws = { + .create_flow_table = mlx5_cmd_hws_create_flow_table, + .destroy_flow_table = mlx5_cmd_hws_destroy_flow_table, + .modify_flow_table = mlx5_cmd_hws_modify_flow_table, + .update_root_ft = mlx5_cmd_hws_update_root_ft, + .create_flow_group = mlx5_cmd_hws_create_flow_group, + .destroy_flow_group = mlx5_cmd_hws_destroy_flow_group, + .create_fte = mlx5_cmd_hws_create_fte, + .delete_fte = mlx5_cmd_hws_delete_fte, + .update_fte = mlx5_cmd_hws_update_fte, + .packet_reformat_alloc = mlx5_cmd_hws_packet_reformat_alloc, + .packet_reformat_dealloc = mlx5_cmd_hws_packet_reformat_dealloc, + .modify_header_alloc = mlx5_cmd_hws_modify_header_alloc, + .modify_header_dealloc = mlx5_cmd_hws_modify_header_dealloc, + .create_match_definer = mlx5_cmd_hws_create_match_definer, + .destroy_match_definer = mlx5_cmd_hws_destroy_match_definer, + .create_ns = mlx5_cmd_hws_create_ns, + .destroy_ns = mlx5_cmd_hws_destroy_ns, + .set_peer = mlx5_cmd_hws_set_peer, + .get_capabilities = mlx5_cmd_hws_get_capabilities, +}; + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_hws_cmds(void) +{ + return &mlx5_flow_cmds_hws; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h new file mode 100644 index 000000000000..b92d55b2d147 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2025 NVIDIA Corporation & Affiliates */ + +#ifndef _MLX5_FS_HWS_ +#define _MLX5_FS_HWS_ + +#include "mlx5hws.h" +#include "fs_hws_pools.h" + +struct mlx5_fs_hws_actions_pool { + struct mlx5hws_action *tag_action; + struct mlx5hws_action *pop_vlan_action; + struct mlx5hws_action *push_vlan_action; + struct mlx5hws_action *drop_action; + struct mlx5hws_action *decapl2_action; + struct mlx5hws_action *remove_hdr_vlan_action; + struct mlx5_fs_pool insert_hdr_pool; + struct mlx5_fs_pool dl3tnltol2_pool; + struct xarray el2tol3tnl_pools; + struct xarray el2tol2tnl_pools; + struct xarray mh_pools; + struct xarray table_dests; + struct xarray vport_vhca_dests; + struct xarray vport_dests; + struct xarray aso_meters; + struct xarray sample_dests; +}; + +struct mlx5_fs_hws_context { + struct mlx5hws_context *hws_ctx; + struct mlx5_fs_hws_actions_pool hws_pool; +}; + +struct mlx5_fs_hws_table { + struct mlx5hws_table *hws_table; + bool miss_ft_set; +}; + +struct mlx5_fs_hws_action { + struct mlx5hws_action *hws_action; + struct mlx5_fs_pool *fs_pool; + struct mlx5_fs_hws_pr *pr_data; + struct mlx5_fs_hws_mh *mh_data; + u32 fw_reformat_id; + /* Protect `fw_reformat_id` against being initialized from multiple + * threads. + */ + struct mutex lock; +}; + +struct mlx5_fs_hws_matcher { + struct mlx5hws_bwc_matcher *matcher; +}; + +struct mlx5_fs_hws_rule_action { + struct mlx5hws_action *action; + union { + struct mlx5_fc *counter; + struct mlx5_exe_aso *exe_aso; + u32 sampler_id; + }; +}; + +struct mlx5_fs_hws_rule { + struct mlx5hws_bwc_rule *bwc_rule; + struct mlx5_fs_hws_rule_action *hws_fs_actions; + int num_fs_actions; +}; + +struct mlx5_fs_hws_data { + struct mlx5hws_action *hws_action; + struct mutex lock; /* protects hws_action */ + refcount_t hws_action_refcount; +}; + +struct mlx5_fs_hws_create_action_ctx { + enum mlx5hws_action_type actions_type; + struct mlx5hws_context *hws_ctx; + u32 id; + union { + u8 return_reg_id; + }; +}; + +struct mlx5hws_action * +mlx5_fs_get_hws_action(struct mlx5_fs_hws_data *fs_hws_data, + struct mlx5_fs_hws_create_action_ctx *create_ctx); +void mlx5_fs_put_hws_action(struct mlx5_fs_hws_data *fs_hws_data); + +#ifdef CONFIG_MLX5_HW_STEERING + +int +mlx5_fs_hws_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat, + u32 *reformat_id); + +bool mlx5_fs_hws_is_supported(struct mlx5_core_dev *dev); + +const struct mlx5_flow_cmds *mlx5_fs_cmd_get_hws_cmds(void); + +#else + +static inline int +mlx5_fs_hws_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat, + u32 *reformat_id) +{ + return -EOPNOTSUPP; +} + +static inline bool mlx5_fs_hws_is_supported(struct mlx5_core_dev *dev) +{ + return false; +} + +static inline const struct mlx5_flow_cmds *mlx5_fs_cmd_get_hws_cmds(void) +{ + return NULL; +} + +#endif /* CONFIG_MLX5_HWS_STEERING */ +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.c new file mode 100644 index 000000000000..f1ecdba74e1f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.c @@ -0,0 +1,421 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2025 NVIDIA Corporation & Affiliates */ + +#include <mlx5_core.h> +#include "fs_hws_pools.h" + +#define MLX5_FS_HWS_DEFAULT_BULK_LEN 65536 +#define MLX5_FS_HWS_POOL_MAX_THRESHOLD BIT(18) +#define MLX5_FS_HWS_POOL_USED_BUFF_RATIO 10 + +static struct mlx5hws_action * +mlx5_fs_dl3tnltol2_bulk_action_create(struct mlx5hws_context *ctx) +{ + struct mlx5hws_action_reformat_header reformat_hdr[2] = {}; + u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB; + enum mlx5hws_action_type reformat_type; + u32 log_bulk_size; + + reformat_type = MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2; + reformat_hdr[MLX5_FS_DL3TNLTOL2_MAC_HDR_IDX].sz = ETH_HLEN; + reformat_hdr[MLX5_FS_DL3TNLTOL2_MAC_VLAN_HDR_IDX].sz = ETH_HLEN + VLAN_HLEN; + + log_bulk_size = ilog2(MLX5_FS_HWS_DEFAULT_BULK_LEN); + return mlx5hws_action_create_reformat(ctx, reformat_type, 2, + reformat_hdr, log_bulk_size, flags); +} + +static struct mlx5hws_action * +mlx5_fs_el2tol3tnl_bulk_action_create(struct mlx5hws_context *ctx, size_t data_size) +{ + struct mlx5hws_action_reformat_header reformat_hdr = {}; + u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB; + enum mlx5hws_action_type reformat_type; + u32 log_bulk_size; + + reformat_type = MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3; + reformat_hdr.sz = data_size; + + log_bulk_size = ilog2(MLX5_FS_HWS_DEFAULT_BULK_LEN); + return mlx5hws_action_create_reformat(ctx, reformat_type, 1, + &reformat_hdr, log_bulk_size, flags); +} + +static struct mlx5hws_action * +mlx5_fs_el2tol2tnl_bulk_action_create(struct mlx5hws_context *ctx, size_t data_size) +{ + struct mlx5hws_action_reformat_header reformat_hdr = {}; + u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB; + enum mlx5hws_action_type reformat_type; + u32 log_bulk_size; + + reformat_type = MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2; + reformat_hdr.sz = data_size; + + log_bulk_size = ilog2(MLX5_FS_HWS_DEFAULT_BULK_LEN); + return mlx5hws_action_create_reformat(ctx, reformat_type, 1, + &reformat_hdr, log_bulk_size, flags); +} + +static struct mlx5hws_action * +mlx5_fs_insert_hdr_bulk_action_create(struct mlx5hws_context *ctx) +{ + struct mlx5hws_action_insert_header insert_hdr = {}; + u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB; + u32 log_bulk_size; + + log_bulk_size = ilog2(MLX5_FS_HWS_DEFAULT_BULK_LEN); + insert_hdr.hdr.sz = MLX5_FS_INSERT_HDR_VLAN_SIZE; + insert_hdr.anchor = MLX5_FS_INSERT_HDR_VLAN_ANCHOR; + insert_hdr.offset = MLX5_FS_INSERT_HDR_VLAN_OFFSET; + + return mlx5hws_action_create_insert_header(ctx, 1, &insert_hdr, + log_bulk_size, flags); +} + +static struct mlx5hws_action * +mlx5_fs_pr_bulk_action_create(struct mlx5_core_dev *dev, + struct mlx5_fs_hws_pr_pool_ctx *pr_pool_ctx) +{ + struct mlx5_flow_root_namespace *root_ns; + struct mlx5hws_context *ctx; + size_t encap_data_size; + + root_ns = mlx5_get_root_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns || root_ns->mode != MLX5_FLOW_STEERING_MODE_HMFS) + return NULL; + + ctx = root_ns->fs_hws_context.hws_ctx; + if (!ctx) + return NULL; + + encap_data_size = pr_pool_ctx->encap_data_size; + switch (pr_pool_ctx->reformat_type) { + case MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2: + return mlx5_fs_dl3tnltol2_bulk_action_create(ctx); + case MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3: + return mlx5_fs_el2tol3tnl_bulk_action_create(ctx, encap_data_size); + case MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2: + return mlx5_fs_el2tol2tnl_bulk_action_create(ctx, encap_data_size); + case MLX5HWS_ACTION_TYP_INSERT_HEADER: + return mlx5_fs_insert_hdr_bulk_action_create(ctx); + default: + return NULL; + } + return NULL; +} + +static struct mlx5_fs_bulk * +mlx5_fs_hws_pr_bulk_create(struct mlx5_core_dev *dev, void *pool_ctx) +{ + struct mlx5_fs_hws_pr_pool_ctx *pr_pool_ctx; + struct mlx5_fs_hws_pr_bulk *pr_bulk; + int bulk_len; + int i; + + if (!pool_ctx) + return NULL; + pr_pool_ctx = pool_ctx; + bulk_len = MLX5_FS_HWS_DEFAULT_BULK_LEN; + pr_bulk = kvzalloc(struct_size(pr_bulk, prs_data, bulk_len), GFP_KERNEL); + if (!pr_bulk) + return NULL; + + if (mlx5_fs_bulk_init(dev, &pr_bulk->fs_bulk, bulk_len)) + goto free_pr_bulk; + + for (i = 0; i < bulk_len; i++) { + pr_bulk->prs_data[i].bulk = pr_bulk; + pr_bulk->prs_data[i].offset = i; + } + + pr_bulk->hws_action = mlx5_fs_pr_bulk_action_create(dev, pr_pool_ctx); + if (!pr_bulk->hws_action) + goto cleanup_fs_bulk; + + return &pr_bulk->fs_bulk; + +cleanup_fs_bulk: + mlx5_fs_bulk_cleanup(&pr_bulk->fs_bulk); +free_pr_bulk: + kvfree(pr_bulk); + return NULL; +} + +static int +mlx5_fs_hws_pr_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fs_bulk *fs_bulk) +{ + struct mlx5_fs_hws_pr_bulk *pr_bulk; + + pr_bulk = container_of(fs_bulk, struct mlx5_fs_hws_pr_bulk, fs_bulk); + if (mlx5_fs_bulk_get_free_amount(fs_bulk) < fs_bulk->bulk_len) { + mlx5_core_err(dev, "Freeing bulk before all reformats were released\n"); + return -EBUSY; + } + + mlx5hws_action_destroy(pr_bulk->hws_action); + mlx5_fs_bulk_cleanup(fs_bulk); + kvfree(pr_bulk); + + return 0; +} + +static void mlx5_hws_pool_update_threshold(struct mlx5_fs_pool *hws_pool) +{ + hws_pool->threshold = min_t(int, MLX5_FS_HWS_POOL_MAX_THRESHOLD, + hws_pool->used_units / MLX5_FS_HWS_POOL_USED_BUFF_RATIO); +} + +static const struct mlx5_fs_pool_ops mlx5_fs_hws_pr_pool_ops = { + .bulk_create = mlx5_fs_hws_pr_bulk_create, + .bulk_destroy = mlx5_fs_hws_pr_bulk_destroy, + .update_threshold = mlx5_hws_pool_update_threshold, +}; + +int mlx5_fs_hws_pr_pool_init(struct mlx5_fs_pool *pr_pool, + struct mlx5_core_dev *dev, size_t encap_data_size, + enum mlx5hws_action_type reformat_type) +{ + struct mlx5_fs_hws_pr_pool_ctx *pr_pool_ctx; + + if (reformat_type != MLX5HWS_ACTION_TYP_INSERT_HEADER && + reformat_type != MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2 && + reformat_type != MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3 && + reformat_type != MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2) + return -EOPNOTSUPP; + + pr_pool_ctx = kzalloc(sizeof(*pr_pool_ctx), GFP_KERNEL); + if (!pr_pool_ctx) + return -ENOMEM; + pr_pool_ctx->reformat_type = reformat_type; + pr_pool_ctx->encap_data_size = encap_data_size; + mlx5_fs_pool_init(pr_pool, dev, &mlx5_fs_hws_pr_pool_ops, pr_pool_ctx); + return 0; +} + +void mlx5_fs_hws_pr_pool_cleanup(struct mlx5_fs_pool *pr_pool) +{ + struct mlx5_fs_hws_pr_pool_ctx *pr_pool_ctx; + + mlx5_fs_pool_cleanup(pr_pool); + pr_pool_ctx = pr_pool->pool_ctx; + if (!pr_pool_ctx) + return; + kfree(pr_pool_ctx); +} + +struct mlx5_fs_hws_pr * +mlx5_fs_hws_pr_pool_acquire_pr(struct mlx5_fs_pool *pr_pool) +{ + struct mlx5_fs_pool_index pool_index = {}; + struct mlx5_fs_hws_pr_bulk *pr_bulk; + int err; + + err = mlx5_fs_pool_acquire_index(pr_pool, &pool_index); + if (err) + return ERR_PTR(err); + pr_bulk = container_of(pool_index.fs_bulk, struct mlx5_fs_hws_pr_bulk, + fs_bulk); + return &pr_bulk->prs_data[pool_index.index]; +} + +void mlx5_fs_hws_pr_pool_release_pr(struct mlx5_fs_pool *pr_pool, + struct mlx5_fs_hws_pr *pr_data) +{ + struct mlx5_fs_bulk *fs_bulk = &pr_data->bulk->fs_bulk; + struct mlx5_fs_pool_index pool_index = {}; + struct mlx5_core_dev *dev = pr_pool->dev; + + pool_index.fs_bulk = fs_bulk; + pool_index.index = pr_data->offset; + if (mlx5_fs_pool_release_index(pr_pool, &pool_index)) + mlx5_core_warn(dev, "Attempted to release packet reformat which is not acquired\n"); +} + +struct mlx5hws_action *mlx5_fs_hws_pr_get_action(struct mlx5_fs_hws_pr *pr_data) +{ + return pr_data->bulk->hws_action; +} + +static struct mlx5hws_action * +mlx5_fs_mh_bulk_action_create(struct mlx5hws_context *ctx, + struct mlx5hws_action_mh_pattern *pattern) +{ + u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB; + u32 log_bulk_size; + + log_bulk_size = ilog2(MLX5_FS_HWS_DEFAULT_BULK_LEN); + return mlx5hws_action_create_modify_header(ctx, 1, pattern, + log_bulk_size, flags); +} + +static struct mlx5_fs_bulk * +mlx5_fs_hws_mh_bulk_create(struct mlx5_core_dev *dev, void *pool_ctx) +{ + struct mlx5hws_action_mh_pattern *pattern; + struct mlx5_flow_root_namespace *root_ns; + struct mlx5_fs_hws_mh_bulk *mh_bulk; + struct mlx5hws_context *ctx; + int bulk_len; + + if (!pool_ctx) + return NULL; + + root_ns = mlx5_get_root_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns || root_ns->mode != MLX5_FLOW_STEERING_MODE_HMFS) + return NULL; + + ctx = root_ns->fs_hws_context.hws_ctx; + if (!ctx) + return NULL; + + pattern = pool_ctx; + bulk_len = MLX5_FS_HWS_DEFAULT_BULK_LEN; + mh_bulk = kvzalloc(struct_size(mh_bulk, mhs_data, bulk_len), GFP_KERNEL); + if (!mh_bulk) + return NULL; + + if (mlx5_fs_bulk_init(dev, &mh_bulk->fs_bulk, bulk_len)) + goto free_mh_bulk; + + for (int i = 0; i < bulk_len; i++) { + mh_bulk->mhs_data[i].bulk = mh_bulk; + mh_bulk->mhs_data[i].offset = i; + } + + mh_bulk->hws_action = mlx5_fs_mh_bulk_action_create(ctx, pattern); + if (!mh_bulk->hws_action) + goto cleanup_fs_bulk; + + return &mh_bulk->fs_bulk; + +cleanup_fs_bulk: + mlx5_fs_bulk_cleanup(&mh_bulk->fs_bulk); +free_mh_bulk: + kvfree(mh_bulk); + return NULL; +} + +static int +mlx5_fs_hws_mh_bulk_destroy(struct mlx5_core_dev *dev, + struct mlx5_fs_bulk *fs_bulk) +{ + struct mlx5_fs_hws_mh_bulk *mh_bulk; + + mh_bulk = container_of(fs_bulk, struct mlx5_fs_hws_mh_bulk, fs_bulk); + if (mlx5_fs_bulk_get_free_amount(fs_bulk) < fs_bulk->bulk_len) { + mlx5_core_err(dev, "Freeing bulk before all modify header were released\n"); + return -EBUSY; + } + + mlx5hws_action_destroy(mh_bulk->hws_action); + mlx5_fs_bulk_cleanup(fs_bulk); + kvfree(mh_bulk); + + return 0; +} + +static const struct mlx5_fs_pool_ops mlx5_fs_hws_mh_pool_ops = { + .bulk_create = mlx5_fs_hws_mh_bulk_create, + .bulk_destroy = mlx5_fs_hws_mh_bulk_destroy, + .update_threshold = mlx5_hws_pool_update_threshold, +}; + +int mlx5_fs_hws_mh_pool_init(struct mlx5_fs_pool *fs_hws_mh_pool, + struct mlx5_core_dev *dev, + struct mlx5hws_action_mh_pattern *pattern) +{ + struct mlx5hws_action_mh_pattern *pool_pattern; + + pool_pattern = kzalloc(sizeof(*pool_pattern), GFP_KERNEL); + if (!pool_pattern) + return -ENOMEM; + pool_pattern->data = kmemdup(pattern->data, pattern->sz, GFP_KERNEL); + if (!pool_pattern->data) { + kfree(pool_pattern); + return -ENOMEM; + } + pool_pattern->sz = pattern->sz; + mlx5_fs_pool_init(fs_hws_mh_pool, dev, &mlx5_fs_hws_mh_pool_ops, + pool_pattern); + return 0; +} + +void mlx5_fs_hws_mh_pool_cleanup(struct mlx5_fs_pool *fs_hws_mh_pool) +{ + struct mlx5hws_action_mh_pattern *pool_pattern; + + mlx5_fs_pool_cleanup(fs_hws_mh_pool); + pool_pattern = fs_hws_mh_pool->pool_ctx; + if (!pool_pattern) + return; + kfree(pool_pattern->data); + kfree(pool_pattern); +} + +struct mlx5_fs_hws_mh * +mlx5_fs_hws_mh_pool_acquire_mh(struct mlx5_fs_pool *mh_pool) +{ + struct mlx5_fs_pool_index pool_index = {}; + struct mlx5_fs_hws_mh_bulk *mh_bulk; + int err; + + err = mlx5_fs_pool_acquire_index(mh_pool, &pool_index); + if (err) + return ERR_PTR(err); + mh_bulk = container_of(pool_index.fs_bulk, struct mlx5_fs_hws_mh_bulk, + fs_bulk); + return &mh_bulk->mhs_data[pool_index.index]; +} + +void mlx5_fs_hws_mh_pool_release_mh(struct mlx5_fs_pool *mh_pool, + struct mlx5_fs_hws_mh *mh_data) +{ + struct mlx5_fs_bulk *fs_bulk = &mh_data->bulk->fs_bulk; + struct mlx5_fs_pool_index pool_index = {}; + struct mlx5_core_dev *dev = mh_pool->dev; + + pool_index.fs_bulk = fs_bulk; + pool_index.index = mh_data->offset; + if (mlx5_fs_pool_release_index(mh_pool, &pool_index)) + mlx5_core_warn(dev, "Attempted to release modify header which is not acquired\n"); +} + +bool mlx5_fs_hws_mh_pool_match(struct mlx5_fs_pool *mh_pool, + struct mlx5hws_action_mh_pattern *pattern) +{ + struct mlx5hws_action_mh_pattern *pool_pattern; + int num_actions, i; + + pool_pattern = mh_pool->pool_ctx; + if (WARN_ON_ONCE(!pool_pattern)) + return false; + + if (pattern->sz != pool_pattern->sz) + return false; + num_actions = pattern->sz / MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto); + for (i = 0; i < num_actions; i++) { + if ((__force __be32)pattern->data[i] != + (__force __be32)pool_pattern->data[i]) + return false; + } + return true; +} + +struct mlx5hws_action *mlx5_fc_get_hws_action(struct mlx5hws_context *ctx, + struct mlx5_fc *counter) +{ + struct mlx5_fs_hws_create_action_ctx create_ctx; + struct mlx5_fc_bulk *fc_bulk = counter->bulk; + + create_ctx.hws_ctx = ctx; + create_ctx.id = fc_bulk->base_id; + create_ctx.actions_type = MLX5HWS_ACTION_TYP_CTR; + + return mlx5_fs_get_hws_action(&fc_bulk->hws_data, &create_ctx); +} + +void mlx5_fc_put_hws_action(struct mlx5_fc *counter) +{ + mlx5_fs_put_hws_action(&counter->bulk->hws_data); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.h new file mode 100644 index 000000000000..34072551dd21 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2025 NVIDIA Corporation & Affiliates */ + +#ifndef __MLX5_FS_HWS_POOLS_H__ +#define __MLX5_FS_HWS_POOLS_H__ + +#include <linux/if_vlan.h> +#include "fs_pool.h" +#include "fs_core.h" + +#define MLX5_FS_INSERT_HDR_VLAN_ANCHOR MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START +#define MLX5_FS_INSERT_HDR_VLAN_OFFSET offsetof(struct vlan_ethhdr, h_vlan_proto) +#define MLX5_FS_INSERT_HDR_VLAN_SIZE sizeof(struct vlan_hdr) + +enum { + MLX5_FS_DL3TNLTOL2_MAC_HDR_IDX = 0, + MLX5_FS_DL3TNLTOL2_MAC_VLAN_HDR_IDX, +}; + +struct mlx5_fs_hws_pr { + struct mlx5_fs_hws_pr_bulk *bulk; + u32 offset; + u8 hdr_idx; + u8 *data; + size_t data_size; +}; + +struct mlx5_fs_hws_pr_bulk { + struct mlx5_fs_bulk fs_bulk; + struct mlx5hws_action *hws_action; + struct mlx5_fs_hws_pr prs_data[]; +}; + +struct mlx5_fs_hws_pr_pool_ctx { + enum mlx5hws_action_type reformat_type; + size_t encap_data_size; +}; + +struct mlx5_fs_hws_mh { + struct mlx5_fs_hws_mh_bulk *bulk; + u32 offset; + u8 *data; +}; + +struct mlx5_fs_hws_mh_bulk { + struct mlx5_fs_bulk fs_bulk; + struct mlx5_fs_pool *mh_pool; + struct mlx5hws_action *hws_action; + struct mlx5_fs_hws_mh mhs_data[]; +}; + +int mlx5_fs_hws_pr_pool_init(struct mlx5_fs_pool *pr_pool, + struct mlx5_core_dev *dev, size_t encap_data_size, + enum mlx5hws_action_type reformat_type); +void mlx5_fs_hws_pr_pool_cleanup(struct mlx5_fs_pool *pr_pool); + +struct mlx5_fs_hws_pr *mlx5_fs_hws_pr_pool_acquire_pr(struct mlx5_fs_pool *pr_pool); +void mlx5_fs_hws_pr_pool_release_pr(struct mlx5_fs_pool *pr_pool, + struct mlx5_fs_hws_pr *pr_data); +struct mlx5hws_action *mlx5_fs_hws_pr_get_action(struct mlx5_fs_hws_pr *pr_data); +int mlx5_fs_hws_mh_pool_init(struct mlx5_fs_pool *fs_hws_mh_pool, + struct mlx5_core_dev *dev, + struct mlx5hws_action_mh_pattern *pattern); +void mlx5_fs_hws_mh_pool_cleanup(struct mlx5_fs_pool *fs_hws_mh_pool); +struct mlx5_fs_hws_mh *mlx5_fs_hws_mh_pool_acquire_mh(struct mlx5_fs_pool *mh_pool); +void mlx5_fs_hws_mh_pool_release_mh(struct mlx5_fs_pool *mh_pool, + struct mlx5_fs_hws_mh *mh_data); +bool mlx5_fs_hws_mh_pool_match(struct mlx5_fs_pool *mh_pool, + struct mlx5hws_action_mh_pattern *pattern); +struct mlx5hws_action *mlx5_fc_get_hws_action(struct mlx5hws_context *ctx, + struct mlx5_fc *counter); +void mlx5_fc_put_hws_action(struct mlx5_fc *counter); +#endif /* __MLX5_FS_HWS_POOLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h index 3c8635f286ce..21279d503117 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h @@ -17,6 +17,7 @@ #include "context.h" #include "table.h" #include "send.h" +#include "action_ste_pool.h" #include "rule.h" #include "cmd.h" #include "action.h" @@ -39,7 +40,6 @@ #define mlx5hws_dbg(ctx, arg...) mlx5_core_dbg((ctx)->mdev, ##arg) #define MLX5HWS_TABLE_TYPE_BASE 2 -#define MLX5HWS_ACTION_STE_IDX_ANY 0 static inline bool is_mem_zero(const u8 *mem, size_t size) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c index 1bb3a6f8c3cd..32f87fdf3213 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c @@ -3,25 +3,6 @@ #include "internal.h" -enum mlx5hws_matcher_rtc_type { - HWS_MATCHER_RTC_TYPE_MATCH, - HWS_MATCHER_RTC_TYPE_STE_ARRAY, - HWS_MATCHER_RTC_TYPE_MAX, -}; - -static const char * const mlx5hws_matcher_rtc_type_str[] = { - [HWS_MATCHER_RTC_TYPE_MATCH] = "MATCH", - [HWS_MATCHER_RTC_TYPE_STE_ARRAY] = "STE_ARRAY", - [HWS_MATCHER_RTC_TYPE_MAX] = "UNKNOWN", -}; - -static const char *hws_matcher_rtc_type_to_str(enum mlx5hws_matcher_rtc_type rtc_type) -{ - if (rtc_type > HWS_MATCHER_RTC_TYPE_MAX) - rtc_type = HWS_MATCHER_RTC_TYPE_MAX; - return mlx5hws_matcher_rtc_type_str[rtc_type]; -} - static bool hws_matcher_requires_col_tbl(u8 log_num_of_rules) { /* Collision table concatenation is done only for large rule tables */ @@ -42,19 +23,202 @@ static void hws_matcher_destroy_end_ft(struct mlx5hws_matcher *matcher) mlx5hws_table_destroy_default_ft(matcher->tbl, matcher->end_ft_id); } +int mlx5hws_matcher_update_end_ft_isolated(struct mlx5hws_table *tbl, + u32 miss_ft_id) +{ + struct mlx5hws_matcher *tmp_matcher; + + if (list_empty(&tbl->matchers_list)) + return -EINVAL; + + /* Update isolated_matcher_end_ft_id attribute for all + * the matchers in isolated table. + */ + list_for_each_entry(tmp_matcher, &tbl->matchers_list, list_node) + tmp_matcher->attr.isolated_matcher_end_ft_id = miss_ft_id; + + tmp_matcher = list_last_entry(&tbl->matchers_list, + struct mlx5hws_matcher, + list_node); + + return mlx5hws_table_ft_set_next_ft(tbl->ctx, + tmp_matcher->end_ft_id, + tbl->fw_ft_type, + miss_ft_id); +} + +static int hws_matcher_connect_end_ft_isolated(struct mlx5hws_matcher *matcher) +{ + struct mlx5hws_table *tbl = matcher->tbl; + u32 end_ft_id; + int ret; + + /* Reset end_ft next RTCs */ + ret = mlx5hws_table_ft_set_next_rtc(tbl->ctx, + matcher->end_ft_id, + matcher->tbl->fw_ft_type, + 0, 0); + if (ret) { + mlx5hws_err(tbl->ctx, "Isolated matcher: failed to reset FT's next RTCs\n"); + return ret; + } + + /* Connect isolated matcher's end_ft to the complex matcher's end FT */ + end_ft_id = matcher->attr.isolated_matcher_end_ft_id; + ret = mlx5hws_table_ft_set_next_ft(tbl->ctx, + matcher->end_ft_id, + matcher->tbl->fw_ft_type, + end_ft_id); + + if (ret) { + mlx5hws_err(tbl->ctx, "Isolated matcher: failed to set FT's miss_ft_id\n"); + return ret; + } + + return 0; +} + +static int hws_matcher_create_end_ft_isolated(struct mlx5hws_matcher *matcher) +{ + struct mlx5hws_table *tbl = matcher->tbl; + int ret; + + ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, + tbl, + 0, + &matcher->end_ft_id); + if (ret) { + mlx5hws_err(tbl->ctx, "Isolated matcher: failed to create end flow table\n"); + return ret; + } + + ret = hws_matcher_connect_end_ft_isolated(matcher); + if (ret) { + mlx5hws_err(tbl->ctx, "Isolated matcher: failed to connect end FT\n"); + goto destroy_default_ft; + } + + return 0; + +destroy_default_ft: + mlx5hws_table_destroy_default_ft(tbl, matcher->end_ft_id); + return ret; +} + static int hws_matcher_create_end_ft(struct mlx5hws_matcher *matcher) { struct mlx5hws_table *tbl = matcher->tbl; int ret; - ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, tbl, &matcher->end_ft_id); + if (mlx5hws_matcher_is_isolated(matcher)) + ret = hws_matcher_create_end_ft_isolated(matcher); + else + ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, + tbl, + 0, + &matcher->end_ft_id); + if (ret) { mlx5hws_err(tbl->ctx, "Failed to create matcher end flow table\n"); return ret; } + return 0; } +static int hws_matcher_connect_isolated_first(struct mlx5hws_matcher *matcher) +{ + struct mlx5hws_table *tbl = matcher->tbl; + struct mlx5hws_context *ctx = tbl->ctx; + int ret; + + /* Isolated matcher's end_ft is already pointing to the end_ft + * of the complex matcher - it was set at creation of end_ft, + * so no need to connect it. + * We still need to connect the isolated table's start FT to + * this matcher's RTC. + */ + ret = mlx5hws_table_ft_set_next_rtc(ctx, + tbl->ft_id, + tbl->fw_ft_type, + matcher->match_ste.rtc_0_id, + matcher->match_ste.rtc_1_id); + if (ret) { + mlx5hws_err(ctx, "Isolated matcher: failed to connect start FT to match RTC\n"); + return ret; + } + + /* Reset table's FT default miss (drop refcount) */ + ret = mlx5hws_table_ft_set_default_next_ft(tbl, tbl->ft_id); + if (ret) { + mlx5hws_err(ctx, "Isolated matcher: failed to reset table ft default miss\n"); + return ret; + } + + list_add(&matcher->list_node, &tbl->matchers_list); + + return ret; +} + +static int hws_matcher_connect_isolated_last(struct mlx5hws_matcher *matcher) +{ + struct mlx5hws_table *tbl = matcher->tbl; + struct mlx5hws_context *ctx = tbl->ctx; + struct mlx5hws_matcher *last; + int ret; + + last = list_last_entry(&tbl->matchers_list, + struct mlx5hws_matcher, + list_node); + + /* New matcher's end_ft is already pointing to the end_ft of + * the complex matcher. + * Connect previous matcher's end_ft to this new matcher RTC. + */ + ret = mlx5hws_table_ft_set_next_rtc(ctx, + last->end_ft_id, + tbl->fw_ft_type, + matcher->match_ste.rtc_0_id, + matcher->match_ste.rtc_1_id); + if (ret) { + mlx5hws_err(ctx, + "Isolated matcher: failed to connect matcher end_ft to new match RTC\n"); + return ret; + } + + /* Reset prev matcher FT default miss (drop refcount) */ + ret = mlx5hws_table_ft_set_default_next_ft(tbl, last->end_ft_id); + if (ret) { + mlx5hws_err(ctx, "Isolated matcher: failed to reset matcher ft default miss\n"); + return ret; + } + + /* Insert after the last matcher */ + list_add(&matcher->list_node, &last->list_node); + + return 0; +} + +static int hws_matcher_connect_isolated(struct mlx5hws_matcher *matcher) +{ + /* Isolated matcher is expected to be the only one in its table. + * However, it can have a collision matcher, and it can go through + * rehash process, in which case we will temporary have both old and + * new matchers in the isolated table. + * Check if this is the first matcher in the isolated table. + */ + if (list_empty(&matcher->tbl->matchers_list)) + return hws_matcher_connect_isolated_first(matcher); + + /* If this wasn't the first matcher, then we have 3 possible cases: + * - this is a collision matcher for the first matcher + * - this is a new rehash dest matcher + * - this is a collision matcher for the new rehash dest matcher + * The logic to add new matcher is the same for all these cases. + */ + return hws_matcher_connect_isolated_last(matcher); +} + static int hws_matcher_connect(struct mlx5hws_matcher *matcher) { struct mlx5hws_table *tbl = matcher->tbl; @@ -64,6 +228,9 @@ static int hws_matcher_connect(struct mlx5hws_matcher *matcher) struct mlx5hws_matcher *tmp_matcher; int ret; + if (mlx5hws_matcher_is_isolated(matcher)) + return hws_matcher_connect_isolated(matcher); + /* Find location in matcher list */ if (list_empty(&tbl->matchers_list)) { list_add(&matcher->list_node, &tbl->matchers_list); @@ -140,6 +307,92 @@ remove_from_list: return ret; } +static int hws_matcher_disconnect_isolated(struct mlx5hws_matcher *matcher) +{ + struct mlx5hws_matcher *first, *last, *prev, *next; + struct mlx5hws_table *tbl = matcher->tbl; + struct mlx5hws_context *ctx = tbl->ctx; + u32 end_ft_id; + int ret; + + first = list_first_entry(&tbl->matchers_list, + struct mlx5hws_matcher, + list_node); + last = list_last_entry(&tbl->matchers_list, + struct mlx5hws_matcher, + list_node); + prev = list_prev_entry(matcher, list_node); + next = list_next_entry(matcher, list_node); + + list_del_init(&matcher->list_node); + + if (first == last) { + /* This was the only matcher in the list. + * Reset isolated table FT next RTCs and connect it + * to the whole complex matcher end FT instead. + */ + ret = mlx5hws_table_ft_set_next_rtc(ctx, + tbl->ft_id, + tbl->fw_ft_type, + 0, 0); + if (ret) { + mlx5hws_err(tbl->ctx, "Isolated matcher: failed to reset FT's next RTCs\n"); + return ret; + } + + end_ft_id = matcher->attr.isolated_matcher_end_ft_id; + ret = mlx5hws_table_ft_set_next_ft(tbl->ctx, + tbl->ft_id, + tbl->fw_ft_type, + end_ft_id); + if (ret) { + mlx5hws_err(tbl->ctx, "Isolated matcher: failed to set FT's miss_ft_id\n"); + return ret; + } + + return 0; + } + + /* At this point we know that there are more matchers in the list */ + + if (matcher == first) { + /* We've disconnected the first matcher. + * Now update isolated table default FT. + */ + if (!next) + return -EINVAL; + return mlx5hws_table_ft_set_next_rtc(ctx, + tbl->ft_id, + tbl->fw_ft_type, + next->match_ste.rtc_0_id, + next->match_ste.rtc_1_id); + } + + if (matcher == last) { + /* If we've disconnected the last matcher - update prev + * matcher's end_ft to point to the complex matcher end_ft. + */ + if (!prev) + return -EINVAL; + return hws_matcher_connect_end_ft_isolated(prev); + } + + /* This wasn't the first or the last matcher, which means that it has + * both prev and next matchers. Note that this only happens if we're + * disconnecting collision matcher of the old matcher during rehash. + */ + if (!prev || !next || + !(matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION)) + return -EINVAL; + + /* Update prev end FT to point to next match RTC */ + return mlx5hws_table_ft_set_next_rtc(ctx, + prev->end_ft_id, + tbl->fw_ft_type, + next->match_ste.rtc_0_id, + next->match_ste.rtc_1_id); +} + static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher) { struct mlx5hws_matcher *next = NULL, *prev = NULL; @@ -147,6 +400,9 @@ static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher) u32 prev_ft_id = tbl->ft_id; int ret; + if (mlx5hws_matcher_is_isolated(matcher)) + return hws_matcher_disconnect_isolated(matcher); + if (!list_is_first(&matcher->list_node, &tbl->matchers_list)) { prev = list_prev_entry(matcher, list_node); prev_ft_id = prev->end_ft_id; @@ -165,14 +421,14 @@ static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher) next->match_ste.rtc_0_id, next->match_ste.rtc_1_id); if (ret) { - mlx5hws_err(tbl->ctx, "Failed to disconnect matcher\n"); - goto matcher_reconnect; + mlx5hws_err(tbl->ctx, "Fatal error, failed to disconnect matcher\n"); + return ret; } } else { ret = mlx5hws_table_connect_to_miss_table(tbl, tbl->default_miss.miss_tbl); if (ret) { - mlx5hws_err(tbl->ctx, "Failed to disconnect last matcher\n"); - goto matcher_reconnect; + mlx5hws_err(tbl->ctx, "Fatal error, failed to disconnect last matcher\n"); + return ret; } } @@ -180,169 +436,115 @@ static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher) if (prev_ft_id == tbl->ft_id) { ret = mlx5hws_table_update_connected_miss_tables(tbl); if (ret) { - mlx5hws_err(tbl->ctx, "Fatal error, failed to update connected miss table\n"); - goto matcher_reconnect; + mlx5hws_err(tbl->ctx, + "Fatal error, failed to update connected miss table\n"); + return ret; } } ret = mlx5hws_table_ft_set_default_next_ft(tbl, prev_ft_id); if (ret) { mlx5hws_err(tbl->ctx, "Fatal error, failed to restore matcher ft default miss\n"); - goto matcher_reconnect; + return ret; } return 0; - -matcher_reconnect: - if (list_empty(&tbl->matchers_list) || !prev) - list_add(&matcher->list_node, &tbl->matchers_list); - else - /* insert after prev matcher */ - list_add(&matcher->list_node, &prev->list_node); - - return ret; } static void hws_matcher_set_rtc_attr_sz(struct mlx5hws_matcher *matcher, struct mlx5hws_cmd_rtc_create_attr *rtc_attr, - enum mlx5hws_matcher_rtc_type rtc_type, bool is_mirror) { - struct mlx5hws_pool_chunk *ste = &matcher->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].ste; enum mlx5hws_matcher_flow_src flow_src = matcher->attr.optimize_flow_src; - bool is_match_rtc = rtc_type == HWS_MATCHER_RTC_TYPE_MATCH; if ((flow_src == MLX5HWS_MATCHER_FLOW_SRC_VPORT && !is_mirror) || (flow_src == MLX5HWS_MATCHER_FLOW_SRC_WIRE && is_mirror)) { /* Optimize FDB RTC */ rtc_attr->log_size = 0; rtc_attr->log_depth = 0; - } else { - /* Keep original values */ - rtc_attr->log_size = is_match_rtc ? matcher->attr.table.sz_row_log : ste->order; - rtc_attr->log_depth = is_match_rtc ? matcher->attr.table.sz_col_log : 0; } } -static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher, - enum mlx5hws_matcher_rtc_type rtc_type, - u8 action_ste_selector) +static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher) { struct mlx5hws_matcher_attr *attr = &matcher->attr; struct mlx5hws_cmd_rtc_create_attr rtc_attr = {0}; struct mlx5hws_match_template *mt = matcher->mt; struct mlx5hws_context *ctx = matcher->tbl->ctx; - struct mlx5hws_action_default_stc *default_stc; - struct mlx5hws_matcher_action_ste *action_ste; + union mlx5hws_matcher_size *size_rx, *size_tx; struct mlx5hws_table *tbl = matcher->tbl; - struct mlx5hws_pool *ste_pool, *stc_pool; - struct mlx5hws_pool_chunk *ste; - u32 *rtc_0_id, *rtc_1_id; u32 obj_id; int ret; - switch (rtc_type) { - case HWS_MATCHER_RTC_TYPE_MATCH: - rtc_0_id = &matcher->match_ste.rtc_0_id; - rtc_1_id = &matcher->match_ste.rtc_1_id; - ste_pool = matcher->match_ste.pool; - ste = &matcher->match_ste.ste; - ste->order = attr->table.sz_col_log + attr->table.sz_row_log; - - rtc_attr.log_size = attr->table.sz_row_log; - rtc_attr.log_depth = attr->table.sz_col_log; - rtc_attr.is_frst_jumbo = mlx5hws_matcher_mt_is_jumbo(mt); - rtc_attr.is_scnd_range = 0; - rtc_attr.miss_ft_id = matcher->end_ft_id; - - if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_HASH) { - /* The usual Hash Table */ - rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_HASH; - - /* The first mt is used since all share the same definer */ - rtc_attr.match_definer_0 = mlx5hws_definer_get_id(mt->definer); - } else if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX) { - rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET; - rtc_attr.num_hash_definer = 1; - - if (attr->distribute_mode == MLX5HWS_MATCHER_DISTRIBUTE_BY_HASH) { - /* Hash Split Table */ - rtc_attr.access_index_mode = MLX5_IFC_RTC_STE_ACCESS_MODE_BY_HASH; - rtc_attr.match_definer_0 = mlx5hws_definer_get_id(mt->definer); - } else if (attr->distribute_mode == MLX5HWS_MATCHER_DISTRIBUTE_BY_LINEAR) { - /* Linear Lookup Table */ - rtc_attr.access_index_mode = MLX5_IFC_RTC_STE_ACCESS_MODE_LINEAR; - rtc_attr.match_definer_0 = ctx->caps->linear_match_definer; - } - } - - /* Match pool requires implicit allocation */ - ret = mlx5hws_pool_chunk_alloc(ste_pool, ste); - if (ret) { - mlx5hws_err(ctx, "Failed to allocate STE for %s RTC", - hws_matcher_rtc_type_to_str(rtc_type)); - return ret; + size_rx = &attr->size[MLX5HWS_MATCHER_SIZE_TYPE_RX]; + size_tx = &attr->size[MLX5HWS_MATCHER_SIZE_TYPE_TX]; + + rtc_attr.log_size = size_rx->table.sz_row_log; + rtc_attr.log_depth = size_rx->table.sz_col_log; + rtc_attr.is_frst_jumbo = mlx5hws_matcher_mt_is_jumbo(mt); + rtc_attr.is_scnd_range = 0; + rtc_attr.miss_ft_id = matcher->end_ft_id; + + if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_HASH) { + /* The usual Hash Table */ + rtc_attr.update_index_mode = + MLX5_IFC_RTC_STE_UPDATE_MODE_BY_HASH; + + /* The first mt is used since all share the same definer */ + rtc_attr.match_definer_0 = mlx5hws_definer_get_id(mt->definer); + } else if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX) { + rtc_attr.update_index_mode = + MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET; + rtc_attr.num_hash_definer = 1; + + if (attr->distribute_mode == + MLX5HWS_MATCHER_DISTRIBUTE_BY_HASH) { + /* Hash Split Table */ + rtc_attr.access_index_mode = + MLX5_IFC_RTC_STE_ACCESS_MODE_BY_HASH; + rtc_attr.match_definer_0 = + mlx5hws_definer_get_id(mt->definer); + } else if (attr->distribute_mode == + MLX5HWS_MATCHER_DISTRIBUTE_BY_LINEAR) { + /* Linear Lookup Table */ + rtc_attr.access_index_mode = + MLX5_IFC_RTC_STE_ACCESS_MODE_LINEAR; + rtc_attr.match_definer_0 = + ctx->caps->linear_match_definer; } - break; - - case HWS_MATCHER_RTC_TYPE_STE_ARRAY: - action_ste = &matcher->action_ste[action_ste_selector]; - - rtc_0_id = &action_ste->rtc_0_id; - rtc_1_id = &action_ste->rtc_1_id; - ste_pool = action_ste->pool; - ste = &action_ste->ste; - ste->order = ilog2(roundup_pow_of_two(action_ste->max_stes)) + - attr->table.sz_row_log; - rtc_attr.log_size = ste->order; - rtc_attr.log_depth = 0; - rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET; - /* The action STEs use the default always hit definer */ - rtc_attr.match_definer_0 = ctx->caps->trivial_match_definer; - rtc_attr.is_frst_jumbo = false; - rtc_attr.miss_ft_id = 0; - break; - - default: - mlx5hws_err(ctx, "HWS Invalid RTC type\n"); - return -EINVAL; } - obj_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste); - rtc_attr.pd = ctx->pd_num; - rtc_attr.ste_base = obj_id; - rtc_attr.ste_offset = ste->offset; + rtc_attr.ste_base = matcher->match_ste.ste_0_base; rtc_attr.reparse_mode = mlx5hws_context_get_reparse_mode(ctx); rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(tbl->type, false); - hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, rtc_type, false); + hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, false); /* STC is a single resource (obj_id), use any STC for the ID */ - stc_pool = ctx->stc_pool[tbl->type]; - default_stc = ctx->common_res[tbl->type].default_stc; - obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, &default_stc->default_hit); + obj_id = mlx5hws_pool_get_base_id(ctx->stc_pool); rtc_attr.stc_base = obj_id; - ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_0_id); + ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, + &matcher->match_ste.rtc_0_id); if (ret) { - mlx5hws_err(ctx, "Failed to create matcher RTC of type %s", - hws_matcher_rtc_type_to_str(rtc_type)); - goto free_ste; + mlx5hws_err(ctx, "Failed to create matcher RTC\n"); + return ret; } if (tbl->type == MLX5HWS_TABLE_TYPE_FDB) { - obj_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste); - rtc_attr.ste_base = obj_id; + rtc_attr.log_size = size_tx->table.sz_row_log; + rtc_attr.log_depth = size_tx->table.sz_col_log; + rtc_attr.ste_base = matcher->match_ste.ste_1_base; rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(tbl->type, true); - obj_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, &default_stc->default_hit); + obj_id = mlx5hws_pool_get_base_mirror_id(ctx->stc_pool); rtc_attr.stc_base = obj_id; - hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, rtc_type, true); + hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, true); - ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_1_id); + ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, + &matcher->match_ste.rtc_1_id); if (ret) { - mlx5hws_err(ctx, "Failed to create peer matcher RTC of type %s", - hws_matcher_rtc_type_to_str(rtc_type)); + mlx5hws_err(ctx, "Failed to create mirror matcher RTC\n"); goto destroy_rtc_0; } } @@ -350,47 +552,18 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher, return 0; destroy_rtc_0: - mlx5hws_cmd_rtc_destroy(ctx->mdev, *rtc_0_id); -free_ste: - if (rtc_type == HWS_MATCHER_RTC_TYPE_MATCH) - mlx5hws_pool_chunk_free(ste_pool, ste); + mlx5hws_cmd_rtc_destroy(ctx->mdev, matcher->match_ste.rtc_0_id); return ret; } -static void hws_matcher_destroy_rtc(struct mlx5hws_matcher *matcher, - enum mlx5hws_matcher_rtc_type rtc_type, - u8 action_ste_selector) +static void hws_matcher_destroy_rtc(struct mlx5hws_matcher *matcher) { - struct mlx5hws_matcher_action_ste *action_ste; - struct mlx5hws_table *tbl = matcher->tbl; - struct mlx5hws_pool_chunk *ste; - struct mlx5hws_pool *ste_pool; - u32 rtc_0_id, rtc_1_id; - - switch (rtc_type) { - case HWS_MATCHER_RTC_TYPE_MATCH: - rtc_0_id = matcher->match_ste.rtc_0_id; - rtc_1_id = matcher->match_ste.rtc_1_id; - ste_pool = matcher->match_ste.pool; - ste = &matcher->match_ste.ste; - break; - case HWS_MATCHER_RTC_TYPE_STE_ARRAY: - action_ste = &matcher->action_ste[action_ste_selector]; - rtc_0_id = action_ste->rtc_0_id; - rtc_1_id = action_ste->rtc_1_id; - ste_pool = action_ste->pool; - ste = &action_ste->ste; - break; - default: - return; - } + struct mlx5_core_dev *mdev = matcher->tbl->ctx->mdev; - if (tbl->type == MLX5HWS_TABLE_TYPE_FDB) - mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, rtc_1_id); + if (matcher->tbl->type == MLX5HWS_TABLE_TYPE_FDB) + mlx5hws_cmd_rtc_destroy(mdev, matcher->match_ste.rtc_1_id); - mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, rtc_0_id); - if (rtc_type == HWS_MATCHER_RTC_TYPE_MATCH) - mlx5hws_pool_chunk_free(ste_pool, ste); + mlx5hws_cmd_rtc_destroy(mdev, matcher->match_ste.rtc_0_id); } static int @@ -398,43 +571,38 @@ hws_matcher_check_attr_sz(struct mlx5hws_cmd_query_caps *caps, struct mlx5hws_matcher *matcher) { struct mlx5hws_matcher_attr *attr = &matcher->attr; + struct mlx5hws_context *ctx = matcher->tbl->ctx; + union mlx5hws_matcher_size *size; + int i; - if (attr->table.sz_col_log > caps->rtc_log_depth_max) { - mlx5hws_err(matcher->tbl->ctx, "Matcher depth exceeds limit %d\n", - caps->rtc_log_depth_max); - return -EOPNOTSUPP; - } + for (i = 0; i < 2; i++) { + size = &attr->size[i]; - if (attr->table.sz_col_log + attr->table.sz_row_log > caps->ste_alloc_log_max) { - mlx5hws_err(matcher->tbl->ctx, "Total matcher size exceeds limit %d\n", - caps->ste_alloc_log_max); - return -EOPNOTSUPP; - } + if (size->table.sz_col_log > caps->rtc_log_depth_max) { + mlx5hws_err(ctx, "Matcher depth exceeds limit %d\n", + caps->rtc_log_depth_max); + return -EOPNOTSUPP; + } - if (attr->table.sz_col_log + attr->table.sz_row_log < caps->ste_alloc_log_gran) { - mlx5hws_err(matcher->tbl->ctx, "Total matcher size below limit %d\n", - caps->ste_alloc_log_gran); - return -EOPNOTSUPP; + if (size->table.sz_col_log + size->table.sz_row_log > + caps->ste_alloc_log_max) { + mlx5hws_err(ctx, + "Total matcher size exceeds limit %d\n", + caps->ste_alloc_log_max); + return -EOPNOTSUPP; + } + + if (size->table.sz_col_log + size->table.sz_row_log < + caps->ste_alloc_log_gran) { + mlx5hws_err(ctx, "Total matcher size below limit %d\n", + caps->ste_alloc_log_gran); + return -EOPNOTSUPP; + } } return 0; } -static void hws_matcher_set_pool_attr(struct mlx5hws_pool_attr *attr, - struct mlx5hws_matcher *matcher) -{ - switch (matcher->attr.optimize_flow_src) { - case MLX5HWS_MATCHER_FLOW_SRC_VPORT: - attr->opt_type = MLX5HWS_POOL_OPTIMIZE_ORIG; - break; - case MLX5HWS_MATCHER_FLOW_SRC_WIRE: - attr->opt_type = MLX5HWS_POOL_OPTIMIZE_MIRROR; - break; - default: - break; - } -} - static int hws_matcher_check_and_process_at(struct mlx5hws_matcher *matcher, struct mlx5hws_action_template *at) { @@ -458,170 +626,17 @@ static int hws_matcher_check_and_process_at(struct mlx5hws_matcher *matcher, return 0; } -static int hws_matcher_resize_init(struct mlx5hws_matcher *src_matcher) -{ - struct mlx5hws_matcher_resize_data *resize_data; - - resize_data = kzalloc(sizeof(*resize_data), GFP_KERNEL); - if (!resize_data) - return -ENOMEM; - - resize_data->max_stes = src_matcher->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].max_stes; - - resize_data->action_ste[0].stc = src_matcher->action_ste[0].stc; - resize_data->action_ste[0].rtc_0_id = src_matcher->action_ste[0].rtc_0_id; - resize_data->action_ste[0].rtc_1_id = src_matcher->action_ste[0].rtc_1_id; - resize_data->action_ste[0].pool = src_matcher->action_ste[0].max_stes ? - src_matcher->action_ste[0].pool : - NULL; - resize_data->action_ste[1].stc = src_matcher->action_ste[1].stc; - resize_data->action_ste[1].rtc_0_id = src_matcher->action_ste[1].rtc_0_id; - resize_data->action_ste[1].rtc_1_id = src_matcher->action_ste[1].rtc_1_id; - resize_data->action_ste[1].pool = src_matcher->action_ste[1].max_stes ? - src_matcher->action_ste[1].pool : - NULL; - - /* Place the new resized matcher on the dst matcher's list */ - list_add(&resize_data->list_node, &src_matcher->resize_dst->resize_data); - - /* Move all the previous resized matchers to the dst matcher's list */ - while (!list_empty(&src_matcher->resize_data)) { - resize_data = list_first_entry(&src_matcher->resize_data, - struct mlx5hws_matcher_resize_data, - list_node); - list_del_init(&resize_data->list_node); - list_add(&resize_data->list_node, &src_matcher->resize_dst->resize_data); - } - - return 0; -} - -static void hws_matcher_resize_uninit(struct mlx5hws_matcher *matcher) -{ - struct mlx5hws_matcher_resize_data *resize_data; - - if (!mlx5hws_matcher_is_resizable(matcher)) - return; - - while (!list_empty(&matcher->resize_data)) { - resize_data = list_first_entry(&matcher->resize_data, - struct mlx5hws_matcher_resize_data, - list_node); - list_del_init(&resize_data->list_node); - - if (resize_data->max_stes) { - mlx5hws_action_free_single_stc(matcher->tbl->ctx, - matcher->tbl->type, - &resize_data->action_ste[1].stc); - mlx5hws_action_free_single_stc(matcher->tbl->ctx, - matcher->tbl->type, - &resize_data->action_ste[0].stc); - - if (matcher->tbl->type == MLX5HWS_TABLE_TYPE_FDB) { - mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, - resize_data->action_ste[1].rtc_1_id); - mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, - resize_data->action_ste[0].rtc_1_id); - } - mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, - resize_data->action_ste[1].rtc_0_id); - mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, - resize_data->action_ste[0].rtc_0_id); - if (resize_data->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].pool) { - mlx5hws_pool_destroy(resize_data->action_ste[1].pool); - mlx5hws_pool_destroy(resize_data->action_ste[0].pool); - } - } - - kfree(resize_data); - } -} - -static int -hws_matcher_bind_at_idx(struct mlx5hws_matcher *matcher, u8 action_ste_selector) -{ - struct mlx5hws_cmd_stc_modify_attr stc_attr = {0}; - struct mlx5hws_matcher_action_ste *action_ste; - struct mlx5hws_table *tbl = matcher->tbl; - struct mlx5hws_pool_attr pool_attr = {0}; - struct mlx5hws_context *ctx = tbl->ctx; - int ret; - - action_ste = &matcher->action_ste[action_ste_selector]; - - /* Allocate action STE mempool */ - pool_attr.table_type = tbl->type; - pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE; - pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL; - pool_attr.alloc_log_sz = ilog2(roundup_pow_of_two(action_ste->max_stes)) + - matcher->attr.table.sz_row_log; - hws_matcher_set_pool_attr(&pool_attr, matcher); - action_ste->pool = mlx5hws_pool_create(ctx, &pool_attr); - if (!action_ste->pool) { - mlx5hws_err(ctx, "Failed to create action ste pool\n"); - return -EINVAL; - } - - /* Allocate action RTC */ - ret = hws_matcher_create_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY, action_ste_selector); - if (ret) { - mlx5hws_err(ctx, "Failed to create action RTC\n"); - goto free_ste_pool; - } - - /* Allocate STC for jumps to STE */ - stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT; - stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE; - stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE; - stc_attr.ste_table.ste = action_ste->ste; - stc_attr.ste_table.ste_pool = action_ste->pool; - stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer; - - ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, tbl->type, - &action_ste->stc); - if (ret) { - mlx5hws_err(ctx, "Failed to create action jump to table STC\n"); - goto free_rtc; - } - - return 0; - -free_rtc: - hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY, action_ste_selector); -free_ste_pool: - mlx5hws_pool_destroy(action_ste->pool); - return ret; -} - -static void hws_matcher_unbind_at_idx(struct mlx5hws_matcher *matcher, u8 action_ste_selector) -{ - struct mlx5hws_matcher_action_ste *action_ste; - struct mlx5hws_table *tbl = matcher->tbl; - - action_ste = &matcher->action_ste[action_ste_selector]; - - if (!action_ste->max_stes || - matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION || - mlx5hws_matcher_is_in_resize(matcher)) - return; - - mlx5hws_action_free_single_stc(tbl->ctx, tbl->type, &action_ste->stc); - hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY, action_ste_selector); - mlx5hws_pool_destroy(action_ste->pool); -} - static int hws_matcher_bind_at(struct mlx5hws_matcher *matcher) { bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(matcher->mt); - struct mlx5hws_table *tbl = matcher->tbl; - struct mlx5hws_context *ctx = tbl->ctx; - u32 required_stes; - u8 max_stes = 0; + struct mlx5hws_context *ctx = matcher->tbl->ctx; + u8 required_stes, max_stes; int i, ret; if (matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION) return 0; + max_stes = 0; for (i = 0; i < matcher->num_of_at; i++) { struct mlx5hws_action_template *at = &matcher->at[i]; @@ -637,38 +652,40 @@ static int hws_matcher_bind_at(struct mlx5hws_matcher *matcher) /* Future: Optimize reparse */ } - /* There are no additional STEs required for matcher */ - if (!max_stes) - return 0; - - matcher->action_ste[0].max_stes = max_stes; - matcher->action_ste[1].max_stes = max_stes; - - ret = hws_matcher_bind_at_idx(matcher, 0); - if (ret) - return ret; - - ret = hws_matcher_bind_at_idx(matcher, 1); - if (ret) - goto free_at_0; + matcher->num_of_action_stes = max_stes; return 0; - -free_at_0: - hws_matcher_unbind_at_idx(matcher, 0); - return ret; } -static void hws_matcher_unbind_at(struct mlx5hws_matcher *matcher) +static void hws_matcher_set_ip_version_match(struct mlx5hws_matcher *matcher) { - hws_matcher_unbind_at_idx(matcher, 1); - hws_matcher_unbind_at_idx(matcher, 0); + int i; + + for (i = 0; i < matcher->mt->fc_sz; i++) { + switch (matcher->mt->fc[i].fname) { + case MLX5HWS_DEFINER_FNAME_ETH_TYPE_O: + matcher->matches_outer_ethertype = 1; + break; + case MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_O: + matcher->matches_outer_ip_version = 1; + break; + case MLX5HWS_DEFINER_FNAME_ETH_TYPE_I: + matcher->matches_inner_ethertype = 1; + break; + case MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_I: + matcher->matches_inner_ip_version = 1; + break; + default: + break; + } + } } static int hws_matcher_bind_mt(struct mlx5hws_matcher *matcher) { + struct mlx5hws_cmd_ste_create_attr ste_attr = {}; struct mlx5hws_context *ctx = matcher->tbl->ctx; - struct mlx5hws_pool_attr pool_attr = {0}; + union mlx5hws_matcher_size *size; int ret; /* Calculate match, range and hash definers */ @@ -681,23 +698,41 @@ static int hws_matcher_bind_mt(struct mlx5hws_matcher *matcher) } } - /* Create an STE pool per matcher*/ - pool_attr.table_type = matcher->tbl->type; - pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE; - pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_MATCHER_STE_POOL; - pool_attr.alloc_log_sz = matcher->attr.table.sz_col_log + - matcher->attr.table.sz_row_log; - hws_matcher_set_pool_attr(&pool_attr, matcher); - - matcher->match_ste.pool = mlx5hws_pool_create(ctx, &pool_attr); - if (!matcher->match_ste.pool) { - mlx5hws_err(ctx, "Failed to allocate matcher STE pool\n"); - ret = -EOPNOTSUPP; + hws_matcher_set_ip_version_match(matcher); + + /* Create an STE range each for RX and TX. */ + ste_attr.table_type = FS_FT_FDB_RX; + size = &matcher->attr.size[MLX5HWS_MATCHER_SIZE_TYPE_RX]; + ste_attr.log_obj_range = + matcher->attr.optimize_flow_src == + MLX5HWS_MATCHER_FLOW_SRC_VPORT ? + 0 : size->table.sz_col_log + size->table.sz_row_log; + + ret = mlx5hws_cmd_ste_create(ctx->mdev, &ste_attr, + &matcher->match_ste.ste_0_base); + if (ret) { + mlx5hws_err(ctx, "Failed to allocate RX STE range (%d)\n", ret); goto uninit_match_definer; } + ste_attr.table_type = FS_FT_FDB_TX; + size = &matcher->attr.size[MLX5HWS_MATCHER_SIZE_TYPE_TX]; + ste_attr.log_obj_range = + matcher->attr.optimize_flow_src == + MLX5HWS_MATCHER_FLOW_SRC_WIRE ? + 0 : size->table.sz_col_log + size->table.sz_row_log; + + ret = mlx5hws_cmd_ste_create(ctx->mdev, &ste_attr, + &matcher->match_ste.ste_1_base); + if (ret) { + mlx5hws_err(ctx, "Failed to allocate TX STE range (%d)\n", ret); + goto destroy_rx_ste_range; + } + return 0; +destroy_rx_ste_range: + mlx5hws_cmd_ste_destroy(ctx->mdev, matcher->match_ste.ste_0_base); uninit_match_definer: if (!(matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION)) mlx5hws_definer_mt_uninit(ctx, matcher->mt); @@ -706,9 +741,12 @@ uninit_match_definer: static void hws_matcher_unbind_mt(struct mlx5hws_matcher *matcher) { - mlx5hws_pool_destroy(matcher->match_ste.pool); + struct mlx5hws_context *ctx = matcher->tbl->ctx; + + mlx5hws_cmd_ste_destroy(ctx->mdev, matcher->match_ste.ste_1_base); + mlx5hws_cmd_ste_destroy(ctx->mdev, matcher->match_ste.ste_0_base); if (!(matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION)) - mlx5hws_definer_mt_uninit(matcher->tbl->ctx, matcher->mt); + mlx5hws_definer_mt_uninit(ctx, matcher->mt); } static int @@ -717,6 +755,10 @@ hws_matcher_validate_insert_mode(struct mlx5hws_cmd_query_caps *caps, { struct mlx5hws_matcher_attr *attr = &matcher->attr; struct mlx5hws_context *ctx = matcher->tbl->ctx; + union mlx5hws_matcher_size *size_rx, *size_tx; + + size_rx = &matcher->attr.size[MLX5HWS_MATCHER_SIZE_TYPE_RX]; + size_tx = &matcher->attr.size[MLX5HWS_MATCHER_SIZE_TYPE_TX]; switch (attr->insert_mode) { case MLX5HWS_MATCHER_INSERT_BY_HASH: @@ -727,7 +769,7 @@ hws_matcher_validate_insert_mode(struct mlx5hws_cmd_query_caps *caps, break; case MLX5HWS_MATCHER_INSERT_BY_INDEX: - if (attr->table.sz_col_log) { + if (size_rx->table.sz_col_log || size_tx->table.sz_col_log) { mlx5hws_err(ctx, "Matcher with INSERT_BY_INDEX supports only Nx1 table size\n"); return -EOPNOTSUPP; } @@ -747,7 +789,10 @@ hws_matcher_validate_insert_mode(struct mlx5hws_cmd_query_caps *caps, return -EOPNOTSUPP; } - if (attr->table.sz_row_log > MLX5_IFC_RTC_LINEAR_LOOKUP_TBL_LOG_MAX) { + if (size_rx->table.sz_row_log > + MLX5_IFC_RTC_LINEAR_LOOKUP_TBL_LOG_MAX || + size_tx->table.sz_row_log > + MLX5_IFC_RTC_LINEAR_LOOKUP_TBL_LOG_MAX) { mlx5hws_err(ctx, "Matcher with linear distribute: rows exceed limit %d", MLX5_IFC_RTC_LINEAR_LOOKUP_TBL_LOG_MAX); return -EOPNOTSUPP; @@ -771,6 +816,10 @@ hws_matcher_process_attr(struct mlx5hws_cmd_query_caps *caps, struct mlx5hws_matcher *matcher) { struct mlx5hws_matcher_attr *attr = &matcher->attr; + union mlx5hws_matcher_size *size_rx, *size_tx; + + size_rx = &attr->size[MLX5HWS_MATCHER_SIZE_TYPE_RX]; + size_tx = &attr->size[MLX5HWS_MATCHER_SIZE_TYPE_TX]; if (hws_matcher_validate_insert_mode(caps, matcher)) return -EOPNOTSUPP; @@ -782,10 +831,16 @@ hws_matcher_process_attr(struct mlx5hws_cmd_query_caps *caps, /* Convert number of rules to the required depth */ if (attr->mode == MLX5HWS_MATCHER_RESOURCE_MODE_RULE && - attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_HASH) - attr->table.sz_col_log = hws_matcher_rules_to_tbl_depth(attr->rule.num_log); + attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_HASH) { + size_rx->table.sz_col_log = + hws_matcher_rules_to_tbl_depth(size_rx->rule.num_log); + size_tx->table.sz_col_log = + hws_matcher_rules_to_tbl_depth(size_tx->rule.num_log); + } matcher->flags |= attr->resizable ? MLX5HWS_MATCHER_FLAGS_RESIZABLE : 0; + matcher->flags |= attr->isolated_matcher_end_ft_id ? + MLX5HWS_MATCHER_FLAGS_ISOLATED : 0; return hws_matcher_check_attr_sz(caps, matcher); } @@ -807,10 +862,10 @@ static int hws_matcher_create_and_connect(struct mlx5hws_matcher *matcher) /* Create matcher end flow table anchor */ ret = hws_matcher_create_end_ft(matcher); if (ret) - goto unbind_at; + goto unbind_mt; /* Allocate the RTC for the new matcher */ - ret = hws_matcher_create_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH, 0); + ret = hws_matcher_create_rtc(matcher); if (ret) goto destroy_end_ft; @@ -822,11 +877,9 @@ static int hws_matcher_create_and_connect(struct mlx5hws_matcher *matcher) return 0; destroy_rtc: - hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH, 0); + hws_matcher_destroy_rtc(matcher); destroy_end_ft: hws_matcher_destroy_end_ft(matcher); -unbind_at: - hws_matcher_unbind_at(matcher); unbind_mt: hws_matcher_unbind_mt(matcher); return ret; @@ -834,11 +887,9 @@ unbind_mt: static void hws_matcher_destroy_and_disconnect(struct mlx5hws_matcher *matcher) { - hws_matcher_resize_uninit(matcher); hws_matcher_disconnect(matcher); - hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH, 0); + hws_matcher_destroy_rtc(matcher); hws_matcher_destroy_end_ft(matcher); - hws_matcher_unbind_at(matcher); hws_matcher_unbind_mt(matcher); } @@ -846,22 +897,25 @@ static int hws_matcher_create_col_matcher(struct mlx5hws_matcher *matcher) { struct mlx5hws_context *ctx = matcher->tbl->ctx; + union mlx5hws_matcher_size *size_rx, *size_tx; struct mlx5hws_matcher *col_matcher; - int ret; + int i, ret; + + size_rx = &matcher->attr.size[MLX5HWS_MATCHER_SIZE_TYPE_RX]; + size_tx = &matcher->attr.size[MLX5HWS_MATCHER_SIZE_TYPE_TX]; if (matcher->attr.mode != MLX5HWS_MATCHER_RESOURCE_MODE_RULE || matcher->attr.insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX) return 0; - if (!hws_matcher_requires_col_tbl(matcher->attr.rule.num_log)) + if (!hws_matcher_requires_col_tbl(size_rx->rule.num_log) && + !hws_matcher_requires_col_tbl(size_tx->rule.num_log)) return 0; col_matcher = kzalloc(sizeof(*matcher), GFP_KERNEL); if (!col_matcher) return -ENOMEM; - INIT_LIST_HEAD(&col_matcher->resize_data); - col_matcher->tbl = matcher->tbl; col_matcher->mt = matcher->mt; col_matcher->at = matcher->at; @@ -872,12 +926,20 @@ hws_matcher_create_col_matcher(struct mlx5hws_matcher *matcher) col_matcher->flags |= MLX5HWS_MATCHER_FLAGS_COLLISION; col_matcher->attr.mode = MLX5HWS_MATCHER_RESOURCE_MODE_HTABLE; col_matcher->attr.optimize_flow_src = matcher->attr.optimize_flow_src; - col_matcher->attr.table.sz_row_log = matcher->attr.rule.num_log; - col_matcher->attr.table.sz_col_log = MLX5HWS_MATCHER_ASSURED_COL_TBL_DEPTH; - if (col_matcher->attr.table.sz_row_log > MLX5HWS_MATCHER_ASSURED_ROW_RATIO) - col_matcher->attr.table.sz_row_log -= MLX5HWS_MATCHER_ASSURED_ROW_RATIO; + for (i = 0; i < 2; i++) { + union mlx5hws_matcher_size *dst = &col_matcher->attr.size[i]; + union mlx5hws_matcher_size *src = &matcher->attr.size[i]; + + dst->table.sz_row_log = src->rule.num_log; + dst->table.sz_col_log = MLX5HWS_MATCHER_ASSURED_COL_TBL_DEPTH; + if (dst->table.sz_row_log > MLX5HWS_MATCHER_ASSURED_ROW_RATIO) + dst->table.sz_row_log -= + MLX5HWS_MATCHER_ASSURED_ROW_RATIO; + } col_matcher->attr.max_num_of_at_attach = matcher->attr.max_num_of_at_attach; + col_matcher->attr.isolated_matcher_end_ft_id = + matcher->attr.isolated_matcher_end_ft_id; ret = hws_matcher_process_attr(ctx->caps, col_matcher); if (ret) @@ -915,8 +977,6 @@ static int hws_matcher_init(struct mlx5hws_matcher *matcher) struct mlx5hws_context *ctx = matcher->tbl->ctx; int ret; - INIT_LIST_HEAD(&matcher->resize_data); - mutex_lock(&ctx->ctrl_lock); /* Allocate matcher resource and connect to the packet pipe */ @@ -951,18 +1011,44 @@ static int hws_matcher_uninit(struct mlx5hws_matcher *matcher) return 0; } +static int hws_matcher_grow_at_array(struct mlx5hws_matcher *matcher) +{ + void *p; + + if (matcher->size_of_at_array >= MLX5HWS_MATCHER_MAX_AT) + return -ENOMEM; + + matcher->size_of_at_array *= 2; + p = krealloc(matcher->at, + matcher->size_of_at_array * sizeof(*matcher->at), + __GFP_ZERO | GFP_KERNEL); + if (!p) { + matcher->size_of_at_array /= 2; + return -ENOMEM; + } + + matcher->at = p; + + return 0; +} + int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher, struct mlx5hws_action_template *at) { bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(matcher->mt); - struct mlx5hws_context *ctx = matcher->tbl->ctx; u32 required_stes; int ret; - if (!matcher->attr.max_num_of_at_attach) { - mlx5hws_dbg(ctx, "Num of current at (%d) exceed allowed value\n", - matcher->num_of_at); - return -EOPNOTSUPP; + if (unlikely(matcher->num_of_at >= matcher->size_of_at_array)) { + ret = hws_matcher_grow_at_array(matcher); + if (ret) + return ret; + + if (matcher->col_matcher) { + ret = hws_matcher_grow_at_array(matcher->col_matcher); + if (ret) + return ret; + } } ret = hws_matcher_check_and_process_at(matcher, at); @@ -970,16 +1056,11 @@ int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher, return ret; required_stes = at->num_of_action_stes - (!is_jumbo || at->only_term); - if (matcher->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].max_stes < required_stes) { - mlx5hws_dbg(ctx, "Required STEs [%d] exceeds initial action template STE [%d]\n", - required_stes, - matcher->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].max_stes); - return -ENOMEM; - } + if (matcher->num_of_action_stes < required_stes) + matcher->num_of_action_stes = required_stes; matcher->at[matcher->num_of_at] = *at; matcher->num_of_at += 1; - matcher->attr.max_num_of_at_attach -= 1; if (matcher->col_matcher) matcher->col_matcher->num_of_at = matcher->num_of_at; @@ -1007,9 +1088,10 @@ hws_matcher_set_templates(struct mlx5hws_matcher *matcher, if (!matcher->mt) return -ENOMEM; - matcher->at = kcalloc(num_of_at + matcher->attr.max_num_of_at_attach, - sizeof(*matcher->at), - GFP_KERNEL); + matcher->size_of_at_array = + num_of_at + matcher->attr.max_num_of_at_attach; + matcher->at = kvcalloc(matcher->size_of_at_array, sizeof(*matcher->at), + GFP_KERNEL); if (!matcher->at) { mlx5hws_err(ctx, "Failed to allocate action template array\n"); ret = -ENOMEM; @@ -1035,7 +1117,7 @@ free_mt: static void hws_matcher_unset_templates(struct mlx5hws_matcher *matcher) { - kfree(matcher->at); + kvfree(matcher->at); kfree(matcher->mt); } @@ -1157,8 +1239,7 @@ static int hws_matcher_resize_precheck(struct mlx5hws_matcher *src_matcher, return -EINVAL; } - if (src_matcher->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].max_stes > - dst_matcher->action_ste[0].max_stes) { + if (src_matcher->num_of_action_stes > dst_matcher->num_of_action_stes) { mlx5hws_err(ctx, "Src/dst matcher max STEs mismatch\n"); return -EINVAL; } @@ -1187,10 +1268,6 @@ int mlx5hws_matcher_resize_set_target(struct mlx5hws_matcher *src_matcher, src_matcher->resize_dst = dst_matcher; - ret = hws_matcher_resize_init(src_matcher); - if (ret) - src_matcher->resize_dst = NULL; - out: mutex_unlock(&src_matcher->tbl->ctx->ctrl_lock); return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h index 81ff487f57be..ae20bcebfdde 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h @@ -18,6 +18,14 @@ /* Required depth of the main large table */ #define MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH 2 +/* Action RTC size multiplier that is required in order + * to support rule update for rules with action STEs. + */ +#define MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT 1 + +/* Maximum number of action templates that can be attached to a matcher. */ +#define MLX5HWS_MATCHER_MAX_AT 128 + enum mlx5hws_matcher_offset { MLX5HWS_MATCHER_OFFSET_TAG_DW1 = 12, MLX5HWS_MATCHER_OFFSET_TAG_DW0 = 13, @@ -26,6 +34,7 @@ enum mlx5hws_matcher_offset { enum mlx5hws_matcher_flags { MLX5HWS_MATCHER_FLAGS_COLLISION = 1 << 2, MLX5HWS_MATCHER_FLAGS_RESIZABLE = 1 << 3, + MLX5HWS_MATCHER_FLAGS_ISOLATED = 1 << 4, }; struct mlx5hws_match_template { @@ -37,32 +46,16 @@ struct mlx5hws_match_template { }; struct mlx5hws_matcher_match_ste { - struct mlx5hws_pool_chunk ste; - u32 rtc_0_id; - u32 rtc_1_id; - struct mlx5hws_pool *pool; -}; - -struct mlx5hws_matcher_action_ste { - struct mlx5hws_pool_chunk ste; - struct mlx5hws_pool_chunk stc; - u32 rtc_0_id; - u32 rtc_1_id; - struct mlx5hws_pool *pool; - u8 max_stes; -}; - -struct mlx5hws_matcher_resize_data_node { - struct mlx5hws_pool_chunk stc; u32 rtc_0_id; u32 rtc_1_id; - struct mlx5hws_pool *pool; + u32 ste_0_base; + u32 ste_1_base; }; -struct mlx5hws_matcher_resize_data { - struct mlx5hws_matcher_resize_data_node action_ste[2]; - u8 max_stes; - struct list_head list_node; +enum { + MLX5HWS_MATCHER_IPV_UNSET = 0, + MLX5HWS_MATCHER_IPV_4 = 1, + MLX5HWS_MATCHER_IPV_6 = 2, }; struct mlx5hws_matcher { @@ -71,16 +64,22 @@ struct mlx5hws_matcher { struct mlx5hws_match_template *mt; struct mlx5hws_action_template *at; u8 num_of_at; + u8 size_of_at_array; u8 num_of_mt; + u8 num_of_action_stes; /* enum mlx5hws_matcher_flags */ u8 flags; + u8 matches_outer_ethertype:1; + u8 matches_outer_ip_version:1; + u8 matches_inner_ethertype:1; + u8 matches_inner_ip_version:1; + u8 outer_ip_version:2; + u8 inner_ip_version:2; u32 end_ft_id; struct mlx5hws_matcher *col_matcher; struct mlx5hws_matcher *resize_dst; struct mlx5hws_matcher_match_ste match_ste; - struct mlx5hws_matcher_action_ste action_ste[2]; struct list_head list_node; - struct list_head resize_data; }; static inline bool @@ -99,9 +98,17 @@ static inline bool mlx5hws_matcher_is_in_resize(struct mlx5hws_matcher *matcher) return !!matcher->resize_dst; } +static inline bool mlx5hws_matcher_is_isolated(struct mlx5hws_matcher *matcher) +{ + return !!(matcher->flags & MLX5HWS_MATCHER_FLAGS_ISOLATED); +} + static inline bool mlx5hws_matcher_is_insert_by_idx(struct mlx5hws_matcher *matcher) { return matcher->attr.insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX; } +int mlx5hws_matcher_update_end_ft_isolated(struct mlx5hws_table *tbl, + u32 miss_ft_id); + #endif /* HWS_MATCHER_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h index f39d636ff39a..2498ceff2060 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h @@ -70,12 +70,12 @@ enum mlx5hws_send_queue_actions { struct mlx5hws_context_attr { u16 queues; u16 queue_size; - bool bwc; /* add support for backward compatible API*/ }; struct mlx5hws_table_attr { enum mlx5hws_table_type type; u32 level; + u16 uid; }; enum mlx5hws_matcher_flow_src { @@ -94,6 +94,23 @@ enum mlx5hws_matcher_distribute_mode { MLX5HWS_MATCHER_DISTRIBUTE_BY_LINEAR = 0x1, }; +enum mlx5hws_matcher_size_type { + MLX5HWS_MATCHER_SIZE_TYPE_RX, + MLX5HWS_MATCHER_SIZE_TYPE_TX, + MLX5HWS_MATCHER_SIZE_TYPE_MAX, +}; + +union mlx5hws_matcher_size { + struct { + u8 sz_row_log; + u8 sz_col_log; + } table; + + struct { + u8 num_log; + } rule; +}; + struct mlx5hws_matcher_attr { /* Processing priority inside table */ u32 priority; @@ -108,18 +125,11 @@ struct mlx5hws_matcher_attr { enum mlx5hws_matcher_distribute_mode distribute_mode; /* Define whether the created matcher supports resizing into a bigger matcher */ bool resizable; - union { - struct { - u8 sz_row_log; - u8 sz_col_log; - } table; - - struct { - u8 num_log; - } rule; - }; + union mlx5hws_matcher_size size[MLX5HWS_MATCHER_SIZE_TYPE_MAX]; /* Optional AT attach configuration - Max number of additional AT */ u8 max_num_of_at_attach; + /* Optional end FT (miss FT ID) for match RTC (for isolated matcher) */ + u32 isolated_matcher_end_ft_id; }; struct mlx5hws_rule_attr { @@ -212,6 +222,7 @@ struct mlx5hws_action_dest_attr { struct mlx5hws_action *dest; /* Optional reformat action */ struct mlx5hws_action *reformat; + bool is_wire_ft; }; /** @@ -503,6 +514,15 @@ enum mlx5hws_action_type mlx5hws_action_get_type(struct mlx5hws_action *action); /** + * mlx5hws_action_get_dev - Get mlx5 core device. + * + * @action: The action to get the device from. + * + * Return: mlx5 core device. + */ +struct mlx5_core_dev *mlx5hws_action_get_dev(struct mlx5hws_action *action); + +/** * mlx5hws_action_create_dest_drop - Create a direct rule drop action. * * @ctx: The context in which the new action will be created. @@ -716,18 +736,14 @@ mlx5hws_action_create_push_vlan(struct mlx5hws_context *ctx, u32 flags); * @dests: The destination array. Each contains a destination action and can * have additional actions. * @ignore_flow_level: Whether to turn on 'ignore_flow_level' for this dest. - * @flow_source: Source port of the traffic for this actions. * @flags: Action creation flags (enum mlx5hws_action_flags). * * Return: pointer to mlx5hws_action on success NULL otherwise. */ struct mlx5hws_action * -mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, - size_t num_dest, +mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, size_t num_dest, struct mlx5hws_action_dest_attr *dests, - bool ignore_flow_level, - u32 flow_source, - u32 flags); + bool ignore_flow_level, u32 flags); /** * mlx5hws_action_create_insert_header - Create insert header action. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c index 06db5e4726ae..51e4c551e0ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c @@ -153,8 +153,7 @@ mlx5hws_pat_get_existing_cached_pattern(struct mlx5hws_pattern_cache *cache, cached_pattern = mlx5hws_pat_find_cached_pattern(cache, num_of_actions, actions); if (cached_pattern) { /* LRU: move it to be first in the list */ - list_del_init(&cached_pattern->ptrn_list_node); - list_add(&cached_pattern->ptrn_list_node, &cache->ptrn_list); + list_move(&cached_pattern->ptrn_list_node, &cache->ptrn_list); cached_pattern->refcount++; } @@ -344,7 +343,7 @@ void mlx5hws_arg_write(struct mlx5hws_send_engine *queue, mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_ctrl, &wqe_len); memset(wqe_ctrl, 0, wqe_len); mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_arg, &wqe_len); - memcpy(wqe_arg, arg_data, wqe_len); + memcpy(wqe_arg, arg_data, MLX5HWS_ARG_DATA_SIZE); send_attr.id = arg_idx++; mlx5hws_send_engine_post_end(&ctrl, &send_attr); @@ -491,8 +490,8 @@ hws_action_modify_get_target_fields(u8 action_type, __be64 *pattern, switch (action_type) { case MLX5_ACTION_TYPE_SET: case MLX5_ACTION_TYPE_ADD: - *src_field = MLX5_GET(set_action_in, pattern, field); - *dst_field = INVALID_FIELD; + *src_field = INVALID_FIELD; + *dst_field = MLX5_GET(set_action_in, pattern, field); break; case MLX5_ACTION_TYPE_COPY: *src_field = MLX5_GET(copy_action_in, pattern, src_field); @@ -523,57 +522,59 @@ bool mlx5hws_pat_verify_actions(struct mlx5hws_context *ctx, __be64 pattern[], s return true; } -void mlx5hws_pat_calc_nope(__be64 *pattern, size_t num_actions, - size_t max_actions, size_t *new_size, - u32 *nope_location, __be64 *new_pat) +int mlx5hws_pat_calc_nop(__be64 *pattern, size_t num_actions, + size_t max_actions, size_t *new_size, + u32 *nop_locations, __be64 *new_pat) { - u16 prev_src_field = 0, prev_dst_field = 0; + u16 prev_src_field = INVALID_FIELD, prev_dst_field = INVALID_FIELD; u16 src_field, dst_field; u8 action_type; + bool dependent; size_t i, j; *new_size = num_actions; - *nope_location = 0; + *nop_locations = 0; if (num_actions == 1) - return; + return 0; for (i = 0, j = 0; i < num_actions; i++, j++) { - action_type = MLX5_GET(set_action_in, &pattern[i], action_type); + if (j >= max_actions) + return -EINVAL; + action_type = MLX5_GET(set_action_in, &pattern[i], action_type); hws_action_modify_get_target_fields(action_type, &pattern[i], &src_field, &dst_field); - if (i % 2) { - if (action_type == MLX5_ACTION_TYPE_COPY && - (prev_src_field == src_field || - prev_dst_field == dst_field)) { - /* need Nope */ - *new_size += 1; - *nope_location |= BIT(i); - memset(&new_pat[j], 0, MLX5HWS_MODIFY_ACTION_SIZE); - MLX5_SET(set_action_in, &new_pat[j], - action_type, - MLX5_MODIFICATION_TYPE_NOP); - j++; - } else if (prev_src_field == src_field) { - /* need Nope*/ - *new_size += 1; - *nope_location |= BIT(i); - MLX5_SET(set_action_in, &new_pat[j], - action_type, - MLX5_MODIFICATION_TYPE_NOP); - j++; - } - } - memcpy(&new_pat[j], &pattern[i], MLX5HWS_MODIFY_ACTION_SIZE); - /* check if no more space */ - if (j > max_actions) { - *new_size = num_actions; - *nope_location = 0; - return; + + /* For every action, look at it and the previous one. The two + * actions are dependent if: + */ + dependent = + (i > 0) && + /* At least one of the actions is a write and */ + (dst_field != INVALID_FIELD || + prev_dst_field != INVALID_FIELD) && + /* One reads from the other's source */ + (dst_field == prev_src_field || + src_field == prev_dst_field || + /* Or both write to the same destination */ + dst_field == prev_dst_field); + + if (dependent) { + *new_size += 1; + *nop_locations |= BIT(i); + memset(&new_pat[j], 0, MLX5HWS_MODIFY_ACTION_SIZE); + MLX5_SET(set_action_in, &new_pat[j], action_type, + MLX5_MODIFICATION_TYPE_NOP); + j++; + if (j >= max_actions) + return -EINVAL; } + memcpy(&new_pat[j], &pattern[i], MLX5HWS_MODIFY_ACTION_SIZE); prev_src_field = src_field; prev_dst_field = dst_field; } + + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h index 27ca93385b08..7fbd8dc7aa18 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h @@ -31,7 +31,7 @@ struct mlx5hws_pattern_cache_item { u8 *data; u16 num_of_actions; } mh_data; - u32 refcount; + u32 refcount; /* protected by pattern_cache lock */ struct list_head ptrn_list_node; }; @@ -96,6 +96,7 @@ int mlx5hws_arg_write_inline_arg_data(struct mlx5hws_context *ctx, u8 *arg_data, size_t data_size); -void mlx5hws_pat_calc_nope(__be64 *pattern, size_t num_actions, size_t max_actions, - size_t *new_size, u32 *nope_location, __be64 *new_pat); +int mlx5hws_pat_calc_nop(__be64 *pattern, size_t num_actions, + size_t max_actions, size_t *new_size, + u32 *nop_locations, __be64 *new_pat); #endif /* MLX5HWS_PAT_ARG_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c index fed2d913f3b8..7e37d6e9eb83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c @@ -20,15 +20,14 @@ static void hws_pool_free_one_resource(struct mlx5hws_pool_resource *resource) kfree(resource); } -static void hws_pool_resource_free(struct mlx5hws_pool *pool, - int resource_idx) +static void hws_pool_resource_free(struct mlx5hws_pool *pool) { - hws_pool_free_one_resource(pool->resource[resource_idx]); - pool->resource[resource_idx] = NULL; + hws_pool_free_one_resource(pool->resource); + pool->resource = NULL; if (pool->tbl_type == MLX5HWS_TABLE_TYPE_FDB) { - hws_pool_free_one_resource(pool->mirror_resource[resource_idx]); - pool->mirror_resource[resource_idx] = NULL; + hws_pool_free_one_resource(pool->mirror_resource); + pool->mirror_resource = NULL; } } @@ -61,10 +60,8 @@ hws_pool_create_one_resource(struct mlx5hws_pool *pool, u32 log_range, ret = -EINVAL; } - if (ret) { - mlx5hws_err(pool->ctx, "Failed to allocate resource objects\n"); + if (ret) goto free_resource; - } resource->pool = pool; resource->range = 1 << log_range; @@ -77,199 +74,114 @@ free_resource: return NULL; } -static int -hws_pool_resource_alloc(struct mlx5hws_pool *pool, u32 log_range, int idx) +static int hws_pool_resource_alloc(struct mlx5hws_pool *pool) { struct mlx5hws_pool_resource *resource; u32 fw_ft_type, opt_log_range; fw_ft_type = mlx5hws_table_get_res_fw_ft_type(pool->tbl_type, false); - opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_ORIG ? 0 : log_range; + opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_MIRROR ? + 0 : pool->alloc_log_sz; resource = hws_pool_create_one_resource(pool, opt_log_range, fw_ft_type); if (!resource) { - mlx5hws_err(pool->ctx, "Failed allocating resource\n"); + mlx5hws_err(pool->ctx, "Failed to allocate resource\n"); return -EINVAL; } - pool->resource[idx] = resource; + pool->resource = resource; if (pool->tbl_type == MLX5HWS_TABLE_TYPE_FDB) { struct mlx5hws_pool_resource *mirror_resource; fw_ft_type = mlx5hws_table_get_res_fw_ft_type(pool->tbl_type, true); - opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_MIRROR ? 0 : log_range; + opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_ORIG ? + 0 : pool->alloc_log_sz; mirror_resource = hws_pool_create_one_resource(pool, opt_log_range, fw_ft_type); if (!mirror_resource) { - mlx5hws_err(pool->ctx, "Failed allocating mirrored resource\n"); + mlx5hws_err(pool->ctx, "Failed to allocate mirrored resource\n"); hws_pool_free_one_resource(resource); - pool->resource[idx] = NULL; + pool->resource = NULL; return -EINVAL; } - pool->mirror_resource[idx] = mirror_resource; + pool->mirror_resource = mirror_resource; } return 0; } -static unsigned long *hws_pool_create_and_init_bitmap(u32 log_range) -{ - unsigned long *cur_bmp; - - cur_bmp = bitmap_zalloc(1 << log_range, GFP_KERNEL); - if (!cur_bmp) - return NULL; - - bitmap_fill(cur_bmp, 1 << log_range); - - return cur_bmp; -} - -static void hws_pool_buddy_db_put_chunk(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static int hws_pool_buddy_init(struct mlx5hws_pool *pool) { struct mlx5hws_buddy_mem *buddy; - buddy = pool->db.buddy_manager->buddies[chunk->resource_idx]; + buddy = mlx5hws_buddy_create(pool->alloc_log_sz); if (!buddy) { - mlx5hws_err(pool->ctx, "No such buddy (%d)\n", chunk->resource_idx); - return; - } - - mlx5hws_buddy_free_mem(buddy, chunk->offset, chunk->order); -} - -static struct mlx5hws_buddy_mem * -hws_pool_buddy_get_next_buddy(struct mlx5hws_pool *pool, int idx, - u32 order, bool *is_new_buddy) -{ - static struct mlx5hws_buddy_mem *buddy; - u32 new_buddy_size; - - buddy = pool->db.buddy_manager->buddies[idx]; - if (buddy) - return buddy; - - new_buddy_size = max(pool->alloc_log_sz, order); - *is_new_buddy = true; - buddy = mlx5hws_buddy_create(new_buddy_size); - if (!buddy) { - mlx5hws_err(pool->ctx, "Failed to create buddy order: %d index: %d\n", - new_buddy_size, idx); - return NULL; + mlx5hws_err(pool->ctx, "Failed to create buddy order: %zu\n", + pool->alloc_log_sz); + return -ENOMEM; } - if (hws_pool_resource_alloc(pool, new_buddy_size, idx) != 0) { - mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d index: %d\n", - pool->type, new_buddy_size, idx); + if (hws_pool_resource_alloc(pool) != 0) { + mlx5hws_err(pool->ctx, "Failed to create resource type: %d size %zu\n", + pool->type, pool->alloc_log_sz); mlx5hws_buddy_cleanup(buddy); - return NULL; + return -ENOMEM; } - pool->db.buddy_manager->buddies[idx] = buddy; + pool->db.buddy = buddy; - return buddy; + return 0; } -static int hws_pool_buddy_get_mem_chunk(struct mlx5hws_pool *pool, - int order, - u32 *buddy_idx, - int *seg) +static int hws_pool_buddy_db_get_chunk(struct mlx5hws_pool *pool, + struct mlx5hws_pool_chunk *chunk) { - struct mlx5hws_buddy_mem *buddy; - bool new_mem = false; - int ret = 0; - int i; - - *seg = -1; - - /* Find the next free place from the buddy array */ - while (*seg == -1) { - for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) { - buddy = hws_pool_buddy_get_next_buddy(pool, i, - order, - &new_mem); - if (!buddy) { - ret = -ENOMEM; - goto out; - } - - *seg = mlx5hws_buddy_alloc_mem(buddy, order); - if (*seg != -1) - goto found; - - if (pool->flags & MLX5HWS_POOL_FLAGS_ONE_RESOURCE) { - mlx5hws_err(pool->ctx, - "Fail to allocate seg for one resource pool\n"); - ret = -ENOMEM; - goto out; - } - - if (new_mem) { - /* We have new memory pool, should be place for us */ - mlx5hws_err(pool->ctx, - "No memory for order: %d with buddy no: %d\n", - order, i); - ret = -ENOMEM; - goto out; - } - } + struct mlx5hws_buddy_mem *buddy = pool->db.buddy; + + if (!buddy) { + mlx5hws_err(pool->ctx, "Bad buddy state\n"); + return -EINVAL; } -found: - *buddy_idx = i; -out: - return ret; + chunk->offset = mlx5hws_buddy_alloc_mem(buddy, chunk->order); + if (chunk->offset >= 0) + return 0; + + return -ENOMEM; } -static int hws_pool_buddy_db_get_chunk(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static void hws_pool_buddy_db_put_chunk(struct mlx5hws_pool *pool, + struct mlx5hws_pool_chunk *chunk) { - int ret = 0; + struct mlx5hws_buddy_mem *buddy; - /* Go over the buddies and find next free slot */ - ret = hws_pool_buddy_get_mem_chunk(pool, chunk->order, - &chunk->resource_idx, - &chunk->offset); - if (ret) - mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n", - chunk->order); + buddy = pool->db.buddy; + if (!buddy) { + mlx5hws_err(pool->ctx, "Bad buddy state\n"); + return; + } - return ret; + mlx5hws_buddy_free_mem(buddy, chunk->offset, chunk->order); } static void hws_pool_buddy_db_uninit(struct mlx5hws_pool *pool) { struct mlx5hws_buddy_mem *buddy; - int i; - - for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) { - buddy = pool->db.buddy_manager->buddies[i]; - if (buddy) { - mlx5hws_buddy_cleanup(buddy); - kfree(buddy); - pool->db.buddy_manager->buddies[i] = NULL; - } - } - kfree(pool->db.buddy_manager); + buddy = pool->db.buddy; + if (buddy) { + mlx5hws_buddy_cleanup(buddy); + kfree(buddy); + pool->db.buddy = NULL; + } } -static int hws_pool_buddy_db_init(struct mlx5hws_pool *pool, u32 log_range) +static int hws_pool_buddy_db_init(struct mlx5hws_pool *pool) { - pool->db.buddy_manager = kzalloc(sizeof(*pool->db.buddy_manager), GFP_KERNEL); - if (!pool->db.buddy_manager) - return -ENOMEM; - - if (pool->flags & MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE) { - bool new_buddy; + int ret; - if (!hws_pool_buddy_get_next_buddy(pool, 0, log_range, &new_buddy)) { - mlx5hws_err(pool->ctx, - "Failed allocating memory on create log_sz: %d\n", log_range); - kfree(pool->db.buddy_manager); - return -ENOMEM; - } - } + ret = hws_pool_buddy_init(pool); + if (ret) + return ret; pool->p_db_uninit = &hws_pool_buddy_db_uninit; pool->p_get_chunk = &hws_pool_buddy_db_get_chunk; @@ -278,261 +190,105 @@ static int hws_pool_buddy_db_init(struct mlx5hws_pool *pool, u32 log_range) return 0; } -static int hws_pool_create_resource_on_index(struct mlx5hws_pool *pool, - u32 alloc_size, int idx) -{ - int ret = hws_pool_resource_alloc(pool, alloc_size, idx); - - if (ret) { - mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d index: %d\n", - pool->type, alloc_size, idx); - return ret; - } - - return 0; -} - -static struct mlx5hws_pool_elements * -hws_pool_element_create_new_elem(struct mlx5hws_pool *pool, u32 order, int idx) +static unsigned long *hws_pool_create_and_init_bitmap(u32 log_range) { - struct mlx5hws_pool_elements *elem; - u32 alloc_size; - - alloc_size = pool->alloc_log_sz; + unsigned long *bitmap; - elem = kzalloc(sizeof(*elem), GFP_KERNEL); - if (!elem) + bitmap = bitmap_zalloc(1 << log_range, GFP_KERNEL); + if (!bitmap) return NULL; - /* Sharing the same resource, also means that all the elements are with size 1 */ - if ((pool->flags & MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS) && - !(pool->flags & MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK)) { - /* Currently all chunks in size 1 */ - elem->bitmap = hws_pool_create_and_init_bitmap(alloc_size - order); - if (!elem->bitmap) { - mlx5hws_err(pool->ctx, - "Failed to create bitmap type: %d: size %d index: %d\n", - pool->type, alloc_size, idx); - goto free_elem; - } - - elem->log_size = alloc_size - order; - } - - if (hws_pool_create_resource_on_index(pool, alloc_size, idx)) { - mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d index: %d\n", - pool->type, alloc_size, idx); - goto free_db; - } - - pool->db.element_manager->elements[idx] = elem; - - return elem; + bitmap_fill(bitmap, 1 << log_range); -free_db: - bitmap_free(elem->bitmap); -free_elem: - kfree(elem); - return NULL; + return bitmap; } -static int hws_pool_element_find_seg(struct mlx5hws_pool_elements *elem, int *seg) +static int hws_pool_bitmap_init(struct mlx5hws_pool *pool) { - unsigned int segment, size; - - size = 1 << elem->log_size; + unsigned long *bitmap; - segment = find_first_bit(elem->bitmap, size); - if (segment >= size) { - elem->is_full = true; + bitmap = hws_pool_create_and_init_bitmap(pool->alloc_log_sz); + if (!bitmap) { + mlx5hws_err(pool->ctx, "Failed to create bitmap order: %zu\n", + pool->alloc_log_sz); return -ENOMEM; } - bitmap_clear(elem->bitmap, segment, 1); - *seg = segment; - return 0; -} - -static int -hws_pool_onesize_element_get_mem_chunk(struct mlx5hws_pool *pool, u32 order, - u32 *idx, int *seg) -{ - struct mlx5hws_pool_elements *elem; - - elem = pool->db.element_manager->elements[0]; - if (!elem) - elem = hws_pool_element_create_new_elem(pool, order, 0); - if (!elem) - goto err_no_elem; - - if (hws_pool_element_find_seg(elem, seg) != 0) { - mlx5hws_err(pool->ctx, "No more resources (last request order: %d)\n", order); + if (hws_pool_resource_alloc(pool) != 0) { + mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %zu\n", + pool->type, pool->alloc_log_sz); + bitmap_free(bitmap); return -ENOMEM; } - *idx = 0; - elem->num_of_elements++; - return 0; + pool->db.bitmap = bitmap; -err_no_elem: - mlx5hws_err(pool->ctx, "Failed to allocate element for order: %d\n", order); - return -ENOMEM; -} - -static int -hws_pool_general_element_get_mem_chunk(struct mlx5hws_pool *pool, u32 order, - u32 *idx, int *seg) -{ - int ret, i; - - for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) { - if (!pool->resource[i]) { - ret = hws_pool_create_resource_on_index(pool, order, i); - if (ret) - goto err_no_res; - *idx = i; - *seg = 0; /* One memory slot in that element */ - return 0; - } - } - - mlx5hws_err(pool->ctx, "No more resources (last request order: %d)\n", order); - return -ENOMEM; - -err_no_res: - mlx5hws_err(pool->ctx, "Failed to allocate element for order: %d\n", order); - return -ENOMEM; + return 0; } -static int hws_pool_general_element_db_get_chunk(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static int hws_pool_bitmap_db_get_chunk(struct mlx5hws_pool *pool, + struct mlx5hws_pool_chunk *chunk) { - int ret; - - /* Go over all memory elements and find/allocate free slot */ - ret = hws_pool_general_element_get_mem_chunk(pool, chunk->order, - &chunk->resource_idx, - &chunk->offset); - if (ret) - mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n", - chunk->order); + unsigned long *bitmap, size; - return ret; -} + if (chunk->order != 0) { + mlx5hws_err(pool->ctx, "Pool only supports order 0 allocs\n"); + return -EINVAL; + } -static void hws_pool_general_element_db_put_chunk(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) -{ - if (unlikely(!pool->resource[chunk->resource_idx])) - pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx); + bitmap = pool->db.bitmap; + if (!bitmap) { + mlx5hws_err(pool->ctx, "Bad bitmap state\n"); + return -EINVAL; + } - if (pool->flags & MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE) - hws_pool_resource_free(pool, chunk->resource_idx); -} + size = 1 << pool->alloc_log_sz; -static void hws_pool_general_element_db_uninit(struct mlx5hws_pool *pool) -{ - (void)pool; -} + chunk->offset = find_first_bit(bitmap, size); + if (chunk->offset >= size) + return -ENOMEM; -/* This memory management works as the following: - * - At start doesn't allocate no mem at all. - * - When new request for chunk arrived: - * allocate resource and give it. - * - When free that chunk: - * the resource is freed. - */ -static int hws_pool_general_element_db_init(struct mlx5hws_pool *pool) -{ - pool->p_db_uninit = &hws_pool_general_element_db_uninit; - pool->p_get_chunk = &hws_pool_general_element_db_get_chunk; - pool->p_put_chunk = &hws_pool_general_element_db_put_chunk; + bitmap_clear(bitmap, chunk->offset, 1); return 0; } -static void hws_onesize_element_db_destroy_element(struct mlx5hws_pool *pool, - struct mlx5hws_pool_elements *elem, - struct mlx5hws_pool_chunk *chunk) -{ - if (unlikely(!pool->resource[chunk->resource_idx])) - pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx); - - hws_pool_resource_free(pool, chunk->resource_idx); - kfree(elem); - pool->db.element_manager->elements[chunk->resource_idx] = NULL; -} - -static void hws_onesize_element_db_put_chunk(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static void hws_pool_bitmap_db_put_chunk(struct mlx5hws_pool *pool, + struct mlx5hws_pool_chunk *chunk) { - struct mlx5hws_pool_elements *elem; + unsigned long *bitmap; - if (unlikely(chunk->resource_idx)) - pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx); - - elem = pool->db.element_manager->elements[chunk->resource_idx]; - if (!elem) { - mlx5hws_err(pool->ctx, "No such element (%d)\n", chunk->resource_idx); + bitmap = pool->db.bitmap; + if (!bitmap) { + mlx5hws_err(pool->ctx, "Bad bitmap state\n"); return; } - bitmap_set(elem->bitmap, chunk->offset, 1); - elem->is_full = false; - elem->num_of_elements--; - - if (pool->flags & MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE && - !elem->num_of_elements) - hws_onesize_element_db_destroy_element(pool, elem, chunk); + bitmap_set(bitmap, chunk->offset, 1); } -static int hws_onesize_element_db_get_chunk(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static void hws_pool_bitmap_db_uninit(struct mlx5hws_pool *pool) { - int ret = 0; + unsigned long *bitmap; - /* Go over all memory elements and find/allocate free slot */ - ret = hws_pool_onesize_element_get_mem_chunk(pool, chunk->order, - &chunk->resource_idx, - &chunk->offset); - if (ret) - mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n", - chunk->order); - - return ret; -} - -static void hws_onesize_element_db_uninit(struct mlx5hws_pool *pool) -{ - struct mlx5hws_pool_elements *elem; - int i; - - for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) { - elem = pool->db.element_manager->elements[i]; - if (elem) { - bitmap_free(elem->bitmap); - kfree(elem); - pool->db.element_manager->elements[i] = NULL; - } + bitmap = pool->db.bitmap; + if (bitmap) { + bitmap_free(bitmap); + pool->db.bitmap = NULL; } - kfree(pool->db.element_manager); } -/* This memory management works as the following: - * - At start doesn't allocate no mem at all. - * - When new request for chunk arrived: - * aloocate the first and only slot of memory/resource - * when it ended return error. - */ -static int hws_pool_onesize_element_db_init(struct mlx5hws_pool *pool) +static int hws_pool_bitmap_db_init(struct mlx5hws_pool *pool) { - pool->db.element_manager = kzalloc(sizeof(*pool->db.element_manager), GFP_KERNEL); - if (!pool->db.element_manager) - return -ENOMEM; + int ret; - pool->p_db_uninit = &hws_onesize_element_db_uninit; - pool->p_get_chunk = &hws_onesize_element_db_get_chunk; - pool->p_put_chunk = &hws_onesize_element_db_put_chunk; + ret = hws_pool_bitmap_init(pool); + if (ret) + return ret; + + pool->p_db_uninit = &hws_pool_bitmap_db_uninit; + pool->p_get_chunk = &hws_pool_bitmap_db_get_chunk; + pool->p_put_chunk = &hws_pool_bitmap_db_put_chunk; return 0; } @@ -542,15 +298,14 @@ static int hws_pool_db_init(struct mlx5hws_pool *pool, { int ret; - if (db_type == MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE) - ret = hws_pool_general_element_db_init(pool); - else if (db_type == MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE) - ret = hws_pool_onesize_element_db_init(pool); + if (db_type == MLX5HWS_POOL_DB_TYPE_BITMAP) + ret = hws_pool_bitmap_db_init(pool); else - ret = hws_pool_buddy_db_init(pool, pool->alloc_log_sz); + ret = hws_pool_buddy_db_init(pool); if (ret) { - mlx5hws_err(pool->ctx, "Failed to init general db : %d (ret: %d)\n", db_type, ret); + mlx5hws_err(pool->ctx, "Failed to init pool type: %d (ret: %d)\n", + db_type, ret); return ret; } @@ -569,6 +324,8 @@ int mlx5hws_pool_chunk_alloc(struct mlx5hws_pool *pool, mutex_lock(&pool->lock); ret = pool->p_get_chunk(pool, chunk); + if (ret == 0) + pool->available_elems -= 1 << chunk->order; mutex_unlock(&pool->lock); return ret; @@ -579,6 +336,7 @@ void mlx5hws_pool_chunk_free(struct mlx5hws_pool *pool, { mutex_lock(&pool->lock); pool->p_put_chunk(pool, chunk); + pool->available_elems += 1 << chunk->order; mutex_unlock(&pool->lock); } @@ -599,17 +357,13 @@ mlx5hws_pool_create(struct mlx5hws_context *ctx, struct mlx5hws_pool_attr *pool_ pool->tbl_type = pool_attr->table_type; pool->opt_type = pool_attr->opt_type; - /* Support general db */ - if (pool->flags == (MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE | - MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK)) - res_db_type = MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE; - else if (pool->flags == (MLX5HWS_POOL_FLAGS_ONE_RESOURCE | - MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS)) - res_db_type = MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE; - else + if (pool->flags & MLX5HWS_POOL_FLAG_BUDDY) res_db_type = MLX5HWS_POOL_DB_TYPE_BUDDY; + else + res_db_type = MLX5HWS_POOL_DB_TYPE_BITMAP; pool->alloc_log_sz = pool_attr->alloc_log_sz; + pool->available_elems = 1 << pool_attr->alloc_log_sz; if (hws_pool_db_init(pool, res_db_type)) goto free_pool; @@ -623,18 +377,17 @@ free_pool: return NULL; } -int mlx5hws_pool_destroy(struct mlx5hws_pool *pool) +void mlx5hws_pool_destroy(struct mlx5hws_pool *pool) { - int i; - mutex_destroy(&pool->lock); - for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) - if (pool->resource[i]) - hws_pool_resource_free(pool, i); + if (pool->available_elems != 1 << pool->alloc_log_sz) + mlx5hws_err(pool->ctx, "Attempting to destroy non-empty pool\n"); + + if (pool->resource) + hws_pool_resource_free(pool); hws_pool_db_unint(pool); kfree(pool); - return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h index 621298b352b2..33e33d5f1fb3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h @@ -6,16 +6,12 @@ #define MLX5HWS_POOL_STC_LOG_SZ 15 -#define MLX5HWS_POOL_RESOURCE_ARR_SZ 100 - enum mlx5hws_pool_type { MLX5HWS_POOL_TYPE_STE, MLX5HWS_POOL_TYPE_STC, }; struct mlx5hws_pool_chunk { - u32 resource_idx; - /* Internal offset, relative to base index */ int offset; int order; }; @@ -27,35 +23,17 @@ struct mlx5hws_pool_resource { }; enum mlx5hws_pool_flags { - /* Only a one resource in that pool */ - MLX5HWS_POOL_FLAGS_ONE_RESOURCE = 1 << 0, - MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE = 1 << 1, - /* No sharing resources between chunks */ - MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK = 1 << 2, - /* All objects are in the same size */ - MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS = 1 << 3, - /* Managed by buddy allocator */ - MLX5HWS_POOL_FLAGS_BUDDY_MANAGED = 1 << 4, - /* Allocate pool_type memory on pool creation */ - MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE = 1 << 5, - - /* These values should be used by the caller */ - MLX5HWS_POOL_FLAGS_FOR_STC_POOL = - MLX5HWS_POOL_FLAGS_ONE_RESOURCE | - MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS, - MLX5HWS_POOL_FLAGS_FOR_MATCHER_STE_POOL = - MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE | - MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK, - MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL = - MLX5HWS_POOL_FLAGS_ONE_RESOURCE | - MLX5HWS_POOL_FLAGS_BUDDY_MANAGED | - MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE, + /* Managed by a buddy allocator. If this is not set only allocations of + * order 0 are supported. + */ + MLX5HWS_POOL_FLAG_BUDDY = BIT(0), }; enum mlx5hws_pool_optimize { MLX5HWS_POOL_OPTIMIZE_NONE = 0x0, MLX5HWS_POOL_OPTIMIZE_ORIG = 0x1, MLX5HWS_POOL_OPTIMIZE_MIRROR = 0x2, + MLX5HWS_POOL_OPTIMIZE_MAX = 0x3, }; struct mlx5hws_pool_attr { @@ -68,34 +46,17 @@ struct mlx5hws_pool_attr { }; enum mlx5hws_db_type { - /* Uses for allocating chunk of big memory, each element has its own resource in the FW*/ - MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE, - /* One resource only, all the elements are with same one size */ - MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE, - /* Many resources, the memory allocated with buddy mechanism */ + /* Uses a bitmap, supports only allocations of order 0. */ + MLX5HWS_POOL_DB_TYPE_BITMAP, + /* Entries are managed using a buddy mechanism. */ MLX5HWS_POOL_DB_TYPE_BUDDY, }; -struct mlx5hws_buddy_manager { - struct mlx5hws_buddy_mem *buddies[MLX5HWS_POOL_RESOURCE_ARR_SZ]; -}; - -struct mlx5hws_pool_elements { - u32 num_of_elements; - unsigned long *bitmap; - u32 log_size; - bool is_full; -}; - -struct mlx5hws_element_manager { - struct mlx5hws_pool_elements *elements[MLX5HWS_POOL_RESOURCE_ARR_SZ]; -}; - struct mlx5hws_pool_db { enum mlx5hws_db_type type; union { - struct mlx5hws_element_manager *element_manager; - struct mlx5hws_buddy_manager *buddy_manager; + unsigned long *bitmap; + struct mlx5hws_buddy_mem *buddy; }; }; @@ -111,11 +72,11 @@ struct mlx5hws_pool { enum mlx5hws_pool_flags flags; struct mutex lock; /* protect the pool */ size_t alloc_log_sz; + size_t available_elems; enum mlx5hws_table_type tbl_type; enum mlx5hws_pool_optimize opt_type; - struct mlx5hws_pool_resource *resource[MLX5HWS_POOL_RESOURCE_ARR_SZ]; - struct mlx5hws_pool_resource *mirror_resource[MLX5HWS_POOL_RESOURCE_ARR_SZ]; - /* DB */ + struct mlx5hws_pool_resource *resource; + struct mlx5hws_pool_resource *mirror_resource; struct mlx5hws_pool_db db; /* Functions */ mlx5hws_pool_unint_db p_db_uninit; @@ -127,7 +88,7 @@ struct mlx5hws_pool * mlx5hws_pool_create(struct mlx5hws_context *ctx, struct mlx5hws_pool_attr *pool_attr); -int mlx5hws_pool_destroy(struct mlx5hws_pool *pool); +void mlx5hws_pool_destroy(struct mlx5hws_pool *pool); int mlx5hws_pool_chunk_alloc(struct mlx5hws_pool *pool, struct mlx5hws_pool_chunk *chunk); @@ -135,17 +96,37 @@ int mlx5hws_pool_chunk_alloc(struct mlx5hws_pool *pool, void mlx5hws_pool_chunk_free(struct mlx5hws_pool *pool, struct mlx5hws_pool_chunk *chunk); -static inline u32 -mlx5hws_pool_chunk_get_base_id(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static inline u32 mlx5hws_pool_get_base_id(struct mlx5hws_pool *pool) { - return pool->resource[chunk->resource_idx]->base_id; + return pool->resource->base_id; } -static inline u32 -mlx5hws_pool_chunk_get_base_mirror_id(struct mlx5hws_pool *pool, - struct mlx5hws_pool_chunk *chunk) +static inline u32 mlx5hws_pool_get_base_mirror_id(struct mlx5hws_pool *pool) { - return pool->mirror_resource[chunk->resource_idx]->base_id; + return pool->mirror_resource->base_id; +} + +static inline bool +mlx5hws_pool_empty(struct mlx5hws_pool *pool) +{ + bool ret; + + mutex_lock(&pool->lock); + ret = pool->available_elems == 0; + mutex_unlock(&pool->lock); + + return ret; +} + +static inline bool +mlx5hws_pool_full(struct mlx5hws_pool *pool) +{ + bool ret; + + mutex_lock(&pool->lock); + ret = pool->available_elems == (1 << pool->alloc_log_sz); + mutex_unlock(&pool->lock); + + return ret; } #endif /* MLX5HWS_POOL_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/prm.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/prm.h index de92cecbeb92..271490a51b96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/prm.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/prm.h @@ -390,11 +390,6 @@ struct mlx5_ifc_definer_bits { u8 match_mask[0x160]; }; -struct mlx5_ifc_arg_bits { - u8 rsvd0[0x88]; - u8 access_pd[0x18]; -}; - struct mlx5_ifc_header_modify_pattern_in_bits { u8 modify_field_select[0x40]; @@ -428,11 +423,6 @@ struct mlx5_ifc_create_definer_in_bits { struct mlx5_ifc_definer_bits definer; }; -struct mlx5_ifc_create_arg_in_bits { - struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; - struct mlx5_ifc_arg_bits arg; -}; - struct mlx5_ifc_create_header_modify_pattern_in_bits { struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; struct mlx5_ifc_header_modify_pattern_in_bits pattern; @@ -479,36 +469,4 @@ enum { MLX5_IFC_MODIFY_FLOW_TABLE_MISS_ACTION_GOTO_TBL = 1, }; -struct mlx5_ifc_alloc_packet_reformat_out_bits { - u8 status[0x8]; - u8 reserved_at_8[0x18]; - - u8 syndrome[0x20]; - - u8 packet_reformat_id[0x20]; - - u8 reserved_at_60[0x20]; -}; - -struct mlx5_ifc_dealloc_packet_reformat_in_bits { - u8 opcode[0x10]; - u8 reserved_at_10[0x10]; - - u8 reserved_at_20[0x10]; - u8 op_mod[0x10]; - - u8 packet_reformat_id[0x20]; - - u8 reserved_at_60[0x20]; -}; - -struct mlx5_ifc_dealloc_packet_reformat_out_bits { - u8 status[0x8]; - u8 reserved_at_8[0x18]; - - u8 syndrome[0x20]; - - u8 reserved_at_40[0x40]; -}; - #endif /* MLX5_PRM_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c index e20c67a04203..a94f094e72ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c @@ -3,10 +3,8 @@ #include "internal.h" -static void hws_rule_skip(struct mlx5hws_matcher *matcher, - struct mlx5hws_match_template *mt, - u32 flow_source, - bool *skip_rx, bool *skip_tx) +void mlx5hws_rule_skip(struct mlx5hws_matcher *matcher, u32 flow_source, + bool *skip_rx, bool *skip_tx) { /* By default FDB rules are added to both RX and TX */ *skip_rx = false; @@ -14,20 +12,21 @@ static void hws_rule_skip(struct mlx5hws_matcher *matcher, if (flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT) { *skip_rx = true; - } else if (flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK) { + return; + } + + if (flow_source == MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK) { *skip_tx = true; - } else { - /* If no flow source was set for current rule, - * check for flow source in matcher attributes. - */ - if (matcher->attr.optimize_flow_src) { - *skip_tx = - matcher->attr.optimize_flow_src == MLX5HWS_MATCHER_FLOW_SRC_WIRE; - *skip_rx = - matcher->attr.optimize_flow_src == MLX5HWS_MATCHER_FLOW_SRC_VPORT; - return; - } + return; } + + /* If no flow source was set for current rule, + * check for flow source in matcher attributes. + */ + *skip_tx = matcher->attr.optimize_flow_src == + MLX5HWS_MATCHER_FLOW_SRC_WIRE; + *skip_rx = matcher->attr.optimize_flow_src == + MLX5HWS_MATCHER_FLOW_SRC_VPORT; } static void @@ -66,7 +65,8 @@ static void hws_rule_init_dep_wqe(struct mlx5hws_send_ring_dep_wqe *dep_wqe, attr->rule_idx : 0; if (tbl->type == MLX5HWS_TABLE_TYPE_FDB) { - hws_rule_skip(matcher, mt, attr->flow_source, &skip_rx, &skip_tx); + mlx5hws_rule_skip(matcher, attr->flow_source, + &skip_rx, &skip_tx); if (!skip_rx) { dep_wqe->rtc_0 = matcher->match_ste.rtc_0_id; @@ -129,27 +129,18 @@ static void hws_rule_gen_comp(struct mlx5hws_send_engine *queue, static void hws_rule_save_resize_info(struct mlx5hws_rule *rule, - struct mlx5hws_send_ste_attr *ste_attr, - bool is_update) + struct mlx5hws_send_ste_attr *ste_attr) { if (!mlx5hws_matcher_is_resizable(rule->matcher)) return; - if (likely(!is_update)) { + /* resize_info might already exist (if we're in update flow) */ + if (likely(!rule->resize_info)) { rule->resize_info = kzalloc(sizeof(*rule->resize_info), GFP_KERNEL); if (unlikely(!rule->resize_info)) { pr_warn("HWS: resize info isn't allocated for rule\n"); return; } - - rule->resize_info->max_stes = - rule->matcher->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].max_stes; - rule->resize_info->action_ste_pool[0] = rule->matcher->action_ste[0].max_stes ? - rule->matcher->action_ste[0].pool : - NULL; - rule->resize_info->action_ste_pool[1] = rule->matcher->action_ste[1].max_stes ? - rule->matcher->action_ste[1].pool : - NULL; } memcpy(rule->resize_info->ctrl_seg, ste_attr->wqe_ctrl, @@ -204,84 +195,30 @@ hws_rule_load_delete_info(struct mlx5hws_rule *rule, } } -static int hws_rule_alloc_action_ste_idx(struct mlx5hws_rule *rule, - u8 action_ste_selector) +static int mlx5hws_rule_alloc_action_ste(struct mlx5hws_rule *rule, + u16 queue_id, bool skip_rx, + bool skip_tx) { struct mlx5hws_matcher *matcher = rule->matcher; - struct mlx5hws_matcher_action_ste *action_ste; - struct mlx5hws_pool_chunk ste = {0}; - int ret; - - action_ste = &matcher->action_ste[action_ste_selector]; - ste.order = ilog2(roundup_pow_of_two(action_ste->max_stes)); - ret = mlx5hws_pool_chunk_alloc(action_ste->pool, &ste); - if (unlikely(ret)) { - mlx5hws_err(matcher->tbl->ctx, - "Failed to allocate STE for rule actions"); - return ret; - } - rule->action_ste_idx = ste.offset; + struct mlx5hws_context *ctx = matcher->tbl->ctx; - return 0; + rule->action_ste.ste.order = + ilog2(roundup_pow_of_two(matcher->num_of_action_stes)); + return mlx5hws_action_ste_chunk_alloc(&ctx->action_ste_pool[queue_id], + skip_rx, skip_tx, + &rule->action_ste); } -static void hws_rule_free_action_ste_idx(struct mlx5hws_rule *rule, - u8 action_ste_selector) +void mlx5hws_rule_free_action_ste(struct mlx5hws_action_ste_chunk *action_ste) { - struct mlx5hws_matcher *matcher = rule->matcher; - struct mlx5hws_pool_chunk ste = {0}; - struct mlx5hws_pool *pool; - u8 max_stes; - - if (mlx5hws_matcher_is_resizable(matcher)) { - /* Free the original action pool if rule was resized */ - max_stes = rule->resize_info->max_stes; - pool = rule->resize_info->action_ste_pool[action_ste_selector]; - } else { - max_stes = matcher->action_ste[action_ste_selector].max_stes; - pool = matcher->action_ste[action_ste_selector].pool; - } - - /* This release is safe only when the rule match part was deleted */ - ste.order = ilog2(roundup_pow_of_two(max_stes)); - ste.offset = rule->action_ste_idx; - - mlx5hws_pool_chunk_free(pool, &ste); -} - -static int hws_rule_alloc_action_ste(struct mlx5hws_rule *rule, - struct mlx5hws_rule_attr *attr) -{ - int action_ste_idx; - int ret; - - ret = hws_rule_alloc_action_ste_idx(rule, 0); - if (unlikely(ret)) - return ret; - - action_ste_idx = rule->action_ste_idx; - - ret = hws_rule_alloc_action_ste_idx(rule, 1); - if (unlikely(ret)) { - hws_rule_free_action_ste_idx(rule, 0); - return ret; - } - - /* Both pools have to return the same index */ - if (unlikely(rule->action_ste_idx != action_ste_idx)) { - pr_warn("HWS: allocation of action STE failed - pool indexes mismatch\n"); - return -EINVAL; - } - - return 0; -} + if (!action_ste->action_tbl) + return; -void mlx5hws_rule_free_action_ste(struct mlx5hws_rule *rule) -{ - if (rule->action_ste_idx > -1) { - hws_rule_free_action_ste_idx(rule, 1); - hws_rule_free_action_ste_idx(rule, 0); - } + /* This release is safe only when the rule match STE was deleted + * (when the rule is being deleted) or replaced with the new STE that + * isn't pointing to old action STEs (when the rule is being updated). + */ + mlx5hws_action_ste_chunk_free(action_ste); } static void hws_rule_create_init(struct mlx5hws_rule *rule, @@ -298,14 +235,17 @@ static void hws_rule_create_init(struct mlx5hws_rule *rule, /* In update we use these rtc's */ rule->rtc_0 = 0; rule->rtc_1 = 0; - rule->action_ste_selector = 0; + + rule->status = MLX5HWS_RULE_STATUS_CREATING; } else { - rule->action_ste_selector = !rule->action_ste_selector; + rule->status = MLX5HWS_RULE_STATUS_UPDATING; + /* Save the old action STE info so we can free it after writing + * new action STEs and a corresponding match STE. + */ + rule->old_action_ste = rule->action_ste; } rule->pending_wqes = 0; - rule->action_ste_idx = -1; - rule->status = MLX5HWS_RULE_STATUS_CREATING; /* Init default send STE attributes */ ste_attr->gta_opcode = MLX5HWS_WQE_GTA_OP_ACTIVATE; @@ -315,8 +255,7 @@ static void hws_rule_create_init(struct mlx5hws_rule *rule, /* Init default action apply */ apply->tbl_type = tbl->type; - apply->common_res = &ctx->common_res[tbl->type]; - apply->jump_to_action_stc = matcher->action_ste[0].stc.offset; + apply->common_res = &ctx->common_res; apply->require_dep = 0; } @@ -332,8 +271,6 @@ static void hws_rule_move_init(struct mlx5hws_rule *rule, rule->rtc_1 = 0; rule->pending_wqes = 0; - rule->action_ste_idx = -1; - rule->action_ste_selector = 0; rule->status = MLX5HWS_RULE_STATUS_CREATING; rule->resize_info->state = MLX5HWS_RULE_RESIZE_STATE_WRITING; } @@ -394,21 +331,24 @@ static int hws_rule_create_hws(struct mlx5hws_rule *rule, if (action_stes) { /* Allocate action STEs for rules that need more than match STE */ - if (!is_update) { - ret = hws_rule_alloc_action_ste(rule, attr); - if (ret) { - mlx5hws_err(ctx, "Failed to allocate action memory %d", ret); - mlx5hws_send_abort_new_dep_wqe(queue); - return ret; - } + ret = mlx5hws_rule_alloc_action_ste(rule, attr->queue_id, + !!ste_attr.rtc_0, + !!ste_attr.rtc_1); + if (ret) { + mlx5hws_err(ctx, "Failed to allocate action memory %d", ret); + mlx5hws_send_abort_new_dep_wqe(queue); + return ret; } + apply.jump_to_action_stc = + rule->action_ste.action_tbl->stc.offset; /* Skip RX/TX based on the dep_wqe init */ ste_attr.rtc_0 = dep_wqe->rtc_0 ? - matcher->action_ste[rule->action_ste_selector].rtc_0_id : 0; + rule->action_ste.action_tbl->rtc_0_id : 0; ste_attr.rtc_1 = dep_wqe->rtc_1 ? - matcher->action_ste[rule->action_ste_selector].rtc_1_id : 0; + rule->action_ste.action_tbl->rtc_1_id : 0; /* Action STEs are written to a specific index last to first */ - ste_attr.direct_index = rule->action_ste_idx + action_stes; + ste_attr.direct_index = + rule->action_ste.ste.offset + action_stes; apply.next_direct_idx = ste_attr.direct_index; } else { apply.next_direct_idx = 0; @@ -459,7 +399,7 @@ static int hws_rule_create_hws(struct mlx5hws_rule *rule, if (!is_update) hws_rule_save_delete_info(rule, &ste_attr); - hws_rule_save_resize_info(rule, &ste_attr, is_update); + hws_rule_save_resize_info(rule, &ste_attr); mlx5hws_send_engine_inc_rule(queue); if (!attr->burst) @@ -480,7 +420,10 @@ static void hws_rule_destroy_failed_hws(struct mlx5hws_rule *rule, attr->user_data, MLX5HWS_RULE_STATUS_DELETED); /* Rule failed now we can safely release action STEs */ - mlx5hws_rule_free_action_ste(rule); + mlx5hws_rule_free_action_ste(&rule->action_ste); + + /* Perhaps the rule failed updating - release old action STEs as well */ + mlx5hws_rule_free_action_ste(&rule->old_action_ste); /* Clear complex tag */ hws_rule_clear_delete_info(rule); @@ -517,7 +460,8 @@ static int hws_rule_destroy_hws(struct mlx5hws_rule *rule, } /* Rule is not completed yet */ - if (rule->status == MLX5HWS_RULE_STATUS_CREATING) + if (rule->status == MLX5HWS_RULE_STATUS_CREATING || + rule->status == MLX5HWS_RULE_STATUS_UPDATING) return -EBUSY; /* Rule failed and doesn't require cleanup */ @@ -534,7 +478,7 @@ static int hws_rule_destroy_hws(struct mlx5hws_rule *rule, hws_rule_gen_comp(queue, rule, false, attr->user_data, MLX5HWS_RULE_STATUS_DELETED); - mlx5hws_rule_free_action_ste(rule); + mlx5hws_rule_free_action_ste(&rule->action_ste); mlx5hws_rule_clear_resize_info(rule); return 0; } @@ -711,6 +655,124 @@ int mlx5hws_rule_move_hws_add(struct mlx5hws_rule *rule, return 0; } +static u8 hws_rule_ethertype_to_matcher_ipv(u32 ethertype) +{ + switch (ethertype) { + case ETH_P_IP: + return MLX5HWS_MATCHER_IPV_4; + case ETH_P_IPV6: + return MLX5HWS_MATCHER_IPV_6; + default: + return MLX5HWS_MATCHER_IPV_UNSET; + } +} + +static u8 hws_rule_ip_version_to_matcher_ipv(u32 ip_version) +{ + switch (ip_version) { + case 4: + return MLX5HWS_MATCHER_IPV_4; + case 6: + return MLX5HWS_MATCHER_IPV_6; + default: + return MLX5HWS_MATCHER_IPV_UNSET; + } +} + +static int hws_rule_check_outer_ip_version(struct mlx5hws_matcher *matcher, + u32 *match_param) +{ + struct mlx5hws_context *ctx = matcher->tbl->ctx; + u8 outer_ipv_ether = MLX5HWS_MATCHER_IPV_UNSET; + u8 outer_ipv_ip = MLX5HWS_MATCHER_IPV_UNSET; + u8 outer_ipv, ver; + + if (matcher->matches_outer_ethertype) { + ver = MLX5_GET(fte_match_param, match_param, + outer_headers.ethertype); + outer_ipv_ether = hws_rule_ethertype_to_matcher_ipv(ver); + } + if (matcher->matches_outer_ip_version) { + ver = MLX5_GET(fte_match_param, match_param, + outer_headers.ip_version); + outer_ipv_ip = hws_rule_ip_version_to_matcher_ipv(ver); + } + + if (outer_ipv_ether != MLX5HWS_MATCHER_IPV_UNSET && + outer_ipv_ip != MLX5HWS_MATCHER_IPV_UNSET && + outer_ipv_ether != outer_ipv_ip) { + mlx5hws_err(ctx, "Rule matches on inconsistent outer ethertype and ip version\n"); + return -EINVAL; + } + + outer_ipv = outer_ipv_ether != MLX5HWS_MATCHER_IPV_UNSET ? + outer_ipv_ether : outer_ipv_ip; + if (outer_ipv != MLX5HWS_MATCHER_IPV_UNSET && + matcher->outer_ip_version != MLX5HWS_MATCHER_IPV_UNSET && + outer_ipv != matcher->outer_ip_version) { + mlx5hws_err(ctx, "Matcher and rule disagree on outer IP version\n"); + return -EINVAL; + } + matcher->outer_ip_version = outer_ipv; + + return 0; +} + +static int hws_rule_check_inner_ip_version(struct mlx5hws_matcher *matcher, + u32 *match_param) +{ + struct mlx5hws_context *ctx = matcher->tbl->ctx; + u8 inner_ipv_ether = MLX5HWS_MATCHER_IPV_UNSET; + u8 inner_ipv_ip = MLX5HWS_MATCHER_IPV_UNSET; + u8 inner_ipv, ver; + + if (matcher->matches_inner_ethertype) { + ver = MLX5_GET(fte_match_param, match_param, + inner_headers.ethertype); + inner_ipv_ether = hws_rule_ethertype_to_matcher_ipv(ver); + } + if (matcher->matches_inner_ip_version) { + ver = MLX5_GET(fte_match_param, match_param, + inner_headers.ip_version); + inner_ipv_ip = hws_rule_ip_version_to_matcher_ipv(ver); + } + + if (inner_ipv_ether != MLX5HWS_MATCHER_IPV_UNSET && + inner_ipv_ip != MLX5HWS_MATCHER_IPV_UNSET && + inner_ipv_ether != inner_ipv_ip) { + mlx5hws_err(ctx, "Rule matches on inconsistent inner ethertype and ip version\n"); + return -EINVAL; + } + + inner_ipv = inner_ipv_ether != MLX5HWS_MATCHER_IPV_UNSET ? + inner_ipv_ether : inner_ipv_ip; + if (inner_ipv != MLX5HWS_MATCHER_IPV_UNSET && + matcher->inner_ip_version != MLX5HWS_MATCHER_IPV_UNSET && + inner_ipv != matcher->inner_ip_version) { + mlx5hws_err(ctx, "Matcher and rule disagree on inner IP version\n"); + return -EINVAL; + } + matcher->inner_ip_version = inner_ipv; + + return 0; +} + +static int hws_rule_check_ip_version(struct mlx5hws_matcher *matcher, + u32 *match_param) +{ + int ret; + + ret = hws_rule_check_outer_ip_version(matcher, match_param); + if (unlikely(ret)) + return ret; + + ret = hws_rule_check_inner_ip_version(matcher, match_param); + if (unlikely(ret)) + return ret; + + return 0; +} + int mlx5hws_rule_create(struct mlx5hws_matcher *matcher, u8 mt_idx, u32 *match_param, @@ -721,6 +783,10 @@ int mlx5hws_rule_create(struct mlx5hws_matcher *matcher, { int ret; + ret = hws_rule_check_ip_version(matcher, match_param); + if (unlikely(ret)) + return ret; + rule_handle->matcher = matcher; ret = hws_rule_enqueue_precheck_create(rule_handle, attr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h index 495cdd17e9f3..d0f082b8dbf5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h @@ -15,6 +15,8 @@ enum mlx5hws_rule_status { MLX5HWS_RULE_STATUS_UNKNOWN, MLX5HWS_RULE_STATUS_CREATING, MLX5HWS_RULE_STATUS_CREATED, + MLX5HWS_RULE_STATUS_UPDATING, + MLX5HWS_RULE_STATUS_UPDATED, MLX5HWS_RULE_STATUS_DELETING, MLX5HWS_RULE_STATUS_DELETED, MLX5HWS_RULE_STATUS_FAILING, @@ -42,12 +44,10 @@ struct mlx5hws_rule_match_tag { }; struct mlx5hws_rule_resize_info { - struct mlx5hws_pool *action_ste_pool[2]; u32 rtc_0; u32 rtc_1; u32 rule_idx; u8 state; - u8 max_stes; u8 ctrl_seg[MLX5HWS_WQE_SZ_GTA_CTRL]; /* Ctrl segment of STE: 48 bytes */ u8 data_seg[MLX5HWS_WQE_SZ_GTA_DATA]; /* Data segment of STE: 64 bytes */ }; @@ -58,18 +58,21 @@ struct mlx5hws_rule { struct mlx5hws_rule_match_tag tag; struct mlx5hws_rule_resize_info *resize_info; }; + struct mlx5hws_action_ste_chunk action_ste; + struct mlx5hws_action_ste_chunk old_action_ste; u32 rtc_0; /* The RTC into which the STE was inserted */ u32 rtc_1; /* The RTC into which the STE was inserted */ - int action_ste_idx; /* STE array index */ u8 status; /* enum mlx5hws_rule_status */ - u8 action_ste_selector; /* For rule update - which action STE is in use */ u8 pending_wqes; bool skip_delete; /* For complex rules - another rule with same tag * still exists, so don't actually delete this rule. */ }; -void mlx5hws_rule_free_action_ste(struct mlx5hws_rule *rule); +void mlx5hws_rule_skip(struct mlx5hws_matcher *matcher, u32 flow_source, + bool *skip_rx, bool *skip_tx); + +void mlx5hws_rule_free_action_ste(struct mlx5hws_action_ste_chunk *action_ste); int mlx5hws_rule_move_hws_remove(struct mlx5hws_rule *rule, void *queue, void *user_data); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c index 424797b6d802..b0595c9b09e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c @@ -344,18 +344,133 @@ hws_send_engine_update_rule_resize(struct mlx5hws_send_engine *queue, } } +static void hws_send_engine_dump_error_cqe(struct mlx5hws_send_engine *queue, + struct mlx5hws_send_ring_priv *priv, + struct mlx5_cqe64 *cqe) +{ + u8 wqe_opcode = cqe ? be32_to_cpu(cqe->sop_drop_qpn) >> 24 : 0; + struct mlx5hws_context *ctx = priv->rule->matcher->tbl->ctx; + u32 opcode = cqe ? get_cqe_opcode(cqe) : 0; + struct mlx5hws_rule *rule = priv->rule; + + /* If something bad happens and lots of rules are failing, we don't + * want to pollute dmesg. Print only the first bad cqe per engine, + * the one that started the avalanche. + */ + if (queue->error_cqe_printed) + return; + + queue->error_cqe_printed = true; + + if (mlx5hws_rule_move_in_progress(rule)) + mlx5hws_err(ctx, + "--- rule 0x%08llx: error completion moving rule: phase %s, wqes left %d\n", + HWS_PTR_TO_ID(rule), + rule->resize_info->state == + MLX5HWS_RULE_RESIZE_STATE_WRITING ? "WRITING" : + rule->resize_info->state == + MLX5HWS_RULE_RESIZE_STATE_DELETING ? "DELETING" : + "UNKNOWN", + rule->pending_wqes); + else + mlx5hws_err(ctx, + "--- rule 0x%08llx: error completion %s (%d), wqes left %d\n", + HWS_PTR_TO_ID(rule), + rule->status == + MLX5HWS_RULE_STATUS_CREATING ? "CREATING" : + rule->status == + MLX5HWS_RULE_STATUS_DELETING ? "DELETING" : + rule->status == + MLX5HWS_RULE_STATUS_FAILING ? "FAILING" : + rule->status == + MLX5HWS_RULE_STATUS_UPDATING ? "UPDATING" : "NA", + rule->status, + rule->pending_wqes); + + mlx5hws_err(ctx, " rule 0x%08llx: matcher 0x%llx %s\n", + HWS_PTR_TO_ID(rule), + HWS_PTR_TO_ID(rule->matcher), + (rule->matcher->flags & MLX5HWS_MATCHER_FLAGS_ISOLATED) ? + "(isolated)" : ""); + + if (!cqe) { + mlx5hws_err(ctx, " rule 0x%08llx: no CQE\n", + HWS_PTR_TO_ID(rule)); + return; + } + + mlx5hws_err(ctx, " rule 0x%08llx: cqe->opcode = %d %s\n", + HWS_PTR_TO_ID(rule), opcode, + opcode == MLX5_CQE_REQ ? "(MLX5_CQE_REQ)" : + opcode == MLX5_CQE_REQ_ERR ? "(MLX5_CQE_REQ_ERR)" : " "); + + if (opcode == MLX5_CQE_REQ_ERR) { + struct mlx5_err_cqe *err_cqe = (struct mlx5_err_cqe *)cqe; + + mlx5hws_err(ctx, + " rule 0x%08llx: |--- hw_error_syndrome = 0x%x\n", + HWS_PTR_TO_ID(rule), + err_cqe->rsvd1[16]); + mlx5hws_err(ctx, + " rule 0x%08llx: |--- hw_syndrome_type = 0x%x\n", + HWS_PTR_TO_ID(rule), + err_cqe->rsvd1[17] >> 4); + mlx5hws_err(ctx, + " rule 0x%08llx: |--- vendor_err_synd = 0x%x\n", + HWS_PTR_TO_ID(rule), + err_cqe->vendor_err_synd); + mlx5hws_err(ctx, + " rule 0x%08llx: |--- syndrome = 0x%x\n", + HWS_PTR_TO_ID(rule), + err_cqe->syndrome); + } + + mlx5hws_err(ctx, + " rule 0x%08llx: cqe->byte_cnt = 0x%08x\n", + HWS_PTR_TO_ID(rule), be32_to_cpu(cqe->byte_cnt)); + mlx5hws_err(ctx, + " rule 0x%08llx: |-- UPDATE STATUS = %s\n", + HWS_PTR_TO_ID(rule), + (be32_to_cpu(cqe->byte_cnt) & 0x80000000) ? + "FAILURE" : "SUCCESS"); + mlx5hws_err(ctx, + " rule 0x%08llx: |------- SYNDROME = %s\n", + HWS_PTR_TO_ID(rule), + ((be32_to_cpu(cqe->byte_cnt) & 0x00000003) == 1) ? + "SET_FLOW_FAIL" : + ((be32_to_cpu(cqe->byte_cnt) & 0x00000003) == 2) ? + "DISABLE_FLOW_FAIL" : "UNKNOWN"); + mlx5hws_err(ctx, + " rule 0x%08llx: cqe->sop_drop_qpn = 0x%08x\n", + HWS_PTR_TO_ID(rule), be32_to_cpu(cqe->sop_drop_qpn)); + mlx5hws_err(ctx, + " rule 0x%08llx: |-send wqe opcode = 0x%02x %s\n", + HWS_PTR_TO_ID(rule), wqe_opcode, + wqe_opcode == MLX5HWS_WQE_OPCODE_TBL_ACCESS ? + "(MLX5HWS_WQE_OPCODE_TBL_ACCESS)" : "(UNKNOWN)"); + mlx5hws_err(ctx, + " rule 0x%08llx: |------------ qpn = 0x%06x\n", + HWS_PTR_TO_ID(rule), + be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff); +} + static void hws_send_engine_update_rule(struct mlx5hws_send_engine *queue, struct mlx5hws_send_ring_priv *priv, u16 wqe_cnt, - enum mlx5hws_flow_op_status *status) + enum mlx5hws_flow_op_status *status, + struct mlx5_cqe64 *cqe) { priv->rule->pending_wqes--; - if (*status == MLX5HWS_FLOW_OP_ERROR) { + if (unlikely(*status == MLX5HWS_FLOW_OP_ERROR)) { if (priv->retry_id) { + /* If there is a retry_id, then it's not an error yet, + * retry to insert this rule in the collision RTC. + */ hws_send_engine_retry_post_send(queue, priv, wqe_cnt); return; } + hws_send_engine_dump_error_cqe(queue, priv, cqe); /* Some part of the rule failed */ priv->rule->status = MLX5HWS_RULE_STATUS_FAILING; *priv->used_id = 0; @@ -377,17 +492,25 @@ static void hws_send_engine_update_rule(struct mlx5hws_send_engine *queue, *status = MLX5HWS_FLOW_OP_ERROR; } else { - /* Increase the status, this only works on good flow as the enum - * is arrange it away creating -> created -> deleting -> deleted + /* Increase the status, this only works on good flow as + * the enum is arranged this way: + * - creating -> created + * - updating -> updated + * - deleting -> deleted */ priv->rule->status++; *status = MLX5HWS_FLOW_OP_SUCCESS; - /* Rule was deleted now we can safely release action STEs - * and clear resize info - */ if (priv->rule->status == MLX5HWS_RULE_STATUS_DELETED) { - mlx5hws_rule_free_action_ste(priv->rule); + /* Rule was deleted, now we can safely release + * action STEs and clear resize info + */ + mlx5hws_rule_free_action_ste(&priv->rule->action_ste); mlx5hws_rule_clear_resize_info(priv->rule); + } else if (priv->rule->status == MLX5HWS_RULE_STATUS_UPDATED) { + /* Rule was updated, free the old action STEs */ + mlx5hws_rule_free_action_ste(&priv->rule->old_action_ste); + /* Update completed - move the rule back to "created" */ + priv->rule->status = MLX5HWS_RULE_STATUS_CREATED; } } } @@ -412,7 +535,8 @@ static void hws_send_engine_update(struct mlx5hws_send_engine *queue, if (priv->user_data) { if (priv->rule) { - hws_send_engine_update_rule(queue, priv, wqe_cnt, &status); + hws_send_engine_update_rule(queue, priv, wqe_cnt, + &status, cqe); /* Completion is provided on the last rule WQE */ if (priv->rule->pending_wqes) return; @@ -633,6 +757,7 @@ static int hws_send_ring_create_sq(struct mlx5_core_dev *mdev, u32 pdn, MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, flush_in_error_en, 1); + MLX5_SET(sqc, sqc, non_wire, 1); ts_format = mlx5_is_real_time_sq(mdev) ? MLX5_TIMESTAMP_FORMAT_REAL_TIME : MLX5_TIMESTAMP_FORMAT_FREE_RUNNING; @@ -839,7 +964,6 @@ static int hws_send_ring_open_cq(struct mlx5_core_dev *mdev, return -ENOMEM; MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.uar->index); - MLX5_SET(cqc, cqc_data, cqe_sz, queue->num_entries); MLX5_SET(cqc, cqc_data, log_cq_size, ilog2(queue->num_entries)); err = hws_send_ring_alloc_cq(mdev, numa_node, queue, cqc_data, cq); @@ -896,15 +1020,18 @@ close_cq: return err; } -void mlx5hws_send_queue_close(struct mlx5hws_send_engine *queue) +static void mlx5hws_send_queue_close(struct mlx5hws_send_engine *queue) { + if (!queue->num_entries) + return; /* this queue wasn't initialized */ + hws_send_ring_close(queue); kfree(queue->completed.entries); } -int mlx5hws_send_queue_open(struct mlx5hws_context *ctx, - struct mlx5hws_send_engine *queue, - u16 queue_size) +static int mlx5hws_send_queue_open(struct mlx5hws_context *ctx, + struct mlx5hws_send_engine *queue, + u16 queue_size) { int err; @@ -990,6 +1117,7 @@ static int hws_bwc_send_queues_init(struct mlx5hws_context *ctx) for (i = 0; i < bwc_queues; i++) { mutex_init(&ctx->bwc_send_queue_locks[i]); lockdep_register_key(ctx->bwc_lock_class_keys + i); + lockdep_set_class(ctx->bwc_send_queue_locks + i, ctx->bwc_lock_class_keys + i); } return 0; @@ -1004,7 +1132,7 @@ int mlx5hws_send_queues_open(struct mlx5hws_context *ctx, u16 queue_size) { int err = 0; - u32 i; + int i = 0; /* Open one extra queue for control path */ ctx->queues = queues + 1; @@ -1020,7 +1148,13 @@ int mlx5hws_send_queues_open(struct mlx5hws_context *ctx, goto free_bwc_locks; } - for (i = 0; i < ctx->queues; i++) { + /* If native API isn't supported, skip the unused native queues: + * initialize BWC queues and control queue only. + */ + if (!mlx5hws_context_native_supported(ctx)) + i = mlx5hws_bwc_get_queue_id(ctx, 0); + + for (; i < ctx->queues; i++) { err = mlx5hws_send_queue_open(ctx, &ctx->send_queue[i], queue_size); if (err) goto close_send_queues; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h index b50825d6dc53..3fb8e99309b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h @@ -140,6 +140,7 @@ struct mlx5hws_send_engine { u16 used_entries; u16 num_entries; bool err; + bool error_cqe_printed; struct mutex lock; /* Protects the send engine */ }; @@ -189,12 +190,6 @@ void mlx5hws_send_abort_new_dep_wqe(struct mlx5hws_send_engine *queue); void mlx5hws_send_all_dep_wqe(struct mlx5hws_send_engine *queue); -void mlx5hws_send_queue_close(struct mlx5hws_send_engine *queue); - -int mlx5hws_send_queue_open(struct mlx5hws_context *ctx, - struct mlx5hws_send_engine *queue, - u16 queue_size); - void mlx5hws_send_queues_close(struct mlx5hws_context *ctx); int mlx5hws_send_queues_open(struct mlx5hws_context *ctx, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c index 9576e02d00c3..6113383ae47b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c @@ -9,6 +9,7 @@ u32 mlx5hws_table_get_id(struct mlx5hws_table *tbl) } static void hws_table_init_next_ft_attr(struct mlx5hws_table *tbl, + u16 uid, struct mlx5hws_cmd_ft_create_attr *ft_attr) { ft_attr->type = tbl->fw_ft_type; @@ -16,7 +17,9 @@ static void hws_table_init_next_ft_attr(struct mlx5hws_table *tbl, ft_attr->level = tbl->ctx->caps->fdb_ft.max_level - 1; else ft_attr->level = tbl->ctx->caps->nic_ft.max_level - 1; + ft_attr->rtc_valid = true; + ft_attr->uid = uid; } static void hws_table_set_cap_attr(struct mlx5hws_table *tbl, @@ -37,6 +40,7 @@ static void hws_table_set_cap_attr(struct mlx5hws_table *tbl, } static int hws_table_up_default_fdb_miss_tbl(struct mlx5hws_table *tbl) +__must_hold(&tbl->ctx->ctrl_lock) { struct mlx5hws_cmd_ft_create_attr ft_attr = {0}; struct mlx5hws_cmd_set_fte_attr fte_attr = {0}; @@ -48,8 +52,8 @@ static int hws_table_up_default_fdb_miss_tbl(struct mlx5hws_table *tbl) if (tbl->type != MLX5HWS_TABLE_TYPE_FDB) return 0; - if (ctx->common_res[tbl_type].default_miss) { - ctx->common_res[tbl_type].default_miss->refcount++; + if (ctx->common_res.default_miss) { + ctx->common_res.default_miss->refcount++; return 0; } @@ -70,29 +74,28 @@ static int hws_table_up_default_fdb_miss_tbl(struct mlx5hws_table *tbl) return -EINVAL; } - /* ctx->ctrl_lock must be held here */ - ctx->common_res[tbl_type].default_miss = default_miss; - ctx->common_res[tbl_type].default_miss->refcount++; + ctx->common_res.default_miss = default_miss; + ctx->common_res.default_miss->refcount++; return 0; } /* Called under ctx->ctrl_lock */ static void hws_table_down_default_fdb_miss_tbl(struct mlx5hws_table *tbl) +__must_hold(&tbl->ctx->ctrl_lock) { struct mlx5hws_cmd_forward_tbl *default_miss; struct mlx5hws_context *ctx = tbl->ctx; - u8 tbl_type = tbl->type; if (tbl->type != MLX5HWS_TABLE_TYPE_FDB) return; - default_miss = ctx->common_res[tbl_type].default_miss; + default_miss = ctx->common_res.default_miss; if (--default_miss->refcount) return; mlx5hws_cmd_forward_tbl_destroy(ctx->mdev, default_miss); - ctx->common_res[tbl_type].default_miss = NULL; + ctx->common_res.default_miss = NULL; } static int hws_table_connect_to_default_miss_tbl(struct mlx5hws_table *tbl, u32 ft_id) @@ -119,12 +122,12 @@ static int hws_table_connect_to_default_miss_tbl(struct mlx5hws_table *tbl, u32 int mlx5hws_table_create_default_ft(struct mlx5_core_dev *mdev, struct mlx5hws_table *tbl, - u32 *ft_id) + u16 uid, u32 *ft_id) { struct mlx5hws_cmd_ft_create_attr ft_attr = {0}; int ret; - hws_table_init_next_ft_attr(tbl, &ft_attr); + hws_table_init_next_ft_attr(tbl, uid, &ft_attr); hws_table_set_cap_attr(tbl, &ft_attr); ret = mlx5hws_cmd_flow_table_create(mdev, &ft_attr, ft_id); @@ -189,7 +192,10 @@ static int hws_table_init(struct mlx5hws_table *tbl) } mutex_lock(&ctx->ctrl_lock); - ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, tbl, &tbl->ft_id); + ret = mlx5hws_table_create_default_ft(tbl->ctx->mdev, + tbl, + tbl->uid, + &tbl->ft_id); if (ret) { mlx5hws_err(tbl->ctx, "Failed to create flow table object\n"); mutex_unlock(&ctx->ctrl_lock); @@ -239,6 +245,7 @@ struct mlx5hws_table *mlx5hws_table_create(struct mlx5hws_context *ctx, tbl->ctx = ctx; tbl->type = attr->type; tbl->level = attr->level; + tbl->uid = attr->uid; ret = hws_table_init(tbl); if (ret) { @@ -342,10 +349,10 @@ int mlx5hws_table_ft_set_next_rtc(struct mlx5hws_context *ctx, return mlx5hws_cmd_flow_table_modify(ctx->mdev, &ft_attr, ft_id); } -static int hws_table_ft_set_next_ft(struct mlx5hws_context *ctx, - u32 ft_id, - u32 fw_ft_type, - u32 next_ft_id) +int mlx5hws_table_ft_set_next_ft(struct mlx5hws_context *ctx, + u32 ft_id, + u32 fw_ft_type, + u32 next_ft_id) { struct mlx5hws_cmd_ft_modify_attr ft_attr = {0}; @@ -389,10 +396,10 @@ int mlx5hws_table_connect_to_miss_table(struct mlx5hws_table *src_tbl, if (dst_tbl) { if (list_empty(&dst_tbl->matchers_list)) { /* Connect src_tbl last_ft to dst_tbl start anchor */ - ret = hws_table_ft_set_next_ft(src_tbl->ctx, - last_ft_id, - src_tbl->fw_ft_type, - dst_tbl->ft_id); + ret = mlx5hws_table_ft_set_next_ft(src_tbl->ctx, + last_ft_id, + src_tbl->fw_ft_type, + dst_tbl->ft_id); if (ret) return ret; @@ -478,15 +485,9 @@ int mlx5hws_table_set_default_miss(struct mlx5hws_table *tbl, if (old_miss_tbl) list_del_init(&tbl->default_miss.next); - old_miss_tbl = tbl->default_miss.miss_tbl; - if (old_miss_tbl) - list_del_init(&old_miss_tbl->default_miss.head); - if (miss_tbl) list_add(&tbl->default_miss.next, &miss_tbl->default_miss.head); - mutex_unlock(&ctx->ctrl_lock); - return 0; out: mutex_unlock(&ctx->ctrl_lock); return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h index dd50420eec9e..1246f9bd8422 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.h @@ -18,6 +18,7 @@ struct mlx5hws_table { enum mlx5hws_table_type type; u32 fw_ft_type; u32 level; + u16 uid; struct list_head matchers_list; struct list_head tbl_list_node; struct mlx5hws_default_miss default_miss; @@ -47,7 +48,7 @@ u32 mlx5hws_table_get_res_fw_ft_type(enum mlx5hws_table_type tbl_type, int mlx5hws_table_create_default_ft(struct mlx5_core_dev *mdev, struct mlx5hws_table *tbl, - u32 *ft_id); + u16 uid, u32 *ft_id); void mlx5hws_table_destroy_default_ft(struct mlx5hws_table *tbl, u32 ft_id); @@ -65,4 +66,9 @@ int mlx5hws_table_ft_set_next_rtc(struct mlx5hws_context *ctx, u32 rtc_0_id, u32 rtc_1_id); +int mlx5hws_table_ft_set_next_ft(struct mlx5hws_context *ctx, + u32 ft_id, + u32 fw_ft_type, + u32 next_ft_id); + #endif /* MLX5HWS_TABLE_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c index 3d74109f8230..65740bb68b09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c @@ -8,7 +8,7 @@ #define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \ ((dmn)->info.caps.dmn_type##_sw_owner || \ ((dmn)->info.caps.dmn_type##_sw_owner_v2 && \ - (dmn)->info.caps.sw_format_ver <= MLX5_STEERING_FORMAT_CONNECTX_7)) + (dmn)->info.caps.sw_format_ver <= MLX5_STEERING_FORMAT_CONNECTX_8)) bool mlx5dr_domain_is_support_ptrn_arg(struct mlx5dr_domain *dmn) { @@ -297,7 +297,9 @@ dr_domain_add_vport_cap(struct mlx5dr_domain *dmn, u16 vport) if (ret) { mlx5dr_dbg(dmn, "Couldn't insert new vport into xarray (%d)\n", ret); kvfree(vport_caps); - return ERR_PTR(ret); + if (ret == -EBUSY) + return ERR_PTR(-EBUSY); + return NULL; } return vport_caps; @@ -514,30 +516,6 @@ def_xa_destroy: return NULL; } -/* Assure synchronization of the device steering tables with updates made by SW - * insertion. - */ -int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags) -{ - int ret = 0; - - if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) { - mlx5dr_domain_lock(dmn); - ret = mlx5dr_send_ring_force_drain(dmn); - mlx5dr_domain_unlock(dmn); - if (ret) { - mlx5dr_err(dmn, "Force drain failed flags: %d, ret: %d\n", - flags, ret); - return ret; - } - } - - if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_HW) - ret = mlx5dr_cmd_sync_steering(dmn->mdev); - - return ret; -} - int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) { if (WARN_ON_ONCE(refcount_read(&dmn->refcount) > 1)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c index 6fa06ba2d346..4fd4e8483382 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c @@ -1067,7 +1067,6 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, int inlen, err, eqn; void *cqc, *in; __be64 *pas; - int vector; u32 i; cq = kzalloc(sizeof(*cq), GFP_KERNEL); @@ -1096,8 +1095,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, if (!in) goto err_cqwq; - vector = raw_smp_processor_id() % mlx5_comp_vectors_max(mdev); - err = mlx5_comp_eqn_get(mdev, vector, &eqn); + err = mlx5_comp_eqn_get(mdev, 0, &eqn); if (err) { kvfree(in); goto err_cqwq; @@ -1333,36 +1331,3 @@ void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, kfree(send_ring->sync_buff); kfree(send_ring); } - -int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn) -{ - struct mlx5dr_send_ring *send_ring = dmn->send_ring; - struct postsend_info send_info = {}; - u8 data[DR_STE_SIZE]; - int num_of_sends_req; - int ret; - int i; - - /* Sending this amount of requests makes sure we will get drain */ - num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2; - - /* Send fake requests forcing the last to be signaled */ - send_info.write.addr = (uintptr_t)data; - send_info.write.length = DR_STE_SIZE; - send_info.write.lkey = 0; - /* Using the sync_mr in order to write/read */ - send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr; - send_info.rkey = send_ring->sync_mr->mkey; - - for (i = 0; i < num_of_sends_req; i++) { - ret = dr_postsend_icm_data(dmn, &send_info); - if (ret) - return ret; - } - - spin_lock(&send_ring->lock); - ret = dr_handle_pending_wc(dmn, send_ring); - spin_unlock(&send_ring->lock); - - return ret; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.c index e94fbb015efa..c8b8ff80c7c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.c @@ -555,7 +555,7 @@ void mlx5dr_ste_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes) { - ste_ctx->set_actions_tx(dmn, action_type_set, ste_ctx->actions_caps, + ste_ctx->set_actions_tx(ste_ctx, dmn, action_type_set, ste_ctx->actions_caps, hw_ste_arr, attr, added_stes); } @@ -566,7 +566,7 @@ void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes) { - ste_ctx->set_actions_rx(dmn, action_type_set, ste_ctx->actions_caps, + ste_ctx->set_actions_rx(ste_ctx, dmn, action_type_set, ste_ctx->actions_caps, hw_ste_arr, attr, added_stes); } @@ -1458,6 +1458,8 @@ struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx(u8 version) return mlx5dr_ste_get_ctx_v1(); else if (version == MLX5_STEERING_FORMAT_CONNECTX_7) return mlx5dr_ste_get_ctx_v2(); + else if (version == MLX5_STEERING_FORMAT_CONNECTX_8) + return mlx5dr_ste_get_ctx_v3(); return NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h index 54a6619c3ecb..3d5afc832fa5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h @@ -160,13 +160,15 @@ struct mlx5dr_ste_ctx { /* Actions */ u32 actions_caps; - void (*set_actions_rx)(struct mlx5dr_domain *dmn, + void (*set_actions_rx)(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, u8 *action_type_set, u32 actions_caps, u8 *hw_ste_arr, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); - void (*set_actions_tx)(struct mlx5dr_domain *dmn, + void (*set_actions_tx)(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, u8 *action_type_set, u32 actions_caps, u8 *hw_ste_arr, @@ -197,7 +199,21 @@ struct mlx5dr_ste_ctx { u16 *used_hw_action_num); int (*alloc_modify_hdr_chunk)(struct mlx5dr_action *action); void (*dealloc_modify_hdr_chunk)(struct mlx5dr_action *action); - + /* Actions bit set */ + void (*set_encap)(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, int size); + void (*set_push_vlan)(u8 *ste, u8 *d_action, + u32 vlan_hdr); + void (*set_pop_vlan)(u8 *hw_ste_p, u8 *s_action, + u8 vlans_num); + void (*set_rx_decap)(u8 *hw_ste_p, u8 *s_action); + void (*set_encap_l3)(u8 *hw_ste_p, u8 *frst_s_action, + u8 *scnd_d_action, u32 reformat_id, + int size); + void (*set_insert_hdr)(u8 *hw_ste_p, u8 *d_action, u32 reformat_id, + u8 anchor, u8 offset, int size); + void (*set_remove_hdr)(u8 *hw_ste_p, u8 *s_action, u8 anchor, + u8 offset, int size); /* Send */ void (*prepare_for_postsend)(u8 *hw_ste_p, u32 ste_size); }; @@ -205,5 +221,6 @@ struct mlx5dr_ste_ctx { struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v0(void); struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v1(void); struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v2(void); +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v3(void); #endif /* _DR_STE_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v0.c index e9f6c7ed7a7b..42536bee55e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v0.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v0.c @@ -406,7 +406,8 @@ static void dr_ste_v0_arr_init_next(u8 **last_ste, } static void -dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn, +dr_ste_v0_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, u8 *action_type_set, u32 actions_caps, u8 *last_ste, @@ -476,7 +477,8 @@ dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn, } static void -dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn, +dr_ste_v0_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, u8 *action_type_set, u32 actions_caps, u8 *last_ste, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c index 1d49704b9542..6447efbae00d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c @@ -5,136 +5,6 @@ #include "mlx5_ifc_dr_ste_v1.h" #include "dr_ste_v1.h" -#define DR_STE_CALC_DFNR_TYPE(lookup_type, inner) \ - ((inner) ? DR_STE_V1_LU_TYPE_##lookup_type##_I : \ - DR_STE_V1_LU_TYPE_##lookup_type##_O) - -enum dr_ste_v1_entry_format { - DR_STE_V1_TYPE_BWC_BYTE = 0x0, - DR_STE_V1_TYPE_BWC_DW = 0x1, - DR_STE_V1_TYPE_MATCH = 0x2, - DR_STE_V1_TYPE_MATCH_RANGES = 0x7, -}; - -/* Lookup type is built from 2B: [ Definer mode 1B ][ Definer index 1B ] */ -enum { - DR_STE_V1_LU_TYPE_NOP = 0x0000, - DR_STE_V1_LU_TYPE_ETHL2_TNL = 0x0002, - DR_STE_V1_LU_TYPE_IBL3_EXT = 0x0102, - DR_STE_V1_LU_TYPE_ETHL2_O = 0x0003, - DR_STE_V1_LU_TYPE_IBL4 = 0x0103, - DR_STE_V1_LU_TYPE_ETHL2_I = 0x0004, - DR_STE_V1_LU_TYPE_SRC_QP_GVMI = 0x0104, - DR_STE_V1_LU_TYPE_ETHL2_SRC_O = 0x0005, - DR_STE_V1_LU_TYPE_ETHL2_HEADERS_O = 0x0105, - DR_STE_V1_LU_TYPE_ETHL2_SRC_I = 0x0006, - DR_STE_V1_LU_TYPE_ETHL2_HEADERS_I = 0x0106, - DR_STE_V1_LU_TYPE_ETHL3_IPV4_5_TUPLE_O = 0x0007, - DR_STE_V1_LU_TYPE_IPV6_DES_O = 0x0107, - DR_STE_V1_LU_TYPE_ETHL3_IPV4_5_TUPLE_I = 0x0008, - DR_STE_V1_LU_TYPE_IPV6_DES_I = 0x0108, - DR_STE_V1_LU_TYPE_ETHL4_O = 0x0009, - DR_STE_V1_LU_TYPE_IPV6_SRC_O = 0x0109, - DR_STE_V1_LU_TYPE_ETHL4_I = 0x000a, - DR_STE_V1_LU_TYPE_IPV6_SRC_I = 0x010a, - DR_STE_V1_LU_TYPE_ETHL2_SRC_DST_O = 0x000b, - DR_STE_V1_LU_TYPE_MPLS_O = 0x010b, - DR_STE_V1_LU_TYPE_ETHL2_SRC_DST_I = 0x000c, - DR_STE_V1_LU_TYPE_MPLS_I = 0x010c, - DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_O = 0x000d, - DR_STE_V1_LU_TYPE_GRE = 0x010d, - DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER = 0x000e, - DR_STE_V1_LU_TYPE_GENERAL_PURPOSE = 0x010e, - DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_I = 0x000f, - DR_STE_V1_LU_TYPE_STEERING_REGISTERS_0 = 0x010f, - DR_STE_V1_LU_TYPE_STEERING_REGISTERS_1 = 0x0110, - DR_STE_V1_LU_TYPE_FLEX_PARSER_OK = 0x0011, - DR_STE_V1_LU_TYPE_FLEX_PARSER_0 = 0x0111, - DR_STE_V1_LU_TYPE_FLEX_PARSER_1 = 0x0112, - DR_STE_V1_LU_TYPE_ETHL4_MISC_O = 0x0113, - DR_STE_V1_LU_TYPE_ETHL4_MISC_I = 0x0114, - DR_STE_V1_LU_TYPE_INVALID = 0x00ff, - DR_STE_V1_LU_TYPE_DONT_CARE = MLX5DR_STE_LU_TYPE_DONT_CARE, -}; - -enum dr_ste_v1_header_anchors { - DR_STE_HEADER_ANCHOR_START_OUTER = 0x00, - DR_STE_HEADER_ANCHOR_1ST_VLAN = 0x02, - DR_STE_HEADER_ANCHOR_IPV6_IPV4 = 0x07, - DR_STE_HEADER_ANCHOR_INNER_MAC = 0x13, - DR_STE_HEADER_ANCHOR_INNER_IPV6_IPV4 = 0x19, -}; - -enum dr_ste_v1_action_size { - DR_STE_ACTION_SINGLE_SZ = 4, - DR_STE_ACTION_DOUBLE_SZ = 8, - DR_STE_ACTION_TRIPLE_SZ = 12, -}; - -enum dr_ste_v1_action_insert_ptr_attr { - DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE = 0, /* Regular push header (e.g. push vlan) */ - DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP = 1, /* Encapsulation / Tunneling */ - DR_STE_V1_ACTION_INSERT_PTR_ATTR_ESP = 2, /* IPsec */ -}; - -enum dr_ste_v1_action_id { - DR_STE_V1_ACTION_ID_NOP = 0x00, - DR_STE_V1_ACTION_ID_COPY = 0x05, - DR_STE_V1_ACTION_ID_SET = 0x06, - DR_STE_V1_ACTION_ID_ADD = 0x07, - DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE = 0x08, - DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER = 0x09, - DR_STE_V1_ACTION_ID_INSERT_INLINE = 0x0a, - DR_STE_V1_ACTION_ID_INSERT_POINTER = 0x0b, - DR_STE_V1_ACTION_ID_FLOW_TAG = 0x0c, - DR_STE_V1_ACTION_ID_QUEUE_ID_SEL = 0x0d, - DR_STE_V1_ACTION_ID_ACCELERATED_LIST = 0x0e, - DR_STE_V1_ACTION_ID_MODIFY_LIST = 0x0f, - DR_STE_V1_ACTION_ID_ASO = 0x12, - DR_STE_V1_ACTION_ID_TRAILER = 0x13, - DR_STE_V1_ACTION_ID_COUNTER_ID = 0x14, - DR_STE_V1_ACTION_ID_MAX = 0x21, - /* use for special cases */ - DR_STE_V1_ACTION_ID_SPECIAL_ENCAP_L3 = 0x22, -}; - -enum { - DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_0 = 0x00, - DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1 = 0x01, - DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_2 = 0x02, - DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08, - DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09, - DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e, - DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0 = 0x18, - DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_1 = 0x19, - DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40, - DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41, - DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44, - DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45, - DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46, - DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47, - DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c, - DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d, - DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e, - DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f, - DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e, - DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f, - DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f, - DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70, - DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b, - DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_0 = 0x8c, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_1 = 0x8d, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_0 = 0x8e, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_1 = 0x8f, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_0 = 0x90, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_1 = 0x91, -}; - -enum dr_ste_v1_aso_ctx_type { - DR_STE_V1_ASO_CTX_TYPE_POLICERS = 0x2, -}; - static const struct mlx5dr_ste_action_modify_field dr_ste_v1_action_modify_field_arr[] = { [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = { .hw_field = DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_0, .start = 0, .end = 31, @@ -379,13 +249,12 @@ static void dr_ste_v1_set_counter_id(u8 *hw_ste_p, u32 ctr_id) MLX5_SET(ste_match_bwc_v1, hw_ste_p, counter_id, ctr_id); } -static void dr_ste_v1_set_reparse(u8 *hw_ste_p) +void dr_ste_v1_set_reparse(u8 *hw_ste_p) { MLX5_SET(ste_match_bwc_v1, hw_ste_p, reparse, 1); } -static void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action, - u32 reformat_id, int size) +void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action, u32 reformat_id, int size) { MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER); @@ -397,10 +266,10 @@ static void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action, dr_ste_v1_set_reparse(hw_ste_p); } -static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, - u32 reformat_id, - u8 anchor, u8 offset, - int size) +void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, + u8 anchor, u8 offset, + int size) { MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER); @@ -417,9 +286,9 @@ static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, dr_ste_v1_set_reparse(hw_ste_p); } -static void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, - u8 anchor, u8 offset, - int size) +void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, + u8 anchor, u8 offset, + int size) { MLX5_SET(ste_single_action_remove_header_size_v1, s_action, action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); @@ -432,8 +301,7 @@ static void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, dr_ste_v1_set_reparse(hw_ste_p); } -static void dr_ste_v1_set_push_vlan(u8 *hw_ste_p, u8 *d_action, - u32 vlan_hdr) +void dr_ste_v1_set_push_vlan(u8 *hw_ste_p, u8 *d_action, u32 vlan_hdr) { MLX5_SET(ste_double_action_insert_with_inline_v1, d_action, action_id, DR_STE_V1_ACTION_ID_INSERT_INLINE); @@ -446,7 +314,7 @@ static void dr_ste_v1_set_push_vlan(u8 *hw_ste_p, u8 *d_action, dr_ste_v1_set_reparse(hw_ste_p); } -static void dr_ste_v1_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num) +void dr_ste_v1_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num) { MLX5_SET(ste_single_action_remove_header_size_v1, s_action, action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); @@ -459,11 +327,8 @@ static void dr_ste_v1_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num) dr_ste_v1_set_reparse(hw_ste_p); } -static void dr_ste_v1_set_encap_l3(u8 *hw_ste_p, - u8 *frst_s_action, - u8 *scnd_d_action, - u32 reformat_id, - int size) +void dr_ste_v1_set_encap_l3(u8 *hw_ste_p, u8 *frst_s_action, u8 *scnd_d_action, + u32 reformat_id, int size) { /* Remove L2 headers */ MLX5_SET(ste_single_action_remove_header_v1, frst_s_action, action_id, @@ -483,7 +348,7 @@ static void dr_ste_v1_set_encap_l3(u8 *hw_ste_p, dr_ste_v1_set_reparse(hw_ste_p); } -static void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action) +void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action) { MLX5_SET(ste_single_action_remove_header_v1, s_action, action_id, DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER); @@ -620,7 +485,8 @@ static void dr_ste_v1_arr_init_next_match_range(u8 **last_ste, dr_ste_v1_set_entry_type(*last_ste, DR_STE_V1_TYPE_MATCH_RANGES); } -void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, +void dr_ste_v1_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, u8 *action_type_set, u32 actions_caps, u8 *last_ste, @@ -640,7 +506,7 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count); + ste_ctx->set_pop_vlan(last_ste, action, attr->vlans.count); action_sz -= DR_STE_ACTION_SINGLE_SZ; action += DR_STE_ACTION_SINGLE_SZ; @@ -677,8 +543,8 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, action_sz = DR_STE_ACTION_TRIPLE_SZ; allow_encap = true; } - dr_ste_v1_set_push_vlan(last_ste, action, - attr->vlans.headers[i]); + ste_ctx->set_push_vlan(last_ste, action, + attr->vlans.headers[i]); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; } @@ -691,9 +557,9 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, action_sz = DR_STE_ACTION_TRIPLE_SZ; allow_encap = true; } - dr_ste_v1_set_encap(last_ste, action, - attr->reformat.id, - attr->reformat.size); + ste_ctx->set_encap(last_ste, action, + attr->reformat.id, + attr->reformat.size); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; } else if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]) { @@ -706,10 +572,10 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, } d_action = action + DR_STE_ACTION_SINGLE_SZ; - dr_ste_v1_set_encap_l3(last_ste, - action, d_action, - attr->reformat.id, - attr->reformat.size); + ste_ctx->set_encap_l3(last_ste, + action, d_action, + attr->reformat.id, + attr->reformat.size); action_sz -= DR_STE_ACTION_TRIPLE_SZ; action += DR_STE_ACTION_TRIPLE_SZ; } else if (action_type_set[DR_ACTION_TYP_INSERT_HDR]) { @@ -718,11 +584,11 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_insert_hdr(last_ste, action, - attr->reformat.id, - attr->reformat.param_0, - attr->reformat.param_1, - attr->reformat.size); + ste_ctx->set_insert_hdr(last_ste, action, + attr->reformat.id, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; } else if (action_type_set[DR_ACTION_TYP_REMOVE_HDR]) { @@ -731,10 +597,10 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_remove_hdr(last_ste, action, - attr->reformat.param_0, - attr->reformat.param_1, - attr->reformat.size); + ste_ctx->set_remove_hdr(last_ste, action, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); action_sz -= DR_STE_ACTION_SINGLE_SZ; action += DR_STE_ACTION_SINGLE_SZ; } @@ -776,7 +642,8 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1); } -void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, +void dr_ste_v1_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, u8 *action_type_set, u32 actions_caps, u8 *last_ste, @@ -799,7 +666,7 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, allow_modify_hdr = false; allow_ctr = false; } else if (action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2]) { - dr_ste_v1_set_rx_decap(last_ste, action); + ste_ctx->set_rx_decap(last_ste, action); action_sz -= DR_STE_ACTION_SINGLE_SZ; action += DR_STE_ACTION_SINGLE_SZ; allow_modify_hdr = false; @@ -827,7 +694,7 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count); + ste_ctx->set_pop_vlan(last_ste, action, attr->vlans.count); action_sz -= DR_STE_ACTION_SINGLE_SZ; action += DR_STE_ACTION_SINGLE_SZ; allow_ctr = false; @@ -868,8 +735,8 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_push_vlan(last_ste, action, - attr->vlans.headers[i]); + ste_ctx->set_push_vlan(last_ste, action, + attr->vlans.headers[i]); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; } @@ -895,9 +762,9 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_encap(last_ste, action, - attr->reformat.id, - attr->reformat.size); + ste_ctx->set_encap(last_ste, action, + attr->reformat.id, + attr->reformat.size); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; allow_modify_hdr = false; @@ -912,10 +779,10 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, d_action = action + DR_STE_ACTION_SINGLE_SZ; - dr_ste_v1_set_encap_l3(last_ste, - action, d_action, - attr->reformat.id, - attr->reformat.size); + ste_ctx->set_encap_l3(last_ste, + action, d_action, + attr->reformat.id, + attr->reformat.size); action_sz -= DR_STE_ACTION_TRIPLE_SZ; allow_modify_hdr = false; } else if (action_type_set[DR_ACTION_TYP_INSERT_HDR]) { @@ -925,11 +792,11 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_insert_hdr(last_ste, action, - attr->reformat.id, - attr->reformat.param_0, - attr->reformat.param_1, - attr->reformat.size); + ste_ctx->set_insert_hdr(last_ste, action, + attr->reformat.id, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; allow_modify_hdr = false; @@ -941,10 +808,10 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, allow_modify_hdr = true; allow_ctr = true; } - dr_ste_v1_set_remove_hdr(last_ste, action, - attr->reformat.param_0, - attr->reformat.param_1, - attr->reformat.size); + ste_ctx->set_remove_hdr(last_ste, action, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); action_sz -= DR_STE_ACTION_SINGLE_SZ; action += DR_STE_ACTION_SINGLE_SZ; } @@ -1027,9 +894,6 @@ void dr_ste_v1_set_action_copy(u8 *d_action, MLX5_SET(ste_double_action_copy_v1, d_action, source_right_shifter, src_shifter); } -#define DR_STE_DECAP_L3_ACTION_NUM 8 -#define DR_STE_L2_HDR_MAX_SZ 20 - int dr_ste_v1_set_action_decap_l3_list(void *data, u32 data_sz, u8 *hw_action, @@ -2330,7 +2194,14 @@ static struct mlx5dr_ste_ctx ste_ctx_v1 = { .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list, .alloc_modify_hdr_chunk = &dr_ste_v1_alloc_modify_hdr_ptrn_arg, .dealloc_modify_hdr_chunk = &dr_ste_v1_free_modify_hdr_ptrn_arg, - + /* Actions bit set */ + .set_encap = &dr_ste_v1_set_encap, + .set_push_vlan = &dr_ste_v1_set_push_vlan, + .set_pop_vlan = &dr_ste_v1_set_pop_vlan, + .set_rx_decap = &dr_ste_v1_set_rx_decap, + .set_encap_l3 = &dr_ste_v1_set_encap_l3, + .set_insert_hdr = &dr_ste_v1_set_insert_hdr, + .set_remove_hdr = &dr_ste_v1_set_remove_hdr, /* Send */ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h index e2fc69867088..591c20c95a6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h @@ -7,6 +7,138 @@ #include "dr_types.h" #include "dr_ste.h" +#define DR_STE_DECAP_L3_ACTION_NUM 8 +#define DR_STE_L2_HDR_MAX_SZ 20 +#define DR_STE_CALC_DFNR_TYPE(lookup_type, inner) \ + ((inner) ? DR_STE_V1_LU_TYPE_##lookup_type##_I : \ + DR_STE_V1_LU_TYPE_##lookup_type##_O) + +enum dr_ste_v1_entry_format { + DR_STE_V1_TYPE_BWC_BYTE = 0x0, + DR_STE_V1_TYPE_BWC_DW = 0x1, + DR_STE_V1_TYPE_MATCH = 0x2, + DR_STE_V1_TYPE_MATCH_RANGES = 0x7, +}; + +/* Lookup type is built from 2B: [ Definer mode 1B ][ Definer index 1B ] */ +enum { + DR_STE_V1_LU_TYPE_NOP = 0x0000, + DR_STE_V1_LU_TYPE_ETHL2_TNL = 0x0002, + DR_STE_V1_LU_TYPE_IBL3_EXT = 0x0102, + DR_STE_V1_LU_TYPE_ETHL2_O = 0x0003, + DR_STE_V1_LU_TYPE_IBL4 = 0x0103, + DR_STE_V1_LU_TYPE_ETHL2_I = 0x0004, + DR_STE_V1_LU_TYPE_SRC_QP_GVMI = 0x0104, + DR_STE_V1_LU_TYPE_ETHL2_SRC_O = 0x0005, + DR_STE_V1_LU_TYPE_ETHL2_HEADERS_O = 0x0105, + DR_STE_V1_LU_TYPE_ETHL2_SRC_I = 0x0006, + DR_STE_V1_LU_TYPE_ETHL2_HEADERS_I = 0x0106, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_5_TUPLE_O = 0x0007, + DR_STE_V1_LU_TYPE_IPV6_DES_O = 0x0107, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_5_TUPLE_I = 0x0008, + DR_STE_V1_LU_TYPE_IPV6_DES_I = 0x0108, + DR_STE_V1_LU_TYPE_ETHL4_O = 0x0009, + DR_STE_V1_LU_TYPE_IPV6_SRC_O = 0x0109, + DR_STE_V1_LU_TYPE_ETHL4_I = 0x000a, + DR_STE_V1_LU_TYPE_IPV6_SRC_I = 0x010a, + DR_STE_V1_LU_TYPE_ETHL2_SRC_DST_O = 0x000b, + DR_STE_V1_LU_TYPE_MPLS_O = 0x010b, + DR_STE_V1_LU_TYPE_ETHL2_SRC_DST_I = 0x000c, + DR_STE_V1_LU_TYPE_MPLS_I = 0x010c, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_O = 0x000d, + DR_STE_V1_LU_TYPE_GRE = 0x010d, + DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER = 0x000e, + DR_STE_V1_LU_TYPE_GENERAL_PURPOSE = 0x010e, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_I = 0x000f, + DR_STE_V1_LU_TYPE_STEERING_REGISTERS_0 = 0x010f, + DR_STE_V1_LU_TYPE_STEERING_REGISTERS_1 = 0x0110, + DR_STE_V1_LU_TYPE_FLEX_PARSER_OK = 0x0011, + DR_STE_V1_LU_TYPE_FLEX_PARSER_0 = 0x0111, + DR_STE_V1_LU_TYPE_FLEX_PARSER_1 = 0x0112, + DR_STE_V1_LU_TYPE_ETHL4_MISC_O = 0x0113, + DR_STE_V1_LU_TYPE_ETHL4_MISC_I = 0x0114, + DR_STE_V1_LU_TYPE_INVALID = 0x00ff, + DR_STE_V1_LU_TYPE_DONT_CARE = MLX5DR_STE_LU_TYPE_DONT_CARE, +}; + +enum dr_ste_v1_header_anchors { + DR_STE_HEADER_ANCHOR_START_OUTER = 0x00, + DR_STE_HEADER_ANCHOR_1ST_VLAN = 0x02, + DR_STE_HEADER_ANCHOR_IPV6_IPV4 = 0x07, + DR_STE_HEADER_ANCHOR_INNER_MAC = 0x13, + DR_STE_HEADER_ANCHOR_INNER_IPV6_IPV4 = 0x19, +}; + +enum dr_ste_v1_action_size { + DR_STE_ACTION_SINGLE_SZ = 4, + DR_STE_ACTION_DOUBLE_SZ = 8, + DR_STE_ACTION_TRIPLE_SZ = 12, +}; + +enum dr_ste_v1_action_insert_ptr_attr { + DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE = 0, /* Regular push header (e.g. push vlan) */ + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP = 1, /* Encapsulation / Tunneling */ + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ESP = 2, /* IPsec */ +}; + +enum dr_ste_v1_action_id { + DR_STE_V1_ACTION_ID_NOP = 0x00, + DR_STE_V1_ACTION_ID_COPY = 0x05, + DR_STE_V1_ACTION_ID_SET = 0x06, + DR_STE_V1_ACTION_ID_ADD = 0x07, + DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE = 0x08, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER = 0x09, + DR_STE_V1_ACTION_ID_INSERT_INLINE = 0x0a, + DR_STE_V1_ACTION_ID_INSERT_POINTER = 0x0b, + DR_STE_V1_ACTION_ID_FLOW_TAG = 0x0c, + DR_STE_V1_ACTION_ID_QUEUE_ID_SEL = 0x0d, + DR_STE_V1_ACTION_ID_ACCELERATED_LIST = 0x0e, + DR_STE_V1_ACTION_ID_MODIFY_LIST = 0x0f, + DR_STE_V1_ACTION_ID_ASO = 0x12, + DR_STE_V1_ACTION_ID_TRAILER = 0x13, + DR_STE_V1_ACTION_ID_COUNTER_ID = 0x14, + DR_STE_V1_ACTION_ID_MAX = 0x21, + /* use for special cases */ + DR_STE_V1_ACTION_ID_SPECIAL_ENCAP_L3 = 0x22, +}; + +enum { + DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_0 = 0x00, + DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1 = 0x01, + DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_2 = 0x02, + DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08, + DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09, + DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e, + DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0 = 0x18, + DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_1 = 0x19, + DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40, + DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f, + DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e, + DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f, + DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f, + DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70, + DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b, + DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_0 = 0x8c, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_1 = 0x8d, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_0 = 0x8e, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_1 = 0x8f, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_0 = 0x90, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_1 = 0x91, +}; + +enum dr_ste_v1_aso_ctx_type { + DR_STE_V1_ASO_CTX_TYPE_POLICERS = 0x2, +}; + bool dr_ste_v1_is_miss_addr_set(u8 *hw_ste_p); void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr); u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p); @@ -17,11 +149,22 @@ u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p); void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size); void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, bool is_rx, u16 gvmi); void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, u32 ste_size); -void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, u8 *action_type_set, - u32 actions_caps, u8 *last_ste, +void dr_ste_v1_set_reparse(u8 *hw_ste_p); +void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action, u32 reformat_id, int size); +void dr_ste_v1_set_push_vlan(u8 *hw_ste_p, u8 *d_action, u32 vlan_hdr); +void dr_ste_v1_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num); +void dr_ste_v1_set_encap_l3(u8 *hw_ste_p, u8 *frst_s_action, u8 *scnd_d_action, + u32 reformat_id, int size); +void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action); +void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, u32 reformat_id, + u8 anchor, u8 offset, int size); +void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, u8 anchor, + u8 offset, int size); +void dr_ste_v1_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_domain *dmn, + u8 *action_type_set, u32 actions_caps, u8 *last_ste, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); -void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, u8 *action_type_set, - u32 actions_caps, u8 *last_ste, +void dr_ste_v1_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_domain *dmn, + u8 *action_type_set, u32 actions_caps, u8 *last_ste, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); void dr_ste_v1_set_action_set(u8 *d_action, u8 hw_field, u8 shifter, u8 length, u32 data); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c index 808b013cf48c..d0ebaf820d42 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c @@ -2,167 +2,7 @@ /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ #include "dr_ste_v1.h" - -enum { - DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0 = 0x00, - DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1 = 0x01, - DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2 = 0x02, - DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08, - DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09, - DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e, - DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0 = 0x18, - DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1 = 0x19, - DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40, - DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41, - DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44, - DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45, - DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46, - DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47, - DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c, - DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d, - DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e, - DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f, - DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e, - DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f, - DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f, - DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70, - DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b, - DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c, - DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0 = 0x90, - DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1 = 0x91, - DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0 = 0x92, - DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1 = 0x93, - DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0 = 0x94, - DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1 = 0x95, -}; - -static const struct mlx5dr_ste_action_modify_field dr_ste_v2_action_modify_field_arr[] = { - [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1, .start = 16, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 0, .end = 15, - }, - [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 16, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 18, .end = 23, - }, - [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1, .start = 16, .end = 24, - .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, - }, - [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, - .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, - }, - [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, - .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, - }, - [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, - }, - [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, - }, - [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, - .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, - }, - [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, - .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, - }, - [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, - }, - [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, - }, - [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, - }, - [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, - }, - [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, - }, - [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, - }, - [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, - }, - [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, - }, - [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, - }, - [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, - }, - [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15, - }, - [MLX5_ACTION_IN_FIELD_OUT_EMD_31_0] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_OUT_EMD_47_32] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0, .start = 0, .end = 15, - }, -}; +#include "dr_ste_v2.h" static struct mlx5dr_ste_ctx ste_ctx_v2 = { /* Builders */ @@ -223,7 +63,14 @@ static struct mlx5dr_ste_ctx ste_ctx_v2 = { .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list, .alloc_modify_hdr_chunk = &dr_ste_v1_alloc_modify_hdr_ptrn_arg, .dealloc_modify_hdr_chunk = &dr_ste_v1_free_modify_hdr_ptrn_arg, - + /* Actions bit set */ + .set_encap = &dr_ste_v1_set_encap, + .set_push_vlan = &dr_ste_v1_set_push_vlan, + .set_pop_vlan = &dr_ste_v1_set_pop_vlan, + .set_rx_decap = &dr_ste_v1_set_rx_decap, + .set_encap_l3 = &dr_ste_v1_set_encap_l3, + .set_insert_hdr = &dr_ste_v1_set_insert_hdr, + .set_remove_hdr = &dr_ste_v1_set_remove_hdr, /* Send */ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.h new file mode 100644 index 000000000000..d853fde49cfc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.h @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef _DR_STE_V2_ +#define _DR_STE_V2_ + +enum { + DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0 = 0x00, + DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1 = 0x01, + DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2 = 0x02, + DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08, + DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09, + DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e, + DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0 = 0x18, + DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1 = 0x19, + DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40, + DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f, + DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e, + DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f, + DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f, + DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70, + DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b, + DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0 = 0x90, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1 = 0x91, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0 = 0x92, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1 = 0x93, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0 = 0x94, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1 = 0x95, +}; + +static const struct mlx5dr_ste_action_modify_field dr_ste_v2_action_modify_field_arr[] = { + [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1, .start = 16, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 16, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 18, .end = 23, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1, .start = 16, .end = 24, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_EMD_31_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_EMD_47_32] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0, .start = 0, .end = 15, + }, +}; + +#endif /* _DR_STE_V2_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c new file mode 100644 index 000000000000..e468a9ae44e8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "dr_ste_v1.h" +#include "dr_ste_v2.h" + +static void dr_ste_v3_set_encap(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, int size) +{ + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, action_id, + DR_STE_V1_ACTION_ID_INSERT_POINTER); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, size, size / 2); + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, pointer, reformat_id); + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, attributes, + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP); + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v3_set_push_vlan(u8 *ste, u8 *d_action, + u32 vlan_hdr) +{ + MLX5_SET(ste_double_action_insert_with_inline_v3, d_action, action_id, + DR_STE_V1_ACTION_ID_INSERT_INLINE); + /* The hardware expects here offset to vlan header in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_inline_v3, d_action, start_offset, + HDR_LEN_L2_MACS >> 1); + MLX5_SET(ste_double_action_insert_with_inline_v3, d_action, inline_data, vlan_hdr); + dr_ste_v1_set_reparse(ste); +} + +static void dr_ste_v3_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, + u8 vlans_num) +{ + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + start_anchor, DR_STE_HEADER_ANCHOR_1ST_VLAN); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + remove_size, (HDR_LEN_L2_VLAN >> 1) * vlans_num); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v3_set_encap_l3(u8 *hw_ste_p, + u8 *frst_s_action, + u8 *scnd_d_action, + u32 reformat_id, + int size) +{ + /* Remove L2 headers */ + MLX5_SET(ste_single_action_remove_header_v3, frst_s_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER); + MLX5_SET(ste_single_action_remove_header_v3, frst_s_action, end_anchor, + DR_STE_HEADER_ANCHOR_IPV6_IPV4); + + /* Encapsulate with given reformat ID */ + MLX5_SET(ste_double_action_insert_with_ptr_v3, scnd_d_action, action_id, + DR_STE_V1_ACTION_ID_INSERT_POINTER); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_ptr_v3, scnd_d_action, size, size / 2); + MLX5_SET(ste_double_action_insert_with_ptr_v3, scnd_d_action, pointer, reformat_id); + MLX5_SET(ste_double_action_insert_with_ptr_v3, scnd_d_action, attributes, + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v3_set_rx_decap(u8 *hw_ste_p, u8 *s_action) +{ + MLX5_SET(ste_single_action_remove_header_v3, s_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER); + MLX5_SET(ste_single_action_remove_header_v3, s_action, decap, 1); + MLX5_SET(ste_single_action_remove_header_v3, s_action, vni_to_cqe, 1); + MLX5_SET(ste_single_action_remove_header_v3, s_action, end_anchor, + DR_STE_HEADER_ANCHOR_INNER_MAC); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v3_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, u8 anchor, + u8 offset, int size) +{ + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER); + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + start_anchor, anchor); + + /* The hardware expects here size and offset in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + size, size / 2); + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + start_offset, offset / 2); + + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + pointer, reformat_id); + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, + attributes, DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v3_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, + u8 anchor, u8 offset, int size) +{ + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + start_anchor, anchor); + + /* The hardware expects here size and offset in words (2 byte) */ + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + remove_size, size / 2); + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + start_offset, offset / 2); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static int +dr_ste_v3_set_action_decap_l3_list(void *data, u32 data_sz, + u8 *hw_action, u32 hw_action_sz, + uint16_t *used_hw_action_num) +{ + u8 padded_data[DR_STE_L2_HDR_MAX_SZ] = {}; + void *data_ptr = padded_data; + u16 used_actions = 0; + u32 inline_data_sz; + u32 i; + + if (hw_action_sz / DR_STE_ACTION_DOUBLE_SZ < DR_STE_DECAP_L3_ACTION_NUM) + return -EINVAL; + + inline_data_sz = + MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v3, inline_data); + + /* Add an alignment padding */ + memcpy(padded_data + data_sz % inline_data_sz, data, data_sz); + + /* Remove L2L3 outer headers */ + MLX5_SET(ste_single_action_remove_header_v3, hw_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER); + MLX5_SET(ste_single_action_remove_header_v3, hw_action, decap, 1); + MLX5_SET(ste_single_action_remove_header_v3, hw_action, vni_to_cqe, 1); + MLX5_SET(ste_single_action_remove_header_v3, hw_action, end_anchor, + DR_STE_HEADER_ANCHOR_INNER_IPV6_IPV4); + hw_action += DR_STE_ACTION_DOUBLE_SZ; + used_actions++; /* Remove and NOP are a single double action */ + + /* Point to the last dword of the header */ + data_ptr += (data_sz / inline_data_sz) * inline_data_sz; + + /* Add the new header using inline action 4Byte at a time, the header + * is added in reversed order to the beginning of the packet to avoid + * incorrect parsing by the HW. Since header is 14B or 18B an extra + * two bytes are padded and later removed. + */ + for (i = 0; i < data_sz / inline_data_sz + 1; i++) { + void *addr_inline; + + MLX5_SET(ste_double_action_insert_with_inline_v3, hw_action, action_id, + DR_STE_V1_ACTION_ID_INSERT_INLINE); + /* The hardware expects here offset to words (2 bytes) */ + MLX5_SET(ste_double_action_insert_with_inline_v3, hw_action, start_offset, 0); + + /* Copy bytes one by one to avoid endianness problem */ + addr_inline = MLX5_ADDR_OF(ste_double_action_insert_with_inline_v3, + hw_action, inline_data); + memcpy(addr_inline, data_ptr - i * inline_data_sz, inline_data_sz); + hw_action += DR_STE_ACTION_DOUBLE_SZ; + used_actions++; + } + + /* Remove first 2 extra bytes */ + MLX5_SET(ste_single_action_remove_header_size_v3, hw_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); + MLX5_SET(ste_single_action_remove_header_size_v3, hw_action, start_offset, 0); + /* The hardware expects here size in words (2 bytes) */ + MLX5_SET(ste_single_action_remove_header_size_v3, hw_action, remove_size, 1); + used_actions++; + + *used_hw_action_num = used_actions; + + return 0; +} + +static struct mlx5dr_ste_ctx ste_ctx_v3 = { + /* Builders */ + .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init, + .build_eth_l3_ipv6_src_init = &dr_ste_v1_build_eth_l3_ipv6_src_init, + .build_eth_l3_ipv6_dst_init = &dr_ste_v1_build_eth_l3_ipv6_dst_init, + .build_eth_l3_ipv4_5_tuple_init = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_init, + .build_eth_l2_src_init = &dr_ste_v1_build_eth_l2_src_init, + .build_eth_l2_dst_init = &dr_ste_v1_build_eth_l2_dst_init, + .build_eth_l2_tnl_init = &dr_ste_v1_build_eth_l2_tnl_init, + .build_eth_l3_ipv4_misc_init = &dr_ste_v1_build_eth_l3_ipv4_misc_init, + .build_eth_ipv6_l3_l4_init = &dr_ste_v1_build_eth_ipv6_l3_l4_init, + .build_mpls_init = &dr_ste_v1_build_mpls_init, + .build_tnl_gre_init = &dr_ste_v1_build_tnl_gre_init, + .build_tnl_mpls_init = &dr_ste_v1_build_tnl_mpls_init, + .build_tnl_mpls_over_udp_init = &dr_ste_v1_build_tnl_mpls_over_udp_init, + .build_tnl_mpls_over_gre_init = &dr_ste_v1_build_tnl_mpls_over_gre_init, + .build_icmp_init = &dr_ste_v1_build_icmp_init, + .build_general_purpose_init = &dr_ste_v1_build_general_purpose_init, + .build_eth_l4_misc_init = &dr_ste_v1_build_eth_l4_misc_init, + .build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init, + .build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init, + .build_tnl_geneve_tlv_opt_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init, + .build_tnl_geneve_tlv_opt_exist_init = + &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init, + .build_register_0_init = &dr_ste_v1_build_register_0_init, + .build_register_1_init = &dr_ste_v1_build_register_1_init, + .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init, + .build_flex_parser_0_init = &dr_ste_v1_build_flex_parser_0_init, + .build_flex_parser_1_init = &dr_ste_v1_build_flex_parser_1_init, + .build_tnl_gtpu_init = &dr_ste_v1_build_flex_parser_tnl_gtpu_init, + .build_tnl_header_0_1_init = &dr_ste_v1_build_tnl_header_0_1_init, + .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init, + .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init, + + /* Getters and Setters */ + .ste_init = &dr_ste_v1_init, + .set_next_lu_type = &dr_ste_v1_set_next_lu_type, + .get_next_lu_type = &dr_ste_v1_get_next_lu_type, + .is_miss_addr_set = &dr_ste_v1_is_miss_addr_set, + .set_miss_addr = &dr_ste_v1_set_miss_addr, + .get_miss_addr = &dr_ste_v1_get_miss_addr, + .set_hit_addr = &dr_ste_v1_set_hit_addr, + .set_byte_mask = &dr_ste_v1_set_byte_mask, + .get_byte_mask = &dr_ste_v1_get_byte_mask, + + /* Actions */ + .actions_caps = DR_STE_CTX_ACTION_CAP_TX_POP | + DR_STE_CTX_ACTION_CAP_RX_PUSH | + DR_STE_CTX_ACTION_CAP_RX_ENCAP, + .set_actions_rx = &dr_ste_v1_set_actions_rx, + .set_actions_tx = &dr_ste_v1_set_actions_tx, + .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v2_action_modify_field_arr), + .modify_field_arr = dr_ste_v2_action_modify_field_arr, + .set_action_set = &dr_ste_v1_set_action_set, + .set_action_add = &dr_ste_v1_set_action_add, + .set_action_copy = &dr_ste_v1_set_action_copy, + .set_action_decap_l3_list = &dr_ste_v3_set_action_decap_l3_list, + .alloc_modify_hdr_chunk = &dr_ste_v1_alloc_modify_hdr_ptrn_arg, + .dealloc_modify_hdr_chunk = &dr_ste_v1_free_modify_hdr_ptrn_arg, + /* Actions bit set */ + .set_encap = &dr_ste_v3_set_encap, + .set_push_vlan = &dr_ste_v3_set_push_vlan, + .set_pop_vlan = &dr_ste_v3_set_pop_vlan, + .set_rx_decap = &dr_ste_v3_set_rx_decap, + .set_encap_l3 = &dr_ste_v3_set_encap_l3, + .set_insert_hdr = &dr_ste_v3_set_insert_hdr, + .set_remove_hdr = &dr_ste_v3_set_remove_hdr, + /* Send */ + .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, +}; + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v3(void) +{ + return &ste_ctx_v3; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h index 7618c6147f86..cc328292bf84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h @@ -1473,7 +1473,6 @@ struct mlx5dr_send_ring { int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn); void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, struct mlx5dr_send_ring *send_ring); -int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn); int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, u8 *data, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.c index 4b349d4005e4..f367997ab61e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.c @@ -521,7 +521,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, goto free_actions; } - id = dst->dest_attr.counter_id; + id = mlx5_fc_id(dst->dest_attr.counter); tmp_action = mlx5dr_action_create_flow_counter(id); if (!tmp_action) { @@ -833,15 +833,21 @@ static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns, return steering_caps; } -int mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat) +int +mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat, + u32 *reformat_id) { + struct mlx5dr_action *dr_action; + switch (pkt_reformat->reformat_type) { case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: case MLX5_REFORMAT_TYPE_L2_TO_NVGRE: case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: case MLX5_REFORMAT_TYPE_INSERT_HDR: - return mlx5dr_action_get_pkt_reformat_id(pkt_reformat->fs_dr_action.dr_action); + dr_action = pkt_reformat->fs_dr_action.dr_action; + *reformat_id = mlx5dr_action_get_pkt_reformat_id(dr_action); + return 0; } return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.h index 99a3b2eff6b8..f869f2daefbf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.h @@ -38,7 +38,9 @@ struct mlx5_fs_dr_table { bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev); -int mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat); +int +mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat, + u32 *reformat_id); const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void); @@ -49,9 +51,11 @@ static inline const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void) return NULL; } -static inline u32 mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat) +static inline int +mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat, + u32 *reformat_id) { - return 0; + return -EOPNOTSUPP; } static inline bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5_ifc_dr.h index fb078fa0f0cc..898c3618ff26 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5_ifc_dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5_ifc_dr.h @@ -600,4 +600,44 @@ struct mlx5_ifc_ste_double_action_aso_v1_bits { }; }; +struct mlx5_ifc_ste_single_action_remove_header_v3_bits { + u8 action_id[0x8]; + u8 start_anchor[0x7]; + u8 end_anchor[0x7]; + u8 reserved_at_16[0x1]; + u8 outer_l4_remove[0x1]; + u8 reserved_at_18[0x4]; + u8 decap[0x1]; + u8 vni_to_cqe[0x1]; + u8 qos_profile[0x2]; +}; + +struct mlx5_ifc_ste_single_action_remove_header_size_v3_bits { + u8 action_id[0x8]; + u8 start_anchor[0x7]; + u8 start_offset[0x8]; + u8 outer_l4_remove[0x1]; + u8 reserved_at_18[0x2]; + u8 remove_size[0x6]; +}; + +struct mlx5_ifc_ste_double_action_insert_with_inline_v3_bits { + u8 action_id[0x8]; + u8 start_anchor[0x7]; + u8 start_offset[0x8]; + u8 reserved_at_17[0x9]; + + u8 inline_data[0x20]; +}; + +struct mlx5_ifc_ste_double_action_insert_with_ptr_v3_bits { + u8 action_id[0x8]; + u8 start_anchor[0x7]; + u8 start_offset[0x8]; + u8 size[0x6]; + u8 attributes[0x3]; + + u8 pointer[0x20]; +}; + #endif /* MLX5_IFC_DR_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h index 3ac7dc67509f..fc8a2169d1a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h @@ -45,8 +45,6 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type); int mlx5dr_domain_destroy(struct mlx5dr_domain *domain); -int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags); - void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, struct mlx5dr_domain *peer_dmn, u16 peer_vhca_id); @@ -160,7 +158,7 @@ mlx5dr_is_supported(struct mlx5_core_dev *dev) (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) || (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) && (MLX5_CAP_GEN(dev, steering_format_version) <= - MLX5_STEERING_FORMAT_CONNECTX_7))); + MLX5_STEERING_FORMAT_CONNECTX_8))); } /* buddy functions & structure */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 0d5f750faa45..da5c24fc7b30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -465,19 +465,22 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid) { u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; out = kvzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; - mlx5_query_nic_vport_context(mdev, 0, out); + err = mlx5_query_nic_vport_context(mdev, 0, out); + if (err) + goto out; *node_guid = MLX5_GET64(query_nic_vport_context_out, out, nic_vport_context.node_guid); - +out: kvfree(out); - return 0; + return err; } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid); @@ -519,19 +522,22 @@ int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev, { u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; out = kvzalloc(outlen, GFP_KERNEL); if (!out) return -ENOMEM; - mlx5_query_nic_vport_context(mdev, 0, out); + err = mlx5_query_nic_vport_context(mdev, 0, out); + if (err) + goto out; *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out, nic_vport_context.qkey_violation_counter); - +out: kvfree(out); - return 0; + return err; } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr); @@ -1199,6 +1205,31 @@ int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *ou } EXPORT_SYMBOL_GPL(mlx5_vport_get_other_func_cap); +int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id) +{ + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *query_ctx; + void *hca_caps; + int err; + + *vhca_id = 0; + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) + return -ENOMEM; + + err = mlx5_vport_get_other_func_general_cap(dev, vport, query_ctx); + if (err) + goto out_free; + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + *vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id); + +out_free: + kfree(query_ctx); + return err; +} + int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 vport, u16 opmod) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wc.c b/drivers/net/ethernet/mellanox/mlx5/core/wc.c index 1bed75eca97d..2f0316616fa4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wc.c @@ -378,10 +378,14 @@ err_create_cq: mlx5_free_bfreg(mdev, &sq->bfreg); err_alloc_bfreg: kfree(sq); + + if (mdev->wc_state == MLX5_WC_STATE_UNSUPPORTED) + mlx5_core_warn(mdev, "Write combining is not supported\n"); } bool mlx5_wc_support_get(struct mlx5_core_dev *mdev) { + struct mutex *wc_state_lock = &mdev->wc_state_lock; struct mlx5_core_dev *parent = NULL; if (!MLX5_CAP_GEN(mdev, bf)) { @@ -400,32 +404,31 @@ bool mlx5_wc_support_get(struct mlx5_core_dev *mdev) */ goto out; - mutex_lock(&mdev->wc_state_lock); - - if (mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED) - goto unlock; - #ifdef CONFIG_MLX5_SF - if (mlx5_core_is_sf(mdev)) + if (mlx5_core_is_sf(mdev)) { parent = mdev->priv.parent_mdev; + wc_state_lock = &parent->wc_state_lock; + } #endif - if (parent) { - mutex_lock(&parent->wc_state_lock); + mutex_lock(wc_state_lock); + + if (mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED) + goto unlock; + if (parent) { mlx5_core_test_wc(parent); mlx5_core_dbg(mdev, "parent set wc_state=%d\n", parent->wc_state); mdev->wc_state = parent->wc_state; - mutex_unlock(&parent->wc_state_lock); + } else { + mlx5_core_test_wc(mdev); } - mlx5_core_test_wc(mdev); - unlock: - mutex_unlock(&mdev->wc_state_lock); + mutex_unlock(wc_state_lock); out: mlx5_core_dbg(mdev, "wc_state=%d\n", mdev->wc_state); diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index fb2e5b844c15..d1f8a72cae53 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -142,8 +142,10 @@ static int mlxbf_gige_open(struct net_device *netdev) mlxbf_gige_cache_stats(priv); err = mlxbf_gige_clean_port(priv); - if (err) + if (err) { + dev_err(priv->dev, "open: clean_port failed: %pe\n", ERR_PTR(err)); return err; + } /* Clear driver's valid_polarity to match hardware, * since the above call to clean_port() resets the @@ -154,19 +156,25 @@ static int mlxbf_gige_open(struct net_device *netdev) phy_start(phydev); err = mlxbf_gige_tx_init(priv); - if (err) + if (err) { + dev_err(priv->dev, "open: tx_init failed: %pe\n", ERR_PTR(err)); goto phy_deinit; + } err = mlxbf_gige_rx_init(priv); - if (err) + if (err) { + dev_err(priv->dev, "open: rx_init failed: %pe\n", ERR_PTR(err)); goto tx_deinit; + } netif_napi_add(netdev, &priv->napi, mlxbf_gige_poll); napi_enable(&priv->napi); netif_start_queue(netdev); err = mlxbf_gige_request_irqs(priv); - if (err) + if (err) { + dev_err(priv->dev, "open: request_irqs failed: %pe\n", ERR_PTR(err)); goto napi_deinit; + } mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_BCAST_MAC_FILTER_IDX); mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX); @@ -418,8 +426,10 @@ static int mlxbf_gige_probe(struct platform_device *pdev) /* Attach MDIO device */ err = mlxbf_gige_mdio_probe(pdev, priv); - if (err) + if (err) { + dev_err(priv->dev, "probe: mdio_probe failed: %pe\n", ERR_PTR(err)); return err; + } priv->base = base; priv->llu_base = llu_base; @@ -438,7 +448,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev) err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) { - dev_err(&pdev->dev, "DMA configuration failed: 0x%x\n", err); + dev_err(&pdev->dev, "DMA configuration failed: %pe\n", ERR_PTR(err)); goto out; } @@ -447,8 +457,10 @@ static int mlxbf_gige_probe(struct platform_device *pdev) priv->llu_plu_irq = platform_get_irq(pdev, MLXBF_GIGE_LLU_PLU_INTR_IDX); phy_irq = acpi_dev_gpio_irq_get_by(ACPI_COMPANION(&pdev->dev), "phy", 0); - if (phy_irq < 0) { - dev_err(&pdev->dev, "Error getting PHY irq. Use polling instead"); + if (phy_irq == -EPROBE_DEFER) { + err = -EPROBE_DEFER; + goto out; + } else if (phy_irq < 0) { phy_irq = PHY_POLL; } @@ -466,7 +478,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev) mlxbf_gige_link_cfgs[priv->hw_version].adjust_link, mlxbf_gige_link_cfgs[priv->hw_version].phy_mode); if (err) { - dev_err(&pdev->dev, "Could not attach to PHY\n"); + dev_err(&pdev->dev, "Could not attach to PHY: %pe\n", ERR_PTR(err)); goto out; } @@ -477,7 +489,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev) err = register_netdev(netdev); if (err) { - dev_err(&pdev->dev, "Failed to register netdev\n"); + dev_err(&pdev->dev, "Failed to register netdev: %pe\n", ERR_PTR(err)); phy_disconnect(phydev); goto out; } diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c index 46245e0b2462..43c84900369a 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c @@ -14,7 +14,6 @@ #define MLXFW_FSM_STATE_WAIT_TIMEOUT_MS 30000 #define MLXFW_FSM_STATE_WAIT_ROUNDS \ (MLXFW_FSM_STATE_WAIT_TIMEOUT_MS / MLXFW_FSM_STATE_WAIT_CYCLE_MS) -#define MLXFW_FSM_MAX_COMPONENT_SIZE (10 * (1 << 20)) static const int mlxfw_fsm_state_errno[] = { [MLXFW_FSM_STATE_ERR_ERROR] = -EIO, @@ -229,7 +228,6 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, return err; } - comp_max_size = min_t(u32, comp_max_size, MLXFW_FSM_MAX_COMPONENT_SIZE); if (comp->data_size > comp_max_size) { MLXFW_ERR_MSG(mlxfw_dev, extack, "Component size is bigger than limit", -EINVAL); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 4a79c0d7e7ad..2bb2b77351bd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -35,6 +35,7 @@ #include "reg.h" #include "resources.h" #include "../mlxfw/mlxfw.h" +#include "txheader.h" static LIST_HEAD(mlxsw_core_driver_list); static DEFINE_SPINLOCK(mlxsw_core_driver_list_lock); @@ -677,7 +678,7 @@ struct mlxsw_reg_trans { struct list_head bulk_list; struct mlxsw_core *core; struct sk_buff *tx_skb; - struct mlxsw_tx_info tx_info; + struct mlxsw_txhdr_info txhdr_info; struct delayed_work timeout_dw; unsigned int retries; u64 tid; @@ -737,12 +738,11 @@ static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, if (!skb) return -ENOMEM; - trace_devlink_hwmsg(priv_to_devlink(mlxsw_core), false, 0, - skb->data + mlxsw_core->driver->txhdr_len, - skb->len - mlxsw_core->driver->txhdr_len); + trace_devlink_hwmsg(priv_to_devlink(mlxsw_core), false, 0, skb->data, + skb->len); atomic_set(&trans->active, 1); - err = mlxsw_core_skb_transmit(mlxsw_core, skb, &trans->tx_info); + err = mlxsw_core_skb_transmit(mlxsw_core, skb, &trans->txhdr_info); if (err) { dev_kfree_skb(skb); return err; @@ -944,7 +944,7 @@ static struct sk_buff *mlxsw_emad_alloc(const struct mlxsw_core *mlxsw_core, emad_len = (reg_len + sizeof(u32) + MLXSW_EMAD_ETH_HDR_LEN + (MLXSW_EMAD_OP_TLV_LEN + MLXSW_EMAD_END_TLV_LEN) * - sizeof(u32) + mlxsw_core->driver->txhdr_len); + sizeof(u32) + MLXSW_TXHDR_LEN); if (mlxsw_core->emad.enable_string_tlv) emad_len += MLXSW_EMAD_STRING_TLV_LEN * sizeof(u32); if (mlxsw_core->emad.enable_latency_tlv) @@ -984,8 +984,8 @@ static int mlxsw_emad_reg_access(struct mlxsw_core *mlxsw_core, list_add_tail(&trans->bulk_list, bulk_list); trans->core = mlxsw_core; trans->tx_skb = skb; - trans->tx_info.local_port = MLXSW_PORT_CPU_PORT; - trans->tx_info.is_emad = true; + trans->txhdr_info.tx_info.local_port = MLXSW_PORT_CPU_PORT; + trans->txhdr_info.tx_info.is_emad = true; INIT_DELAYED_WORK(&trans->timeout_dw, mlxsw_emad_trans_timeout_work); trans->tid = tid; init_completion(&trans->completion); @@ -995,7 +995,6 @@ static int mlxsw_emad_reg_access(struct mlxsw_core *mlxsw_core, trans->type = type; mlxsw_emad_construct(mlxsw_core, skb, reg, payload, type, trans->tid); - mlxsw_core->driver->txhdr_construct(skb, &trans->tx_info); spin_lock_bh(&mlxsw_core->emad.trans_list_lock); list_add_tail_rcu(&trans->list, &mlxsw_core->emad.trans_list); @@ -2330,10 +2329,10 @@ bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core, EXPORT_SYMBOL(mlxsw_core_skb_transmit_busy); int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) + const struct mlxsw_txhdr_info *txhdr_info) { return mlxsw_core->bus->skb_transmit(mlxsw_core->bus_priv, skb, - tx_info); + txhdr_info); } EXPORT_SYMBOL(mlxsw_core_skb_transmit); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 6d11225594dd..1a871397a6df 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -72,7 +72,14 @@ struct mlxsw_tx_info { bool is_emad; }; +struct mlxsw_txhdr_info { + struct mlxsw_tx_info tx_info; + bool data; + u16 max_fid; /* Used for PTP packets which are sent as data. */ +}; + struct mlxsw_rx_md_info { + struct napi_struct *napi; u32 cookie_index; u32 latency; u32 tx_congestion; @@ -94,7 +101,7 @@ struct mlxsw_rx_md_info { bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core, const struct mlxsw_tx_info *tx_info); int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info); + const struct mlxsw_txhdr_info *txhdr_info); void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, u16 local_port); @@ -425,8 +432,6 @@ struct mlxsw_driver { int (*trap_policer_counter_get)(struct mlxsw_core *mlxsw_core, const struct devlink_trap_policer *policer, u64 *p_drops); - void (*txhdr_construct)(struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info); int (*resources_register)(struct mlxsw_core *mlxsw_core); int (*kvd_sizes_get)(struct mlxsw_core *mlxsw_core, const struct mlxsw_config_profile *profile, @@ -439,7 +444,6 @@ struct mlxsw_driver { void (*ptp_transmitted)(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, u16 local_port); - u8 txhdr_len; const struct mlxsw_config_profile *profile; bool sdq_supports_cqe_v2; }; @@ -486,7 +490,7 @@ struct mlxsw_bus { bool (*skb_transmit_busy)(void *bus_priv, const struct mlxsw_tx_info *tx_info); int (*skb_transmit)(void *bus_priv, struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info); + const struct mlxsw_txhdr_info *txhdr_info); int (*cmd_exec)(void *bus_priv, u16 opcode, u8 opcode_mod, u32 in_mod, bool out_mbox_direct, char *in_mbox, size_t in_mbox_size, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index e746cd9c68ed..eac9a14a6058 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -205,11 +205,11 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, return 0; } -static struct thermal_zone_params mlxsw_thermal_params = { +static const struct thermal_zone_params mlxsw_thermal_params = { .no_hwmon = true, }; -static struct thermal_zone_device_ops mlxsw_thermal_ops = { +static const struct thermal_zone_device_ops mlxsw_thermal_ops = { .should_bind = mlxsw_thermal_should_bind, .get_temp = mlxsw_thermal_get_temp, }; @@ -252,7 +252,7 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, return 0; } -static struct thermal_zone_device_ops mlxsw_thermal_module_ops = { +static const struct thermal_zone_device_ops mlxsw_thermal_module_ops = { .should_bind = mlxsw_thermal_module_should_bind, .get_temp = mlxsw_thermal_module_temp_get, }; @@ -280,7 +280,7 @@ static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev, return 0; } -static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = { +static const struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = { .should_bind = mlxsw_thermal_module_should_bind, .get_temp = mlxsw_thermal_gearbox_temp_get, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 1e150ce1c73a..f9f565c1036d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -516,7 +516,7 @@ static bool mlxsw_i2c_skb_transmit_busy(void *bus_priv, } static int mlxsw_i2c_skb_transmit(void *bus_priv, struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) + const struct mlxsw_txhdr_info *txhdr_info) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index d6f37456fb31..8769cba2c746 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -21,6 +21,7 @@ #include "cmd.h" #include "port.h" #include "resources.h" +#include "txheader.h" #define mlxsw_pci_write32(mlxsw_pci, reg, val) \ iowrite32be(val, (mlxsw_pci)->hw_addr + (MLXSW_PCI_ ## reg)) @@ -155,7 +156,7 @@ static int mlxsw_pci_napi_devs_init(struct mlxsw_pci *mlxsw_pci) } strscpy(mlxsw_pci->napi_dev_rx->name, "mlxsw_rx", sizeof(mlxsw_pci->napi_dev_rx->name)); - dev_set_threaded(mlxsw_pci->napi_dev_rx, true); + netif_threaded_enable(mlxsw_pci->napi_dev_rx); return 0; @@ -737,6 +738,7 @@ static void mlxsw_pci_cqe_rdq_md_init(struct sk_buff *skb, const char *cqe) } static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, + struct napi_struct *napi, struct mlxsw_pci_queue *q, u16 consumer_counter_limit, enum mlxsw_pci_cqe_v cqe_v, char *cqe) @@ -807,6 +809,7 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, } mlxsw_pci_skb_cb_ts_set(mlxsw_pci, skb, cqe_v, cqe); + mlxsw_skb_cb(skb)->rx_md_info.napi = napi; mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info); @@ -869,7 +872,7 @@ static int mlxsw_pci_napi_poll_cq_rx(struct napi_struct *napi, int budget) continue; } - mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, + mlxsw_pci_cqe_rdq_handle(mlxsw_pci, napi, rdq, wqe_counter, q->u.cq.v, cqe); if (++work_done == budget) @@ -2093,6 +2096,39 @@ static void mlxsw_pci_fini(void *bus_priv) mlxsw_pci_free_irq_vectors(mlxsw_pci); } +static int mlxsw_pci_txhdr_construct(struct sk_buff *skb, + const struct mlxsw_txhdr_info *txhdr_info) +{ + const struct mlxsw_tx_info tx_info = txhdr_info->tx_info; + char *txhdr; + + if (skb_cow_head(skb, MLXSW_TXHDR_LEN)) + return -ENOMEM; + + txhdr = skb_push(skb, MLXSW_TXHDR_LEN); + memset(txhdr, 0, MLXSW_TXHDR_LEN); + + mlxsw_tx_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_1); + mlxsw_tx_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH); + mlxsw_tx_hdr_swid_set(txhdr, 0); + + if (unlikely(txhdr_info->data)) { + u16 fid = txhdr_info->max_fid + tx_info.local_port - 1; + + mlxsw_tx_hdr_rx_is_router_set(txhdr, true); + mlxsw_tx_hdr_fid_valid_set(txhdr, true); + mlxsw_tx_hdr_fid_set(txhdr, fid); + mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_DATA); + } else { + mlxsw_tx_hdr_ctl_set(txhdr, MLXSW_TXHDR_ETH_CTL); + mlxsw_tx_hdr_control_tclass_set(txhdr, 1); + mlxsw_tx_hdr_port_mid_set(txhdr, tx_info.local_port); + mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL); + } + + return 0; +} + static struct mlxsw_pci_queue * mlxsw_pci_sdq_pick(struct mlxsw_pci *mlxsw_pci, const struct mlxsw_tx_info *tx_info) @@ -2120,7 +2156,7 @@ static bool mlxsw_pci_skb_transmit_busy(void *bus_priv, } static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) + const struct mlxsw_txhdr_info *txhdr_info) { struct mlxsw_pci *mlxsw_pci = bus_priv; struct mlxsw_pci_queue *q; @@ -2129,13 +2165,17 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, int i; int err; + err = mlxsw_pci_txhdr_construct(skb, txhdr_info); + if (err) + return err; + if (skb_shinfo(skb)->nr_frags > MLXSW_PCI_WQE_SG_ENTRIES - 1) { err = skb_linearize(skb); if (err) return err; } - q = mlxsw_pci_sdq_pick(mlxsw_pci, tx_info); + q = mlxsw_pci_sdq_pick(mlxsw_pci, &txhdr_info->tx_info); spin_lock_bh(&q->lock); elem_info = mlxsw_pci_queue_elem_info_producer_get(q); if (!elem_info) { @@ -2143,7 +2183,7 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, err = -EAGAIN; goto unlock; } - mlxsw_skb_cb(skb)->tx_info = *tx_info; + mlxsw_skb_cb(skb)->tx_info = txhdr_info->tx_info; elem_info->sdq.skb = skb; wqe = elem_info->elem; @@ -2174,6 +2214,8 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, for (i++; i < MLXSW_PCI_WQE_SG_ENTRIES; i++) mlxsw_pci_wqe_byte_count_set(wqe, i, 0); + mlxsw_pci_wqe_ipcs_set(wqe, skb->ip_summed == CHECKSUM_PARTIAL); + /* Everything is set up, ring producer doorbell to get HW going */ q->producer_counter++; mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index 6bed495dcf0f..7fa94e5828de 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -90,6 +90,11 @@ MLXSW_ITEM32(pci, wqe, lp, 0x00, 30, 1); */ MLXSW_ITEM32(pci, wqe, type, 0x00, 23, 4); +/* pci_wqe_ipcs + * Calculate IPv4 and TCP / UDP checksums. + */ +MLXSW_ITEM32(pci, wqe, ipcs, 0x00, 14, 1); + /* pci_wqe_byte_count * Size of i-th scatter/gather entry, 0 if entry is unused. */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3f5e5d99251b..9a2d64a0a858 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -107,74 +107,6 @@ static const unsigned char mlxsw_sp2_mac_mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xf0, 0x00 }; -/* tx_hdr_version - * Tx header version. - * Must be set to 1. - */ -MLXSW_ITEM32(tx, hdr, version, 0x00, 28, 4); - -/* tx_hdr_ctl - * Packet control type. - * 0 - Ethernet control (e.g. EMADs, LACP) - * 1 - Ethernet data - */ -MLXSW_ITEM32(tx, hdr, ctl, 0x00, 26, 2); - -/* tx_hdr_proto - * Packet protocol type. Must be set to 1 (Ethernet). - */ -MLXSW_ITEM32(tx, hdr, proto, 0x00, 21, 3); - -/* tx_hdr_rx_is_router - * Packet is sent from the router. Valid for data packets only. - */ -MLXSW_ITEM32(tx, hdr, rx_is_router, 0x00, 19, 1); - -/* tx_hdr_fid_valid - * Indicates if the 'fid' field is valid and should be used for - * forwarding lookup. Valid for data packets only. - */ -MLXSW_ITEM32(tx, hdr, fid_valid, 0x00, 16, 1); - -/* tx_hdr_swid - * Switch partition ID. Must be set to 0. - */ -MLXSW_ITEM32(tx, hdr, swid, 0x00, 12, 3); - -/* tx_hdr_control_tclass - * Indicates if the packet should use the control TClass and not one - * of the data TClasses. - */ -MLXSW_ITEM32(tx, hdr, control_tclass, 0x00, 6, 1); - -/* tx_hdr_etclass - * Egress TClass to be used on the egress device on the egress port. - */ -MLXSW_ITEM32(tx, hdr, etclass, 0x00, 0, 4); - -/* tx_hdr_port_mid - * Destination local port for unicast packets. - * Destination multicast ID for multicast packets. - * - * Control packets are directed to a specific egress port, while data - * packets are transmitted through the CPU port (0) into the switch partition, - * where forwarding rules are applied. - */ -MLXSW_ITEM32(tx, hdr, port_mid, 0x04, 16, 16); - -/* tx_hdr_fid - * Forwarding ID used for L2 forwarding lookup. Valid only if 'fid_valid' is - * set, otherwise calculated based on the packet's VID using VID to FID mapping. - * Valid for data packets only. - */ -MLXSW_ITEM32(tx, hdr, fid, 0x08, 16, 16); - -/* tx_hdr_type - * 0 - Data packets - * 6 - Control packets - */ -MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4); - int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, unsigned int counter_index, bool clear, u64 *packets, u64 *bytes) @@ -233,61 +165,6 @@ void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp, counter_index); } -void mlxsw_sp_txhdr_construct(struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) -{ - char *txhdr = skb_push(skb, MLXSW_TXHDR_LEN); - - memset(txhdr, 0, MLXSW_TXHDR_LEN); - - mlxsw_tx_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_1); - mlxsw_tx_hdr_ctl_set(txhdr, MLXSW_TXHDR_ETH_CTL); - mlxsw_tx_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH); - mlxsw_tx_hdr_swid_set(txhdr, 0); - mlxsw_tx_hdr_control_tclass_set(txhdr, 1); - mlxsw_tx_hdr_port_mid_set(txhdr, tx_info->local_port); - mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL); -} - -int -mlxsw_sp_txhdr_ptp_data_construct(struct mlxsw_core *mlxsw_core, - struct mlxsw_sp_port *mlxsw_sp_port, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) -{ - char *txhdr; - u16 max_fid; - int err; - - if (skb_cow_head(skb, MLXSW_TXHDR_LEN)) { - err = -ENOMEM; - goto err_skb_cow_head; - } - - if (!MLXSW_CORE_RES_VALID(mlxsw_core, FID)) { - err = -EIO; - goto err_res_valid; - } - max_fid = MLXSW_CORE_RES_GET(mlxsw_core, FID); - - txhdr = skb_push(skb, MLXSW_TXHDR_LEN); - memset(txhdr, 0, MLXSW_TXHDR_LEN); - - mlxsw_tx_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_1); - mlxsw_tx_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH); - mlxsw_tx_hdr_rx_is_router_set(txhdr, true); - mlxsw_tx_hdr_fid_valid_set(txhdr, true); - mlxsw_tx_hdr_fid_set(txhdr, max_fid + tx_info->local_port - 1); - mlxsw_tx_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_DATA); - return 0; - -err_res_valid: -err_skb_cow_head: - this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); - dev_kfree_skb_any(skb); - return err; -} - static bool mlxsw_sp_skb_requires_ts(struct sk_buff *skb) { unsigned int type; @@ -299,30 +176,49 @@ static bool mlxsw_sp_skb_requires_ts(struct sk_buff *skb) return !!ptp_parse_header(skb, type); } -static int mlxsw_sp_txhdr_handle(struct mlxsw_core *mlxsw_core, - struct mlxsw_sp_port *mlxsw_sp_port, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) +static void mlxsw_sp_txhdr_info_data_init(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, + struct mlxsw_txhdr_info *txhdr_info) { - struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + /* Resource validation was done as part of PTP init. */ + u16 max_fid = MLXSW_CORE_RES_GET(mlxsw_core, FID); + + txhdr_info->data = true; + txhdr_info->max_fid = max_fid; +} - /* In Spectrum-2 and Spectrum-3, PTP events that require a time stamp - * need special handling and cannot be transmitted as regular control - * packets. +static struct sk_buff * +mlxsw_sp_vlan_tag_push(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb) +{ + /* In some Spectrum ASICs, in order for PTP event packets to have their + * correction field correctly set on the egress port they must be + * transmitted as data packets. Such packets ingress the ASIC via the + * CPU port and must have a VLAN tag, as the CPU port is not configured + * with a PVID. Push the default VLAN (4095), which is configured as + * egress untagged on all the ports. */ - if (unlikely(mlxsw_sp_skb_requires_ts(skb))) - return mlxsw_sp->ptp_ops->txhdr_construct(mlxsw_core, - mlxsw_sp_port, skb, - tx_info); + if (skb_vlan_tagged(skb)) + return skb; - if (skb_cow_head(skb, MLXSW_TXHDR_LEN)) { - this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); - dev_kfree_skb_any(skb); - return -ENOMEM; - } + return vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q), + MLXSW_SP_DEFAULT_VID); +} - mlxsw_sp_txhdr_construct(skb, tx_info); - return 0; +static struct sk_buff * +mlxsw_sp_txhdr_preparations(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + struct mlxsw_txhdr_info *txhdr_info) +{ + if (likely(!mlxsw_sp_skb_requires_ts(skb))) + return skb; + + if (!mlxsw_sp->ptp_ops->tx_as_data) + return skb; + + /* Special handling for PTP events that require a time stamp and cannot + * be transmitted as regular control packets. + */ + mlxsw_sp_txhdr_info_data_init(mlxsw_sp->core, skb, txhdr_info); + return mlxsw_sp_vlan_tag_push(mlxsw_sp, skb); } enum mlxsw_reg_spms_state mlxsw_sp_stp_spms_state(u8 state) @@ -721,16 +617,16 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_port_pcpu_stats *pcpu_stats; - const struct mlxsw_tx_info tx_info = { - .local_port = mlxsw_sp_port->local_port, - .is_emad = false, + struct mlxsw_txhdr_info txhdr_info = { + .tx_info.local_port = mlxsw_sp_port->local_port, + .tx_info.is_emad = false, }; u64 len; int err; memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb)); - if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &tx_info)) + if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &txhdr_info.tx_info)) return NETDEV_TX_BUSY; if (eth_skb_pad(skb)) { @@ -738,10 +634,11 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } - err = mlxsw_sp_txhdr_handle(mlxsw_sp->core, mlxsw_sp_port, skb, - &tx_info); - if (err) + skb = mlxsw_sp_txhdr_preparations(mlxsw_sp, skb, &txhdr_info); + if (!skb) { + this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); return NETDEV_TX_OK; + } /* TX header is consumed by HW on the way so we shouldn't count its * bytes as being sent. @@ -751,7 +648,7 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, /* Due to a race we might fail here because of a full queue. In that * unlikely case we simply drop the packet. */ - err = mlxsw_core_skb_transmit(mlxsw_sp->core, skb, &tx_info); + err = mlxsw_core_skb_transmit(mlxsw_sp->core, skb, &txhdr_info); if (!err) { pcpu_stats = this_cpu_ptr(mlxsw_sp_port->pcpu_stats); @@ -1262,63 +1159,31 @@ static int mlxsw_sp_set_features(struct net_device *dev, return 0; } -static int mlxsw_sp_port_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct ifreq *ifr) +static int mlxsw_sp_port_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { - struct hwtstamp_config config; - int err; - - if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) - return -EFAULT; - - err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, - &config); - if (err) - return err; - - if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) - return -EFAULT; + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - return 0; + return mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, + config, extack); } -static int mlxsw_sp_port_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct ifreq *ifr) +static int mlxsw_sp_port_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *config) { - struct hwtstamp_config config; - int err; - - err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_get(mlxsw_sp_port, - &config); - if (err) - return err; - - if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) - return -EFAULT; + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - return 0; + return mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_get(mlxsw_sp_port, + config); } static inline void mlxsw_sp_port_ptp_clear(struct mlxsw_sp_port *mlxsw_sp_port) { - struct hwtstamp_config config = {0}; - - mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, &config); -} - -static int -mlxsw_sp_port_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct kernel_hwtstamp_config config = {}; - switch (cmd) { - case SIOCSHWTSTAMP: - return mlxsw_sp_port_hwtstamp_set(mlxsw_sp_port, ifr); - case SIOCGHWTSTAMP: - return mlxsw_sp_port_hwtstamp_get(mlxsw_sp_port, ifr); - default: - return -EOPNOTSUPP; - } + mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, &config, + NULL); } static const struct net_device_ops mlxsw_sp_port_netdev_ops = { @@ -1335,7 +1200,8 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid, .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, .ndo_set_features = mlxsw_sp_set_features, - .ndo_eth_ioctl = mlxsw_sp_port_ioctl, + .ndo_hwtstamp_get = mlxsw_sp_port_hwtstamp_get, + .ndo_hwtstamp_set = mlxsw_sp_port_hwtstamp_set, }; static int @@ -1677,10 +1543,12 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u16 local_port, netif_carrier_off(dev); dev->features |= NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_FILTER | - NETIF_F_HW_TC; - dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK; + NETIF_F_HW_TC | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + dev->hw_features |= NETIF_F_HW_TC | NETIF_F_LOOPBACK | + NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + dev->vlan_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; dev->lltx = true; - dev->netns_local = true; + dev->netns_immutable = true; dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = MLXSW_PORT_MAX_MTU - MLXSW_PORT_ETH_FRAME_HDR; @@ -2449,7 +2317,7 @@ void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb, u64_stats_update_end(&pcpu_stats->syncp); skb->protocol = eth_type_trans(skb, skb->dev); - netif_receive_skb(skb); + napi_gro_receive(mlxsw_skb_cb(skb)->rx_md_info.napi, skb); } static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u16 local_port, @@ -2507,11 +2375,11 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { ROUTER_EXP, false), MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_DIP_LINK_LOCAL, FORWARD, ROUTER_EXP, false), + MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_SIP_LINK_LOCAL, FORWARD, + ROUTER_EXP, false), /* Multicast Router Traps */ MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false), MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), - /* NVE traps */ - MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, NEIGH_DISCOVERY, false), }; static const struct mlxsw_listener mlxsw_sp1_listener[] = { @@ -2792,7 +2660,6 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = { .get_stats_count = mlxsw_sp1_get_stats_count, .get_stats_strings = mlxsw_sp1_get_stats_strings, .get_stats = mlxsw_sp1_get_stats, - .txhdr_construct = mlxsw_sp_ptp_txhdr_construct, }; static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = { @@ -2811,7 +2678,7 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = { .get_stats_count = mlxsw_sp2_get_stats_count, .get_stats_strings = mlxsw_sp2_get_stats_strings, .get_stats = mlxsw_sp2_get_stats, - .txhdr_construct = mlxsw_sp2_ptp_txhdr_construct, + .tx_as_data = true, }; static const struct mlxsw_sp_ptp_ops mlxsw_sp4_ptp_ops = { @@ -2830,7 +2697,6 @@ static const struct mlxsw_sp_ptp_ops mlxsw_sp4_ptp_ops = { .get_stats_count = mlxsw_sp2_get_stats_count, .get_stats_strings = mlxsw_sp2_get_stats_strings, .get_stats = mlxsw_sp2_get_stats, - .txhdr_construct = mlxsw_sp_ptp_txhdr_construct, }; struct mlxsw_sp_sample_trigger_node { @@ -3992,11 +3858,9 @@ static struct mlxsw_driver mlxsw_sp1_driver = { .trap_policer_fini = mlxsw_sp_trap_policer_fini, .trap_policer_set = mlxsw_sp_trap_policer_set, .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, - .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp1_resources_register, .kvd_sizes_get = mlxsw_sp_kvd_sizes_get, .ptp_transmitted = mlxsw_sp_ptp_transmitted, - .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp1_config_profile, .sdq_supports_cqe_v2 = false, }; @@ -4030,10 +3894,8 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .trap_policer_fini = mlxsw_sp_trap_policer_fini, .trap_policer_set = mlxsw_sp_trap_policer_set, .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, - .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, .ptp_transmitted = mlxsw_sp_ptp_transmitted, - .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, .sdq_supports_cqe_v2 = true, }; @@ -4067,10 +3929,8 @@ static struct mlxsw_driver mlxsw_sp3_driver = { .trap_policer_fini = mlxsw_sp_trap_policer_fini, .trap_policer_set = mlxsw_sp_trap_policer_set, .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, - .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, .ptp_transmitted = mlxsw_sp_ptp_transmitted, - .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, .sdq_supports_cqe_v2 = true, }; @@ -4102,10 +3962,8 @@ static struct mlxsw_driver mlxsw_sp4_driver = { .trap_policer_fini = mlxsw_sp_trap_policer_fini, .trap_policer_set = mlxsw_sp_trap_policer_set, .trap_policer_counter_get = mlxsw_sp_trap_policer_counter_get, - .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, .ptp_transmitted = mlxsw_sp_ptp_transmitted, - .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp4_config_profile, .sdq_supports_cqe_v2 = true, }; @@ -5343,25 +5201,13 @@ static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp, return 0; if (!mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack)) return -EOPNOTSUPP; - if (cu_info->linking) { - if (!netif_running(dev)) - return 0; - /* When the bridge is VLAN-aware, the VNI of the VxLAN - * device needs to be mapped to a VLAN, but at this - * point no VLANs are configured on the VxLAN device - */ - if (br_vlan_enabled(upper_dev)) - return 0; + if (!netif_running(dev)) + return 0; + if (cu_info->linking) return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, dev, 0, extack); - } else { - /* VLANs were already flushed, which triggered the - * necessary cleanup - */ - if (br_vlan_enabled(upper_dev)) - return 0; + else mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, dev); - } break; case NETDEV_PRE_UP: upper_dev = netdev_master_upper_dev_get(dev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 8d3c61287696..b03ff9e044f9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -233,9 +233,10 @@ struct mlxsw_sp_ptp_ops { u16 local_port); int (*hwtstamp_get)(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config); int (*hwtstamp_set)(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); void (*shaper_work)(struct work_struct *work); int (*get_ts_info)(struct mlxsw_sp *mlxsw_sp, struct kernel_ethtool_ts_info *info); @@ -243,10 +244,7 @@ struct mlxsw_sp_ptp_ops { void (*get_stats_strings)(u8 **p); void (*get_stats)(struct mlxsw_sp_port *mlxsw_sp_port, u64 *data, int data_index); - int (*txhdr_construct)(struct mlxsw_core *mlxsw_core, - struct mlxsw_sp_port *mlxsw_sp_port, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info); + bool tx_as_data; }; struct mlxsw_sp_fid_core_ops { @@ -354,7 +352,7 @@ struct mlxsw_sp_port { struct mlxsw_sp_flow_block *eg_flow_block; struct { struct delayed_work shaper_dw; - struct hwtstamp_config hwtstamp_config; + struct kernel_hwtstamp_config hwtstamp_config; u16 ing_types; u16 egr_types; struct mlxsw_sp_ptp_port_stats stats; @@ -664,10 +662,10 @@ bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, const struct net_device *br_dev); int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, const struct net_device *br_dev, - const struct net_device *vxlan_dev, u16 vid, + struct net_device *vxlan_dev, u16 vid, struct netlink_ext_ack *extack); void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, - const struct net_device *vxlan_dev); + struct net_device *vxlan_dev); extern struct notifier_block mlxsw_sp_switchdev_notifier; /* spectrum.c */ @@ -711,12 +709,6 @@ int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int *p_counter_index); void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp, unsigned int counter_index); -void mlxsw_sp_txhdr_construct(struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info); -int mlxsw_sp_txhdr_ptp_data_construct(struct mlxsw_core *mlxsw_core, - struct mlxsw_sp_port *mlxsw_sp_port, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info); bool mlxsw_sp_port_dev_check(const struct net_device *dev); struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev); struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev); @@ -763,9 +755,6 @@ void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, struct net_device *dev); -bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev); -u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, enum mlxsw_sp_l3proto ul_proto, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c index a54eedb69a3f..067f0055a55a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c @@ -212,7 +212,22 @@ static const u8 mlxsw_sp4_acl_bf_crc6_tab[256] = { * This array defines key offsets for easy access when copying key blocks from * entry key to Bloom filter chunk. */ -static const u8 chunk_key_offsets[MLXSW_BLOOM_KEY_CHUNKS] = {2, 20, 38}; +static char * +mlxsw_sp_acl_bf_enc_key_get(struct mlxsw_sp_acl_atcam_entry *aentry, + u8 chunk_index) +{ + switch (chunk_index) { + case 0: + return &aentry->ht_key.enc_key[2]; + case 1: + return &aentry->ht_key.enc_key[20]; + case 2: + return &aentry->ht_key.enc_key[38]; + default: + WARN_ON_ONCE(1); + return &aentry->ht_key.enc_key[0]; + } +} static u16 mlxsw_sp2_acl_bf_crc16_byte(u16 crc, u8 c) { @@ -235,9 +250,10 @@ __mlxsw_sp_acl_bf_key_encode(struct mlxsw_sp_acl_atcam_region *aregion, u8 key_offset, u8 chunk_key_len, u8 chunk_len) { struct mlxsw_afk_key_info *key_info = aregion->region->key_info; - u8 chunk_index, chunk_count, block_count; + u8 chunk_index, chunk_count; char *chunk = output; __be16 erp_region_id; + u32 block_count; block_count = mlxsw_afk_key_info_blocks_count_get(key_info); chunk_count = 1 + ((block_count - 1) >> 2); @@ -245,12 +261,13 @@ __mlxsw_sp_acl_bf_key_encode(struct mlxsw_sp_acl_atcam_region *aregion, (aregion->region->id << 4)); for (chunk_index = max_chunks - chunk_count; chunk_index < max_chunks; chunk_index++) { + char *enc_key; + memset(chunk, 0, pad_bytes); memcpy(chunk + pad_bytes, &erp_region_id, sizeof(erp_region_id)); - memcpy(chunk + key_offset, - &aentry->ht_key.enc_key[chunk_key_offsets[chunk_index]], - chunk_key_len); + enc_key = mlxsw_sp_acl_bf_enc_key_get(aentry, chunk_index); + memcpy(chunk + key_offset, enc_key, chunk_key_len); chunk += chunk_len; } *len = chunk_count * chunk_len; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c index 6fe185ea6732..1850a975b380 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c @@ -324,6 +324,10 @@ static const struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5b[] = MLXSW_AFK_ELEMENT_INST_EXT_U32(SRC_SYS_PORT, 0x04, 0, 9, -1, true), /* RX_ACL_SYSTEM_PORT */ }; +static const struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_1b[] = { + MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x04, 4), +}; + static const struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_5b[] = { MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER, 0x04, 20, 12), }; @@ -341,7 +345,7 @@ static const struct mlxsw_afk_block mlxsw_sp4_afk_blocks[] = { MLXSW_AFK_BLOCK(0x14, mlxsw_sp_afk_element_info_mac_4), MLXSW_AFK_BLOCK_HIGH_ENTROPY(0x1A, mlxsw_sp_afk_element_info_mac_5b), MLXSW_AFK_BLOCK_HIGH_ENTROPY(0x38, mlxsw_sp_afk_element_info_ipv4_0), - MLXSW_AFK_BLOCK_HIGH_ENTROPY(0x39, mlxsw_sp_afk_element_info_ipv4_1), + MLXSW_AFK_BLOCK_HIGH_ENTROPY(0x3F, mlxsw_sp_afk_element_info_ipv4_1b), MLXSW_AFK_BLOCK(0x3A, mlxsw_sp_afk_element_info_ipv4_2), MLXSW_AFK_BLOCK(0x36, mlxsw_sp_afk_element_info_ipv4_5b), MLXSW_AFK_BLOCK(0x40, mlxsw_sp_afk_element_info_ipv6_0), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index 2bed8c86b7cf..0a8fb9c842d3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -262,7 +262,7 @@ err_port_pause_configure: } struct mlxsw_sp_port_hw_stats { - char str[ETH_GSTRING_LEN]; + char str[ETH_GSTRING_LEN] __nonstring; u64 (*getter)(const char *payload); bool cells_bytes; }; @@ -768,7 +768,9 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, err = mlxsw_sp_get_hw_stats_by_group(&hw_stats, &len, grp); if (err) return; - mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl); + err = mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl); + if (err) + return; for (i = 0; i < len; i++) { data[data_index + i] = hw_stats[i].getter(ppcnt_pl); if (!hw_stats[i].cells_bytes) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index f07955b5439f..6a4a81c63451 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -192,6 +192,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return -EOPNOTSUPP; } + if (sample_act_count) { + NL_SET_ERR_MSG_MOD(extack, "Mirror action after sample action is not supported"); + return -EOPNOTSUPP; + } + err = mlxsw_sp_acl_rulei_act_mirror(mlxsw_sp, rulei, block, out_dev, extack); @@ -265,6 +270,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return -EOPNOTSUPP; } + if (mirror_act_count) { + NL_SET_ERR_MSG_MOD(extack, "Sample action after mirror action is not supported"); + return -EOPNOTSUPP; + } + err = mlxsw_sp_acl_rulei_act_sample(mlxsw_sp, rulei, block, act->sample.psample_group, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c index 69cd689dbc83..5afe6b155ef0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -1003,10 +1003,10 @@ static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp, mr->mr_ops->route_stats(mlxsw_sp, mr_route->route_priv, &packets, &bytes); - if (mr_route->mfc->mfc_un.res.pkt != packets) - mr_route->mfc->mfc_un.res.lastuse = jiffies; - mr_route->mfc->mfc_un.res.pkt = packets; - mr_route->mfc->mfc_un.res.bytes = bytes; + if (atomic_long_read(&mr_route->mfc->mfc_un.res.pkt) != packets) + WRITE_ONCE(mr_route->mfc->mfc_un.res.lastuse, jiffies); + atomic_long_set(&mr_route->mfc->mfc_un.res.pkt, packets); + atomic_long_set(&mr_route->mfc->mfc_un.res.bytes, bytes); } static void mlxsw_sp_mr_stats_update(struct work_struct *work) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index d94081c7658e..5b9f0844b8f6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -46,7 +46,7 @@ struct mlxsw_sp2_ptp_state { refcount_t ptp_port_enabled_ref; /* Number of ports with time stamping * enabled. */ - struct hwtstamp_config config; + struct kernel_hwtstamp_config config; struct mutex lock; /* Protects 'config' and HW configuration. */ }; @@ -131,7 +131,7 @@ static u64 __mlxsw_sp1_ptp_read_frc(struct mlxsw_sp1_ptp_clock *clock, return (u64) frc_l | (u64) frc_h2 << 32; } -static u64 mlxsw_sp1_ptp_read_frc(const struct cyclecounter *cc) +static u64 mlxsw_sp1_ptp_read_frc(struct cyclecounter *cc) { struct mlxsw_sp1_ptp_clock *clock = container_of(cc, struct mlxsw_sp1_ptp_clock, cycles); @@ -1083,14 +1083,14 @@ void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state_common) } int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { *config = mlxsw_sp_port->ptp.hwtstamp_config; return 0; } static int -mlxsw_sp1_ptp_get_message_types(const struct hwtstamp_config *config, +mlxsw_sp1_ptp_get_message_types(const struct kernel_hwtstamp_config *config, u16 *p_ing_types, u16 *p_egr_types, enum hwtstamp_rx_filters *p_rx_filter) { @@ -1246,7 +1246,8 @@ void mlxsw_sp1_ptp_shaper_work(struct work_struct *work) } int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { enum hwtstamp_rx_filters rx_filter; u16 ing_types; @@ -1270,7 +1271,7 @@ int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, if (err) return err; - /* Notify the ioctl caller what we are actually timestamping. */ + /* Notify the caller what we are actually timestamping. */ config->rx_filter = rx_filter; return 0; @@ -1353,6 +1354,10 @@ struct mlxsw_sp_ptp_state *mlxsw_sp2_ptp_init(struct mlxsw_sp *mlxsw_sp) struct mlxsw_sp2_ptp_state *ptp_state; int err; + /* Max FID will be used in data path, check validity as part of init. */ + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, FID)) + return ERR_PTR(-EIO); + ptp_state = kzalloc(sizeof(*ptp_state), GFP_KERNEL); if (!ptp_state) return ERR_PTR(-ENOMEM); @@ -1447,7 +1452,7 @@ void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, } int mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { struct mlxsw_sp2_ptp_state *ptp_state; @@ -1461,7 +1466,7 @@ int mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, } static int -mlxsw_sp2_ptp_get_message_types(const struct hwtstamp_config *config, +mlxsw_sp2_ptp_get_message_types(const struct kernel_hwtstamp_config *config, u16 *p_ing_types, u16 *p_egr_types, enum hwtstamp_rx_filters *p_rx_filter) { @@ -1538,7 +1543,7 @@ static int mlxsw_sp2_ptp_mtpcpc_set(struct mlxsw_sp *mlxsw_sp, bool ptp_trap_en, static int mlxsw_sp2_ptp_enable(struct mlxsw_sp *mlxsw_sp, u16 ing_types, u16 egr_types, - struct hwtstamp_config new_config) + struct kernel_hwtstamp_config new_config) { struct mlxsw_sp2_ptp_state *ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp); int err; @@ -1552,7 +1557,7 @@ static int mlxsw_sp2_ptp_enable(struct mlxsw_sp *mlxsw_sp, u16 ing_types, } static int mlxsw_sp2_ptp_disable(struct mlxsw_sp *mlxsw_sp, - struct hwtstamp_config new_config) + struct kernel_hwtstamp_config new_config) { struct mlxsw_sp2_ptp_state *ptp_state = mlxsw_sp2_ptp_state(mlxsw_sp); int err; @@ -1567,7 +1572,7 @@ static int mlxsw_sp2_ptp_disable(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp2_ptp_configure_port(struct mlxsw_sp_port *mlxsw_sp_port, u16 ing_types, u16 egr_types, - struct hwtstamp_config new_config) + struct kernel_hwtstamp_config new_config) { struct mlxsw_sp2_ptp_state *ptp_state; int err; @@ -1588,7 +1593,7 @@ static int mlxsw_sp2_ptp_configure_port(struct mlxsw_sp_port *mlxsw_sp_port, } static int mlxsw_sp2_ptp_deconfigure_port(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config new_config) + struct kernel_hwtstamp_config new_config) { struct mlxsw_sp2_ptp_state *ptp_state; int err; @@ -1610,11 +1615,12 @@ err_ptp_disable: } int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { + struct kernel_hwtstamp_config new_config; struct mlxsw_sp2_ptp_state *ptp_state; enum hwtstamp_rx_filters rx_filter; - struct hwtstamp_config new_config; u16 new_ing_types, new_egr_types; bool ptp_enabled; int err; @@ -1648,7 +1654,7 @@ int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port->ptp.ing_types = new_ing_types; mlxsw_sp_port->ptp.egr_types = new_egr_types; - /* Notify the ioctl caller what we are actually timestamping. */ + /* Notify the caller what we are actually timestamping. */ config->rx_filter = rx_filter; mutex_unlock(&ptp_state->lock); @@ -1679,43 +1685,3 @@ int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, return 0; } - -int mlxsw_sp_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, - struct mlxsw_sp_port *mlxsw_sp_port, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) -{ - if (skb_cow_head(skb, MLXSW_TXHDR_LEN)) { - this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); - dev_kfree_skb_any(skb); - return -ENOMEM; - } - - mlxsw_sp_txhdr_construct(skb, tx_info); - return 0; -} - -int mlxsw_sp2_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, - struct mlxsw_sp_port *mlxsw_sp_port, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) -{ - /* In Spectrum-2 and Spectrum-3, in order for PTP event packets to have - * their correction field correctly set on the egress port they must be - * transmitted as data packets. Such packets ingress the ASIC via the - * CPU port and must have a VLAN tag, as the CPU port is not configured - * with a PVID. Push the default VLAN (4095), which is configured as - * egress untagged on all the ports. - */ - if (!skb_vlan_tagged(skb)) { - skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q), - MLXSW_SP_DEFAULT_VID); - if (!skb) { - this_cpu_inc(mlxsw_sp_port->pcpu_stats->tx_dropped); - return -ENOMEM; - } - } - - return mlxsw_sp_txhdr_ptp_data_construct(mlxsw_core, mlxsw_sp_port, skb, - tx_info); -} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h index c8aa1452fbb9..df37f1470830 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h @@ -34,10 +34,11 @@ void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, u64 timestamp); int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config); int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); void mlxsw_sp1_ptp_shaper_work(struct work_struct *work); @@ -49,11 +50,6 @@ void mlxsw_sp1_get_stats_strings(u8 **p); void mlxsw_sp1_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, u64 *data, int data_index); -int mlxsw_sp_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, - struct mlxsw_sp_port *mlxsw_sp_port, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info); - struct mlxsw_sp_ptp_clock * mlxsw_sp2_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev); @@ -70,19 +66,15 @@ void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, u16 local_port); int mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config); int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config); + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack); int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, struct kernel_ethtool_ts_info *info); -int mlxsw_sp2_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, - struct mlxsw_sp_port *mlxsw_sp_port, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info); - #else static inline struct mlxsw_sp_ptp_clock * @@ -127,14 +119,15 @@ mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, static inline int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { return -EOPNOTSUPP; } static inline int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } @@ -157,15 +150,6 @@ static inline void mlxsw_sp1_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, { } -static inline int -mlxsw_sp_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, - struct mlxsw_sp_port *mlxsw_sp_port, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) -{ - return -EOPNOTSUPP; -} - static inline struct mlxsw_sp_ptp_clock * mlxsw_sp2_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev) { @@ -200,23 +184,15 @@ static inline void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, static inline int mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) + struct kernel_hwtstamp_config *config) { return -EOPNOTSUPP; } static inline int mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, - struct hwtstamp_config *config) -{ - return -EOPNOTSUPP; -} - -static inline int -mlxsw_sp2_ptp_txhdr_construct(struct mlxsw_core *mlxsw_core, - struct mlxsw_sp_port *mlxsw_sp_port, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 7d6d859cef3f..a2033837182e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3014,6 +3014,9 @@ static int mlxsw_sp_neigh_rif_made_sync(struct mlxsw_sp *mlxsw_sp, .rif = rif, }; + if (!mlxsw_sp_dev_lower_is_port(mlxsw_sp_rif_dev(rif))) + return 0; + neigh_for_each(&arp_tbl, mlxsw_sp_neigh_rif_made_sync_each, &rms); if (rms.err) goto err_arp; @@ -8184,41 +8187,6 @@ mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, return NULL; } -bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev) -{ - struct mlxsw_sp_rif *rif; - - mutex_lock(&mlxsw_sp->router->lock); - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - mutex_unlock(&mlxsw_sp->router->lock); - - return rif; -} - -u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) -{ - struct mlxsw_sp_rif *rif; - u16 vid = 0; - - mutex_lock(&mlxsw_sp->router->lock); - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!rif) - goto out; - - /* We only return the VID for VLAN RIFs. Otherwise we return an - * invalid value (0). - */ - if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN) - goto out; - - vid = mlxsw_sp_fid_8021q_vid(rif->fid); - -out: - mutex_unlock(&mlxsw_sp->router->lock); - return vid; -} - static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) { char ritr_pl[MLXSW_REG_RITR_LEN]; @@ -8417,19 +8385,6 @@ u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif) return lb_rif->common.rif_index; } -u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) -{ - struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common); - u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev); - struct mlxsw_sp_vr *ul_vr; - - ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL); - if (WARN_ON(IS_ERR(ul_vr))) - return 0; - - return ul_vr->id; -} - u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) { return lb_rif->ul_rif_id; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 0432c7cc6b07..313efab5c324 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -90,7 +90,6 @@ struct mlxsw_sp_ipip_entry; struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 rif_index); u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); -u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif); u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c index 4b5fd71c897d..32d2e61f2b82 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c @@ -423,8 +423,7 @@ mlxsw_sp_span_gretap4_route(const struct net_device *to_dev, parms = mlxsw_sp_ipip_netdev_parms4(to_dev); ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp, - 0, 0, dev_net(to_dev), parms.link, tun->fwmark, 0, - 0); + 0, 0, tun->net, parms.link, tun->fwmark, 0, 0); rt = ip_route_output_key(tun->net, &fl4); if (IS_ERR(rt)) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 6397ff0dc951..a48bf342084d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -2929,23 +2929,8 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port); } -int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, - const struct net_device *br_dev, - const struct net_device *vxlan_dev, u16 vid, - struct netlink_ext_ack *extack) -{ - struct mlxsw_sp_bridge_device *bridge_device; - - bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); - if (WARN_ON(!bridge_device)) - return -EINVAL; - - return bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, vid, - extack); -} - -void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, - const struct net_device *vxlan_dev) +static void __mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, + const struct net_device *vxlan_dev) { struct vxlan_dev *vxlan = netdev_priv(vxlan_dev); struct mlxsw_sp_fid *fid; @@ -2963,6 +2948,47 @@ void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fid_put(fid); } +int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + struct net_device *vxlan_dev, u16 vid, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_bridge_device *bridge_device; + struct mlxsw_sp_port *mlxsw_sp_port; + int err; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (WARN_ON(!bridge_device)) + return -EINVAL; + + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(bridge_device->dev); + if (!mlxsw_sp_port) + return -EINVAL; + + err = bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, vid, + extack); + if (err) + return err; + + err = switchdev_bridge_port_offload(vxlan_dev, mlxsw_sp_port->dev, + NULL, NULL, NULL, false, extack); + if (err) + goto err_bridge_port_offload; + + return 0; + +err_bridge_port_offload: + __mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); + return err; +} + +void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, + struct net_device *vxlan_dev) +{ + switchdev_bridge_port_unoffload(vxlan_dev, NULL, NULL, NULL); + __mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); +} + static void mlxsw_sp_switchdev_vxlan_addr_convert(const union vxlan_addr *vxlan_addr, enum mlxsw_sp_l3proto *proto, @@ -3867,7 +3893,7 @@ mlxsw_sp_switchdev_vxlan_vlan_add(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fid_put(fid); return -EINVAL; } - mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); + __mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); mlxsw_sp_fid_put(fid); return 0; } @@ -3883,7 +3909,7 @@ mlxsw_sp_switchdev_vxlan_vlan_add(struct mlxsw_sp *mlxsw_sp, /* Fourth case: Thew new VLAN is PVID, which means the VLAN currently * mapped to the VNI should be unmapped */ - mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); + __mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); mlxsw_sp_fid_put(fid); /* Fifth case: The new VLAN is also egress untagged, which means the @@ -3923,7 +3949,7 @@ mlxsw_sp_switchdev_vxlan_vlan_del(struct mlxsw_sp *mlxsw_sp, if (mlxsw_sp_fid_8021q_vid(fid) != vid) goto out; - mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); + __mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, vxlan_dev); out: mlxsw_sp_fid_put(fid); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index 899c954e0e5f..b5c3f789c685 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -173,7 +173,7 @@ static void mlxsw_sp_rx_no_mark_listener(struct sk_buff *skb, u16 local_port, if (err) return; - netif_receive_skb(skb); + napi_gro_receive(mlxsw_skb_cb(skb)->rx_md_info.napi, skb); } static void mlxsw_sp_rx_mark_listener(struct sk_buff *skb, u16 local_port, @@ -959,18 +959,18 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = { }, { .trap = MLXSW_SP_TRAP_CONTROL(ARP_REQUEST, NEIGH_DISCOVERY, - MIRROR), + TRAP), .listeners_arr = { - MLXSW_SP_RXL_MARK(ROUTER_ARPBC, NEIGH_DISCOVERY, - TRAP_TO_CPU, false), + MLXSW_SP_RXL_NO_MARK(ARPBC, NEIGH_DISCOVERY, + TRAP_TO_CPU, false), }, }, { .trap = MLXSW_SP_TRAP_CONTROL(ARP_RESPONSE, NEIGH_DISCOVERY, - MIRROR), + TRAP), .listeners_arr = { - MLXSW_SP_RXL_MARK(ROUTER_ARPUC, NEIGH_DISCOVERY, - TRAP_TO_CPU, false), + MLXSW_SP_RXL_NO_MARK(ARPUC, NEIGH_DISCOVERY, + TRAP_TO_CPU, false), }, }, { diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 83477c8e6971..9962dc157901 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -29,6 +29,8 @@ enum { MLXSW_TRAP_ID_FDB_MISMATCH = 0x3B, MLXSW_TRAP_ID_FID_MISS = 0x3D, MLXSW_TRAP_ID_DECAP_ECN0 = 0x40, + MLXSW_TRAP_ID_ARPBC = 0x50, + MLXSW_TRAP_ID_ARPUC = 0x51, MLXSW_TRAP_ID_MTUERROR = 0x52, MLXSW_TRAP_ID_TTLERROR = 0x53, MLXSW_TRAP_ID_LBERROR = 0x54, @@ -66,13 +68,10 @@ enum { MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92, MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1, MLXSW_TRAP_ID_NVE_DECAP_ARP = 0xB8, - MLXSW_TRAP_ID_NVE_ENCAP_ARP = 0xBD, MLXSW_TRAP_ID_IPV4_BFD = 0xD0, MLXSW_TRAP_ID_IPV6_BFD = 0xD1, MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6, MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7, - MLXSW_TRAP_ID_ROUTER_ARPBC = 0xE0, - MLXSW_TRAP_ID_ROUTER_ARPUC = 0xE1, MLXSW_TRAP_ID_DISCARD_NON_ROUTABLE = 0x11A, MLXSW_TRAP_ID_DISCARD_ROUTER2 = 0x130, MLXSW_TRAP_ID_DISCARD_ROUTER3 = 0x131, @@ -95,6 +94,7 @@ enum { MLXSW_TRAP_ID_DISCARD_ING_ROUTER_IPV4_SIP_BC = 0x16A, MLXSW_TRAP_ID_DISCARD_ING_ROUTER_IPV4_DIP_LOCAL_NET = 0x16B, MLXSW_TRAP_ID_DISCARD_ING_ROUTER_DIP_LINK_LOCAL = 0x16C, + MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_LINK_LOCAL = 0x16D, MLXSW_TRAP_ID_DISCARD_ROUTER_IRIF_EN = 0x178, MLXSW_TRAP_ID_DISCARD_ROUTER_ERIF_EN = 0x179, MLXSW_TRAP_ID_DISCARD_ROUTER_LPM4 = 0x17B, diff --git a/drivers/net/ethernet/mellanox/mlxsw/txheader.h b/drivers/net/ethernet/mellanox/mlxsw/txheader.h index da51dd9d5e44..e78cba5821b6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/txheader.h +++ b/drivers/net/ethernet/mellanox/mlxsw/txheader.h @@ -4,6 +4,69 @@ #ifndef _MLXSW_TXHEADER_H #define _MLXSW_TXHEADER_H +/* tx_hdr_version + * Tx header version. + * Must be set to 1. + */ +MLXSW_ITEM32(tx, hdr, version, 0x00, 28, 4); + +/* tx_hdr_ctl + * Packet control type. + * 0 - Ethernet control (e.g. EMADs, LACP) + * 1 - Ethernet data + */ +MLXSW_ITEM32(tx, hdr, ctl, 0x00, 26, 2); + +/* tx_hdr_proto + * Packet protocol type. Must be set to 1 (Ethernet). + */ +MLXSW_ITEM32(tx, hdr, proto, 0x00, 21, 3); + +/* tx_hdr_rx_is_router + * Packet is sent from the router. Valid for data packets only. + */ +MLXSW_ITEM32(tx, hdr, rx_is_router, 0x00, 19, 1); + +/* tx_hdr_fid_valid + * Indicates if the 'fid' field is valid and should be used for + * forwarding lookup. Valid for data packets only. + */ +MLXSW_ITEM32(tx, hdr, fid_valid, 0x00, 16, 1); + +/* tx_hdr_swid + * Switch partition ID. Must be set to 0. + */ +MLXSW_ITEM32(tx, hdr, swid, 0x00, 12, 3); + +/* tx_hdr_control_tclass + * Indicates if the packet should use the control TClass and not one + * of the data TClasses. + */ +MLXSW_ITEM32(tx, hdr, control_tclass, 0x00, 6, 1); + +/* tx_hdr_port_mid + * Destination local port for unicast packets. + * Destination multicast ID for multicast packets. + * + * Control packets are directed to a specific egress port, while data + * packets are transmitted through the CPU port (0) into the switch partition, + * where forwarding rules are applied. + */ +MLXSW_ITEM32(tx, hdr, port_mid, 0x04, 16, 16); + +/* tx_hdr_fid + * Forwarding ID used for L2 forwarding lookup. Valid only if 'fid_valid' is + * set, otherwise calculated based on the packet's VID using VID to FID mapping. + * Valid for data packets only. + */ +MLXSW_ITEM32(tx, hdr, fid, 0x08, 16, 16); + +/* tx_hdr_type + * 0 - Data packets + * 6 - Control packets + */ +MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4); + #define MLXSW_TXHDR_LEN 0x10 #define MLXSW_TXHDR_VERSION_0 0 #define MLXSW_TXHDR_VERSION_1 1 |