From 84214ab4689f962b4bfc47fc9a5838d25ac4274d Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Apr 2023 15:30:42 +0200 Subject: igc: Enable and fix RX hash usage by netstack When function igc_rx_hash() was introduced in v4.20 via commit 0507ef8a0372 ("igc: Add transmit and receive fastpath and interrupt handlers"), the hardware wasn't configured to provide RSS hash, thus it made sense to not enable net_device NETIF_F_RXHASH feature bit. The NIC hardware was configured to enable RSS hash info in v5.2 via commit 2121c2712f82 ("igc: Add multiple receive queues control supporting"), but forgot to set the NETIF_F_RXHASH feature bit. The original implementation of igc_rx_hash() didn't extract the associated pkt_hash_type, but statically set PKT_HASH_TYPE_L3. The largest portions of this patch are about extracting the RSS Type from the hardware and mapping this to enum pkt_hash_types. This was based on Foxville i225 software user manual rev-1.3.1 and tested on Intel Ethernet Controller I225-LM (rev 03). For UDP it's worth noting that RSS (type) hashing have been disabled both for IPv4 and IPv6 (see IGC_MRQC_RSS_FIELD_IPV4_UDP + IGC_MRQC_RSS_FIELD_IPV6_UDP) because hardware RSS doesn't handle fragmented pkts well when enabled (can cause out-of-order). This results in PKT_HASH_TYPE_L3 for UDP packets, and hash value doesn't include UDP port numbers. Not being PKT_HASH_TYPE_L4, have the effect that netstack will do a software based hash calc calling into flow_dissect, but only when code calls skb_get_hash(), which doesn't necessary happen for local delivery. For QA verification testing I wrote a small bpftrace prog: [0] https://github.com/xdp-project/xdp-project/blob/master/areas/hints/monitor_skb_hash_on_dev.bt Fixes: 2121c2712f82 ("igc: Add multiple receive queues control supporting") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann Acked-by: Song Yoong Siang Link: https://lore.kernel.org/bpf/168182464270.616355.11391652654430626584.stgit@firesoul --- drivers/net/ethernet/intel/igc/igc.h | 28 ++++++++++++++++++++++++++++ drivers/net/ethernet/intel/igc/igc_main.c | 31 +++++++++++++++++++++++++++---- 2 files changed, 55 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 34aebf00a512..f7f9e217e7b4 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -13,6 +13,7 @@ #include #include #include +#include #include "igc_hw.h" @@ -311,6 +312,33 @@ extern char igc_driver_name[]; #define IGC_MRQC_RSS_FIELD_IPV4_UDP 0x00400000 #define IGC_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 +/* RX-desc Write-Back format RSS Type's */ +enum igc_rss_type_num { + IGC_RSS_TYPE_NO_HASH = 0, + IGC_RSS_TYPE_HASH_TCP_IPV4 = 1, + IGC_RSS_TYPE_HASH_IPV4 = 2, + IGC_RSS_TYPE_HASH_TCP_IPV6 = 3, + IGC_RSS_TYPE_HASH_IPV6_EX = 4, + IGC_RSS_TYPE_HASH_IPV6 = 5, + IGC_RSS_TYPE_HASH_TCP_IPV6_EX = 6, + IGC_RSS_TYPE_HASH_UDP_IPV4 = 7, + IGC_RSS_TYPE_HASH_UDP_IPV6 = 8, + IGC_RSS_TYPE_HASH_UDP_IPV6_EX = 9, + IGC_RSS_TYPE_MAX = 10, +}; +#define IGC_RSS_TYPE_MAX_TABLE 16 +#define IGC_RSS_TYPE_MASK GENMASK(3,0) /* 4-bits (3:0) = mask 0x0F */ + +/* igc_rss_type - Rx descriptor RSS type field */ +static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc) +{ + /* RSS Type 4-bits (3:0) number: 0-9 (above 9 is reserved) + * Accessing the same bits via u16 (wb.lower.lo_dword.hs_rss.pkt_info) + * is slightly slower than via u32 (wb.lower.lo_dword.data) + */ + return le32_get_bits(rx_desc->wb.lower.lo_dword.data, IGC_RSS_TYPE_MASK); +} + /* Interrupt defines */ #define IGC_START_ITR 648 /* ~6000 ints/sec */ #define IGC_4K_ITR 980 diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index ba49728be919..f6b03e0372e0 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1687,14 +1687,36 @@ static void igc_rx_checksum(struct igc_ring *ring, le32_to_cpu(rx_desc->wb.upper.status_error)); } +/* Mapping HW RSS Type to enum pkt_hash_types */ +static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = { + [IGC_RSS_TYPE_NO_HASH] = PKT_HASH_TYPE_L2, + [IGC_RSS_TYPE_HASH_TCP_IPV4] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_IPV4] = PKT_HASH_TYPE_L3, + [IGC_RSS_TYPE_HASH_TCP_IPV6] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_IPV6_EX] = PKT_HASH_TYPE_L3, + [IGC_RSS_TYPE_HASH_IPV6] = PKT_HASH_TYPE_L3, + [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_UDP_IPV4] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_UDP_IPV6] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4, + [10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ + [11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask */ + [12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons */ + [13] = PKT_HASH_TYPE_NONE, + [14] = PKT_HASH_TYPE_NONE, + [15] = PKT_HASH_TYPE_NONE, +}; + static inline void igc_rx_hash(struct igc_ring *ring, union igc_adv_rx_desc *rx_desc, struct sk_buff *skb) { - if (ring->netdev->features & NETIF_F_RXHASH) - skb_set_hash(skb, - le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), - PKT_HASH_TYPE_L3); + if (ring->netdev->features & NETIF_F_RXHASH) { + u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); + u32 rss_type = igc_rss_type(rx_desc); + + skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]); + } } static void igc_rx_vlan(struct igc_ring *rx_ring, @@ -6551,6 +6573,7 @@ static int igc_probe(struct pci_dev *pdev, netdev->features |= NETIF_F_TSO; netdev->features |= NETIF_F_TSO6; netdev->features |= NETIF_F_TSO_ECN; + netdev->features |= NETIF_F_RXHASH; netdev->features |= NETIF_F_RXCSUM; netdev->features |= NETIF_F_HW_CSUM; netdev->features |= NETIF_F_SCTP_CRC; -- cgit v1.2.3 From 73b7123de0cfa4f6609677e927ab02cb05b593c2 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Apr 2023 15:30:47 +0200 Subject: igc: Add igc_xdp_buff wrapper for xdp_buff in driver Driver specific metadata data for XDP-hints kfuncs are propagated via tail extending the struct xdp_buff with a locally scoped driver struct. Zero-Copy AF_XDP/XSK does similar tricks via struct xdp_buff_xsk. This xdp_buff_xsk struct contains a CB area (24 bytes) that can be used for extending the locally scoped driver into. The XSK_CHECK_PRIV_TYPE define catch size violations build time. The changes needed for AF_XDP zero-copy in igc_clean_rx_irq_zc() is done in next patch, because the member rx_desc isn't available at this point. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann Acked-by: Song Yoong Siang Link: https://lore.kernel.org/bpf/168182464779.616355.3761989884165609387.stgit@firesoul --- drivers/net/ethernet/intel/igc/igc.h | 5 +++++ drivers/net/ethernet/intel/igc/igc_main.c | 16 +++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index f7f9e217e7b4..76a5115aefc8 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -499,6 +499,11 @@ struct igc_rx_buffer { }; }; +/* context wrapper around xdp_buff to provide access to descriptor metadata */ +struct igc_xdp_buff { + struct xdp_buff xdp; +}; + struct igc_q_vector { struct igc_adapter *adapter; /* backlink */ void __iomem *itr_register; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index f6b03e0372e0..06bf8a7eec93 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2233,6 +2233,8 @@ static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count) if (!count) return ok; + XSK_CHECK_PRIV_TYPE(struct igc_xdp_buff); + desc = IGC_RX_DESC(ring, i); bi = &ring->rx_buffer_info[i]; i -= ring->count; @@ -2517,8 +2519,8 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) union igc_adv_rx_desc *rx_desc; struct igc_rx_buffer *rx_buffer; unsigned int size, truesize; + struct igc_xdp_buff ctx; ktime_t timestamp = 0; - struct xdp_buff xdp; int pkt_offset = 0; void *pktbuf; @@ -2552,13 +2554,13 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) } if (!skb) { - xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq); - xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring), + xdp_init_buff(&ctx.xdp, truesize, &rx_ring->xdp_rxq); + xdp_prepare_buff(&ctx.xdp, pktbuf - igc_rx_offset(rx_ring), igc_rx_offset(rx_ring) + pkt_offset, size, true); - xdp_buff_clear_frags_flag(&xdp); + xdp_buff_clear_frags_flag(&ctx.xdp); - skb = igc_xdp_run_prog(adapter, &xdp); + skb = igc_xdp_run_prog(adapter, &ctx.xdp); } if (IS_ERR(skb)) { @@ -2580,9 +2582,9 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) } else if (skb) igc_add_rx_frag(rx_ring, rx_buffer, skb, size); else if (ring_uses_build_skb(rx_ring)) - skb = igc_build_skb(rx_ring, rx_buffer, &xdp); + skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp); else - skb = igc_construct_skb(rx_ring, rx_buffer, &xdp, + skb = igc_construct_skb(rx_ring, rx_buffer, &ctx.xdp, timestamp); /* exit if we failed to retrieve a buffer */ -- cgit v1.2.3 From 8416814fffa9cfa74c18da149f522dd9e1850987 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Apr 2023 15:30:52 +0200 Subject: igc: Add XDP hints kfuncs for RX hash This implements XDP hints kfunc for RX-hash (xmo_rx_hash). The HW rss hash type is handled via mapping table. This igc driver (default config) does L3 hashing for UDP packets (excludes UDP src/dest ports in hash calc). Meaning RSS hash type is L3 based. Tested that the igc_rss_type_num for UDP is either IGC_RSS_TYPE_HASH_IPV4 or IGC_RSS_TYPE_HASH_IPV6. This patch also updates AF_XDP zero-copy function igc_clean_rx_irq_zc() to use the xdp_buff wrapper struct igc_xdp_buff. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann Acked-by: Song Yoong Siang Link: https://lore.kernel.org/bpf/168182465285.616355.2701740913376314790.stgit@firesoul --- drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_main.c | 53 +++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 76a5115aefc8..c609a2e648f8 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -502,6 +502,7 @@ struct igc_rx_buffer { /* context wrapper around xdp_buff to provide access to descriptor metadata */ struct igc_xdp_buff { struct xdp_buff xdp; + union igc_adv_rx_desc *rx_desc; }; struct igc_q_vector { diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 06bf8a7eec93..c18486f46085 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2559,6 +2559,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) igc_rx_offset(rx_ring) + pkt_offset, size, true); xdp_buff_clear_frags_flag(&ctx.xdp); + ctx.rx_desc = rx_desc; skb = igc_xdp_run_prog(adapter, &ctx.xdp); } @@ -2685,6 +2686,15 @@ static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector, napi_gro_receive(&q_vector->napi, skb); } +static struct igc_xdp_buff *xsk_buff_to_igc_ctx(struct xdp_buff *xdp) +{ + /* xdp_buff pointer used by ZC code path is alloc as xdp_buff_xsk. The + * igc_xdp_buff shares its layout with xdp_buff_xsk and private + * igc_xdp_buff fields fall into xdp_buff_xsk->cb + */ + return (struct igc_xdp_buff *)xdp; +} + static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) { struct igc_adapter *adapter = q_vector->adapter; @@ -2703,6 +2713,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) while (likely(total_packets < budget)) { union igc_adv_rx_desc *desc; struct igc_rx_buffer *bi; + struct igc_xdp_buff *ctx; ktime_t timestamp = 0; unsigned int size; int res; @@ -2720,6 +2731,9 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) bi = &ring->rx_buffer_info[ntc]; + ctx = xsk_buff_to_igc_ctx(bi->xdp); + ctx->rx_desc = desc; + if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) { timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, bi->xdp->data); @@ -6475,6 +6489,44 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg) return value; } +/* Mapping HW RSS Type to enum xdp_rss_hash_type */ +static enum xdp_rss_hash_type igc_xdp_rss_type[IGC_RSS_TYPE_MAX_TABLE] = { + [IGC_RSS_TYPE_NO_HASH] = XDP_RSS_TYPE_L2, + [IGC_RSS_TYPE_HASH_TCP_IPV4] = XDP_RSS_TYPE_L4_IPV4_TCP, + [IGC_RSS_TYPE_HASH_IPV4] = XDP_RSS_TYPE_L3_IPV4, + [IGC_RSS_TYPE_HASH_TCP_IPV6] = XDP_RSS_TYPE_L4_IPV6_TCP, + [IGC_RSS_TYPE_HASH_IPV6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, + [IGC_RSS_TYPE_HASH_IPV6] = XDP_RSS_TYPE_L3_IPV6, + [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, + [IGC_RSS_TYPE_HASH_UDP_IPV4] = XDP_RSS_TYPE_L4_IPV4_UDP, + [IGC_RSS_TYPE_HASH_UDP_IPV6] = XDP_RSS_TYPE_L4_IPV6_UDP, + [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX, + [10] = XDP_RSS_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ + [11] = XDP_RSS_TYPE_NONE, /* keep array sized for SW bit-mask */ + [12] = XDP_RSS_TYPE_NONE, /* to handle future HW revisons */ + [13] = XDP_RSS_TYPE_NONE, + [14] = XDP_RSS_TYPE_NONE, + [15] = XDP_RSS_TYPE_NONE, +}; + +static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type) +{ + const struct igc_xdp_buff *ctx = (void *)_ctx; + + if (!(ctx->xdp.rxq->dev->features & NETIF_F_RXHASH)) + return -ENODATA; + + *hash = le32_to_cpu(ctx->rx_desc->wb.lower.hi_dword.rss); + *rss_type = igc_xdp_rss_type[igc_rss_type(ctx->rx_desc)]; + + return 0; +} + +static const struct xdp_metadata_ops igc_xdp_metadata_ops = { + .xmo_rx_hash = igc_xdp_rx_hash, +}; + /** * igc_probe - Device Initialization Routine * @pdev: PCI device information struct @@ -6548,6 +6600,7 @@ static int igc_probe(struct pci_dev *pdev, hw->hw_addr = adapter->io_addr; netdev->netdev_ops = &igc_netdev_ops; + netdev->xdp_metadata_ops = &igc_xdp_metadata_ops; igc_ethtool_set_ops(netdev); netdev->watchdog_timeo = 5 * HZ; -- cgit v1.2.3 From d677266755c6e55c43b6755673a1eeae3d452e87 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Apr 2023 15:30:57 +0200 Subject: igc: Add XDP hints kfuncs for RX timestamp The NIC hardware RX timestamping mechanism adds an optional tailored header before the MAC header containing packet reception time. Optional depending on RX descriptor TSIP status bit (IGC_RXDADV_STAT_TSIP). In case this bit is set driver does offset adjustments to packet data start and extracts the timestamp. The timestamp need to be extracted before invoking the XDP bpf_prog, because this area just before the packet is also accessible by XDP via data_meta context pointer (and helper bpf_xdp_adjust_meta). Thus, an XDP bpf_prog can potentially overwrite this and corrupt data that we want to extract with the new kfunc for reading the timestamp. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann Acked-by: Song Yoong Siang Link: https://lore.kernel.org/bpf/168182465791.616355.2583922957423587914.stgit@firesoul --- drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_main.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index c609a2e648f8..18d4af934d8c 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -503,6 +503,7 @@ struct igc_rx_buffer { struct igc_xdp_buff { struct xdp_buff xdp; union igc_adv_rx_desc *rx_desc; + ktime_t rx_ts; /* data indication bit IGC_RXDADV_STAT_TSIP */ }; struct igc_q_vector { diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index c18486f46085..b78e0e6562c8 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2549,6 +2549,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) { timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, pktbuf); + ctx.rx_ts = timestamp; pkt_offset = IGC_TS_HDR_LEN; size -= IGC_TS_HDR_LEN; } @@ -2737,6 +2738,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) { timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, bi->xdp->data); + ctx->rx_ts = timestamp; bi->xdp->data += IGC_TS_HDR_LEN; @@ -6523,8 +6525,22 @@ static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, return 0; } +static int igc_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp) +{ + const struct igc_xdp_buff *ctx = (void *)_ctx; + + if (igc_test_staterr(ctx->rx_desc, IGC_RXDADV_STAT_TSIP)) { + *timestamp = ctx->rx_ts; + + return 0; + } + + return -ENODATA; +} + static const struct xdp_metadata_ops igc_xdp_metadata_ops = { .xmo_rx_hash = igc_xdp_rx_hash, + .xmo_rx_timestamp = igc_xdp_rx_timestamp, }; /** -- cgit v1.2.3 From 25c8c0d91ddb688f37e5f30a5342740b5a78e1cf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 May 2023 13:52:27 -1000 Subject: net: thunderx: Use alloc_ordered_workqueue() to create ordered workqueues BACKGROUND ========== When multiple work items are queued to a workqueue, their execution order doesn't match the queueing order. They may get executed in any order and simultaneously. When fully serialized execution - one by one in the queueing order - is needed, an ordered workqueue should be used which can be created with alloc_ordered_workqueue(). However, alloc_ordered_workqueue() was a later addition. Before it, an ordered workqueue could be obtained by creating an UNBOUND workqueue with @max_active==1. This originally was an implementation side-effect which was broken by 4c16bd327c74 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered"). Because there were users that depended on the ordered execution, 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") made workqueue allocation path to implicitly promote UNBOUND workqueues w/ @max_active==1 to ordered workqueues. While this has worked okay, overloading the UNBOUND allocation interface this way creates other issues. It's difficult to tell whether a given workqueue actually needs to be ordered and users that legitimately want a min concurrency level wq unexpectedly gets an ordered one instead. With planned UNBOUND workqueue updates to improve execution locality and more prevalence of chiplet designs which can benefit from such improvements, this isn't a state we wanna be in forever. This patch series audits all callsites that create an UNBOUND workqueue w/ @max_active==1 and converts them to alloc_ordered_workqueue() as necessary. WHAT TO LOOK FOR ================ The conversions are from alloc_workqueue(WQ_UNBOUND | flags, 1, args..) to alloc_ordered_workqueue(flags, args...) which don't cause any functional changes. If you know that fully ordered execution is not ncessary, please let me know. I'll drop the conversion and instead add a comment noting the fact to reduce confusion while conversion is in progress. If you aren't fully sure, it's completely fine to let the conversion through. The behavior will stay exactly the same and we can always reconsider later. As there are follow-up workqueue core changes, I'd really appreciate if the patch can be routed through the workqueue tree w/ your acks. Thanks. Signed-off-by: Tejun Heo Reviewed-by: Sunil Goutham Acked-by: Jakub Kicinski Cc: "David S. Miller" Cc: Eric Dumazet Cc: Paolo Abeni Cc: linux-arm-kernel@lists.infradead.org Cc: netdev@vger.kernel.org --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 7eb2ddbe9bad..a317feb8decb 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1126,8 +1126,7 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) } poll: - lmac->check_link = alloc_workqueue("check_link", WQ_UNBOUND | - WQ_MEM_RECLAIM, 1); + lmac->check_link = alloc_ordered_workqueue("check_link", WQ_MEM_RECLAIM); if (!lmac->check_link) return -ENOMEM; INIT_DELAYED_WORK(&lmac->dwork, bgx_poll_for_link); -- cgit v1.2.3 From 289f97467480266f9bd8cac7f1e05a478d523f79 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 May 2023 13:52:28 -1000 Subject: net: octeontx2: Use alloc_ordered_workqueue() to create ordered workqueues BACKGROUND ========== When multiple work items are queued to a workqueue, their execution order doesn't match the queueing order. They may get executed in any order and simultaneously. When fully serialized execution - one by one in the queueing order - is needed, an ordered workqueue should be used which can be created with alloc_ordered_workqueue(). However, alloc_ordered_workqueue() was a later addition. Before it, an ordered workqueue could be obtained by creating an UNBOUND workqueue with @max_active==1. This originally was an implementation side-effect which was broken by 4c16bd327c74 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered"). Because there were users that depended on the ordered execution, 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered") made workqueue allocation path to implicitly promote UNBOUND workqueues w/ @max_active==1 to ordered workqueues. While this has worked okay, overloading the UNBOUND allocation interface this way creates other issues. It's difficult to tell whether a given workqueue actually needs to be ordered and users that legitimately want a min concurrency level wq unexpectedly gets an ordered one instead. With planned UNBOUND workqueue updates to improve execution locality and more prevalence of chiplet designs which can benefit from such improvements, this isn't a state we wanna be in forever. This patch series audits all callsites that create an UNBOUND workqueue w/ @max_active==1 and converts them to alloc_ordered_workqueue() as necessary. WHAT TO LOOK FOR ================ The conversions are from alloc_workqueue(WQ_UNBOUND | flags, 1, args..) to alloc_ordered_workqueue(flags, args...) which don't cause any functional changes. If you know that fully ordered execution is not ncessary, please let me know. I'll drop the conversion and instead add a comment noting the fact to reduce confusion while conversion is in progress. If you aren't fully sure, it's completely fine to let the conversion through. The behavior will stay exactly the same and we can always reconsider later. As there are follow-up workqueue core changes, I'd really appreciate if the patch can be routed through the workqueue tree w/ your acks. Thanks. Signed-off-by: Tejun Heo Reviewed-by: Sunil Goutham Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Ratheesh Kannoth Cc: Srujana Challa Cc: Geetha sowjanya Cc: netdev@vger.kernel.org --- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 5 ++--- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 13 +++++-------- drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 5 ++--- 3 files changed, 9 insertions(+), 14 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 9f673bda9dbd..0069e60afa3b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -3044,9 +3044,8 @@ static int rvu_flr_init(struct rvu *rvu) cfg | BIT_ULL(22)); } - rvu->flr_wq = alloc_workqueue("rvu_afpf_flr", - WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM, - 1); + rvu->flr_wq = alloc_ordered_workqueue("rvu_afpf_flr", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!rvu->flr_wq) return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 18284ad75157..74c49795dc82 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -271,8 +271,7 @@ static int otx2_pf_flr_init(struct otx2_nic *pf, int num_vfs) { int vf; - pf->flr_wq = alloc_workqueue("otx2_pf_flr_wq", - WQ_UNBOUND | WQ_HIGHPRI, 1); + pf->flr_wq = alloc_ordered_workqueue("otx2_pf_flr_wq", WQ_HIGHPRI); if (!pf->flr_wq) return -ENOMEM; @@ -593,9 +592,8 @@ static int otx2_pfvf_mbox_init(struct otx2_nic *pf, int numvfs) if (!pf->mbox_pfvf) return -ENOMEM; - pf->mbox_pfvf_wq = alloc_workqueue("otx2_pfvf_mailbox", - WQ_UNBOUND | WQ_HIGHPRI | - WQ_MEM_RECLAIM, 1); + pf->mbox_pfvf_wq = alloc_ordered_workqueue("otx2_pfvf_mailbox", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!pf->mbox_pfvf_wq) return -ENOMEM; @@ -1063,9 +1061,8 @@ static int otx2_pfaf_mbox_init(struct otx2_nic *pf) int err; mbox->pfvf = pf; - pf->mbox_wq = alloc_workqueue("otx2_pfaf_mailbox", - WQ_UNBOUND | WQ_HIGHPRI | - WQ_MEM_RECLAIM, 1); + pf->mbox_wq = alloc_ordered_workqueue("otx2_pfaf_mailbox", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!pf->mbox_wq) return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 53366dbfbf27..7baed6bb3b72 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -297,9 +297,8 @@ static int otx2vf_vfaf_mbox_init(struct otx2_nic *vf) int err; mbox->pfvf = vf; - vf->mbox_wq = alloc_workqueue("otx2_vfaf_mailbox", - WQ_UNBOUND | WQ_HIGHPRI | - WQ_MEM_RECLAIM, 1); + vf->mbox_wq = alloc_ordered_workqueue("otx2_vfaf_mailbox", + WQ_HIGHPRI | WQ_MEM_RECLAIM); if (!vf->mbox_wq) return -ENOMEM; -- cgit v1.2.3 From a731a43e8669a0b430c79a3e38890c27a5847a76 Mon Sep 17 00:00:00 2001 From: Yinjun Zhang Date: Tue, 9 May 2023 09:58:17 +0200 Subject: nfp: improve link modes reading process Avoid reading link modes from management firmware every time when `ethtool_get_link_ksettings` is called, only communicate with management firmware when necessary like we do for eth_table info. This change can ease the situation that when large number of vlan sub-interfaces are created and their information is requested by some monitoring process like PCP [1] through ethool ioctl frequently. [1] https://pcp.io Signed-off-by: Yinjun Zhang Acked-by: Simon Horman Signed-off-by: Louis Peens Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20230509075817.10566-1-louis.peens@corigine.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/netronome/nfp/nfp_net_ethtool.c | 32 +++++-------- .../net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h | 7 ++- .../ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c | 54 +++++++++++----------- 3 files changed, 45 insertions(+), 48 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index dfedb52b7e70..e75cbb287625 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -436,49 +436,41 @@ static void nfp_add_media_link_mode(struct nfp_port *port, struct nfp_eth_table_port *eth_port, struct ethtool_link_ksettings *cmd) { - u64 supported_modes[2], advertised_modes[2]; - struct nfp_eth_media_buf ethm = { - .eth_index = eth_port->eth_index, - }; - struct nfp_cpp *cpp = port->app->cpp; - - if (nfp_eth_read_media(cpp, ðm)) { - bitmap_fill(port->speed_bitmap, NFP_SUP_SPEED_NUMBER); - return; - } - bitmap_zero(port->speed_bitmap, NFP_SUP_SPEED_NUMBER); - for (u32 i = 0; i < 2; i++) { - supported_modes[i] = le64_to_cpu(ethm.supported_modes[i]); - advertised_modes[i] = le64_to_cpu(ethm.advertised_modes[i]); - } - for (u32 i = 0; i < NFP_MEDIA_LINK_MODES_NUMBER; i++) { if (i < 64) { - if (supported_modes[0] & BIT_ULL(i)) { + if (eth_port->link_modes_supp[0] & BIT_ULL(i)) { __set_bit(nfp_eth_media_table[i].ethtool_link_mode, cmd->link_modes.supported); __set_bit(nfp_eth_media_table[i].speed, port->speed_bitmap); } - if (advertised_modes[0] & BIT_ULL(i)) + if (eth_port->link_modes_ad[0] & BIT_ULL(i)) __set_bit(nfp_eth_media_table[i].ethtool_link_mode, cmd->link_modes.advertising); } else { - if (supported_modes[1] & BIT_ULL(i - 64)) { + if (eth_port->link_modes_supp[1] & BIT_ULL(i - 64)) { __set_bit(nfp_eth_media_table[i].ethtool_link_mode, cmd->link_modes.supported); __set_bit(nfp_eth_media_table[i].speed, port->speed_bitmap); } - if (advertised_modes[1] & BIT_ULL(i - 64)) + if (eth_port->link_modes_ad[1] & BIT_ULL(i - 64)) __set_bit(nfp_eth_media_table[i].ethtool_link_mode, cmd->link_modes.advertising); } } + + /* We take all speeds as supported when it fails to read + * link modes due to old management firmware that doesn't + * support link modes reading or error occurring, so that + * speed change of this port is allowed. + */ + if (bitmap_empty(port->speed_bitmap, NFP_SUP_SPEED_NUMBER)) + bitmap_fill(port->speed_bitmap, NFP_SUP_SPEED_NUMBER); } /** diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h index 781edc451bd4..6e044ac04917 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h @@ -196,6 +196,9 @@ enum nfp_ethtool_link_mode_list { * subports) * @ports.is_split: is interface part of a split port * @ports.fec_modes_supported: bitmap of FEC modes supported + * + * @ports.link_modes_supp: bitmap of link modes supported + * @ports.link_modes_ad: bitmap of link modes advertised */ struct nfp_eth_table { unsigned int count; @@ -235,6 +238,9 @@ struct nfp_eth_table { bool is_split; unsigned int fec_modes_supported; + + u64 link_modes_supp[2]; + u64 link_modes_ad[2]; } ports[]; }; @@ -313,7 +319,6 @@ struct nfp_eth_media_buf { }; int nfp_nsp_read_media(struct nfp_nsp *state, void *buf, unsigned int size); -int nfp_eth_read_media(struct nfp_cpp *cpp, struct nfp_eth_media_buf *ethm); #define NFP_NSP_VERSION_BUFSZ 1024 /* reasonable size, not in the ABI */ diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c index 570ac1bb2122..9d62085d772a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c @@ -227,6 +227,30 @@ nfp_eth_calc_port_type(struct nfp_cpp *cpp, struct nfp_eth_table_port *entry) entry->port_type = PORT_DA; } +static void +nfp_eth_read_media(struct nfp_cpp *cpp, struct nfp_nsp *nsp, struct nfp_eth_table_port *entry) +{ + struct nfp_eth_media_buf ethm = { + .eth_index = entry->eth_index, + }; + unsigned int i; + int ret; + + if (!nfp_nsp_has_read_media(nsp)) + return; + + ret = nfp_nsp_read_media(nsp, ðm, sizeof(ethm)); + if (ret) { + nfp_err(cpp, "Reading media link modes failed: %d\n", ret); + return; + } + + for (i = 0; i < 2; i++) { + entry->link_modes_supp[i] = le64_to_cpu(ethm.supported_modes[i]); + entry->link_modes_ad[i] = le64_to_cpu(ethm.advertised_modes[i]); + } +} + /** * nfp_eth_read_ports() - retrieve port information * @cpp: NFP CPP handle @@ -293,8 +317,10 @@ __nfp_eth_read_ports(struct nfp_cpp *cpp, struct nfp_nsp *nsp) &table->ports[j++]); nfp_eth_calc_port_geometry(cpp, table); - for (i = 0; i < table->count; i++) + for (i = 0; i < table->count; i++) { nfp_eth_calc_port_type(cpp, &table->ports[i]); + nfp_eth_read_media(cpp, nsp, &table->ports[i]); + } kfree(entries); @@ -647,29 +673,3 @@ int __nfp_eth_set_split(struct nfp_nsp *nsp, unsigned int lanes) return NFP_ETH_SET_BIT_CONFIG(nsp, NSP_ETH_RAW_PORT, NSP_ETH_PORT_LANES, lanes, NSP_ETH_CTRL_SET_LANES); } - -int nfp_eth_read_media(struct nfp_cpp *cpp, struct nfp_eth_media_buf *ethm) -{ - struct nfp_nsp *nsp; - int ret; - - nsp = nfp_nsp_open(cpp); - if (IS_ERR(nsp)) { - nfp_err(cpp, "Failed to access the NSP: %pe\n", nsp); - return PTR_ERR(nsp); - } - - if (!nfp_nsp_has_read_media(nsp)) { - nfp_warn(cpp, "Reading media link modes not supported. Please update flash\n"); - ret = -EOPNOTSUPP; - goto exit_close_nsp; - } - - ret = nfp_nsp_read_media(nsp, ethm, sizeof(*ethm)); - if (ret) - nfp_err(cpp, "Reading media link modes failed: %pe\n", ERR_PTR(ret)); - -exit_close_nsp: - nfp_nsp_close(nsp); - return ret; -} -- cgit v1.2.3 From 3246627f11c56b4ec875b7225ba9b4fe0d53c271 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 8 May 2023 16:26:27 +0200 Subject: net: stmmac: Make stmmac_pltfr_remove() return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function returns zero unconditionally. Change it to return void instead which simplifies some callers as error handing becomes unnecessary. The function is also used for some drivers as remove callback. Switch these to the .remove_new() callback. For some others no error can happen in the remove callback now, convert them to .remove_new(), too. Acked-by: Jernej Skrabec Reviewed-by: Simon Horman Reviewed-by: Martin Blumenstingl Reviewed-by: Michal Kubiak Signed-off-by: Uwe Kleine-König Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c | 9 +++------ drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c | 9 +++------ drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 5 ++--- drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c | 4 +--- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 4 +--- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h | 2 +- 18 files changed, 23 insertions(+), 34 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c index 9354bf419112..58a7f08e8d78 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c @@ -141,7 +141,7 @@ MODULE_DEVICE_TABLE(of, anarion_dwmac_match); static struct platform_driver anarion_dwmac_driver = { .probe = anarion_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "anarion-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c index ef8f3a940938..ef1023930fd0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c @@ -87,7 +87,7 @@ MODULE_DEVICE_TABLE(of, dwmac_generic_match); static struct platform_driver dwmac_generic_driver = { .probe = dwmac_generic_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = STMMAC_RESOURCE_NAME, .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 7c228bd0d099..b9378a63f0e8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -376,7 +376,7 @@ MODULE_DEVICE_TABLE(of, imx_dwmac_match); static struct platform_driver imx_dwmac_driver = { .probe = imx_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "imx-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index 378b4dd826bb..8063ba1c3ce8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -386,7 +386,7 @@ MODULE_DEVICE_TABLE(of, ingenic_mac_of_matches); static struct platform_driver ingenic_mac_driver = { .probe = ingenic_mac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "ingenic-mac", .pm = pm_ptr(&ingenic_mac_pm_ops), diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c index 06d287f104be..a5e639ab0b9e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c @@ -169,20 +169,17 @@ err_remove_config_dt: return ret; } -static int intel_eth_plat_remove(struct platform_device *pdev) +static void intel_eth_plat_remove(struct platform_device *pdev) { struct intel_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev); - int ret; - ret = stmmac_pltfr_remove(pdev); + stmmac_pltfr_remove(pdev); clk_disable_unprepare(dwmac->tx_clk); - - return ret; } static struct platform_driver intel_eth_plat_driver = { .probe = intel_eth_plat_probe, - .remove = intel_eth_plat_remove, + .remove_new = intel_eth_plat_remove, .driver = { .name = "intel-eth-plat", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index e888c8a9c830..e39406df8516 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -498,7 +498,7 @@ MODULE_DEVICE_TABLE(of, ipq806x_gmac_dwmac_match); static struct platform_driver ipq806x_gmac_dwmac_driver = { .probe = ipq806x_gmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "ipq806x-gmac-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c index 9d77c647badd..18e84ba693a6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c @@ -83,7 +83,7 @@ MODULE_DEVICE_TABLE(of, lpc18xx_dwmac_match); static struct platform_driver lpc18xx_dwmac_driver = { .probe = lpc18xx_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "lpc18xx-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index 9ae31e3dc821..73c1dfa7ecb1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -678,15 +678,12 @@ err_remove_config_dt: return ret; } -static int mediatek_dwmac_remove(struct platform_device *pdev) +static void mediatek_dwmac_remove(struct platform_device *pdev) { struct mediatek_dwmac_plat_data *priv_plat = get_stmmac_bsp_priv(&pdev->dev); - int ret; - ret = stmmac_pltfr_remove(pdev); + stmmac_pltfr_remove(pdev); mediatek_dwmac_clks_config(priv_plat, false); - - return ret; } static const struct of_device_id mediatek_dwmac_match[] = { @@ -701,7 +698,7 @@ MODULE_DEVICE_TABLE(of, mediatek_dwmac_match); static struct platform_driver mediatek_dwmac_driver = { .probe = mediatek_dwmac_probe, - .remove = mediatek_dwmac_remove, + .remove_new = mediatek_dwmac_remove, .driver = { .name = "dwmac-mediatek", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c index 16fb66a0ca72..7aa5e6bc04eb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c @@ -91,7 +91,7 @@ MODULE_DEVICE_TABLE(of, meson6_dwmac_match); static struct platform_driver meson6_dwmac_driver = { .probe = meson6_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "meson6-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index f6754e3643f3..92b16048f91c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -539,7 +539,7 @@ MODULE_DEVICE_TABLE(of, meson8b_dwmac_match); static struct platform_driver meson8b_dwmac_driver = { .probe = meson8b_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "meson8b-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c index 62a69a91ab22..42954020de2c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c @@ -231,7 +231,7 @@ MODULE_DEVICE_TABLE(of, oxnas_dwmac_match); static struct platform_driver oxnas_dwmac_driver = { .probe = oxnas_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "oxnas-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 16a8c361283b..494c22243259 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -668,16 +668,15 @@ err_mem: static int qcom_ethqos_remove(struct platform_device *pdev) { struct qcom_ethqos *ethqos; - int ret; ethqos = get_stmmac_bsp_priv(&pdev->dev); if (!ethqos) return -ENODEV; - ret = stmmac_pltfr_remove(pdev); + stmmac_pltfr_remove(pdev); ethqos_clks_config(ethqos, false); - return ret; + return 0; } static const struct of_device_id qcom_ethqos_match[] = { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 6b447d8f0bd8..6ee050300b31 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -524,7 +524,7 @@ MODULE_DEVICE_TABLE(of, socfpga_dwmac_match); static struct platform_driver socfpga_dwmac_driver = { .probe = socfpga_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "socfpga-dwmac", .pm = &socfpga_dwmac_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c index 4f51a7889642..d3a39d2fb3a9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c @@ -156,7 +156,7 @@ MODULE_DEVICE_TABLE(of, starfive_dwmac_match); static struct platform_driver starfive_dwmac_driver = { .probe = starfive_dwmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "starfive-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index fc3b0acc8f99..50963e91c347 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -179,7 +179,7 @@ MODULE_DEVICE_TABLE(of, sun7i_dwmac_match); static struct platform_driver sun7i_dwmac_driver = { .probe = sun7i_gmac_probe, - .remove = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "sun7i-dwmac", .pm = &stmmac_pltfr_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index c3f10a92b62b..d43da71eb1e1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -265,9 +265,7 @@ static int visconti_eth_dwmac_remove(struct platform_device *pdev) struct stmmac_priv *priv = netdev_priv(ndev); int err; - err = stmmac_pltfr_remove(pdev); - if (err < 0) - dev_err(&pdev->dev, "failed to remove platform: %d\n", err); + stmmac_pltfr_remove(pdev); err = visconti_eth_clock_remove(pdev); if (err < 0) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index eb0b2898daa3..3c6b55b60461 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -707,7 +707,7 @@ EXPORT_SYMBOL_GPL(stmmac_get_platform_resources); * Description: this function calls the main to free the net resources * and calls the platforms hook and release the resources (e.g. mem). */ -int stmmac_pltfr_remove(struct platform_device *pdev) +void stmmac_pltfr_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct stmmac_priv *priv = netdev_priv(ndev); @@ -719,8 +719,6 @@ int stmmac_pltfr_remove(struct platform_device *pdev) plat->exit(pdev, plat->bsp_priv); stmmac_remove_config_dt(pdev, plat); - - return 0; } EXPORT_SYMBOL_GPL(stmmac_pltfr_remove); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h index 3fff3f59d73d..f7e457946681 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h @@ -19,7 +19,7 @@ void stmmac_remove_config_dt(struct platform_device *pdev, int stmmac_get_platform_resources(struct platform_device *pdev, struct stmmac_resources *stmmac_res); -int stmmac_pltfr_remove(struct platform_device *pdev); +void stmmac_pltfr_remove(struct platform_device *pdev); extern const struct dev_pm_ops stmmac_pltfr_pm_ops; static inline void *get_stmmac_bsp_priv(struct device *dev) -- cgit v1.2.3 From b9bc44fe068d6e44504f6f3eb03a325fd6843d60 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 8 May 2023 16:26:28 +0200 Subject: net: stmmac: dwmac-visconti: Make visconti_eth_clock_remove() return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function returns zero unconditionally. Change it to return void instead which simplifies one caller as error handing becomes unnecessary. Signed-off-by: Uwe Kleine-König Reviewed-by: Simon Horman Acked-by: Nobuhiro Iwamatsu Reviewed-by: Michal Kubiak Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index d43da71eb1e1..56209af6243c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -198,7 +198,7 @@ static int visconti_eth_clock_probe(struct platform_device *pdev, return 0; } -static int visconti_eth_clock_remove(struct platform_device *pdev) +static void visconti_eth_clock_remove(struct platform_device *pdev) { struct visconti_eth *dwmac = get_stmmac_bsp_priv(&pdev->dev); struct net_device *ndev = platform_get_drvdata(pdev); @@ -206,8 +206,6 @@ static int visconti_eth_clock_remove(struct platform_device *pdev) clk_disable_unprepare(dwmac->phy_ref_clk); clk_disable_unprepare(priv->plat->stmmac_clk); - - return 0; } static int visconti_eth_dwmac_probe(struct platform_device *pdev) @@ -263,17 +261,14 @@ static int visconti_eth_dwmac_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct stmmac_priv *priv = netdev_priv(ndev); - int err; stmmac_pltfr_remove(pdev); - err = visconti_eth_clock_remove(pdev); - if (err < 0) - dev_err(&pdev->dev, "failed to remove clock: %d\n", err); + visconti_eth_clock_remove(pdev); stmmac_remove_config_dt(pdev, priv->plat); - return err; + return 0; } static const struct of_device_id visconti_eth_dwmac_match[] = { -- cgit v1.2.3 From c5f3ffe35cc92cce6292b5304409f3edc9281d66 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 8 May 2023 16:26:29 +0200 Subject: net: stmmac: dwmac-qcom-ethqos: Drop an if with an always false condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The remove callback is only ever called after .probe() returned successfully. After that get_stmmac_bsp_priv() always return non-NULL. Side note: The early exit would also be a bug because the return value of qcom_ethqos_remove() is ignored by the device core and the device is unbound unconditionally. So exiting early resulted in a dangerous resource leak as all devm allocated resources (some memory and the register mappings) are freed but the network device stays around. Using the network device afterwards probably oopses. Reviewed-by: Simon Horman Signed-off-by: Uwe Kleine-König Reviewed-by: Michal Kubiak Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 494c22243259..bf17c6c8f2eb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -667,11 +667,7 @@ err_mem: static int qcom_ethqos_remove(struct platform_device *pdev) { - struct qcom_ethqos *ethqos; - - ethqos = get_stmmac_bsp_priv(&pdev->dev); - if (!ethqos) - return -ENODEV; + struct qcom_ethqos *ethqos = get_stmmac_bsp_priv(&pdev->dev); stmmac_pltfr_remove(pdev); ethqos_clks_config(ethqos, false); -- cgit v1.2.3 From f4d05c41976170e29aac1bc51df22fb6989d1427 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 8 May 2023 16:26:30 +0200 Subject: net: stmmac: dwmac-visconti: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Reviewed-by: Simon Horman Signed-off-by: Uwe Kleine-König Acked-by: Nobuhiro Iwamatsu Reviewed-by: Michal Kubiak Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index 56209af6243c..acbb284be174 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -257,7 +257,7 @@ remove_config: return ret; } -static int visconti_eth_dwmac_remove(struct platform_device *pdev) +static void visconti_eth_dwmac_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct stmmac_priv *priv = netdev_priv(ndev); @@ -267,8 +267,6 @@ static int visconti_eth_dwmac_remove(struct platform_device *pdev) visconti_eth_clock_remove(pdev); stmmac_remove_config_dt(pdev, priv->plat); - - return 0; } static const struct of_device_id visconti_eth_dwmac_match[] = { @@ -279,7 +277,7 @@ MODULE_DEVICE_TABLE(of, visconti_eth_dwmac_match); static struct platform_driver visconti_eth_dwmac_driver = { .probe = visconti_eth_dwmac_probe, - .remove = visconti_eth_dwmac_remove, + .remove_new = visconti_eth_dwmac_remove, .driver = { .name = "visconti-eth-dwmac", .of_match_table = visconti_eth_dwmac_match, -- cgit v1.2.3 From 360cd89064b6e466b046680e79fd833325225a3f Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 8 May 2023 16:26:31 +0200 Subject: net: stmmac: dwmac-dwc-qos-eth: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Reviewed-by: Simon Horman Signed-off-by: Uwe Kleine-König Reviewed-by: Michal Kubiak Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index 18acf7dd74e5..9f88530c5e8c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -464,7 +464,7 @@ remove_config: return ret; } -static int dwc_eth_dwmac_remove(struct platform_device *pdev) +static void dwc_eth_dwmac_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct stmmac_priv *priv = netdev_priv(ndev); @@ -477,8 +477,6 @@ static int dwc_eth_dwmac_remove(struct platform_device *pdev) data->remove(pdev); stmmac_remove_config_dt(pdev, priv->plat); - - return 0; } static const struct of_device_id dwc_eth_dwmac_match[] = { @@ -490,7 +488,7 @@ MODULE_DEVICE_TABLE(of, dwc_eth_dwmac_match); static struct platform_driver dwc_eth_dwmac_driver = { .probe = dwc_eth_dwmac_probe, - .remove = dwc_eth_dwmac_remove, + .remove_new = dwc_eth_dwmac_remove, .driver = { .name = "dwc-eth-dwmac", .pm = &stmmac_pltfr_pm_ops, -- cgit v1.2.3 From 5580b559a80a3559a3b395a053f83e196aa801af Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 8 May 2023 16:26:32 +0200 Subject: net: stmmac: dwmac-qcom-ethqos: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Reviewed-by: Simon Horman Signed-off-by: Uwe Kleine-König Reviewed-by: Bhupesh Sharma Reviewed-by: Michal Kubiak Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index bf17c6c8f2eb..1db97a5209c4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -665,14 +665,12 @@ err_mem: return ret; } -static int qcom_ethqos_remove(struct platform_device *pdev) +static void qcom_ethqos_remove(struct platform_device *pdev) { struct qcom_ethqos *ethqos = get_stmmac_bsp_priv(&pdev->dev); stmmac_pltfr_remove(pdev); ethqos_clks_config(ethqos, false); - - return 0; } static const struct of_device_id qcom_ethqos_match[] = { @@ -685,7 +683,7 @@ MODULE_DEVICE_TABLE(of, qcom_ethqos_match); static struct platform_driver qcom_ethqos_driver = { .probe = qcom_ethqos_probe, - .remove = qcom_ethqos_remove, + .remove_new = qcom_ethqos_remove, .driver = { .name = "qcom-ethqos", .pm = &stmmac_pltfr_pm_ops, -- cgit v1.2.3 From 903cc461c901d854546096e01aa4bf32d2438c94 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 8 May 2023 16:26:33 +0200 Subject: net: stmmac: dwmac-rk: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Reviewed-by: Simon Horman Signed-off-by: Uwe Kleine-König Reviewed-by: Michal Kubiak Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 4ea31ccf24d0..d81591b470a2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1863,15 +1863,13 @@ err_remove_config_dt: return ret; } -static int rk_gmac_remove(struct platform_device *pdev) +static void rk_gmac_remove(struct platform_device *pdev) { struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(&pdev->dev); stmmac_dvr_remove(&pdev->dev); rk_gmac_powerdown(bsp_priv); - - return 0; } #ifdef CONFIG_PM_SLEEP @@ -1925,7 +1923,7 @@ MODULE_DEVICE_TABLE(of, rk_gmac_dwmac_match); static struct platform_driver rk_gmac_dwmac_driver = { .probe = rk_gmac_probe, - .remove = rk_gmac_remove, + .remove_new = rk_gmac_remove, .driver = { .name = "rk_gmac-dwmac", .pm = &rk_gmac_pm_ops, -- cgit v1.2.3 From b394982a10d93d772c962fb18f9a5f8c24f1d351 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 8 May 2023 16:26:34 +0200 Subject: net: stmmac: dwmac-sti: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Reviewed-by: Simon Horman Signed-off-by: Uwe Kleine-König Reviewed-by: Michal Kubiak Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index 465ce66ef9c1..dcbb17c4f07a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -317,15 +317,13 @@ err_remove_config_dt: return ret; } -static int sti_dwmac_remove(struct platform_device *pdev) +static void sti_dwmac_remove(struct platform_device *pdev) { struct sti_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev); stmmac_dvr_remove(&pdev->dev); clk_disable_unprepare(dwmac->clk); - - return 0; } #ifdef CONFIG_PM_SLEEP @@ -365,7 +363,7 @@ MODULE_DEVICE_TABLE(of, sti_dwmac_match); static struct platform_driver sti_dwmac_driver = { .probe = sti_dwmac_probe, - .remove = sti_dwmac_remove, + .remove_new = sti_dwmac_remove, .driver = { .name = "sti-dwmac", .pm = &sti_dwmac_pm_ops, -- cgit v1.2.3 From fec3f552140ec5d10e3fb8eecbf8fd7f80476d02 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 8 May 2023 16:26:35 +0200 Subject: net: stmmac: dwmac-stm32: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Reviewed-by: Simon Horman Signed-off-by: Uwe Kleine-König Reviewed-by: Michal Kubiak Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index 0616b3a04ff3..bdb4de59a672 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -417,7 +417,7 @@ err_remove_config_dt: return ret; } -static int stm32_dwmac_remove(struct platform_device *pdev) +static void stm32_dwmac_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct stmmac_priv *priv = netdev_priv(ndev); @@ -431,8 +431,6 @@ static int stm32_dwmac_remove(struct platform_device *pdev) dev_pm_clear_wake_irq(&pdev->dev); device_init_wakeup(&pdev->dev, false); } - - return 0; } static int stm32mp1_suspend(struct stm32_dwmac *dwmac) @@ -528,7 +526,7 @@ MODULE_DEVICE_TABLE(of, stm32_dwmac_match); static struct platform_driver stm32_dwmac_driver = { .probe = stm32_dwmac_probe, - .remove = stm32_dwmac_remove, + .remove_new = stm32_dwmac_remove, .driver = { .name = "stm32-dwmac", .pm = &stm32_dwmac_pm_ops, -- cgit v1.2.3 From cc708d4ed7b3d8d93c0e7f64c14a56d2b545e37a Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 8 May 2023 16:26:36 +0200 Subject: net: stmmac: dwmac-sun8i: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Acked-by: Jernej Skrabec Reviewed-by: Simon Horman Signed-off-by: Uwe Kleine-König Reviewed-by: Michal Kubiak Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index c2c592ba0eb8..1e714380d125 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1294,7 +1294,7 @@ dwmac_deconfig: return ret; } -static int sun8i_dwmac_remove(struct platform_device *pdev) +static void sun8i_dwmac_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct stmmac_priv *priv = netdev_priv(ndev); @@ -1309,8 +1309,6 @@ static int sun8i_dwmac_remove(struct platform_device *pdev) stmmac_pltfr_remove(pdev); sun8i_dwmac_unset_syscon(gmac); - - return 0; } static void sun8i_dwmac_shutdown(struct platform_device *pdev) @@ -1341,7 +1339,7 @@ MODULE_DEVICE_TABLE(of, sun8i_dwmac_match); static struct platform_driver sun8i_dwmac_driver = { .probe = sun8i_dwmac_probe, - .remove = sun8i_dwmac_remove, + .remove_new = sun8i_dwmac_remove, .shutdown = sun8i_dwmac_shutdown, .driver = { .name = "dwmac-sun8i", -- cgit v1.2.3 From a86f8601c8f067d38bdb972885df4c0f5ed82738 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 8 May 2023 16:26:37 +0200 Subject: net: stmmac: dwmac-tegra: Convert to platform remove callback returning void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The .remove() callback for a platform driver returns an int which makes many driver authors wrongly assume it's possible to do error handling by returning an error code. However the value returned is (mostly) ignored and this typically results in resource leaks. To improve here there is a quest to make the remove callback return void. In the first step of this quest all drivers are converted to .remove_new() which already returns void. Trivially convert this driver from always returning zero in the remove callback to the void returning variant. Reviewed-by: Simon Horman Acked-by: Thierry Reding Signed-off-by: Uwe Kleine-König Reviewed-by: Michal Kubiak Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c index bdf990cf2f31..f8367c5b490b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c @@ -353,15 +353,13 @@ disable_clks: return err; } -static int tegra_mgbe_remove(struct platform_device *pdev) +static void tegra_mgbe_remove(struct platform_device *pdev) { struct tegra_mgbe *mgbe = get_stmmac_bsp_priv(&pdev->dev); clk_bulk_disable_unprepare(ARRAY_SIZE(mgbe_clks), mgbe->clks); stmmac_pltfr_remove(pdev); - - return 0; } static const struct of_device_id tegra_mgbe_match[] = { @@ -374,7 +372,7 @@ static SIMPLE_DEV_PM_OPS(tegra_mgbe_pm_ops, tegra_mgbe_suspend, tegra_mgbe_resum static struct platform_driver tegra_mgbe_driver = { .probe = tegra_mgbe_probe, - .remove = tegra_mgbe_remove, + .remove_new = tegra_mgbe_remove, .driver = { .name = "tegra-mgbe", .pm = &tegra_mgbe_pm_ops, -- cgit v1.2.3 From af8eacf2b42e0a736a7a2a1379fb6c0b7fd66da4 Mon Sep 17 00:00:00 2001 From: Teoh Ji Sheng Date: Mon, 8 May 2023 22:43:40 +0800 Subject: net: stmmac: xgmac: add ethtool per-queue irq statistic support Commit af9bf70154eb ("net: stmmac: add ethtool per-queue irq statistic support") introduced ethtool per-queue statistics support to display number of interrupts generated by DMA tx and DMA rx for DWMAC4 core. This patch extend the support to XGMAC core. Signed-off-by: Teoh Ji Sheng Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230508144339.3014402-1-ji.sheng.teoh@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index dfd53264e036..070bd912580b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -368,10 +368,12 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv, if (likely(intr_status & XGMAC_RI)) { x->rx_normal_irq_n++; + x->rxq_stats[chan].rx_normal_irq_n++; ret |= handle_rx; } if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) { x->tx_normal_irq_n++; + x->txq_stats[chan].tx_normal_irq_n++; ret |= handle_tx; } } -- cgit v1.2.3 From 011be872643446a9b7c4485cfc8b5f50b0f93a13 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 9 May 2023 09:26:43 +0200 Subject: net: lan966x: Add ES0 VCAP model Provide ES0 (egress stage 0) VCAP model for lan966x. This provides rewriting functionality in the gress path. Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- .../microchip/lan966x/lan966x_vcap_ag_api.c | 264 ++++++++++++++++++++- drivers/net/ethernet/microchip/vcap/vcap_ag_api.h | 67 +++--- 2 files changed, 301 insertions(+), 30 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c index 66400a082d02..fb6851b94528 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_ag_api.c @@ -2121,6 +2121,69 @@ static const struct vcap_field is2_smac_sip6_keyfield[] = { }, }; +static const struct vcap_field es0_vid_keyfield[] = { + [VCAP_KF_IF_EGR_PORT_NO] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 4, + }, + [VCAP_KF_IF_IGR_PORT] = { + .type = VCAP_FIELD_U32, + .offset = 4, + .width = 4, + }, + [VCAP_KF_ISDX_GT0_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 8, + .width = 1, + }, + [VCAP_KF_ISDX_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 9, + .width = 8, + }, + [VCAP_KF_L2_MC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 17, + .width = 1, + }, + [VCAP_KF_L2_BC_IS] = { + .type = VCAP_FIELD_BIT, + .offset = 18, + .width = 1, + }, + [VCAP_KF_8021Q_VID_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 19, + .width = 12, + }, + [VCAP_KF_8021Q_DEI_CLS] = { + .type = VCAP_FIELD_BIT, + .offset = 31, + .width = 1, + }, + [VCAP_KF_8021Q_PCP_CLS] = { + .type = VCAP_FIELD_U32, + .offset = 32, + .width = 3, + }, + [VCAP_KF_L3_DPL_CLS] = { + .type = VCAP_FIELD_BIT, + .offset = 35, + .width = 1, + }, + [VCAP_KF_RTP_ID] = { + .type = VCAP_FIELD_U32, + .offset = 36, + .width = 10, + }, + [VCAP_KF_PDU_TYPE] = { + .type = VCAP_FIELD_U32, + .offset = 46, + .width = 4, + }, +}; + /* keyfield_set */ static const struct vcap_set is1_keyfield_set[] = { [VCAP_KFS_NORMAL] = { @@ -2228,6 +2291,14 @@ static const struct vcap_set is2_keyfield_set[] = { }, }; +static const struct vcap_set es0_keyfield_set[] = { + [VCAP_KFS_VID] = { + .type_id = -1, + .sw_per_item = 1, + .sw_cnt = 1, + }, +}; + /* keyfield_set map */ static const struct vcap_field *is1_keyfield_set_map[] = { [VCAP_KFS_NORMAL] = is1_normal_keyfield, @@ -2255,6 +2326,10 @@ static const struct vcap_field *is2_keyfield_set_map[] = { [VCAP_KFS_SMAC_SIP6] = is2_smac_sip6_keyfield, }; +static const struct vcap_field *es0_keyfield_set_map[] = { + [VCAP_KFS_VID] = es0_vid_keyfield, +}; + /* keyfield_set map sizes */ static int is1_keyfield_set_map_size[] = { [VCAP_KFS_NORMAL] = ARRAY_SIZE(is1_normal_keyfield), @@ -2282,6 +2357,10 @@ static int is2_keyfield_set_map_size[] = { [VCAP_KFS_SMAC_SIP6] = ARRAY_SIZE(is2_smac_sip6_keyfield), }; +static int es0_keyfield_set_map_size[] = { + [VCAP_KFS_VID] = ARRAY_SIZE(es0_vid_keyfield), +}; + /* actionfields */ static const struct vcap_field is1_s1_actionfield[] = { [VCAP_AF_TYPE] = { @@ -2522,6 +2601,94 @@ static const struct vcap_field is2_smac_sip_actionfield[] = { }, }; +static const struct vcap_field es0_vid_actionfield[] = { + [VCAP_AF_PUSH_OUTER_TAG] = { + .type = VCAP_FIELD_U32, + .offset = 0, + .width = 2, + }, + [VCAP_AF_PUSH_INNER_TAG] = { + .type = VCAP_FIELD_BIT, + .offset = 2, + .width = 1, + }, + [VCAP_AF_TAG_A_TPID_SEL] = { + .type = VCAP_FIELD_U32, + .offset = 3, + .width = 2, + }, + [VCAP_AF_TAG_A_VID_SEL] = { + .type = VCAP_FIELD_BIT, + .offset = 5, + .width = 1, + }, + [VCAP_AF_TAG_A_PCP_SEL] = { + .type = VCAP_FIELD_U32, + .offset = 6, + .width = 2, + }, + [VCAP_AF_TAG_A_DEI_SEL] = { + .type = VCAP_FIELD_U32, + .offset = 8, + .width = 2, + }, + [VCAP_AF_TAG_B_TPID_SEL] = { + .type = VCAP_FIELD_U32, + .offset = 10, + .width = 2, + }, + [VCAP_AF_TAG_B_VID_SEL] = { + .type = VCAP_FIELD_BIT, + .offset = 12, + .width = 1, + }, + [VCAP_AF_TAG_B_PCP_SEL] = { + .type = VCAP_FIELD_U32, + .offset = 13, + .width = 2, + }, + [VCAP_AF_TAG_B_DEI_SEL] = { + .type = VCAP_FIELD_U32, + .offset = 15, + .width = 2, + }, + [VCAP_AF_VID_A_VAL] = { + .type = VCAP_FIELD_U32, + .offset = 17, + .width = 12, + }, + [VCAP_AF_PCP_A_VAL] = { + .type = VCAP_FIELD_U32, + .offset = 29, + .width = 3, + }, + [VCAP_AF_DEI_A_VAL] = { + .type = VCAP_FIELD_BIT, + .offset = 32, + .width = 1, + }, + [VCAP_AF_VID_B_VAL] = { + .type = VCAP_FIELD_U32, + .offset = 33, + .width = 12, + }, + [VCAP_AF_PCP_B_VAL] = { + .type = VCAP_FIELD_U32, + .offset = 45, + .width = 3, + }, + [VCAP_AF_DEI_B_VAL] = { + .type = VCAP_FIELD_BIT, + .offset = 48, + .width = 1, + }, + [VCAP_AF_ESDX] = { + .type = VCAP_FIELD_U32, + .offset = 49, + .width = 8, + }, +}; + /* actionfield_set */ static const struct vcap_set is1_actionfield_set[] = { [VCAP_AFS_S1] = { @@ -2544,6 +2711,14 @@ static const struct vcap_set is2_actionfield_set[] = { }, }; +static const struct vcap_set es0_actionfield_set[] = { + [VCAP_AFS_VID] = { + .type_id = -1, + .sw_per_item = 1, + .sw_cnt = 1, + }, +}; + /* actionfield_set map */ static const struct vcap_field *is1_actionfield_set_map[] = { [VCAP_AFS_S1] = is1_s1_actionfield, @@ -2554,6 +2729,10 @@ static const struct vcap_field *is2_actionfield_set_map[] = { [VCAP_AFS_SMAC_SIP] = is2_smac_sip_actionfield, }; +static const struct vcap_field *es0_actionfield_set_map[] = { + [VCAP_AFS_VID] = es0_vid_actionfield, +}; + /* actionfield_set map size */ static int is1_actionfield_set_map_size[] = { [VCAP_AFS_S1] = ARRAY_SIZE(is1_s1_actionfield), @@ -2564,6 +2743,10 @@ static int is2_actionfield_set_map_size[] = { [VCAP_AFS_SMAC_SIP] = ARRAY_SIZE(is2_smac_sip_actionfield), }; +static int es0_actionfield_set_map_size[] = { + [VCAP_AFS_VID] = ARRAY_SIZE(es0_vid_actionfield), +}; + /* Type Groups */ static const struct vcap_typegroup is1_x4_keyfield_set_typegroups[] = { { @@ -2659,6 +2842,10 @@ static const struct vcap_typegroup is2_x1_keyfield_set_typegroups[] = { {} }; +static const struct vcap_typegroup es0_x1_keyfield_set_typegroups[] = { + {} +}; + static const struct vcap_typegroup *is1_keyfield_set_typegroups[] = { [4] = is1_x4_keyfield_set_typegroups, [2] = is1_x2_keyfield_set_typegroups, @@ -2673,6 +2860,11 @@ static const struct vcap_typegroup *is2_keyfield_set_typegroups[] = { [5] = NULL, }; +static const struct vcap_typegroup *es0_keyfield_set_typegroups[] = { + [1] = es0_x1_keyfield_set_typegroups, + [2] = NULL, +}; + static const struct vcap_typegroup is1_x1_actionfield_set_typegroups[] = { {} }; @@ -2700,6 +2892,10 @@ static const struct vcap_typegroup is2_x1_actionfield_set_typegroups[] = { {} }; +static const struct vcap_typegroup es0_x1_actionfield_set_typegroups[] = { + {} +}; + static const struct vcap_typegroup *is1_actionfield_set_typegroups[] = { [1] = is1_x1_actionfield_set_typegroups, [5] = NULL, @@ -2711,6 +2907,11 @@ static const struct vcap_typegroup *is2_actionfield_set_typegroups[] = { [5] = NULL, }; +static const struct vcap_typegroup *es0_actionfield_set_typegroups[] = { + [1] = es0_x1_actionfield_set_typegroups, + [2] = NULL, +}; + /* Keyfieldset names */ static const char * const vcap_keyfield_set_names[] = { [VCAP_KFS_NO_VALUE] = "(None)", @@ -2743,6 +2944,7 @@ static const char * const vcap_keyfield_set_names[] = { [VCAP_KFS_RT] = "VCAP_KFS_RT", [VCAP_KFS_SMAC_SIP4] = "VCAP_KFS_SMAC_SIP4", [VCAP_KFS_SMAC_SIP6] = "VCAP_KFS_SMAC_SIP6", + [VCAP_KFS_VID] = "VCAP_KFS_VID", }; /* Actionfieldset names */ @@ -2751,9 +2953,11 @@ static const char * const vcap_actionfield_set_names[] = { [VCAP_AFS_BASE_TYPE] = "VCAP_AFS_BASE_TYPE", [VCAP_AFS_CLASSIFICATION] = "VCAP_AFS_CLASSIFICATION", [VCAP_AFS_CLASS_REDUCED] = "VCAP_AFS_CLASS_REDUCED", + [VCAP_AFS_ES0] = "VCAP_AFS_ES0", [VCAP_AFS_FULL] = "VCAP_AFS_FULL", [VCAP_AFS_S1] = "VCAP_AFS_S1", [VCAP_AFS_SMAC_SIP] = "VCAP_AFS_SMAC_SIP", + [VCAP_AFS_VID] = "VCAP_AFS_VID", }; /* Keyfield names */ @@ -2774,6 +2978,7 @@ static const char * const vcap_keyfield_names[] = { [VCAP_KF_8021Q_PCP1] = "8021Q_PCP1", [VCAP_KF_8021Q_PCP2] = "8021Q_PCP2", [VCAP_KF_8021Q_PCP_CLS] = "8021Q_PCP_CLS", + [VCAP_KF_8021Q_TPID] = "8021Q_TPID", [VCAP_KF_8021Q_TPID0] = "8021Q_TPID0", [VCAP_KF_8021Q_TPID1] = "8021Q_TPID1", [VCAP_KF_8021Q_TPID2] = "8021Q_TPID2", @@ -2799,6 +3004,7 @@ static const char * const vcap_keyfield_names[] = { [VCAP_KF_HOST_MATCH] = "HOST_MATCH", [VCAP_KF_IF_EGR_PORT_MASK] = "IF_EGR_PORT_MASK", [VCAP_KF_IF_EGR_PORT_MASK_RNG] = "IF_EGR_PORT_MASK_RNG", + [VCAP_KF_IF_EGR_PORT_NO] = "IF_EGR_PORT_NO", [VCAP_KF_IF_IGR_PORT] = "IF_IGR_PORT", [VCAP_KF_IF_IGR_PORT_MASK] = "IF_IGR_PORT_MASK", [VCAP_KF_IF_IGR_PORT_MASK_L3] = "IF_IGR_PORT_MASK_L3", @@ -2873,7 +3079,9 @@ static const char * const vcap_keyfield_names[] = { [VCAP_KF_OAM_OPCODE] = "OAM_OPCODE", [VCAP_KF_OAM_VER] = "OAM_VER", [VCAP_KF_OAM_Y1731_IS] = "OAM_Y1731_IS", + [VCAP_KF_PDU_TYPE] = "PDU_TYPE", [VCAP_KF_PROT_ACTIVE] = "PROT_ACTIVE", + [VCAP_KF_RTP_ID] = "RTP_ID", [VCAP_KF_RT_FRMID] = "RT_FRMID", [VCAP_KF_RT_TYPE] = "RT_TYPE", [VCAP_KF_RT_VLAN_IDX] = "RT_VLAN_IDX", @@ -2891,18 +3099,25 @@ static const char * const vcap_actionfield_names[] = { [VCAP_AF_COPY_PORT_NUM] = "COPY_PORT_NUM", [VCAP_AF_COPY_QUEUE_NUM] = "COPY_QUEUE_NUM", [VCAP_AF_CPU_COPY_ENA] = "CPU_COPY_ENA", + [VCAP_AF_CPU_QU] = "CPU_QU", [VCAP_AF_CPU_QUEUE_NUM] = "CPU_QUEUE_NUM", [VCAP_AF_CUSTOM_ACE_TYPE_ENA] = "CUSTOM_ACE_TYPE_ENA", + [VCAP_AF_DEI_A_VAL] = "DEI_A_VAL", + [VCAP_AF_DEI_B_VAL] = "DEI_B_VAL", + [VCAP_AF_DEI_C_VAL] = "DEI_C_VAL", [VCAP_AF_DEI_ENA] = "DEI_ENA", [VCAP_AF_DEI_VAL] = "DEI_VAL", [VCAP_AF_DLR_SEL] = "DLR_SEL", [VCAP_AF_DP_ENA] = "DP_ENA", [VCAP_AF_DP_VAL] = "DP_VAL", [VCAP_AF_DSCP_ENA] = "DSCP_ENA", + [VCAP_AF_DSCP_SEL] = "DSCP_SEL", [VCAP_AF_DSCP_VAL] = "DSCP_VAL", [VCAP_AF_ES2_REW_CMD] = "ES2_REW_CMD", + [VCAP_AF_ESDX] = "ESDX", [VCAP_AF_FWD_KILL_ENA] = "FWD_KILL_ENA", [VCAP_AF_FWD_MODE] = "FWD_MODE", + [VCAP_AF_FWD_SEL] = "FWD_SEL", [VCAP_AF_HIT_ME_ONCE] = "HIT_ME_ONCE", [VCAP_AF_HOST_MATCH] = "HOST_MATCH", [VCAP_AF_IGNORE_PIPELINE_CTRL] = "IGNORE_PIPELINE_CTRL", @@ -2912,6 +3127,7 @@ static const char * const vcap_actionfield_names[] = { [VCAP_AF_ISDX_ENA] = "ISDX_ENA", [VCAP_AF_ISDX_REPLACE_ENA] = "ISDX_REPLACE_ENA", [VCAP_AF_ISDX_VAL] = "ISDX_VAL", + [VCAP_AF_LOOP_ENA] = "LOOP_ENA", [VCAP_AF_LRN_DIS] = "LRN_DIS", [VCAP_AF_MAP_IDX] = "MAP_IDX", [VCAP_AF_MAP_KEY] = "MAP_KEY", @@ -2928,15 +3144,23 @@ static const char * const vcap_actionfield_names[] = { [VCAP_AF_OAM_SEL] = "OAM_SEL", [VCAP_AF_PAG_OVERRIDE_MASK] = "PAG_OVERRIDE_MASK", [VCAP_AF_PAG_VAL] = "PAG_VAL", + [VCAP_AF_PCP_A_VAL] = "PCP_A_VAL", + [VCAP_AF_PCP_B_VAL] = "PCP_B_VAL", + [VCAP_AF_PCP_C_VAL] = "PCP_C_VAL", [VCAP_AF_PCP_ENA] = "PCP_ENA", [VCAP_AF_PCP_VAL] = "PCP_VAL", + [VCAP_AF_PIPELINE_ACT] = "PIPELINE_ACT", [VCAP_AF_PIPELINE_FORCE_ENA] = "PIPELINE_FORCE_ENA", [VCAP_AF_PIPELINE_PT] = "PIPELINE_PT", [VCAP_AF_POLICE_ENA] = "POLICE_ENA", [VCAP_AF_POLICE_IDX] = "POLICE_IDX", [VCAP_AF_POLICE_REMARK] = "POLICE_REMARK", [VCAP_AF_POLICE_VCAP_ONLY] = "POLICE_VCAP_ONLY", + [VCAP_AF_POP_VAL] = "POP_VAL", [VCAP_AF_PORT_MASK] = "PORT_MASK", + [VCAP_AF_PUSH_CUSTOMER_TAG] = "PUSH_CUSTOMER_TAG", + [VCAP_AF_PUSH_INNER_TAG] = "PUSH_INNER_TAG", + [VCAP_AF_PUSH_OUTER_TAG] = "PUSH_OUTER_TAG", [VCAP_AF_QOS_ENA] = "QOS_ENA", [VCAP_AF_QOS_VAL] = "QOS_VAL", [VCAP_AF_REW_OP] = "REW_OP", @@ -2945,7 +3169,24 @@ static const char * const vcap_actionfield_names[] = { [VCAP_AF_SFID_VAL] = "SFID_VAL", [VCAP_AF_SGID_ENA] = "SGID_ENA", [VCAP_AF_SGID_VAL] = "SGID_VAL", + [VCAP_AF_SWAP_MACS_ENA] = "SWAP_MACS_ENA", + [VCAP_AF_TAG_A_DEI_SEL] = "TAG_A_DEI_SEL", + [VCAP_AF_TAG_A_PCP_SEL] = "TAG_A_PCP_SEL", + [VCAP_AF_TAG_A_TPID_SEL] = "TAG_A_TPID_SEL", + [VCAP_AF_TAG_A_VID_SEL] = "TAG_A_VID_SEL", + [VCAP_AF_TAG_B_DEI_SEL] = "TAG_B_DEI_SEL", + [VCAP_AF_TAG_B_PCP_SEL] = "TAG_B_PCP_SEL", + [VCAP_AF_TAG_B_TPID_SEL] = "TAG_B_TPID_SEL", + [VCAP_AF_TAG_B_VID_SEL] = "TAG_B_VID_SEL", + [VCAP_AF_TAG_C_DEI_SEL] = "TAG_C_DEI_SEL", + [VCAP_AF_TAG_C_PCP_SEL] = "TAG_C_PCP_SEL", + [VCAP_AF_TAG_C_TPID_SEL] = "TAG_C_TPID_SEL", + [VCAP_AF_TAG_C_VID_SEL] = "TAG_C_VID_SEL", [VCAP_AF_TYPE] = "TYPE", + [VCAP_AF_UNTAG_VID_ENA] = "UNTAG_VID_ENA", + [VCAP_AF_VID_A_VAL] = "VID_A_VAL", + [VCAP_AF_VID_B_VAL] = "VID_B_VAL", + [VCAP_AF_VID_C_VAL] = "VID_C_VAL", [VCAP_AF_VID_REPLACE_ENA] = "VID_REPLACE_ENA", [VCAP_AF_VID_VAL] = "VID_VAL", [VCAP_AF_VLAN_POP_CNT] = "VLAN_POP_CNT", @@ -2996,11 +3237,32 @@ const struct vcap_info lan966x_vcaps[] = { .keyfield_set_typegroups = is2_keyfield_set_typegroups, .actionfield_set_typegroups = is2_actionfield_set_typegroups, }, + [VCAP_TYPE_ES0] = { + .name = "es0", + .rows = 256, + .sw_count = 1, + .sw_width = 96, + .sticky_width = 1, + .act_width = 65, + .default_cnt = 8, + .require_cnt_dis = 0, + .version = 1, + .keyfield_set = es0_keyfield_set, + .keyfield_set_size = ARRAY_SIZE(es0_keyfield_set), + .actionfield_set = es0_actionfield_set, + .actionfield_set_size = ARRAY_SIZE(es0_actionfield_set), + .keyfield_set_map = es0_keyfield_set_map, + .keyfield_set_map_size = es0_keyfield_set_map_size, + .actionfield_set_map = es0_actionfield_set_map, + .actionfield_set_map_size = es0_actionfield_set_map_size, + .keyfield_set_typegroups = es0_keyfield_set_typegroups, + .actionfield_set_typegroups = es0_actionfield_set_typegroups, + }, }; const struct vcap_statistics lan966x_vcap_stats = { .name = "lan966x", - .count = 2, + .count = 3, .keyfield_set_names = vcap_keyfield_set_names, .actionfield_set_names = vcap_actionfield_set_names, .keyfield_names = vcap_keyfield_names, diff --git a/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h b/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h index a556c4419986..c3569a4c7b69 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h +++ b/drivers/net/ethernet/microchip/vcap/vcap_ag_api.h @@ -3,8 +3,8 @@ * Microchip VCAP API */ -/* This file is autogenerated by cml-utils 2023-02-16 11:41:14 +0100. - * Commit ID: be85f176b3a151fa748dcaf97c8824a5c2e065f3 +/* This file is autogenerated by cml-utils 2023-03-13 10:16:42 +0100. + * Commit ID: 259f0efd6d6d91bfbf62858de153cc757b6bffa3 (dirty) */ #ifndef __VCAP_AG_API__ @@ -51,6 +51,7 @@ enum vcap_keyfield_set { VCAP_KFS_RT, /* lan966x is1 X1 */ VCAP_KFS_SMAC_SIP4, /* lan966x is2 X1 */ VCAP_KFS_SMAC_SIP6, /* lan966x is2 X2 */ + VCAP_KFS_VID, /* lan966x es0 X1 */ }; /* List of keyfields with description @@ -79,7 +80,7 @@ enum vcap_keyfield_set { * Second DEI in multiple vlan tags (inner tag) * VCAP_KF_8021Q_DEI2: W1, sparx5: is0 * Third DEI in multiple vlan tags (not always available) - * VCAP_KF_8021Q_DEI_CLS: W1, sparx5: is2/es2, lan966x: is2 + * VCAP_KF_8021Q_DEI_CLS: W1, sparx5: is2/es2, lan966x: is2/es0 * Classified DEI * VCAP_KF_8021Q_PCP0: W3, sparx5: is0, lan966x: is1 * First PCP in multiple vlan tags (outer tag or default port tag) @@ -87,7 +88,7 @@ enum vcap_keyfield_set { * Second PCP in multiple vlan tags (inner tag) * VCAP_KF_8021Q_PCP2: W3, sparx5: is0 * Third PCP in multiple vlan tags (not always available) - * VCAP_KF_8021Q_PCP_CLS: W3, sparx5: is2/es2, lan966x: is2 + * VCAP_KF_8021Q_PCP_CLS: W3, sparx5: is2/es2, lan966x: is2/es0 * Classified PCP * VCAP_KF_8021Q_TPID: W3, sparx5: es0 * TPID for outer tag: 0: Customer TPID 1: Service TPID (88A8 or programmable) @@ -104,7 +105,7 @@ enum vcap_keyfield_set { * VCAP_KF_8021Q_VID2: W12, sparx5: is0 * Third VID in multiple vlan tags (not always available) * VCAP_KF_8021Q_VID_CLS: sparx5 is2 W13, sparx5 es0 W13, sparx5 es2 W13, - * lan966x is2 W12 + * lan966x is2 W12, lan966x es0 W12 * Classified VID * VCAP_KF_8021Q_VLAN_DBL_TAGGED_IS: W1, lan966x: is1 * Set if frame has two or more Q-tags. Independent of port VLAN awareness @@ -146,10 +147,10 @@ enum vcap_keyfield_set { * VCAP_KF_IF_EGR_PORT_MASK_RNG: W3, sparx5: es2 * Select which 32 port group is available in IF_EGR_PORT (or virtual ports or * CPU queue) - * VCAP_KF_IF_EGR_PORT_NO: W7, sparx5: es0 + * VCAP_KF_IF_EGR_PORT_NO: sparx5 es0 W7, lan966x es0 W4 * Egress port number * VCAP_KF_IF_IGR_PORT: sparx5 is0 W7, sparx5 es2 W9, lan966x is1 W3, lan966x - * is2 W4 + * is2 W4, lan966x es0 W4 * Sparx5: Logical ingress port number retrieved from * ANA_CL::PORT_ID_CFG.LPORT_NUM or ERLEG, LAN966x: ingress port nunmber * VCAP_KF_IF_IGR_PORT_MASK: sparx5 is0 W65, sparx5 is2 W32, sparx5 is2 W65, @@ -178,11 +179,12 @@ enum vcap_keyfield_set { * Payload after IPv6 header * VCAP_KF_IP_SNAP_IS: W1, sparx5: is0, lan966x: is1 * Set if frame is IPv4, IPv6, or SNAP frame - * VCAP_KF_ISDX_CLS: W12, sparx5: is2/es0/es2 + * VCAP_KF_ISDX_CLS: sparx5 is2 W12, sparx5 es0 W12, sparx5 es2 W12, lan966x es0 + * W8 * Classified ISDX - * VCAP_KF_ISDX_GT0_IS: W1, sparx5: is2/es0/es2, lan966x: is2 + * VCAP_KF_ISDX_GT0_IS: W1, sparx5: is2/es0/es2, lan966x: is2/es0 * Set if classified ISDX > 0 - * VCAP_KF_L2_BC_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2 + * VCAP_KF_L2_BC_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2/es0 * Set if frame's destination MAC address is the broadcast address * (FF-FF-FF-FF-FF-FF). * VCAP_KF_L2_DMAC: W48, sparx5: is0/is2/es2, lan966x: is1/is2 @@ -195,7 +197,7 @@ enum vcap_keyfield_set { * LLC header and data after up to two VLAN tags and the type/length field * VCAP_KF_L2_MAC: W48, lan966x: is1 * MAC address (FIRST=1: SMAC, FIRST=0: DMAC) - * VCAP_KF_L2_MC_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2 + * VCAP_KF_L2_MC_IS: W1, sparx5: is0/is2/es2, lan966x: is1/is2/es0 * Set if frame's destination MAC address is a multicast address (bit 40 = 1). * VCAP_KF_L2_PAYLOAD0: W16, lan966x: is2 * Payload bytes 0-1 after the frame's EtherType @@ -213,7 +215,7 @@ enum vcap_keyfield_set { * SNAP header after LLC header (AA-AA-03) * VCAP_KF_L3_DIP_EQ_SIP_IS: W1, sparx5: is2/es2, lan966x: is2 * Set if Src IP matches Dst IP address - * VCAP_KF_L3_DPL_CLS: W1, sparx5: es0/es2 + * VCAP_KF_L3_DPL_CLS: W1, sparx5: es0/es2, lan966x: es0 * The frames drop precedence level * VCAP_KF_L3_DSCP: W6, sparx5: is0, lan966x: is1 * Frame's DSCP value @@ -330,8 +332,12 @@ enum vcap_keyfield_set { * Frame's OAM version * VCAP_KF_OAM_Y1731_IS: W1, sparx5: is2/es2, lan966x: is2 * Set if frame's EtherType = 0x8902 + * VCAP_KF_PDU_TYPE: W4, lan966x: es0 + * PDU type value (none, OAM CCM, MRP, DLR, RTE, IPv4, IPv6, OAM non-CCM) * VCAP_KF_PROT_ACTIVE: W1, sparx5: es0/es2 * Protection is active + * VCAP_KF_RTP_ID: W10, lan966x: es0 + * Classified RTP_ID * VCAP_KF_RT_FRMID: W32, lan966x: is1 * Profinet or OPC-UA FrameId * VCAP_KF_RT_TYPE: W2, lan966x: is1 @@ -470,7 +476,9 @@ enum vcap_key_field { VCAP_KF_OAM_OPCODE, VCAP_KF_OAM_VER, VCAP_KF_OAM_Y1731_IS, + VCAP_KF_PDU_TYPE, VCAP_KF_PROT_ACTIVE, + VCAP_KF_RTP_ID, VCAP_KF_RT_FRMID, VCAP_KF_RT_TYPE, VCAP_KF_RT_VLAN_IDX, @@ -489,6 +497,7 @@ enum vcap_actionfield_set { VCAP_AFS_FULL, /* sparx5 is0 X3 */ VCAP_AFS_S1, /* lan966x is1 X1 */ VCAP_AFS_SMAC_SIP, /* lan966x is2 X1 */ + VCAP_AFS_VID, /* lan966x es0 X1 */ }; /* List of actionfields with description @@ -523,9 +532,9 @@ enum vcap_actionfield_set { * while bits 1:0 control first lookup. Encoding per lookup: 0: Disabled. 1: * Extract 40 bytes after position corresponding to the location of the IPv4 * header and use as key. 2: Extract 40 bytes after SMAC and use as key - * VCAP_AF_DEI_A_VAL: W1, sparx5: es0 + * VCAP_AF_DEI_A_VAL: W1, sparx5: es0, lan966x: es0 * DEI used in ES0 tag A. See TAG_A_DEI_SEL. - * VCAP_AF_DEI_B_VAL: W1, sparx5: es0 + * VCAP_AF_DEI_B_VAL: W1, sparx5: es0, lan966x: es0 * DEI used in ES0 tag B. See TAG_B_DEI_SEL. * VCAP_AF_DEI_C_VAL: W1, sparx5: es0 * DEI used in ES0 tag C. See TAG_C_DEI_SEL. @@ -556,7 +565,7 @@ enum vcap_actionfield_set { * VCAP_AF_ES2_REW_CMD: W3, sparx5: es2 * Command forwarded to REW: 0: No action. 1: SWAP MAC addresses. 2: Do L2CP * DMAC translation when entering or leaving a tunnel. - * VCAP_AF_ESDX: W13, sparx5: es0 + * VCAP_AF_ESDX: sparx5 es0 W13, lan966x es0 W8 * Egress counter index. Used to index egress counter set as defined in * REW::STAT_CFG. * VCAP_AF_FWD_KILL_ENA: W1, lan966x: is2 @@ -652,9 +661,9 @@ enum vcap_actionfield_set { * (input) AND ~PAG_OVERRIDE_MASK) OR (PAG_VAL AND PAG_OVERRIDE_MASK) * VCAP_AF_PAG_VAL: W8, sparx5: is0, lan966x: is1 * See PAG_OVERRIDE_MASK. - * VCAP_AF_PCP_A_VAL: W3, sparx5: es0 + * VCAP_AF_PCP_A_VAL: W3, sparx5: es0, lan966x: es0 * PCP used in ES0 tag A. See TAG_A_PCP_SEL. - * VCAP_AF_PCP_B_VAL: W3, sparx5: es0 + * VCAP_AF_PCP_B_VAL: W3, sparx5: es0, lan966x: es0 * PCP used in ES0 tag B. See TAG_B_PCP_SEL. * VCAP_AF_PCP_C_VAL: W3, sparx5: es0 * PCP used in ES0 tag C. See TAG_C_PCP_SEL. @@ -691,10 +700,10 @@ enum vcap_actionfield_set { * Selects tag C mode: 0: Do not push tag C. 1: Push tag C if * IFH.VSTAX.TAG.WAS_TAGGED = 1. 2: Push tag C if IFH.VSTAX.TAG.WAS_TAGGED = 0. * 3: Push tag C if UNTAG_VID_ENA = 0 or (C-TAG.VID ! = VID_C_VAL). - * VCAP_AF_PUSH_INNER_TAG: W1, sparx5: es0 + * VCAP_AF_PUSH_INNER_TAG: W1, sparx5: es0, lan966x: es0 * Controls inner tagging. 0: Do not push ES0 tag B as inner tag. 1: Push ES0 * tag B as inner tag. - * VCAP_AF_PUSH_OUTER_TAG: W2, sparx5: es0 + * VCAP_AF_PUSH_OUTER_TAG: W2, sparx5: es0, lan966x: es0 * Controls outer tagging. 0: No ES0 tag A: Port tag is allowed if enabled on * port. 1: ES0 tag A: Push ES0 tag A. No port tag. 2: Force port tag: Always * push port tag. No ES0 tag A. 3: Force untag: Never push port tag or ES0 tag @@ -720,29 +729,29 @@ enum vcap_actionfield_set { * VCAP_AF_SWAP_MACS_ENA: W1, sparx5: es0 * This setting is only active when FWD_SEL = 1 or FWD_SEL = 2 and PIPELINE_ACT * = LBK_ASM. 0: No action. 1: Swap MACs and clear bit 40 in new SMAC. - * VCAP_AF_TAG_A_DEI_SEL: W3, sparx5: es0 + * VCAP_AF_TAG_A_DEI_SEL: sparx5 es0 W3, lan966x es0 W2 * Selects PCP for ES0 tag A. 0: Classified DEI. 1: DEI_A_VAL. 2: DP and QoS * mapped to PCP (per port table). 3: DP. - * VCAP_AF_TAG_A_PCP_SEL: W3, sparx5: es0 + * VCAP_AF_TAG_A_PCP_SEL: sparx5 es0 W3, lan966x es0 W2 * Selects PCP for ES0 tag A. 0: Classified PCP. 1: PCP_A_VAL. 2: DP and QoS * mapped to PCP (per port table). 3: QoS class. - * VCAP_AF_TAG_A_TPID_SEL: W3, sparx5: es0 + * VCAP_AF_TAG_A_TPID_SEL: sparx5 es0 W3, lan966x es0 W2 * Selects TPID for ES0 tag A: 0: 0x8100. 1: 0x88A8. 2: Custom * (REW:PORT:PORT_VLAN_CFG.PORT_TPID). 3: If IFH.TAG_TYPE = 0 then 0x8100 else * custom. - * VCAP_AF_TAG_A_VID_SEL: W2, sparx5: es0 + * VCAP_AF_TAG_A_VID_SEL: sparx5 es0 W2, lan966x es0 W1 * Selects VID for ES0 tag A. 0: Classified VID + VID_A_VAL. 1: VID_A_VAL. - * VCAP_AF_TAG_B_DEI_SEL: W3, sparx5: es0 + * VCAP_AF_TAG_B_DEI_SEL: sparx5 es0 W3, lan966x es0 W2 * Selects PCP for ES0 tag B. 0: Classified DEI. 1: DEI_B_VAL. 2: DP and QoS * mapped to PCP (per port table). 3: DP. - * VCAP_AF_TAG_B_PCP_SEL: W3, sparx5: es0 + * VCAP_AF_TAG_B_PCP_SEL: sparx5 es0 W3, lan966x es0 W2 * Selects PCP for ES0 tag B. 0: Classified PCP. 1: PCP_B_VAL. 2: DP and QoS * mapped to PCP (per port table). 3: QoS class. - * VCAP_AF_TAG_B_TPID_SEL: W3, sparx5: es0 + * VCAP_AF_TAG_B_TPID_SEL: sparx5 es0 W3, lan966x es0 W2 * Selects TPID for ES0 tag B. 0: 0x8100. 1: 0x88A8. 2: Custom * (REW:PORT:PORT_VLAN_CFG.PORT_TPID). 3: If IFH.TAG_TYPE = 0 then 0x8100 else * custom. - * VCAP_AF_TAG_B_VID_SEL: W2, sparx5: es0 + * VCAP_AF_TAG_B_VID_SEL: sparx5 es0 W2, lan966x es0 W1 * Selects VID for ES0 tag B. 0: Classified VID + VID_B_VAL. 1: VID_B_VAL. * VCAP_AF_TAG_C_DEI_SEL: W3, sparx5: es0 * Selects DEI source for ES0 tag C. 0: Classified DEI. 1: DEI_C_VAL. 2: @@ -770,9 +779,9 @@ enum vcap_actionfield_set { * VCAP_AF_UNTAG_VID_ENA: W1, sparx5: es0 * Controls insertion of tag C. Untag or insert mode can be selected. See * PUSH_CUSTOMER_TAG. - * VCAP_AF_VID_A_VAL: W12, sparx5: es0 + * VCAP_AF_VID_A_VAL: W12, sparx5: es0, lan966x: es0 * VID used in ES0 tag A. See TAG_A_VID_SEL. - * VCAP_AF_VID_B_VAL: W12, sparx5: es0 + * VCAP_AF_VID_B_VAL: W12, sparx5: es0, lan966x: es0 * VID used in ES0 tag B. See TAG_B_VID_SEL. * VCAP_AF_VID_C_VAL: W12, sparx5: es0 * VID used in ES0 tag C. See TAG_C_VID_SEL. -- cgit v1.2.3 From 96b6c8a662a39a9ce3a0dac929e23c8a8d454f37 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 9 May 2023 09:26:44 +0200 Subject: net: lan966x: Add ES0 VCAP keyset configuration for lan966x Add ES0 VCAP port keyset configuration for lan966x and also update debugfs to show the keyset configuration. Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- .../net/ethernet/microchip/lan966x/lan966x_main.h | 3 + .../net/ethernet/microchip/lan966x/lan966x_regs.h | 15 ++++ .../microchip/lan966x/lan966x_vcap_debugfs.c | 23 ++++++ .../ethernet/microchip/lan966x/lan966x_vcap_impl.c | 82 ++++++++++++++++++++++ 4 files changed, 123 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index c977c70abc3d..882d5a08e7d5 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -101,6 +101,9 @@ #define LAN966X_VCAP_CID_IS2_L1 VCAP_CID_INGRESS_STAGE2_L1 /* IS2 lookup 1 */ #define LAN966X_VCAP_CID_IS2_MAX (VCAP_CID_INGRESS_STAGE2_L2 - 1) /* IS2 Max */ +#define LAN966X_VCAP_CID_ES0_L0 VCAP_CID_EGRESS_L0 /* ES0 lookup 0 */ +#define LAN966X_VCAP_CID_ES0_MAX (VCAP_CID_EGRESS_L1 - 1) /* ES0 Max */ + /* MAC table entry types. * ENTRYTYPE_NORMAL is subject to aging. * ENTRYTYPE_LOCKED is not subject to aging. diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h index f99f88b5caa8..222039180276 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h @@ -1471,12 +1471,27 @@ enum lan966x_target { /* REW:PORT:PORT_CFG */ #define REW_PORT_CFG(g) __REG(TARGET_REW, 0, 1, 0, g, 10, 128, 8, 0, 1, 4) +#define REW_PORT_CFG_ES0_EN BIT(4) +#define REW_PORT_CFG_ES0_EN_SET(x)\ + FIELD_PREP(REW_PORT_CFG_ES0_EN, x) +#define REW_PORT_CFG_ES0_EN_GET(x)\ + FIELD_GET(REW_PORT_CFG_ES0_EN, x) + #define REW_PORT_CFG_NO_REWRITE BIT(0) #define REW_PORT_CFG_NO_REWRITE_SET(x)\ FIELD_PREP(REW_PORT_CFG_NO_REWRITE, x) #define REW_PORT_CFG_NO_REWRITE_GET(x)\ FIELD_GET(REW_PORT_CFG_NO_REWRITE, x) +/* REW:COMMON:STAT_CFG */ +#define REW_STAT_CFG __REG(TARGET_REW, 0, 1, 3072, 0, 1, 528, 520, 0, 1, 4) + +#define REW_STAT_CFG_STAT_MODE GENMASK(1, 0) +#define REW_STAT_CFG_STAT_MODE_SET(x)\ + FIELD_PREP(REW_STAT_CFG_STAT_MODE, x) +#define REW_STAT_CFG_STAT_MODE_GET(x)\ + FIELD_GET(REW_STAT_CFG_STAT_MODE, x) + /* SYS:SYSTEM:RESET_CFG */ #define SYS_RESET_CFG __REG(TARGET_SYS, 0, 1, 4128, 0, 1, 168, 0, 0, 1, 4) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c index d90c08cfcf14..ac525ff1503e 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_debugfs.c @@ -190,6 +190,26 @@ static void lan966x_vcap_is2_port_keys(struct lan966x_port *port, out->prf(out->dst, "\n"); } +static void lan966x_vcap_es0_port_keys(struct lan966x_port *port, + struct vcap_admin *admin, + struct vcap_output_print *out) +{ + struct lan966x *lan966x = port->lan966x; + u32 val; + + out->prf(out->dst, " port[%d] (%s): ", port->chip_port, + netdev_name(port->dev)); + + val = lan_rd(lan966x, REW_PORT_CFG(port->chip_port)); + out->prf(out->dst, "\n state: "); + if (REW_PORT_CFG_ES0_EN_GET(val)) + out->prf(out->dst, "on"); + else + out->prf(out->dst, "off"); + + out->prf(out->dst, "\n"); +} + int lan966x_vcap_port_info(struct net_device *dev, struct vcap_admin *admin, struct vcap_output_print *out) @@ -210,6 +230,9 @@ int lan966x_vcap_port_info(struct net_device *dev, case VCAP_TYPE_IS1: lan966x_vcap_is1_port_keys(port, admin, out); break; + case VCAP_TYPE_ES0: + lan966x_vcap_es0_port_keys(port, admin, out); + break; default: out->prf(out->dst, " no info\n"); break; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c index 7ea8e8633609..a4414f63c9b1 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c @@ -10,6 +10,12 @@ #define LAN966X_IS1_LOOKUPS 3 #define LAN966X_IS2_LOOKUPS 2 +#define LAN966X_ES0_LOOKUPS 1 + +#define LAN966X_STAT_ESDX_GRN_BYTES 0x300 +#define LAN966X_STAT_ESDX_GRN_PKTS 0x301 +#define LAN966X_STAT_ESDX_YEL_BYTES 0x302 +#define LAN966X_STAT_ESDX_YEL_PKTS 0x303 static struct lan966x_vcap_inst { enum vcap_type vtype; /* type of vcap */ @@ -20,6 +26,14 @@ static struct lan966x_vcap_inst { int count; /* number of available addresses */ bool ingress; /* is vcap in the ingress path */ } lan966x_vcap_inst_cfg[] = { + { + .vtype = VCAP_TYPE_ES0, + .tgt_inst = 0, + .lookups = LAN966X_ES0_LOOKUPS, + .first_cid = LAN966X_VCAP_CID_ES0_L0, + .last_cid = LAN966X_VCAP_CID_ES0_MAX, + .count = 64, + }, { .vtype = VCAP_TYPE_IS1, /* IS1-0 */ .tgt_inst = 1, @@ -279,6 +293,8 @@ lan966x_vcap_validate_keyset(struct net_device *dev, err = lan966x_vcap_is2_get_port_keysets(dev, lookup, &keysetlist, l3_proto); break; + case VCAP_TYPE_ES0: + return kslist->keysets[0]; default: pr_err("vcap type: %s not supported\n", lan966x_vcaps[admin->vtype].name); @@ -338,6 +354,14 @@ static void lan966x_vcap_is2_add_default_fields(struct lan966x_port *port, VCAP_BIT_0); } +static void lan966x_vcap_es0_add_default_fields(struct lan966x_port *port, + struct vcap_admin *admin, + struct vcap_rule *rule) +{ + vcap_rule_add_key_u32(rule, VCAP_KF_IF_EGR_PORT_NO, + port->chip_port, GENMASK(4, 0)); +} + static void lan966x_vcap_add_default_fields(struct net_device *dev, struct vcap_admin *admin, struct vcap_rule *rule) @@ -351,6 +375,9 @@ static void lan966x_vcap_add_default_fields(struct net_device *dev, case VCAP_TYPE_IS2: lan966x_vcap_is2_add_default_fields(port, admin, rule); break; + case VCAP_TYPE_ES0: + lan966x_vcap_es0_add_default_fields(port, admin, rule); + break; default: pr_err("vcap type: %s not supported\n", lan966x_vcaps[admin->vtype].name); @@ -366,6 +393,40 @@ static void lan966x_vcap_cache_erase(struct vcap_admin *admin) memset(&admin->cache.counter, 0, sizeof(admin->cache.counter)); } +/* The ESDX counter is only used/incremented if the frame has been classified + * with an ISDX > 0 (e.g by a rule in IS0). This is not mentioned in the + * datasheet. + */ +static void lan966x_es0_read_esdx_counter(struct lan966x *lan966x, + struct vcap_admin *admin, u32 id) +{ + u32 counter; + + id = id & 0xff; /* counter limit */ + mutex_lock(&lan966x->stats_lock); + lan_wr(SYS_STAT_CFG_STAT_VIEW_SET(id), lan966x, SYS_STAT_CFG); + counter = lan_rd(lan966x, SYS_CNT(LAN966X_STAT_ESDX_GRN_PKTS)) + + lan_rd(lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_PKTS)); + mutex_unlock(&lan966x->stats_lock); + if (counter) + admin->cache.counter = counter; +} + +static void lan966x_es0_write_esdx_counter(struct lan966x *lan966x, + struct vcap_admin *admin, u32 id) +{ + id = id & 0xff; /* counter limit */ + + mutex_lock(&lan966x->stats_lock); + lan_wr(SYS_STAT_CFG_STAT_VIEW_SET(id), lan966x, SYS_STAT_CFG); + lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_GRN_BYTES)); + lan_wr(admin->cache.counter, lan966x, + SYS_CNT(LAN966X_STAT_ESDX_GRN_PKTS)); + lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_BYTES)); + lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_PKTS)); + mutex_unlock(&lan966x->stats_lock); +} + static void lan966x_vcap_cache_write(struct net_device *dev, struct vcap_admin *admin, enum vcap_selection sel, @@ -398,6 +459,9 @@ static void lan966x_vcap_cache_write(struct net_device *dev, admin->cache.sticky = admin->cache.counter > 0; lan_wr(admin->cache.counter, lan966x, VCAP_CNT_DAT(admin->tgt_inst, 0)); + + if (admin->vtype == VCAP_TYPE_ES0) + lan966x_es0_write_esdx_counter(lan966x, admin, start); break; default: break; @@ -437,6 +501,9 @@ static void lan966x_vcap_cache_read(struct net_device *dev, admin->cache.counter = lan_rd(lan966x, VCAP_CNT_DAT(instance, 0)); admin->cache.sticky = admin->cache.counter > 0; + + if (admin->vtype == VCAP_TYPE_ES0) + lan966x_es0_read_esdx_counter(lan966x, admin, start); } } @@ -625,6 +692,12 @@ static void lan966x_vcap_port_key_deselection(struct lan966x *lan966x, lan_wr(0, lan966x, ANA_VCAP_S2_CFG(p)); break; + case VCAP_TYPE_ES0: + for (int p = 0; p < lan966x->num_phys_ports; ++p) + lan_rmw(REW_PORT_CFG_ES0_EN_SET(false), + REW_PORT_CFG_ES0_EN, lan966x, + REW_PORT_CFG(p)); + break; default: pr_err("vcap type: %s not supported\n", lan966x_vcaps[admin->vtype].name); @@ -674,9 +747,18 @@ int lan966x_vcap_init(struct lan966x *lan966x) lan_rmw(ANA_VCAP_CFG_S1_ENA_SET(true), ANA_VCAP_CFG_S1_ENA, lan966x, ANA_VCAP_CFG(lan966x->ports[p]->chip_port)); + + lan_rmw(REW_PORT_CFG_ES0_EN_SET(true), + REW_PORT_CFG_ES0_EN, lan966x, + REW_PORT_CFG(lan966x->ports[p]->chip_port)); } } + /* Statistics: Use ESDX from ES0 if hit, otherwise no counting */ + lan_rmw(REW_STAT_CFG_STAT_MODE_SET(1), + REW_STAT_CFG_STAT_MODE, lan966x, + REW_STAT_CFG); + lan966x->vcap_ctrl = ctrl; return 0; -- cgit v1.2.3 From 85f050002ba99150168078adab6d0f38c0463494 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 9 May 2023 09:26:45 +0200 Subject: net: lan966x: Add TC support for ES0 VCAP Enable the TC command to use the lan966x ES0 VCAP. Currently support only one action which is vlan pop, other will be added later. Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller --- .../ethernet/microchip/lan966x/lan966x_tc_flower.c | 61 ++++++++++++++++++++++ 1 file changed, 61 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c index 47b2f7579dd2..96b3def6c474 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c @@ -5,6 +5,8 @@ #include "vcap_api_client.h" #include "vcap_tc.h" +#define LAN966X_FORCE_UNTAGED 3 + static bool lan966x_tc_is_known_etype(struct vcap_tc_flower_parse_usage *st, u16 etype) { @@ -29,6 +31,8 @@ static bool lan966x_tc_is_known_etype(struct vcap_tc_flower_parse_usage *st, return true; } break; + case VCAP_TYPE_ES0: + return true; default: NL_SET_ERR_MSG_MOD(st->fco->common.extack, "VCAP type not supported"); @@ -318,6 +322,9 @@ static int lan966x_tc_set_actionset(struct vcap_admin *admin, case VCAP_TYPE_IS2: aset = VCAP_AFS_BASE_TYPE; break; + case VCAP_TYPE_ES0: + aset = VCAP_AFS_VID; + break; default: return -EINVAL; } @@ -353,6 +360,10 @@ static int lan966x_tc_add_rule_link_target(struct vcap_admin *admin, /* Add IS2 specific PAG key (for chaining rules from IS1) */ return vcap_rule_add_key_u32(vrule, VCAP_KF_LOOKUP_PAG, link_val, ~0); + case VCAP_TYPE_ES0: + /* Add ES0 specific ISDX key (for chaining rules from IS1) */ + return vcap_rule_add_key_u32(vrule, VCAP_KF_ISDX_CLS, + link_val, ~0); default: break; } @@ -389,6 +400,18 @@ static int lan966x_tc_add_rule_link(struct vcap_control *vctrl, 0xff); if (err) return err; + } else if (admin->vtype == VCAP_TYPE_IS1 && + to_admin->vtype == VCAP_TYPE_ES0) { + /* This works for IS1->ES0 */ + err = vcap_rule_add_action_u32(vrule, VCAP_AF_ISDX_ADD_VAL, + diff); + if (err) + return err; + + err = vcap_rule_add_action_bit(vrule, VCAP_AF_ISDX_REPLACE_ENA, + VCAP_BIT_1); + if (err) + return err; } else { NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported chain destination"); @@ -398,6 +421,23 @@ static int lan966x_tc_add_rule_link(struct vcap_control *vctrl, return err; } +static int lan966x_tc_add_rule_counter(struct vcap_admin *admin, + struct vcap_rule *vrule) +{ + int err = 0; + + switch (admin->vtype) { + case VCAP_TYPE_ES0: + err = vcap_rule_mod_action_u32(vrule, VCAP_AF_ESDX, + vrule->id); + break; + default: + break; + } + + return err; +} + static int lan966x_tc_flower_add(struct lan966x_port *port, struct flow_cls_offload *f, struct vcap_admin *admin, @@ -465,6 +505,21 @@ static int lan966x_tc_flower_add(struct lan966x_port *port, if (err) goto out; + break; + case FLOW_ACTION_VLAN_POP: + if (admin->vtype != VCAP_TYPE_ES0) { + NL_SET_ERR_MSG_MOD(f->common.extack, + "Cannot use vlan pop on non es0"); + err = -EOPNOTSUPP; + goto out; + } + + /* Force untag */ + err = vcap_rule_add_action_u32(vrule, VCAP_AF_PUSH_OUTER_TAG, + LAN966X_FORCE_UNTAGED); + if (err) + goto out; + break; default: NL_SET_ERR_MSG_MOD(f->common.extack, @@ -474,6 +529,12 @@ static int lan966x_tc_flower_add(struct lan966x_port *port, } } + err = lan966x_tc_add_rule_counter(admin, vrule); + if (err) { + vcap_set_tc_exterr(f, vrule); + goto out; + } + err = vcap_val_rule(vrule, l3_proto); if (err) { vcap_set_tc_exterr(f, vrule); -- cgit v1.2.3 From bd9424efc4825ecfc84cd81be777df71ba4404d1 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 10 May 2023 13:58:09 +0530 Subject: macsec: Use helper macsec_netdev_priv for offload drivers Now macsec on top of vlan can be offloaded to macsec offloading devices so that VLAN tag is sent in clear text on wire i.e, packet structure is DMAC|SMAC|VLAN|SECTAG. Offloading devices can simply enable NETIF_F_HW_MACSEC feature in netdev->vlan_features for this to work. But the logic in offloading drivers to retrieve the private structure from netdev needs to be changed to check whether the netdev received is real device or a vlan device and get private structure accordingly. This patch changes the offloading drivers to use helper macsec_netdev_priv instead of netdev_priv. Signed-off-by: Subbaraya Sundeep Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_macsec.c | 40 +++++++++++----------- .../ethernet/marvell/octeontx2/nic/cn10k_macsec.c | 38 ++++++++++---------- .../ethernet/mellanox/mlx5/core/en_accel/macsec.c | 9 ----- include/net/macsec.h | 10 ++++++ 4 files changed, 49 insertions(+), 48 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c index 7eb5851eb95d..6afff8af5e86 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c @@ -289,7 +289,7 @@ static int aq_get_txsc_stats(struct aq_hw_s *hw, const int sc_idx, static int aq_mdo_dev_open(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); int ret = 0; if (netif_carrier_ok(nic->ndev)) @@ -300,7 +300,7 @@ static int aq_mdo_dev_open(struct macsec_context *ctx) static int aq_mdo_dev_stop(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); int i; for (i = 0; i < AQ_MACSEC_MAX_SC; i++) { @@ -439,7 +439,7 @@ static enum aq_macsec_sc_sa sc_sa_from_num_an(const int num_an) static int aq_mdo_add_secy(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; const struct macsec_secy *secy = ctx->secy; enum aq_macsec_sc_sa sc_sa; @@ -474,7 +474,7 @@ static int aq_mdo_add_secy(struct macsec_context *ctx) static int aq_mdo_upd_secy(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); const struct macsec_secy *secy = ctx->secy; int txsc_idx; int ret = 0; @@ -528,7 +528,7 @@ static int aq_clear_txsc(struct aq_nic_s *nic, const int txsc_idx, static int aq_mdo_del_secy(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); int ret = 0; if (!nic->macsec_cfg) @@ -576,7 +576,7 @@ static int aq_update_txsa(struct aq_nic_s *nic, const unsigned int sc_idx, static int aq_mdo_add_txsa(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; const struct macsec_secy *secy = ctx->secy; struct aq_macsec_txsc *aq_txsc; @@ -603,7 +603,7 @@ static int aq_mdo_add_txsa(struct macsec_context *ctx) static int aq_mdo_upd_txsa(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; const struct macsec_secy *secy = ctx->secy; struct aq_macsec_txsc *aq_txsc; @@ -652,7 +652,7 @@ static int aq_clear_txsa(struct aq_nic_s *nic, struct aq_macsec_txsc *aq_txsc, static int aq_mdo_del_txsa(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; int txsc_idx; int ret = 0; @@ -744,7 +744,7 @@ static int aq_set_rxsc(struct aq_nic_s *nic, const u32 rxsc_idx) static int aq_mdo_add_rxsc(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; const u32 rxsc_idx_max = aq_sc_idx_max(cfg->sc_sa); u32 rxsc_idx; @@ -775,7 +775,7 @@ static int aq_mdo_add_rxsc(struct macsec_context *ctx) static int aq_mdo_upd_rxsc(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); int rxsc_idx; int ret = 0; @@ -838,7 +838,7 @@ static int aq_clear_rxsc(struct aq_nic_s *nic, const int rxsc_idx, static int aq_mdo_del_rxsc(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); enum aq_clear_type clear_type = AQ_CLEAR_SW; int rxsc_idx; int ret = 0; @@ -906,8 +906,8 @@ static int aq_update_rxsa(struct aq_nic_s *nic, const unsigned int sc_idx, static int aq_mdo_add_rxsa(struct macsec_context *ctx) { + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); const struct macsec_rx_sc *rx_sc = ctx->sa.rx_sa->sc; - struct aq_nic_s *nic = netdev_priv(ctx->netdev); const struct macsec_secy *secy = ctx->secy; struct aq_macsec_rxsc *aq_rxsc; int rxsc_idx; @@ -933,8 +933,8 @@ static int aq_mdo_add_rxsa(struct macsec_context *ctx) static int aq_mdo_upd_rxsa(struct macsec_context *ctx) { + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); const struct macsec_rx_sc *rx_sc = ctx->sa.rx_sa->sc; - struct aq_nic_s *nic = netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; const struct macsec_secy *secy = ctx->secy; int rxsc_idx; @@ -982,8 +982,8 @@ static int aq_clear_rxsa(struct aq_nic_s *nic, struct aq_macsec_rxsc *aq_rxsc, static int aq_mdo_del_rxsa(struct macsec_context *ctx) { + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); const struct macsec_rx_sc *rx_sc = ctx->sa.rx_sa->sc; - struct aq_nic_s *nic = netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; int rxsc_idx; int ret = 0; @@ -1000,7 +1000,7 @@ static int aq_mdo_del_rxsa(struct macsec_context *ctx) static int aq_mdo_get_dev_stats(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_common_stats *stats = &nic->macsec_cfg->stats; struct aq_hw_s *hw = nic->aq_hw; @@ -1020,7 +1020,7 @@ static int aq_mdo_get_dev_stats(struct macsec_context *ctx) static int aq_mdo_get_tx_sc_stats(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_tx_sc_stats *stats; struct aq_hw_s *hw = nic->aq_hw; struct aq_macsec_txsc *aq_txsc; @@ -1044,7 +1044,7 @@ static int aq_mdo_get_tx_sc_stats(struct macsec_context *ctx) static int aq_mdo_get_tx_sa_stats(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; struct aq_macsec_tx_sa_stats *stats; struct aq_hw_s *hw = nic->aq_hw; @@ -1084,7 +1084,7 @@ static int aq_mdo_get_tx_sa_stats(struct macsec_context *ctx) static int aq_mdo_get_rx_sc_stats(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; struct aq_macsec_rx_sa_stats *stats; struct aq_hw_s *hw = nic->aq_hw; @@ -1129,7 +1129,7 @@ static int aq_mdo_get_rx_sc_stats(struct macsec_context *ctx) static int aq_mdo_get_rx_sa_stats(struct macsec_context *ctx) { - struct aq_nic_s *nic = netdev_priv(ctx->netdev); + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); struct aq_macsec_cfg *cfg = nic->macsec_cfg; struct aq_macsec_rx_sa_stats *stats; struct aq_hw_s *hw = nic->aq_hw; @@ -1399,7 +1399,7 @@ static void aq_check_txsa_expiration(struct aq_nic_s *nic) #define AQ_LOCKED_MDO_DEF(mdo) \ static int aq_locked_mdo_##mdo(struct macsec_context *ctx) \ { \ - struct aq_nic_s *nic = netdev_priv(ctx->netdev); \ + struct aq_nic_s *nic = macsec_netdev_priv(ctx->netdev); \ int ret; \ mutex_lock(&nic->macsec_mutex); \ ret = aq_mdo_##mdo(ctx); \ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c index a487a98eac88..aea4c802bb9d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c @@ -1053,7 +1053,7 @@ static void cn10k_mcs_sync_stats(struct otx2_nic *pfvf, struct macsec_secy *secy static int cn10k_mdo_open(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_secy *secy = ctx->secy; struct macsec_tx_sa *sw_tx_sa; @@ -1077,7 +1077,7 @@ static int cn10k_mdo_open(struct macsec_context *ctx) static int cn10k_mdo_stop(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct cn10k_mcs_txsc *txsc; int err; @@ -1095,7 +1095,7 @@ static int cn10k_mdo_stop(struct macsec_context *ctx) static int cn10k_mdo_add_secy(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_secy *secy = ctx->secy; struct cn10k_mcs_txsc *txsc; @@ -1129,7 +1129,7 @@ static int cn10k_mdo_add_secy(struct macsec_context *ctx) static int cn10k_mdo_upd_secy(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_secy *secy = ctx->secy; struct macsec_tx_sa *sw_tx_sa; @@ -1164,7 +1164,7 @@ static int cn10k_mdo_upd_secy(struct macsec_context *ctx) static int cn10k_mdo_del_secy(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct cn10k_mcs_txsc *txsc; @@ -1183,7 +1183,7 @@ static int cn10k_mdo_del_secy(struct macsec_context *ctx) static int cn10k_mdo_add_txsa(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct macsec_tx_sa *sw_tx_sa = ctx->sa.tx_sa; struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_secy *secy = ctx->secy; @@ -1225,7 +1225,7 @@ static int cn10k_mdo_add_txsa(struct macsec_context *ctx) static int cn10k_mdo_upd_txsa(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct macsec_tx_sa *sw_tx_sa = ctx->sa.tx_sa; struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_secy *secy = ctx->secy; @@ -1258,7 +1258,7 @@ static int cn10k_mdo_upd_txsa(struct macsec_context *ctx) static int cn10k_mdo_del_txsa(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; u8 sa_num = ctx->sa.assoc_num; struct cn10k_mcs_txsc *txsc; @@ -1278,7 +1278,7 @@ static int cn10k_mdo_del_txsa(struct macsec_context *ctx) static int cn10k_mdo_add_rxsc(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_secy *secy = ctx->secy; struct cn10k_mcs_rxsc *rxsc; @@ -1312,7 +1312,7 @@ static int cn10k_mdo_add_rxsc(struct macsec_context *ctx) static int cn10k_mdo_upd_rxsc(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_secy *secy = ctx->secy; bool enable = ctx->rx_sc->active; @@ -1331,7 +1331,7 @@ static int cn10k_mdo_upd_rxsc(struct macsec_context *ctx) static int cn10k_mdo_del_rxsc(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct cn10k_mcs_rxsc *rxsc; @@ -1349,8 +1349,8 @@ static int cn10k_mdo_del_rxsc(struct macsec_context *ctx) static int cn10k_mdo_add_rxsa(struct macsec_context *ctx) { + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct macsec_rx_sc *sw_rx_sc = ctx->sa.rx_sa->sc; - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_rx_sa *rx_sa = ctx->sa.rx_sa; u64 next_pn = rx_sa->next_pn_halves.lower; @@ -1389,8 +1389,8 @@ static int cn10k_mdo_add_rxsa(struct macsec_context *ctx) static int cn10k_mdo_upd_rxsa(struct macsec_context *ctx) { + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct macsec_rx_sc *sw_rx_sc = ctx->sa.rx_sa->sc; - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_rx_sa *rx_sa = ctx->sa.rx_sa; u64 next_pn = rx_sa->next_pn_halves.lower; @@ -1422,8 +1422,8 @@ static int cn10k_mdo_upd_rxsa(struct macsec_context *ctx) static int cn10k_mdo_del_rxsa(struct macsec_context *ctx) { + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct macsec_rx_sc *sw_rx_sc = ctx->sa.rx_sa->sc; - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; u8 sa_num = ctx->sa.assoc_num; struct cn10k_mcs_rxsc *rxsc; @@ -1445,8 +1445,8 @@ static int cn10k_mdo_del_rxsa(struct macsec_context *ctx) static int cn10k_mdo_get_dev_stats(struct macsec_context *ctx) { + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct mcs_secy_stats tx_rsp = { 0 }, rx_rsp = { 0 }; - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_secy *secy = ctx->secy; struct cn10k_mcs_txsc *txsc; @@ -1481,7 +1481,7 @@ static int cn10k_mdo_get_dev_stats(struct macsec_context *ctx) static int cn10k_mdo_get_tx_sc_stats(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct mcs_sc_stats rsp = { 0 }; struct cn10k_mcs_txsc *txsc; @@ -1502,7 +1502,7 @@ static int cn10k_mdo_get_tx_sc_stats(struct macsec_context *ctx) static int cn10k_mdo_get_tx_sa_stats(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct mcs_sa_stats rsp = { 0 }; u8 sa_num = ctx->sa.assoc_num; @@ -1525,7 +1525,7 @@ static int cn10k_mdo_get_tx_sa_stats(struct macsec_context *ctx) static int cn10k_mdo_get_rx_sc_stats(struct macsec_context *ctx) { - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_secy *secy = ctx->secy; struct mcs_sc_stats rsp = { 0 }; @@ -1567,8 +1567,8 @@ static int cn10k_mdo_get_rx_sc_stats(struct macsec_context *ctx) static int cn10k_mdo_get_rx_sa_stats(struct macsec_context *ctx) { + struct otx2_nic *pfvf = macsec_netdev_priv(ctx->netdev); struct macsec_rx_sc *sw_rx_sc = ctx->sa.rx_sa->sc; - struct otx2_nic *pfvf = netdev_priv(ctx->netdev); struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct mcs_sa_stats rsp = { 0 }; u8 sa_num = ctx->sa.assoc_num; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 6b7b563f844a..592b165530ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -349,15 +349,6 @@ static void mlx5e_macsec_cleanup_sa(struct mlx5e_macsec *macsec, sa->macsec_rule = NULL; } -static struct mlx5e_priv *macsec_netdev_priv(const struct net_device *dev) -{ -#if IS_ENABLED(CONFIG_VLAN_8021Q) - if (is_vlan_dev(dev)) - return netdev_priv(vlan_dev_priv(dev)->real_dev); -#endif - return netdev_priv(dev); -} - static int mlx5e_macsec_init_sa(struct macsec_context *ctx, struct mlx5e_macsec_sa *sa, bool encrypt, diff --git a/include/net/macsec.h b/include/net/macsec.h index 5b9c61c4d3a6..441ed8fd4b5f 100644 --- a/include/net/macsec.h +++ b/include/net/macsec.h @@ -8,6 +8,7 @@ #define _NET_MACSEC_H_ #include +#include #include #include @@ -312,4 +313,13 @@ static inline bool macsec_send_sci(const struct macsec_secy *secy) (secy->n_rx_sc > 1 && !tx_sc->end_station && !tx_sc->scb); } +static inline void *macsec_netdev_priv(const struct net_device *dev) +{ +#if IS_ENABLED(CONFIG_VLAN_8021Q) + if (is_vlan_dev(dev)) + return netdev_priv(vlan_dev_priv(dev)->real_dev); +#endif + return netdev_priv(dev); +} + #endif /* _NET_MACSEC_H_ */ -- cgit v1.2.3 From 6096bc0555726c1cdded8486d8800cd4d81eb764 Mon Sep 17 00:00:00 2001 From: wuych Date: Wed, 10 May 2023 14:06:49 +0800 Subject: net: liquidio: lio_vf_main: Remove unnecessary (void*) conversions Pointer variables of void * type do not require type cast. Signed-off-by: wuych Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_vf_main.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index e2921aec3da0..62c2eadc33e3 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -72,8 +72,7 @@ static int liquidio_stop(struct net_device *netdev); static int lio_wait_for_oq_pkts(struct octeon_device *oct) { - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; int retry = MAX_IO_PENDING_PKT_COUNT; int pkt_cnt = 0, pending_pkts; int i; @@ -442,8 +441,7 @@ static void octeon_pci_flr(struct octeon_device *oct) */ static void octeon_destroy_resources(struct octeon_device *oct) { - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; struct msix_entry *msix_entries; int i; @@ -659,8 +657,7 @@ static int send_rx_ctrl_cmd(struct lio *lio, int start_stop) static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx) { struct net_device *netdev = oct->props[ifidx].netdev; - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; struct napi_struct *napi, *n; struct lio *lio; @@ -909,8 +906,7 @@ static int liquidio_open(struct net_device *netdev) { struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; struct napi_struct *napi, *n; int ret = 0; @@ -956,8 +952,7 @@ static int liquidio_stop(struct net_device *netdev) { struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; struct napi_struct *napi, *n; int ret = 0; -- cgit v1.2.3 From fef99e840d465bad6549dd8775a5f967a711d171 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 10 May 2023 11:15:42 +0100 Subject: net: mvneta: fix transmit path dma-unmapping on error The transmit code assumes that the transmit descriptors that are used begin with the first descriptor in the ring, but this may not be the case. Fix this by providing a new function that dma-unmaps a range of numbered descriptor entries, and use that to do the unmapping. Signed-off-by: Russell King (Oracle) Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/mvneta.c | 53 ++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 20 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 2cad76d0a50e..62400ff61e34 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2714,14 +2714,40 @@ mvneta_tso_put_data(struct net_device *dev, struct mvneta_tx_queue *txq, return 0; } +static void mvneta_release_descs(struct mvneta_port *pp, + struct mvneta_tx_queue *txq, + int first, int num) +{ + int desc_idx, i; + + desc_idx = first + num; + if (desc_idx >= txq->size) + desc_idx -= txq->size; + + for (i = num; i >= 0; i--) { + struct mvneta_tx_desc *tx_desc = txq->descs + desc_idx; + + if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr)) + dma_unmap_single(pp->dev->dev.parent, + tx_desc->buf_phys_addr, + tx_desc->data_size, + DMA_TO_DEVICE); + + mvneta_txq_desc_put(txq); + + if (desc_idx == 0) + desc_idx = txq->size; + desc_idx -= 1; + } +} + static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev, struct mvneta_tx_queue *txq) { int hdr_len, total_len, data_left; - int desc_count = 0; + int first_desc, desc_count = 0; struct mvneta_port *pp = netdev_priv(dev); struct tso_t tso; - int i; /* Count needed descriptors */ if ((txq->count + tso_count_descs(skb)) >= txq->size) @@ -2732,6 +2758,8 @@ static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev, return 0; } + first_desc = txq->txq_put_index; + /* Initialize the TSO handler, and prepare the first payload */ hdr_len = tso_start(skb, &tso); @@ -2772,15 +2800,7 @@ err_release: /* Release all used data descriptors; header descriptors must not * be DMA-unmapped. */ - for (i = desc_count - 1; i >= 0; i--) { - struct mvneta_tx_desc *tx_desc = txq->descs + i; - if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr)) - dma_unmap_single(pp->dev->dev.parent, - tx_desc->buf_phys_addr, - tx_desc->data_size, - DMA_TO_DEVICE); - mvneta_txq_desc_put(txq); - } + mvneta_release_descs(pp, txq, first_desc, desc_count - 1); return 0; } @@ -2790,6 +2810,7 @@ static int mvneta_tx_frag_process(struct mvneta_port *pp, struct sk_buff *skb, { struct mvneta_tx_desc *tx_desc; int i, nr_frags = skb_shinfo(skb)->nr_frags; + int first_desc = txq->txq_put_index; for (i = 0; i < nr_frags; i++) { struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index]; @@ -2828,15 +2849,7 @@ error: /* Release all descriptors that were used to map fragments of * this packet, as well as the corresponding DMA mappings */ - for (i = i - 1; i >= 0; i--) { - tx_desc = txq->descs + i; - dma_unmap_single(pp->dev->dev.parent, - tx_desc->buf_phys_addr, - tx_desc->data_size, - DMA_TO_DEVICE); - mvneta_txq_desc_put(txq); - } - + mvneta_release_descs(pp, txq, first_desc, i - 1); return -ENOMEM; } -- cgit v1.2.3 From b0bd1b07c3add928e33282a52ad64a3f011d4fb7 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 10 May 2023 11:15:48 +0100 Subject: net: mvneta: mark mapped and tso buffers separately Mark dma-mapped skbs and TSO buffers separately, so we can use buf->type to identify their differences. Signed-off-by: Russell King (Oracle) Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/mvneta.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 62400ff61e34..c05649f33d18 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -638,6 +638,7 @@ struct mvneta_rx_desc { #endif enum mvneta_tx_buf_type { + MVNETA_TYPE_TSO, MVNETA_TYPE_SKB, MVNETA_TYPE_XDP_TX, MVNETA_TYPE_XDP_NDO, @@ -1883,7 +1884,8 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp, dma_unmap_single(pp->dev->dev.parent, tx_desc->buf_phys_addr, tx_desc->data_size, DMA_TO_DEVICE); - if (buf->type == MVNETA_TYPE_SKB && buf->skb) { + if ((buf->type == MVNETA_TYPE_TSO || + buf->type == MVNETA_TYPE_SKB) && buf->skb) { bytes_compl += buf->skb->len; pkts_compl++; dev_kfree_skb_any(buf->skb); @@ -2674,7 +2676,7 @@ mvneta_tso_put_hdr(struct sk_buff *skb, struct mvneta_tx_queue *txq) tx_desc->command |= MVNETA_TXD_F_DESC; tx_desc->buf_phys_addr = txq->tso_hdrs_phys + txq->txq_put_index * TSO_HEADER_SIZE; - buf->type = MVNETA_TYPE_SKB; + buf->type = MVNETA_TYPE_TSO; buf->skb = NULL; mvneta_txq_inc_put(txq); -- cgit v1.2.3 From f00ba4f41acc050c959803f290a0f0c03dc0da5c Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 10 May 2023 11:15:53 +0100 Subject: net: mvneta: use buf->type to determine whether to dma-unmap Now that we use a different buffer type for TSO headers, we can use buf->type to determine whether the original buffer was DMA-mapped or not. The rules are: MVNETA_TYPE_XDP_TX - from a DMA pool, no unmap is required MVNETA_TYPE_XDP_NDO - dma_map_single()'d MVNETA_TYPE_SKB - normal skbuff, dma_map_single()'d MVNETA_TYPE_TSO - from the TSO buffer area This means we only need to call dma_unmap_single() on the XDP_NDO and SKB types of buffer, and we no longer need the private IS_TSO_HEADER() which relies on the TSO region being contiguously allocated. Signed-off-by: Russell King (Oracle) Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/mvneta.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index c05649f33d18..c23d75af65ee 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -364,10 +364,6 @@ MVNETA_SKB_HEADROOM)) #define MVNETA_MAX_RX_BUF_SIZE (PAGE_SIZE - MVNETA_SKB_PAD) -#define IS_TSO_HEADER(txq, addr) \ - ((addr >= txq->tso_hdrs_phys) && \ - (addr < txq->tso_hdrs_phys + txq->size * TSO_HEADER_SIZE)) - #define MVNETA_RX_GET_BM_POOL_ID(rxd) \ (((rxd)->status & MVNETA_RXD_BM_POOL_MASK) >> MVNETA_RXD_BM_POOL_SHIFT) @@ -1879,8 +1875,8 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp, mvneta_txq_inc_get(txq); - if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr) && - buf->type != MVNETA_TYPE_XDP_TX) + if (buf->type == MVNETA_TYPE_XDP_NDO || + buf->type == MVNETA_TYPE_SKB) dma_unmap_single(pp->dev->dev.parent, tx_desc->buf_phys_addr, tx_desc->data_size, DMA_TO_DEVICE); @@ -2728,8 +2724,9 @@ static void mvneta_release_descs(struct mvneta_port *pp, for (i = num; i >= 0; i--) { struct mvneta_tx_desc *tx_desc = txq->descs + desc_idx; + struct mvneta_tx_buf *buf = &txq->buf[desc_idx]; - if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr)) + if (buf->type == MVNETA_TYPE_SKB) dma_unmap_single(pp->dev->dev.parent, tx_desc->buf_phys_addr, tx_desc->data_size, -- cgit v1.2.3 From d41eb5557668096b0a57646107e6fc4631ba9cf1 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 10 May 2023 11:15:58 +0100 Subject: net: mvneta: move tso_build_hdr() into mvneta_tso_put_hdr() Move tso_build_hdr() into mvneta_tso_put_hdr() so that all the TSO header building code is in one place. Signed-off-by: Russell King (Oracle) Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/mvneta.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index c23d75af65ee..bea84e86cf99 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2659,19 +2659,24 @@ err_drop_frame: return rx_done; } -static inline void -mvneta_tso_put_hdr(struct sk_buff *skb, struct mvneta_tx_queue *txq) +static void mvneta_tso_put_hdr(struct sk_buff *skb, struct mvneta_tx_queue *txq, + struct tso_t *tso, int size, bool is_last) { struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index]; - int hdr_len = skb_tcp_all_headers(skb); + int tso_offset, hdr_len = skb_tcp_all_headers(skb); struct mvneta_tx_desc *tx_desc; + char *hdr; + + tso_offset = txq->txq_put_index * TSO_HEADER_SIZE; + + hdr = txq->tso_hdrs + tso_offset; + tso_build_hdr(skb, hdr, tso, size, is_last); tx_desc = mvneta_txq_next_desc_get(txq); tx_desc->data_size = hdr_len; tx_desc->command = mvneta_skb_tx_csum(skb); tx_desc->command |= MVNETA_TXD_F_DESC; - tx_desc->buf_phys_addr = txq->tso_hdrs_phys + - txq->txq_put_index * TSO_HEADER_SIZE; + tx_desc->buf_phys_addr = txq->tso_hdrs_phys + tso_offset; buf->type = MVNETA_TYPE_TSO; buf->skb = NULL; @@ -2764,17 +2769,12 @@ static int mvneta_tx_tso(struct sk_buff *skb, struct net_device *dev, total_len = skb->len - hdr_len; while (total_len > 0) { - char *hdr; - data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len); total_len -= data_left; desc_count++; /* prepare packet headers: MAC + IP + TCP */ - hdr = txq->tso_hdrs + txq->txq_put_index * TSO_HEADER_SIZE; - tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0); - - mvneta_tso_put_hdr(skb, txq); + mvneta_tso_put_hdr(skb, txq, &tso, data_left, total_len == 0); while (data_left > 0) { int size; -- cgit v1.2.3 From 33f4cefb26e98c3cfe68ee7c88b766aa786b8733 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 10 May 2023 11:16:03 +0100 Subject: net: mvneta: allocate TSO header DMA memory in chunks Now that we no longer need to check whether the DMA address is within the TSO header DMA memory range for the queue, we can allocate the TSO header DMA memory in chunks rather than one contiguous order-6 chunk, which can stress the kernel's memory subsystems to allocate. Instead, use order-1 (8k) allocations, which will result in 32 order-1 pages containing 32 TSO headers. Signed-off-by: Russell King (Oracle) Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/mvneta.c | 88 ++++++++++++++++++++++++++++------- 1 file changed, 70 insertions(+), 18 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index bea84e86cf99..6c6b66d3ea6e 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -344,6 +344,15 @@ #define MVNETA_MAX_SKB_DESCS (MVNETA_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS) +/* The size of a TSO header page */ +#define MVNETA_TSO_PAGE_SIZE (2 * PAGE_SIZE) + +/* Number of TSO headers per page. This should be a power of 2 */ +#define MVNETA_TSO_PER_PAGE (MVNETA_TSO_PAGE_SIZE / TSO_HEADER_SIZE) + +/* Maximum number of TSO header pages */ +#define MVNETA_MAX_TSO_PAGES (MVNETA_MAX_TXD / MVNETA_TSO_PER_PAGE) + /* descriptor aligned size */ #define MVNETA_DESC_ALIGNED_SIZE 32 @@ -687,10 +696,10 @@ struct mvneta_tx_queue { int next_desc_to_proc; /* DMA buffers for TSO headers */ - char *tso_hdrs; + char *tso_hdrs[MVNETA_MAX_TSO_PAGES]; /* DMA address of TSO headers */ - dma_addr_t tso_hdrs_phys; + dma_addr_t tso_hdrs_phys[MVNETA_MAX_TSO_PAGES]; /* Affinity mask for CPUs*/ cpumask_t affinity_mask; @@ -2659,24 +2668,71 @@ err_drop_frame: return rx_done; } +static void mvneta_free_tso_hdrs(struct mvneta_port *pp, + struct mvneta_tx_queue *txq) +{ + struct device *dev = pp->dev->dev.parent; + int i; + + for (i = 0; i < MVNETA_MAX_TSO_PAGES; i++) { + if (txq->tso_hdrs[i]) { + dma_free_coherent(dev, MVNETA_TSO_PAGE_SIZE, + txq->tso_hdrs[i], + txq->tso_hdrs_phys[i]); + txq->tso_hdrs[i] = NULL; + } + } +} + +static int mvneta_alloc_tso_hdrs(struct mvneta_port *pp, + struct mvneta_tx_queue *txq) +{ + struct device *dev = pp->dev->dev.parent; + int i, num; + + num = DIV_ROUND_UP(txq->size, MVNETA_TSO_PER_PAGE); + for (i = 0; i < num; i++) { + txq->tso_hdrs[i] = dma_alloc_coherent(dev, MVNETA_TSO_PAGE_SIZE, + &txq->tso_hdrs_phys[i], + GFP_KERNEL); + if (!txq->tso_hdrs[i]) { + mvneta_free_tso_hdrs(pp, txq); + return -ENOMEM; + } + } + + return 0; +} + +static char *mvneta_get_tso_hdr(struct mvneta_tx_queue *txq, dma_addr_t *dma) +{ + int index, offset; + + index = txq->txq_put_index / MVNETA_TSO_PER_PAGE; + offset = (txq->txq_put_index % MVNETA_TSO_PER_PAGE) * TSO_HEADER_SIZE; + + *dma = txq->tso_hdrs_phys[index] + offset; + + return txq->tso_hdrs[index] + offset; +} + static void mvneta_tso_put_hdr(struct sk_buff *skb, struct mvneta_tx_queue *txq, struct tso_t *tso, int size, bool is_last) { struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index]; - int tso_offset, hdr_len = skb_tcp_all_headers(skb); + int hdr_len = skb_tcp_all_headers(skb); struct mvneta_tx_desc *tx_desc; + dma_addr_t hdr_phys; char *hdr; - tso_offset = txq->txq_put_index * TSO_HEADER_SIZE; - - hdr = txq->tso_hdrs + tso_offset; + hdr = mvneta_get_tso_hdr(txq, &hdr_phys); tso_build_hdr(skb, hdr, tso, size, is_last); tx_desc = mvneta_txq_next_desc_get(txq); tx_desc->data_size = hdr_len; tx_desc->command = mvneta_skb_tx_csum(skb); tx_desc->command |= MVNETA_TXD_F_DESC; - tx_desc->buf_phys_addr = txq->tso_hdrs_phys + tso_offset; + tx_desc->buf_phys_addr = hdr_phys; buf->type = MVNETA_TYPE_TSO; buf->skb = NULL; @@ -3469,7 +3525,7 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp, static int mvneta_txq_sw_init(struct mvneta_port *pp, struct mvneta_tx_queue *txq) { - int cpu; + int cpu, err; txq->size = pp->tx_ring_size; @@ -3494,11 +3550,9 @@ static int mvneta_txq_sw_init(struct mvneta_port *pp, return -ENOMEM; /* Allocate DMA buffers for TSO MAC/IP/TCP headers */ - txq->tso_hdrs = dma_alloc_coherent(pp->dev->dev.parent, - txq->size * TSO_HEADER_SIZE, - &txq->tso_hdrs_phys, GFP_KERNEL); - if (!txq->tso_hdrs) - return -ENOMEM; + err = mvneta_alloc_tso_hdrs(pp, txq); + if (err) + return err; /* Setup XPS mapping */ if (pp->neta_armada3700) @@ -3550,10 +3604,7 @@ static void mvneta_txq_sw_deinit(struct mvneta_port *pp, kfree(txq->buf); - if (txq->tso_hdrs) - dma_free_coherent(pp->dev->dev.parent, - txq->size * TSO_HEADER_SIZE, - txq->tso_hdrs, txq->tso_hdrs_phys); + mvneta_free_tso_hdrs(pp, txq); if (txq->descs) dma_free_coherent(pp->dev->dev.parent, txq->size * MVNETA_DESC_ALIGNED_SIZE, @@ -3562,7 +3613,6 @@ static void mvneta_txq_sw_deinit(struct mvneta_port *pp, netdev_tx_reset_queue(nq); txq->buf = NULL; - txq->tso_hdrs = NULL; txq->descs = NULL; txq->last_desc = 0; txq->next_desc_to_proc = 0; @@ -5833,6 +5883,8 @@ static int __init mvneta_driver_init(void) { int ret; + BUILD_BUG_ON_NOT_POWER_OF_2(MVNETA_TSO_PER_PAGE); + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "net/mvneta:online", mvneta_cpu_online, mvneta_cpu_down_prepare); -- cgit v1.2.3 From 995585ecdf42c14fbd4d9d12ca73bf54358581c6 Mon Sep 17 00:00:00 2001 From: Philipp Rosenberger Date: Tue, 9 May 2023 06:28:56 +0200 Subject: net: enc28j60: Use threaded interrupt instead of workqueue The Microchip ENC28J60 SPI Ethernet driver schedules a work item from the interrupt handler because accesses to the SPI bus may sleep. On PREEMPT_RT (which forces interrupt handling into threads) this old-fashioned approach unnecessarily increases latency because an interrupt results in first waking the interrupt thread, then scheduling the work item. So, a double indirection to handle an interrupt. Avoid by converting the driver to modern threaded interrupt handling. Signed-off-by: Philipp Rosenberger Signed-off-by: Zhi Han [lukas: rewrite commit message, linewrap request_threaded_irq() call] Signed-off-by: Lukas Wunner Reviewed-by: Piotr Raczynski Link: https://lore.kernel.org/r/342380d989ce26bc49f0e5d45fbb0416a5f7809f.1683606193.git.lukas@wunner.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/enc28j60.c | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c index 176efbeae127..d6c9491537e4 100644 --- a/drivers/net/ethernet/microchip/enc28j60.c +++ b/drivers/net/ethernet/microchip/enc28j60.c @@ -58,7 +58,6 @@ struct enc28j60_net { struct mutex lock; struct sk_buff *tx_skb; struct work_struct tx_work; - struct work_struct irq_work; struct work_struct setrx_work; struct work_struct restart_work; u8 bank; /* current register bank selected */ @@ -1118,10 +1117,9 @@ static int enc28j60_rx_interrupt(struct net_device *ndev) return ret; } -static void enc28j60_irq_work_handler(struct work_struct *work) +static irqreturn_t enc28j60_irq(int irq, void *dev_id) { - struct enc28j60_net *priv = - container_of(work, struct enc28j60_net, irq_work); + struct enc28j60_net *priv = dev_id; struct net_device *ndev = priv->netdev; int intflags, loop; @@ -1225,6 +1223,8 @@ static void enc28j60_irq_work_handler(struct work_struct *work) /* re-enable interrupts */ locked_reg_bfset(priv, EIE, EIE_INTIE); + + return IRQ_HANDLED; } /* @@ -1309,22 +1309,6 @@ static void enc28j60_tx_work_handler(struct work_struct *work) enc28j60_hw_tx(priv); } -static irqreturn_t enc28j60_irq(int irq, void *dev_id) -{ - struct enc28j60_net *priv = dev_id; - - /* - * Can't do anything in interrupt context because we need to - * block (spi_sync() is blocking) so fire of the interrupt - * handling workqueue. - * Remember that we access enc28j60 registers through SPI bus - * via spi_sync() call. - */ - schedule_work(&priv->irq_work); - - return IRQ_HANDLED; -} - static void enc28j60_tx_timeout(struct net_device *ndev, unsigned int txqueue) { struct enc28j60_net *priv = netdev_priv(ndev); @@ -1559,7 +1543,6 @@ static int enc28j60_probe(struct spi_device *spi) mutex_init(&priv->lock); INIT_WORK(&priv->tx_work, enc28j60_tx_work_handler); INIT_WORK(&priv->setrx_work, enc28j60_setrx_work_handler); - INIT_WORK(&priv->irq_work, enc28j60_irq_work_handler); INIT_WORK(&priv->restart_work, enc28j60_restart_work_handler); spi_set_drvdata(spi, priv); /* spi to priv reference */ SET_NETDEV_DEV(dev, &spi->dev); @@ -1578,7 +1561,8 @@ static int enc28j60_probe(struct spi_device *spi) /* Board setup must set the relevant edge trigger type; * level triggers won't currently work. */ - ret = request_irq(spi->irq, enc28j60_irq, 0, DRV_NAME, priv); + ret = request_threaded_irq(spi->irq, NULL, enc28j60_irq, IRQF_ONESHOT, + DRV_NAME, priv); if (ret < 0) { if (netif_msg_probe(priv)) dev_err(&spi->dev, "request irq %d failed (ret = %d)\n", -- cgit v1.2.3 From 7f88efc8162cc6d516cacf1d82edc923b423483f Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Wed, 10 May 2023 22:02:47 +0200 Subject: net: samsung: sxgbe: Make sxgbe_drv_remove() return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sxgbe_drv_remove() returned zero unconditionally, so it can be converted to return void without losing anything. The upside is that it becomes more obvious in its callers that there is no error to handle. Signed-off-by: Uwe Kleine-König Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h | 2 +- drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c | 4 +--- drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c | 5 +++-- 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h index 0f45107db8dd..d14e0cfc3a6b 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h @@ -511,7 +511,7 @@ struct sxgbe_priv_data { struct sxgbe_priv_data *sxgbe_drv_probe(struct device *device, struct sxgbe_plat_data *plat_dat, void __iomem *addr); -int sxgbe_drv_remove(struct net_device *ndev); +void sxgbe_drv_remove(struct net_device *ndev); void sxgbe_set_ethtool_ops(struct net_device *netdev); int sxgbe_mdio_unregister(struct net_device *ndev); int sxgbe_mdio_register(struct net_device *ndev); diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index 9664f029fa16..71439825ea4e 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -2203,7 +2203,7 @@ error_free_netdev: * Description: this function resets the TX/RX processes, disables the MAC RX/TX * changes the link status, releases the DMA descriptor rings. */ -int sxgbe_drv_remove(struct net_device *ndev) +void sxgbe_drv_remove(struct net_device *ndev) { struct sxgbe_priv_data *priv = netdev_priv(ndev); u8 queue_num; @@ -2231,8 +2231,6 @@ int sxgbe_drv_remove(struct net_device *ndev) kfree(priv->hw); free_netdev(ndev); - - return 0; } #ifdef CONFIG_PM diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c index 4e5526303f07..fb59ff94509a 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_platform.c @@ -172,9 +172,10 @@ err_out: static int sxgbe_platform_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); - int ret = sxgbe_drv_remove(ndev); - return ret; + sxgbe_drv_remove(ndev); + + return 0; } #ifdef CONFIG_PM -- cgit v1.2.3 From 48c0db05a1bf60067cdee062a6bfad6db5c1f602 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Thu, 11 May 2023 11:47:12 +0530 Subject: octeontx2-pf: mcs: Offload extended packet number(XPN) feature The macsec hardware block supports XPN cipher suites also. Hence added changes to offload XPN feature. Changes include configuring SecY policy to XPN cipher suite, Salt and SSCI values. 64 bit packet number is passed instead of 32 bit packet number. Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/nic/cn10k_macsec.c | 89 +++++++++++++++++----- .../ethernet/marvell/octeontx2/nic/otx2_common.h | 5 ++ 2 files changed, 75 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c index aea4c802bb9d..8eaa50d0f668 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c @@ -6,7 +6,6 @@ #include #include -#include #include "otx2_common.h" #define MCS_TCAM0_MAC_DA_MASK GENMASK_ULL(47, 0) @@ -212,6 +211,7 @@ static int cn10k_mcs_write_rx_secy(struct otx2_nic *pfvf, struct mcs_secy_plcy_write_req *req; struct mbox *mbox = &pfvf->mbox; u64 policy; + u8 cipher; int ret; mutex_lock(&mbox->lock); @@ -227,7 +227,21 @@ static int cn10k_mcs_write_rx_secy(struct otx2_nic *pfvf, policy |= MCS_RX_SECY_PLCY_RP; policy |= MCS_RX_SECY_PLCY_AUTH_ENA; - policy |= FIELD_PREP(MCS_RX_SECY_PLCY_CIP, MCS_GCM_AES_128); + + switch (secy->key_len) { + case 16: + cipher = secy->xpn ? MCS_GCM_AES_XPN_128 : MCS_GCM_AES_128; + break; + case 32: + cipher = secy->xpn ? MCS_GCM_AES_XPN_256 : MCS_GCM_AES_256; + break; + default: + cipher = MCS_GCM_AES_128; + dev_warn(pfvf->dev, "Unsupported key length\n"); + break; + }; + + policy |= FIELD_PREP(MCS_RX_SECY_PLCY_CIP, cipher); policy |= FIELD_PREP(MCS_RX_SECY_PLCY_VAL, secy->validate_frames); policy |= MCS_RX_SECY_PLCY_ENA; @@ -323,9 +337,12 @@ static int cn10k_mcs_write_rx_sa_plcy(struct otx2_nic *pfvf, { unsigned char *src = rxsc->sa_key[assoc_num]; struct mcs_sa_plcy_write_req *plcy_req; + u8 *salt_p = rxsc->salt[assoc_num]; struct mcs_rx_sc_sa_map *map_req; struct mbox *mbox = &pfvf->mbox; + u64 ssci_salt_95_64 = 0; u8 reg, key_len; + u64 salt_63_0; int ret; mutex_lock(&mbox->lock); @@ -349,6 +366,15 @@ static int cn10k_mcs_write_rx_sa_plcy(struct otx2_nic *pfvf, reg++; } + if (secy->xpn) { + memcpy((u8 *)&salt_63_0, salt_p, 8); + memcpy((u8 *)&ssci_salt_95_64, salt_p + 8, 4); + ssci_salt_95_64 |= (__force u64)rxsc->ssci[assoc_num] << 32; + + plcy_req->plcy[0][6] = salt_63_0; + plcy_req->plcy[0][7] = ssci_salt_95_64; + } + plcy_req->sa_index[0] = rxsc->hw_sa_id[assoc_num]; plcy_req->sa_cnt = 1; plcy_req->dir = MCS_RX; @@ -404,6 +430,7 @@ static int cn10k_mcs_write_tx_secy(struct otx2_nic *pfvf, u8 tag_offset = 12; u8 sectag_tci = 0; u64 policy; + u8 cipher; int ret; sw_tx_sc = &secy->tx_sc; @@ -434,7 +461,21 @@ static int cn10k_mcs_write_tx_secy(struct otx2_nic *pfvf, policy |= FIELD_PREP(MCS_TX_SECY_PLCY_ST_OFFSET, tag_offset); policy |= MCS_TX_SECY_PLCY_INS_MODE; policy |= MCS_TX_SECY_PLCY_AUTH_ENA; - policy |= FIELD_PREP(MCS_TX_SECY_PLCY_CIP, MCS_GCM_AES_128); + + switch (secy->key_len) { + case 16: + cipher = secy->xpn ? MCS_GCM_AES_XPN_128 : MCS_GCM_AES_128; + break; + case 32: + cipher = secy->xpn ? MCS_GCM_AES_XPN_256 : MCS_GCM_AES_256; + break; + default: + cipher = MCS_GCM_AES_128; + dev_warn(pfvf->dev, "Unsupported key length\n"); + break; + }; + + policy |= FIELD_PREP(MCS_TX_SECY_PLCY_CIP, cipher); if (secy->protect_frames) policy |= MCS_TX_SECY_PLCY_PROTECT; @@ -544,8 +585,11 @@ static int cn10k_mcs_write_tx_sa_plcy(struct otx2_nic *pfvf, { unsigned char *src = txsc->sa_key[assoc_num]; struct mcs_sa_plcy_write_req *plcy_req; + u8 *salt_p = txsc->salt[assoc_num]; struct mbox *mbox = &pfvf->mbox; + u64 ssci_salt_95_64 = 0; u8 reg, key_len; + u64 salt_63_0; int ret; mutex_lock(&mbox->lock); @@ -561,6 +605,15 @@ static int cn10k_mcs_write_tx_sa_plcy(struct otx2_nic *pfvf, reg++; } + if (secy->xpn) { + memcpy((u8 *)&salt_63_0, salt_p, 8); + memcpy((u8 *)&ssci_salt_95_64, salt_p + 8, 4); + ssci_salt_95_64 |= (__force u64)txsc->ssci[assoc_num] << 32; + + plcy_req->plcy[0][6] = salt_63_0; + plcy_req->plcy[0][7] = ssci_salt_95_64; + } + plcy_req->plcy[0][8] = assoc_num; plcy_req->sa_index[0] = txsc->hw_sa_id[assoc_num]; plcy_req->sa_cnt = 1; @@ -922,8 +975,7 @@ static int cn10k_mcs_secy_tx_cfg(struct otx2_nic *pfvf, struct macsec_secy *secy { if (sw_tx_sa) { cn10k_mcs_write_tx_sa_plcy(pfvf, secy, txsc, sa_num); - cn10k_write_tx_sa_pn(pfvf, txsc, sa_num, - sw_tx_sa->next_pn_halves.lower); + cn10k_write_tx_sa_pn(pfvf, txsc, sa_num, sw_tx_sa->next_pn); cn10k_mcs_link_tx_sa2sc(pfvf, secy, txsc, sa_num, sw_tx_sa->active); } @@ -959,7 +1011,7 @@ static int cn10k_mcs_secy_rx_cfg(struct otx2_nic *pfvf, cn10k_mcs_write_rx_sa_plcy(pfvf, secy, mcs_rx_sc, sa_num, sw_rx_sa->active); cn10k_mcs_write_rx_sa_pn(pfvf, mcs_rx_sc, sa_num, - sw_rx_sa->next_pn_halves.lower); + sw_rx_sa->next_pn); } cn10k_mcs_write_rx_flowid(pfvf, mcs_rx_sc, hw_secy_id); @@ -1103,13 +1155,6 @@ static int cn10k_mdo_add_secy(struct macsec_context *ctx) if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN) return -EOPNOTSUPP; - /* Stick to 16 bytes key len until XPN support is added */ - if (secy->key_len != 16) - return -EOPNOTSUPP; - - if (secy->xpn) - return -EOPNOTSUPP; - txsc = cn10k_mcs_create_txsc(pfvf); if (IS_ERR(txsc)) return -ENOSPC; @@ -1202,6 +1247,9 @@ static int cn10k_mdo_add_txsa(struct macsec_context *ctx) return -ENOSPC; memcpy(&txsc->sa_key[sa_num], ctx->sa.key, secy->key_len); + memcpy(&txsc->salt[sa_num], sw_tx_sa->key.salt.bytes, MACSEC_SALT_LEN); + txsc->ssci[sa_num] = sw_tx_sa->ssci; + txsc->sa_bmap |= 1 << sa_num; if (netif_running(secy->netdev)) { @@ -1210,7 +1258,7 @@ static int cn10k_mdo_add_txsa(struct macsec_context *ctx) return err; err = cn10k_write_tx_sa_pn(pfvf, txsc, sa_num, - sw_tx_sa->next_pn_halves.lower); + sw_tx_sa->next_pn); if (err) return err; @@ -1243,7 +1291,7 @@ static int cn10k_mdo_upd_txsa(struct macsec_context *ctx) if (netif_running(secy->netdev)) { /* Keys cannot be changed after creation */ err = cn10k_write_tx_sa_pn(pfvf, txsc, sa_num, - sw_tx_sa->next_pn_halves.lower); + sw_tx_sa->next_pn); if (err) return err; @@ -1353,7 +1401,6 @@ static int cn10k_mdo_add_rxsa(struct macsec_context *ctx) struct macsec_rx_sc *sw_rx_sc = ctx->sa.rx_sa->sc; struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_rx_sa *rx_sa = ctx->sa.rx_sa; - u64 next_pn = rx_sa->next_pn_halves.lower; struct macsec_secy *secy = ctx->secy; bool sa_in_use = rx_sa->active; u8 sa_num = ctx->sa.assoc_num; @@ -1371,6 +1418,9 @@ static int cn10k_mdo_add_rxsa(struct macsec_context *ctx) return -ENOSPC; memcpy(&rxsc->sa_key[sa_num], ctx->sa.key, ctx->secy->key_len); + memcpy(&rxsc->salt[sa_num], rx_sa->key.salt.bytes, MACSEC_SALT_LEN); + rxsc->ssci[sa_num] = rx_sa->ssci; + rxsc->sa_bmap |= 1 << sa_num; if (netif_running(secy->netdev)) { @@ -1379,7 +1429,8 @@ static int cn10k_mdo_add_rxsa(struct macsec_context *ctx) if (err) return err; - err = cn10k_mcs_write_rx_sa_pn(pfvf, rxsc, sa_num, next_pn); + err = cn10k_mcs_write_rx_sa_pn(pfvf, rxsc, sa_num, + rx_sa->next_pn); if (err) return err; } @@ -1393,7 +1444,6 @@ static int cn10k_mdo_upd_rxsa(struct macsec_context *ctx) struct macsec_rx_sc *sw_rx_sc = ctx->sa.rx_sa->sc; struct cn10k_mcs_cfg *cfg = pfvf->macsec_cfg; struct macsec_rx_sa *rx_sa = ctx->sa.rx_sa; - u64 next_pn = rx_sa->next_pn_halves.lower; struct macsec_secy *secy = ctx->secy; bool sa_in_use = rx_sa->active; u8 sa_num = ctx->sa.assoc_num; @@ -1412,7 +1462,8 @@ static int cn10k_mdo_upd_rxsa(struct macsec_context *ctx) if (err) return err; - err = cn10k_mcs_write_rx_sa_pn(pfvf, rxsc, sa_num, next_pn); + err = cn10k_mcs_write_rx_sa_pn(pfvf, rxsc, sa_num, + rx_sa->next_pn); if (err) return err; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 0c8fc66ade82..d17274aab374 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -398,6 +399,8 @@ struct cn10k_mcs_txsc { u8 sa_bmap; u8 sa_key[CN10K_MCS_SA_PER_SC][MACSEC_MAX_KEY_LEN]; u8 encoding_sa; + u8 salt[CN10K_MCS_SA_PER_SC][MACSEC_SALT_LEN]; + ssci_t ssci[CN10K_MCS_SA_PER_SC]; }; struct cn10k_mcs_rxsc { @@ -410,6 +413,8 @@ struct cn10k_mcs_rxsc { u16 hw_sa_id[CN10K_MCS_SA_PER_SC]; u8 sa_bmap; u8 sa_key[CN10K_MCS_SA_PER_SC][MACSEC_MAX_KEY_LEN]; + u8 salt[CN10K_MCS_SA_PER_SC][MACSEC_SALT_LEN]; + ssci_t ssci[CN10K_MCS_SA_PER_SC]; }; struct cn10k_mcs_cfg { -- cgit v1.2.3 From d3616dc7793ffab17f3362fb439bfc1f887e6d8e Mon Sep 17 00:00:00 2001 From: wuych Date: Fri, 12 May 2023 10:44:03 +0800 Subject: net: liquidio: lio_main: Remove unnecessary (void*) conversions Pointer variables of void * type do not require type cast. Signed-off-by: wuych Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_main.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 9bd1d2d7027d..100daadbea2a 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -191,8 +191,7 @@ static void octeon_droq_bh(struct tasklet_struct *t) static int lio_wait_for_oq_pkts(struct octeon_device *oct) { - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; int retry = 100, pkt_cnt = 0, pending_pkts = 0; int i; @@ -950,8 +949,7 @@ static void octeon_destroy_resources(struct octeon_device *oct) { int i, refcount; struct msix_entry *msix_entries; - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; struct handshake *hs; @@ -1211,8 +1209,7 @@ static int send_rx_ctrl_cmd(struct lio *lio, int start_stop) static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx) { struct net_device *netdev = oct->props[ifidx].netdev; - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; struct napi_struct *napi, *n; struct lio *lio; @@ -1774,8 +1771,7 @@ static int liquidio_open(struct net_device *netdev) { struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; struct napi_struct *napi, *n; int ret = 0; @@ -1855,8 +1851,7 @@ static int liquidio_stop(struct net_device *netdev) { struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)oct->priv; + struct octeon_device_priv *oct_priv = oct->priv; struct napi_struct *napi, *n; int ret = 0; @@ -4057,8 +4052,7 @@ static int octeon_device_init(struct octeon_device *octeon_dev) char bootcmd[] = "\n"; char *dbg_enb = NULL; enum lio_fw_state fw_state; - struct octeon_device_priv *oct_priv = - (struct octeon_device_priv *)octeon_dev->priv; + struct octeon_device_priv *oct_priv = octeon_dev->priv; atomic_set(&octeon_dev->status, OCT_DEV_BEGIN_STATE); /* Enable access to the octeon device and make its DMA capability -- cgit v1.2.3 From 28fa3ac487c6d30aaa10570481c27b6adfc492b3 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 11 May 2023 20:47:28 +0100 Subject: sfc: release encap match in efx_tc_flow_free() When force-freeing leftover entries from our match_action_ht, call efx_tc_delete_rule(), which releases all the rule's resources, rather than open-coding it. The open-coded version was missing a call to release the rule's encap match (if any). It probably doesn't matter as everything's being torn down anyway, but it's cleaner this way and prevents further error messages potentially being logged by efx_tc_encap_match_free() later on. Move efx_tc_flow_free() further down the file to avoid introducing a forward declaration of efx_tc_delete_rule(). Fixes: 17654d84b47c ("sfc: add offloading of 'foreign' TC (decap) rules") Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/tc.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index 0327639a628a..236b44a4215e 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -132,23 +132,6 @@ static void efx_tc_free_action_set_list(struct efx_nic *efx, /* Don't kfree, as acts is embedded inside a struct efx_tc_flow_rule */ } -static void efx_tc_flow_free(void *ptr, void *arg) -{ - struct efx_tc_flow_rule *rule = ptr; - struct efx_nic *efx = arg; - - netif_err(efx, drv, efx->net_dev, - "tc rule %lx still present at teardown, removing\n", - rule->cookie); - - efx_mae_delete_rule(efx, rule->fw_id); - - /* Release entries in subsidiary tables */ - efx_tc_free_action_set_list(efx, &rule->acts, true); - - kfree(rule); -} - /* Boilerplate for the simple 'copy a field' cases */ #define _MAP_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field) \ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_##_name)) { \ @@ -1454,6 +1437,21 @@ static void efx_tc_encap_match_free(void *ptr, void *__unused) kfree(encap); } +static void efx_tc_flow_free(void *ptr, void *arg) +{ + struct efx_tc_flow_rule *rule = ptr; + struct efx_nic *efx = arg; + + netif_err(efx, drv, efx->net_dev, + "tc rule %lx still present at teardown, removing\n", + rule->cookie); + + /* Also releases entries in subsidiary tables */ + efx_tc_delete_rule(efx, rule); + + kfree(rule); +} + int efx_init_struct_tc(struct efx_nic *efx) { int rc; -- cgit v1.2.3 From 56beb35d85e290b71372d7ee1093621f6abb6e96 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 11 May 2023 20:47:29 +0100 Subject: sfc: populate enc_ip_tos matches in MAE outer rules Currently tc.c will block them before they get here, but following patch will change that. Use the extack message from efx_mae_check_encap_match_caps() instead of writing a new one, since there's now more being fed in than just an IP version. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mae.c | 16 +++++++++++++++- drivers/net/ethernet/sfc/mae.h | 1 + drivers/net/ethernet/sfc/tc.c | 9 +++------ drivers/net/ethernet/sfc/tc.h | 1 + 4 files changed, 20 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c index 49706a7b94bf..8f4bb5d36ad8 100644 --- a/drivers/net/ethernet/sfc/mae.c +++ b/drivers/net/ethernet/sfc/mae.c @@ -482,12 +482,14 @@ int efx_mae_match_check_caps(struct efx_nic *efx, rc; \ }) /* Checks that the fields needed for encap-rule matches are supported by the - * MAE. All the fields are exact-match. + * MAE. All the fields are exact-match, except possibly ENC_IP_TOS. */ int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6, + u8 ip_tos_mask, struct netlink_ext_ack *extack) { u8 *supported_fields = efx->tc->caps->outer_rule_fields; + enum mask_type typ; int rc; if (CHECK(ENC_ETHER_TYPE)) @@ -504,6 +506,14 @@ int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6, if (CHECK(ENC_L4_DPORT) || CHECK(ENC_IP_PROTO)) return rc; + typ = classify_mask(&ip_tos_mask, sizeof(ip_tos_mask)); + rc = efx_mae_match_check_cap_typ(supported_fields[MAE_FIELD_ENC_IP_TOS], + typ); + if (rc) { + NL_SET_ERR_MSG_FMT_MOD(extack, "No support for %s mask in field %s", + mask_type_name(typ), "enc_ip_tos"); + return rc; + } return 0; } #undef CHECK @@ -1003,6 +1013,10 @@ int efx_mae_register_encap_match(struct efx_nic *efx, ~(__be16)0); MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_PROTO, IPPROTO_UDP); MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_PROTO_MASK, ~0); + MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_TOS, + encap->ip_tos); + MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_TOS_MASK, + encap->ip_tos_mask); rc = efx_mcdi_rpc(efx, MC_CMD_MAE_OUTER_RULE_INSERT, inbuf, sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); if (rc) diff --git a/drivers/net/ethernet/sfc/mae.h b/drivers/net/ethernet/sfc/mae.h index 9226219491a0..cec61bfde4d4 100644 --- a/drivers/net/ethernet/sfc/mae.h +++ b/drivers/net/ethernet/sfc/mae.h @@ -82,6 +82,7 @@ int efx_mae_match_check_caps(struct efx_nic *efx, const struct efx_tc_match_fields *mask, struct netlink_ext_ack *extack); int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6, + u8 ip_tos_mask, struct netlink_ext_ack *extack); int efx_mae_check_encap_type_supported(struct efx_nic *efx, enum efx_encap_type typ); diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index 236b44a4215e..c2dda3ae5492 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -410,12 +410,9 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, return -EOPNOTSUPP; } - rc = efx_mae_check_encap_match_caps(efx, ipv6, extack); - if (rc) { - NL_SET_ERR_MSG_FMT_MOD(extack, "MAE hw reports no support for IPv%d encap matches", - ipv6 ? 6 : 4); - return -EOPNOTSUPP; - } + rc = efx_mae_check_encap_match_caps(efx, ipv6, match->mask.enc_ip_tos, extack); + if (rc) + return rc; encap = kzalloc(sizeof(*encap), GFP_USER); if (!encap) diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h index 04cced6a2d39..8d2abca26c23 100644 --- a/drivers/net/ethernet/sfc/tc.h +++ b/drivers/net/ethernet/sfc/tc.h @@ -78,6 +78,7 @@ struct efx_tc_encap_match { __be32 src_ip, dst_ip; struct in6_addr src_ip6, dst_ip6; __be16 udp_dport; + u8 ip_tos, ip_tos_mask; struct rhash_head linkage; enum efx_encap_type tun_type; refcount_t ref; -- cgit v1.2.3 From 3c9561c0a5b988be3dfd24ea1de2301b95efc640 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 11 May 2023 20:47:30 +0100 Subject: sfc: support TC decap rules matching on enc_ip_tos Allow efx_tc_encap_match entries to include an ip_tos and ip_tos_mask. To avoid partially-overlapping Outer Rules (which can lead to undefined behaviour in the hardware), store extra "pseudo" entries in our encap_match hashtable, which are used to enforce that all Outer Rule entries within a given tuple (or IPv6 equivalent) have the same ip_tos_mask. The "direct" encap_match entry takes a reference on the "pseudo", allowing it to be destroyed when all "direct" entries using it are removed. efx_tc_em_pseudo_type is an enum rather than just a bool because in future an additional pseudo-type will be added to support Conntrack offload. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/tc.c | 145 +++++++++++++++++++++++++++++++----------- drivers/net/ethernet/sfc/tc.h | 24 +++++++ 2 files changed, 133 insertions(+), 36 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index c2dda3ae5492..8e1769d2c4ee 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -202,6 +202,7 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx, BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_IP) | BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | BIT(FLOW_DISSECTOR_KEY_TCP) | @@ -346,20 +347,47 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx, return 0; } +static void efx_tc_flower_release_encap_match(struct efx_nic *efx, + struct efx_tc_encap_match *encap) +{ + int rc; + + if (!refcount_dec_and_test(&encap->ref)) + return; /* still in use */ + + if (encap->type == EFX_TC_EM_DIRECT) { + rc = efx_mae_unregister_encap_match(efx, encap); + if (rc) + /* Display message but carry on and remove entry from our + * SW tables, because there's not much we can do about it. + */ + netif_err(efx, drv, efx->net_dev, + "Failed to release encap match %#x, rc %d\n", + encap->fw_id, rc); + } + rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage, + efx_tc_encap_match_ht_params); + if (encap->pseudo) + efx_tc_flower_release_encap_match(efx, encap->pseudo); + kfree(encap); +} + static int efx_tc_flower_record_encap_match(struct efx_nic *efx, struct efx_tc_match *match, enum efx_encap_type type, + enum efx_tc_em_pseudo_type em_type, + u8 child_ip_tos_mask, struct netlink_ext_ack *extack) { - struct efx_tc_encap_match *encap, *old; + struct efx_tc_encap_match *encap, *old, *pseudo = NULL; bool ipv6 = false; int rc; /* We require that the socket-defining fields (IP addrs and UDP dest - * port) are present and exact-match. Other fields are currently not - * allowed. This meets what OVS will ask for, and means that we don't - * need to handle difficult checks for overlapping matches as could - * come up if we allowed masks or varying sets of match fields. + * port) are present and exact-match. Other fields may only be used + * if the field-set (and any masks) are the same for all encap + * matches on the same tuple; this is enforced by + * pseudo encap matches. */ if (match->mask.enc_dst_ip | match->mask.enc_src_ip) { if (!IS_ALL_ONES(match->mask.enc_dst_ip)) { @@ -402,21 +430,37 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, return -EOPNOTSUPP; } if (match->mask.enc_ip_tos) { - NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP ToS not supported"); - return -EOPNOTSUPP; + struct efx_tc_match pmatch = *match; + + if (em_type == EFX_TC_EM_PSEUDO_MASK) { /* can't happen */ + NL_SET_ERR_MSG_MOD(extack, "Bad recursion in egress encap match handler"); + return -EOPNOTSUPP; + } + pmatch.value.enc_ip_tos = 0; + pmatch.mask.enc_ip_tos = 0; + rc = efx_tc_flower_record_encap_match(efx, &pmatch, type, + EFX_TC_EM_PSEUDO_MASK, + match->mask.enc_ip_tos, + extack); + if (rc) + return rc; + pseudo = pmatch.encap; } if (match->mask.enc_ip_ttl) { NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP TTL not supported"); - return -EOPNOTSUPP; + rc = -EOPNOTSUPP; + goto fail_pseudo; } rc = efx_mae_check_encap_match_caps(efx, ipv6, match->mask.enc_ip_tos, extack); if (rc) - return rc; + goto fail_pseudo; encap = kzalloc(sizeof(*encap), GFP_USER); - if (!encap) - return -ENOMEM; + if (!encap) { + rc = -ENOMEM; + goto fail_pseudo; + } encap->src_ip = match->value.enc_src_ip; encap->dst_ip = match->value.enc_dst_ip; #ifdef CONFIG_IPV6 @@ -425,12 +469,56 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, #endif encap->udp_dport = match->value.enc_dport; encap->tun_type = type; + encap->ip_tos = match->value.enc_ip_tos; + encap->ip_tos_mask = match->mask.enc_ip_tos; + encap->child_ip_tos_mask = child_ip_tos_mask; + encap->type = em_type; + encap->pseudo = pseudo; old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_match_ht, &encap->linkage, efx_tc_encap_match_ht_params); if (old) { /* don't need our new entry */ kfree(encap); + if (pseudo) /* don't need our new pseudo either */ + efx_tc_flower_release_encap_match(efx, pseudo); + /* check old and new em_types are compatible */ + switch (old->type) { + case EFX_TC_EM_DIRECT: + /* old EM is in hardware, so mustn't overlap with a + * pseudo, but may be shared with another direct EM + */ + if (em_type == EFX_TC_EM_DIRECT) + break; + NL_SET_ERR_MSG_MOD(extack, "Pseudo encap match conflicts with existing direct entry"); + return -EEXIST; + case EFX_TC_EM_PSEUDO_MASK: + /* old EM is protecting a ToS-qualified filter, so may + * only be shared with another pseudo for the same + * ToS mask. + */ + if (em_type != EFX_TC_EM_PSEUDO_MASK) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "%s encap match conflicts with existing pseudo(MASK) entry", + encap->type ? "Pseudo" : "Direct"); + return -EEXIST; + } + if (child_ip_tos_mask != old->child_ip_tos_mask) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Pseudo encap match for TOS mask %#04x conflicts with existing pseudo(MASK) entry for TOS mask %#04x", + child_ip_tos_mask, + old->child_ip_tos_mask); + return -EEXIST; + } + break; + default: /* Unrecognised pseudo-type. Just say no */ + NL_SET_ERR_MSG_FMT_MOD(extack, + "%s encap match conflicts with existing pseudo(%d) entry", + encap->type ? "Pseudo" : "Direct", + old->type); + return -EEXIST; + } + /* check old and new tun_types are compatible */ if (old->tun_type != type) { NL_SET_ERR_MSG_FMT_MOD(extack, "Egress encap match with conflicting tun_type %u != %u", @@ -442,10 +530,12 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, /* existing entry found */ encap = old; } else { - rc = efx_mae_register_encap_match(efx, encap); - if (rc) { - NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW"); - goto fail; + if (em_type == EFX_TC_EM_DIRECT) { + rc = efx_mae_register_encap_match(efx, encap); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW"); + goto fail; + } } refcount_set(&encap->ref, 1); } @@ -455,30 +545,12 @@ fail: rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage, efx_tc_encap_match_ht_params); kfree(encap); +fail_pseudo: + if (pseudo) + efx_tc_flower_release_encap_match(efx, pseudo); return rc; } -static void efx_tc_flower_release_encap_match(struct efx_nic *efx, - struct efx_tc_encap_match *encap) -{ - int rc; - - if (!refcount_dec_and_test(&encap->ref)) - return; /* still in use */ - - rc = efx_mae_unregister_encap_match(efx, encap); - if (rc) - /* Display message but carry on and remove entry from our - * SW tables, because there's not much we can do about it. - */ - netif_err(efx, drv, efx->net_dev, - "Failed to release encap match %#x, rc %d\n", - encap->fw_id, rc); - rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage, - efx_tc_encap_match_ht_params); - kfree(encap); -} - static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule) { efx_mae_delete_rule(efx, rule->fw_id); @@ -632,6 +704,7 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx, } rc = efx_tc_flower_record_encap_match(efx, &match, type, + EFX_TC_EM_DIRECT, 0, extack); if (rc) goto release; diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h index 8d2abca26c23..0f14481d2d9e 100644 --- a/drivers/net/ethernet/sfc/tc.h +++ b/drivers/net/ethernet/sfc/tc.h @@ -74,6 +74,27 @@ static inline bool efx_tc_match_is_encap(const struct efx_tc_match_fields *mask) mask->enc_ip_ttl || mask->enc_sport || mask->enc_dport; } +/** + * enum efx_tc_em_pseudo_type - &struct efx_tc_encap_match pseudo type + * + * These are used to classify "pseudo" encap matches, which don't refer + * to an entry in hardware but rather indicate that a section of the + * match space is in use by another Outer Rule. + * + * @EFX_TC_EM_DIRECT: real HW entry in Outer Rule table; not a pseudo. + * Hardware index in &struct efx_tc_encap_match.fw_id is valid. + * @EFX_TC_EM_PSEUDO_MASK: registered by an encap match which includes a + * match on an optional field (currently only ip_tos), to prevent an + * overlapping encap match _without_ optional fields. + * The pseudo encap match may be referenced again by an encap match + * with a different ip_tos value, but all ip_tos_mask must match the + * first (stored in our child_ip_tos_mask). + */ +enum efx_tc_em_pseudo_type { + EFX_TC_EM_DIRECT, + EFX_TC_EM_PSEUDO_MASK, +}; + struct efx_tc_encap_match { __be32 src_ip, dst_ip; struct in6_addr src_ip6, dst_ip6; @@ -81,8 +102,11 @@ struct efx_tc_encap_match { u8 ip_tos, ip_tos_mask; struct rhash_head linkage; enum efx_encap_type tun_type; + u8 child_ip_tos_mask; refcount_t ref; + enum efx_tc_em_pseudo_type type; u32 fw_id; /* index of this entry in firmware encap match table */ + struct efx_tc_encap_match *pseudo; /* Referenced pseudo EM if needed */ }; struct efx_tc_match { -- cgit v1.2.3 From b6583d5e9e94adce1be61ec59fef4e129f0bc68a Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 11 May 2023 20:47:31 +0100 Subject: sfc: support TC decap rules matching on enc_src_port Allow efx_tc_encap_match entries to include a udp_sport and a udp_sport_mask. As with enc_ip_tos, use pseudos to enforce that all encap matches within a given tuple have the same udp_sport_mask. Note that since we use a single layer of pseudos for both fields, two matches that differ in (say) udp_sport value aren't permitted to have different ip_tos_mask, even though this would technically be safe. Current userland TC does not support setting enc_src_port; this patch was tested with an iproute2 patched to support it. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mae.c | 14 +++++++++++++- drivers/net/ethernet/sfc/mae.h | 2 +- drivers/net/ethernet/sfc/tc.c | 31 +++++++++++++++++++++---------- drivers/net/ethernet/sfc/tc.h | 10 ++++++---- 4 files changed, 41 insertions(+), 16 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c index 8f4bb5d36ad8..37a4c6925ad4 100644 --- a/drivers/net/ethernet/sfc/mae.c +++ b/drivers/net/ethernet/sfc/mae.c @@ -485,7 +485,7 @@ int efx_mae_match_check_caps(struct efx_nic *efx, * MAE. All the fields are exact-match, except possibly ENC_IP_TOS. */ int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6, - u8 ip_tos_mask, + u8 ip_tos_mask, __be16 udp_sport_mask, struct netlink_ext_ack *extack) { u8 *supported_fields = efx->tc->caps->outer_rule_fields; @@ -506,6 +506,14 @@ int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6, if (CHECK(ENC_L4_DPORT) || CHECK(ENC_IP_PROTO)) return rc; + typ = classify_mask((const u8 *)&udp_sport_mask, sizeof(udp_sport_mask)); + rc = efx_mae_match_check_cap_typ(supported_fields[MAE_FIELD_ENC_L4_SPORT], + typ); + if (rc) { + NL_SET_ERR_MSG_FMT_MOD(extack, "No support for %s mask in field %s", + mask_type_name(typ), "enc_src_port"); + return rc; + } typ = classify_mask(&ip_tos_mask, sizeof(ip_tos_mask)); rc = efx_mae_match_check_cap_typ(supported_fields[MAE_FIELD_ENC_IP_TOS], typ); @@ -1011,6 +1019,10 @@ int efx_mae_register_encap_match(struct efx_nic *efx, encap->udp_dport); MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_L4_DPORT_BE_MASK, ~(__be16)0); + MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_L4_DPORT_BE, + encap->udp_sport); + MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_L4_DPORT_BE_MASK, + encap->udp_sport_mask); MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_PROTO, IPPROTO_UDP); MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_PROTO_MASK, ~0); MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_TOS, diff --git a/drivers/net/ethernet/sfc/mae.h b/drivers/net/ethernet/sfc/mae.h index cec61bfde4d4..1cf8dfeb0c28 100644 --- a/drivers/net/ethernet/sfc/mae.h +++ b/drivers/net/ethernet/sfc/mae.h @@ -82,7 +82,7 @@ int efx_mae_match_check_caps(struct efx_nic *efx, const struct efx_tc_match_fields *mask, struct netlink_ext_ack *extack); int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6, - u8 ip_tos_mask, + u8 ip_tos_mask, __be16 udp_sport_mask, struct netlink_ext_ack *extack); int efx_mae_check_encap_type_supported(struct efx_nic *efx, enum efx_encap_type typ); diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index 8e1769d2c4ee..da684b4b7211 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -377,6 +377,7 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, enum efx_encap_type type, enum efx_tc_em_pseudo_type em_type, u8 child_ip_tos_mask, + __be16 child_udp_sport_mask, struct netlink_ext_ack *extack) { struct efx_tc_encap_match *encap, *old, *pseudo = NULL; @@ -425,11 +426,7 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, NL_SET_ERR_MSG_MOD(extack, "Egress encap match is not exact on dst UDP port"); return -EOPNOTSUPP; } - if (match->mask.enc_sport) { - NL_SET_ERR_MSG_MOD(extack, "Egress encap match on src UDP port not supported"); - return -EOPNOTSUPP; - } - if (match->mask.enc_ip_tos) { + if (match->mask.enc_sport || match->mask.enc_ip_tos) { struct efx_tc_match pmatch = *match; if (em_type == EFX_TC_EM_PSEUDO_MASK) { /* can't happen */ @@ -438,9 +435,12 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, } pmatch.value.enc_ip_tos = 0; pmatch.mask.enc_ip_tos = 0; + pmatch.value.enc_sport = 0; + pmatch.mask.enc_sport = 0; rc = efx_tc_flower_record_encap_match(efx, &pmatch, type, EFX_TC_EM_PSEUDO_MASK, match->mask.enc_ip_tos, + match->mask.enc_sport, extack); if (rc) return rc; @@ -452,7 +452,8 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, goto fail_pseudo; } - rc = efx_mae_check_encap_match_caps(efx, ipv6, match->mask.enc_ip_tos, extack); + rc = efx_mae_check_encap_match_caps(efx, ipv6, match->mask.enc_ip_tos, + match->mask.enc_sport, extack); if (rc) goto fail_pseudo; @@ -472,6 +473,9 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, encap->ip_tos = match->value.enc_ip_tos; encap->ip_tos_mask = match->mask.enc_ip_tos; encap->child_ip_tos_mask = child_ip_tos_mask; + encap->udp_sport = match->value.enc_sport; + encap->udp_sport_mask = match->mask.enc_sport; + encap->child_udp_sport_mask = child_udp_sport_mask; encap->type = em_type; encap->pseudo = pseudo; old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_match_ht, @@ -493,9 +497,9 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, NL_SET_ERR_MSG_MOD(extack, "Pseudo encap match conflicts with existing direct entry"); return -EEXIST; case EFX_TC_EM_PSEUDO_MASK: - /* old EM is protecting a ToS-qualified filter, so may - * only be shared with another pseudo for the same - * ToS mask. + /* old EM is protecting a ToS- or src port-qualified + * filter, so may only be shared with another pseudo + * for the same ToS and src port masks. */ if (em_type != EFX_TC_EM_PSEUDO_MASK) { NL_SET_ERR_MSG_FMT_MOD(extack, @@ -510,6 +514,13 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, old->child_ip_tos_mask); return -EEXIST; } + if (child_udp_sport_mask != old->child_udp_sport_mask) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Pseudo encap match for UDP src port mask %#x conflicts with existing pseudo(MASK) entry for mask %#x", + child_udp_sport_mask, + old->child_udp_sport_mask); + return -EEXIST; + } break; default: /* Unrecognised pseudo-type. Just say no */ NL_SET_ERR_MSG_FMT_MOD(extack, @@ -704,7 +715,7 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx, } rc = efx_tc_flower_record_encap_match(efx, &match, type, - EFX_TC_EM_DIRECT, 0, + EFX_TC_EM_DIRECT, 0, 0, extack); if (rc) goto release; diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h index 0f14481d2d9e..24e9640c74e9 100644 --- a/drivers/net/ethernet/sfc/tc.h +++ b/drivers/net/ethernet/sfc/tc.h @@ -84,11 +84,11 @@ static inline bool efx_tc_match_is_encap(const struct efx_tc_match_fields *mask) * @EFX_TC_EM_DIRECT: real HW entry in Outer Rule table; not a pseudo. * Hardware index in &struct efx_tc_encap_match.fw_id is valid. * @EFX_TC_EM_PSEUDO_MASK: registered by an encap match which includes a - * match on an optional field (currently only ip_tos), to prevent an - * overlapping encap match _without_ optional fields. + * match on an optional field (currently ip_tos and/or udp_sport), + * to prevent an overlapping encap match _without_ optional fields. * The pseudo encap match may be referenced again by an encap match - * with a different ip_tos value, but all ip_tos_mask must match the - * first (stored in our child_ip_tos_mask). + * with different values for these fields, but all masks must match the + * first (stored in our child_* fields). */ enum efx_tc_em_pseudo_type { EFX_TC_EM_DIRECT, @@ -99,10 +99,12 @@ struct efx_tc_encap_match { __be32 src_ip, dst_ip; struct in6_addr src_ip6, dst_ip6; __be16 udp_dport; + __be16 udp_sport, udp_sport_mask; u8 ip_tos, ip_tos_mask; struct rhash_head linkage; enum efx_encap_type tun_type; u8 child_ip_tos_mask; + __be16 child_udp_sport_mask; refcount_t ref; enum efx_tc_em_pseudo_type type; u32 fw_id; /* index of this entry in firmware encap match table */ -- cgit v1.2.3 From 7e400ff35cbe3b25fc1da1586b6cd9bc426dfb1c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 11 May 2023 10:21:10 -0700 Subject: net: bcmgenet: Add support for PHY-based Wake-on-LAN If available, interrogate the PHY to find out whether we can use it for Wake-on-LAN. This can be a more power efficient way of implementing that feature, especially when the MAC is powered off in low power states. Reviewed-by: Simon Horman Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 3a4b6cb7b7b9..7a41cad5788f 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -42,6 +42,12 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) struct bcmgenet_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; + if (dev->phydev) { + phy_ethtool_get_wol(dev->phydev, wol); + if (wol->supported) + return; + } + if (!device_can_wakeup(kdev)) { wol->supported = 0; wol->wolopts = 0; @@ -63,6 +69,14 @@ int bcmgenet_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct bcmgenet_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; + int ret; + + /* Try Wake-on-LAN from the PHY first */ + if (dev->phydev) { + ret = phy_ethtool_set_wol(dev->phydev, wol); + if (ret != -EOPNOTSUPP) + return ret; + } if (!device_can_wakeup(kdev)) return -ENOTSUPP; -- cgit v1.2.3 From b51f4113ebb02011f0ca86abc3134b28d2071b6a Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Thu, 11 May 2023 09:12:12 +0800 Subject: net: introduce and use skb_frag_fill_page_desc() Most users use __skb_frag_set_page()/skb_frag_off_set()/ skb_frag_size_set() to fill the page desc for a skb frag. Introduce skb_frag_fill_page_desc() to do that. net/bpf/test_run.c does not call skb_frag_off_set() to set the offset, "copy_from_user(page_address(page), ...)" and 'shinfo' being part of the 'data' kzalloced in bpf_test_init() suggest that it is assuming offset to be initialized as zero, so call skb_frag_fill_page_desc() with offset being zero for this case. Also, skb_frag_set_page() is not used anymore, so remove it. Signed-off-by: Yunsheng Lin Reviewed-by: Leon Romanovsky Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 6 ++--- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 ++-- drivers/net/ethernet/chelsio/cxgb3/sge.c | 5 ++-- drivers/net/ethernet/emulex/benet/be_main.c | 32 +++++++++++++----------- drivers/net/ethernet/freescale/enetc/enetc.c | 5 ++-- drivers/net/ethernet/fungible/funeth/funeth_rx.c | 5 ++-- drivers/net/ethernet/marvell/mvneta.c | 5 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 4 +-- drivers/net/ethernet/sun/cassini.c | 8 ++---- drivers/net/virtio_net.c | 4 +-- drivers/net/vmxnet3/vmxnet3_drv.c | 4 +-- drivers/net/xen-netback/netback.c | 4 +-- include/linux/skbuff.h | 27 ++++++++------------ net/bpf/test_run.c | 3 +-- net/core/gro.c | 4 +-- net/core/pktgen.c | 13 ++++++---- net/core/skbuff.c | 7 +++--- net/tls/tls_device.c | 10 +++----- net/xfrm/xfrm_ipcomp.c | 5 +--- 19 files changed, 64 insertions(+), 92 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 7f933175cbda..4de22eed099a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -532,10 +532,10 @@ static bool aq_add_rx_fragment(struct device *dev, buff_->rxdata.pg_off, buff_->len, DMA_FROM_DEVICE); - skb_frag_off_set(frag, buff_->rxdata.pg_off); - skb_frag_size_set(frag, buff_->len); sinfo->xdp_frags_size += buff_->len; - __skb_frag_set_page(frag, buff_->rxdata.page); + skb_frag_fill_page_desc(frag, buff_->rxdata.page, + buff_->rxdata.pg_off, + buff_->len); buff_->is_cleaned = 1; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index dcd9367f05af..efaff5018af8 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1085,9 +1085,8 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp, RX_AGG_CMP_LEN) >> RX_AGG_CMP_LEN_SHIFT; cons_rx_buf = &rxr->rx_agg_ring[cons]; - skb_frag_off_set(frag, cons_rx_buf->offset); - skb_frag_size_set(frag, frag_len); - __skb_frag_set_page(frag, cons_rx_buf->page); + skb_frag_fill_page_desc(frag, cons_rx_buf->page, + cons_rx_buf->offset, frag_len); shinfo->nr_frags = i + 1; __clear_bit(cons, rxr->rx_agg_bmap); diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c index efa7f401529e..2e9a74fe0970 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c @@ -2184,9 +2184,8 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, len -= offset; rx_frag += nr_frags; - __skb_frag_set_page(rx_frag, sd->pg_chunk.page); - skb_frag_off_set(rx_frag, sd->pg_chunk.offset + offset); - skb_frag_size_set(rx_frag, len); + skb_frag_fill_page_desc(rx_frag, sd->pg_chunk.page, + sd->pg_chunk.offset + offset, len); skb->len += len; skb->data_len += len; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 7e408bcc88de..3164ed205cf7 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2343,11 +2343,10 @@ static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb, hdr_len = ETH_HLEN; memcpy(skb->data, start, hdr_len); skb_shinfo(skb)->nr_frags = 1; - skb_frag_set_page(skb, 0, page_info->page); - skb_frag_off_set(&skb_shinfo(skb)->frags[0], - page_info->page_offset + hdr_len); - skb_frag_size_set(&skb_shinfo(skb)->frags[0], - curr_frag_len - hdr_len); + skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[0], + page_info->page, + page_info->page_offset + hdr_len, + curr_frag_len - hdr_len); skb->data_len = curr_frag_len - hdr_len; skb->truesize += rx_frag_size; skb->tail += hdr_len; @@ -2369,16 +2368,17 @@ static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb, if (page_info->page_offset == 0) { /* Fresh page */ j++; - skb_frag_set_page(skb, j, page_info->page); - skb_frag_off_set(&skb_shinfo(skb)->frags[j], - page_info->page_offset); - skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0); + skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[j], + page_info->page, + page_info->page_offset, + curr_frag_len); skb_shinfo(skb)->nr_frags++; } else { put_page(page_info->page); + skb_frag_size_add(&skb_shinfo(skb)->frags[j], + curr_frag_len); } - skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len); skb->len += curr_frag_len; skb->data_len += curr_frag_len; skb->truesize += rx_frag_size; @@ -2451,14 +2451,16 @@ static void be_rx_compl_process_gro(struct be_rx_obj *rxo, if (i == 0 || page_info->page_offset == 0) { /* First frag or Fresh page */ j++; - skb_frag_set_page(skb, j, page_info->page); - skb_frag_off_set(&skb_shinfo(skb)->frags[j], - page_info->page_offset); - skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0); + skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[j], + page_info->page, + page_info->page_offset, + curr_frag_len); } else { put_page(page_info->page); + skb_frag_size_add(&skb_shinfo(skb)->frags[j], + curr_frag_len); } - skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len); + skb->truesize += rx_frag_size; remaining -= curr_frag_len; memset(page_info, 0, sizeof(*page_info)); diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 3c4fa26f0f9b..63854294ac33 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1445,9 +1445,8 @@ static void enetc_add_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i, xdp_buff_set_frag_pfmemalloc(xdp_buff); frag = &shinfo->frags[shinfo->nr_frags]; - skb_frag_off_set(frag, rx_swbd->page_offset); - skb_frag_size_set(frag, size); - __skb_frag_set_page(frag, rx_swbd->page); + skb_frag_fill_page_desc(frag, rx_swbd->page, rx_swbd->page_offset, + size); shinfo->nr_frags++; } diff --git a/drivers/net/ethernet/fungible/funeth/funeth_rx.c b/drivers/net/ethernet/fungible/funeth/funeth_rx.c index 29a6c2ede43a..7e2584895de3 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_rx.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_rx.c @@ -323,9 +323,8 @@ static int fun_gather_pkt(struct funeth_rxq *q, unsigned int tot_len, if (ref_ok) ref_ok |= buf->node; - __skb_frag_set_page(frags, buf->page); - skb_frag_off_set(frags, q->buf_offset); - skb_frag_size_set(frags++, frag_len); + skb_frag_fill_page_desc(frags++, buf->page, q->buf_offset, + frag_len); tot_len -= frag_len; if (!tot_len) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 6c6b66d3ea6e..e2abc00d0472 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2376,9 +2376,8 @@ mvneta_swbm_add_rx_fragment(struct mvneta_port *pp, if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) { skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags++]; - skb_frag_off_set(frag, pp->rx_offset_correction); - skb_frag_size_set(frag, data_len); - __skb_frag_set_page(frag, page); + skb_frag_fill_page_desc(frag, page, + pp->rx_offset_correction, data_len); if (!xdp_buff_has_frags(xdp)) { sinfo->xdp_frags_size = *size; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 69634829558e..704b022cd1f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -491,9 +491,7 @@ mlx5e_add_skb_shared_info_frag(struct mlx5e_rq *rq, struct skb_shared_info *sinf } frag = &sinfo->frags[sinfo->nr_frags++]; - __skb_frag_set_page(frag, frag_page->page); - skb_frag_off_set(frag, frag_offset); - skb_frag_size_set(frag, len); + skb_frag_fill_page_desc(frag, frag_page->page, frag_offset, len); if (page_is_pfmemalloc(frag_page->page)) xdp_buff_set_frag_pfmemalloc(xdp); diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index 4ef05bad4613..2d52f54ebb45 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -1998,10 +1998,8 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, skb->truesize += hlen - swivel; skb->len += hlen - swivel; - __skb_frag_set_page(frag, page->buffer); + skb_frag_fill_page_desc(frag, page->buffer, off, hlen - swivel); __skb_frag_ref(frag); - skb_frag_off_set(frag, off); - skb_frag_size_set(frag, hlen - swivel); /* any more data? */ if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) { @@ -2024,10 +2022,8 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, skb->len += hlen; frag++; - __skb_frag_set_page(frag, page->buffer); + skb_frag_fill_page_desc(frag, page->buffer, 0, hlen); __skb_frag_ref(frag); - skb_frag_off_set(frag, 0); - skb_frag_size_set(frag, hlen); RX_USED_ADD(page, hlen + cp->crc_size); } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 97241006d64a..29eccc8ff41f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1272,9 +1272,7 @@ static int virtnet_build_xdp_buff_mrg(struct net_device *dev, } frag = &shinfo->frags[shinfo->nr_frags++]; - __skb_frag_set_page(frag, page); - skb_frag_off_set(frag, offset); - skb_frag_size_set(frag, len); + skb_frag_fill_page_desc(frag, page, offset, len); if (page_is_pfmemalloc(page)) xdp_buff_set_frag_pfmemalloc(xdp); diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index f2b76ee866a4..7fa74b8b2100 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -686,9 +686,7 @@ vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd, BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS); - __skb_frag_set_page(frag, rbi->page); - skb_frag_off_set(frag, 0); - skb_frag_size_set(frag, rcd->len); + skb_frag_fill_page_desc(frag, rbi->page, 0, rcd->len); skb->data_len += rcd->len; skb->truesize += PAGE_SIZE; skb_shinfo(skb)->nr_frags++; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index c1501f41e2d8..3d79b35eb577 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1128,9 +1128,7 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s BUG(); offset += len; - __skb_frag_set_page(&frags[i], page); - skb_frag_off_set(&frags[i], 0); - skb_frag_size_set(&frags[i], len); + skb_frag_fill_page_desc(&frags[i], page, 0, len); } /* Release all the original (foreign) frags. */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 738776ab8838..30be21c7d05f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2411,6 +2411,15 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) return skb_headlen(skb) + __skb_pagelen(skb); } +static inline void skb_frag_fill_page_desc(skb_frag_t *frag, + struct page *page, + int off, int size) +{ + frag->bv_page = page; + frag->bv_offset = off; + skb_frag_size_set(frag, size); +} + static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo, int i, struct page *page, int off, int size) @@ -2422,9 +2431,7 @@ static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo, * that not all callers have unique ownership of the page but rely * on page_is_pfmemalloc doing the right thing(tm). */ - frag->bv_page = page; - frag->bv_offset = off; - skb_frag_size_set(frag, size); + skb_frag_fill_page_desc(frag, page, off, size); } /** @@ -3496,20 +3503,6 @@ static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page) frag->bv_page = page; } -/** - * skb_frag_set_page - sets the page contained in a paged fragment of an skb - * @skb: the buffer - * @f: the fragment offset - * @page: the page to set - * - * Sets the @f'th fragment of @skb to contain @page. - */ -static inline void skb_frag_set_page(struct sk_buff *skb, int f, - struct page *page) -{ - __skb_frag_set_page(&skb_shinfo(skb)->frags[f], page); -} - bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); /** diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index e79e3a415ca9..98143b86a9dd 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -1415,11 +1415,10 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, } frag = &sinfo->frags[sinfo->nr_frags++]; - __skb_frag_set_page(frag, page); data_len = min_t(u32, kattr->test.data_size_in - size, PAGE_SIZE); - skb_frag_size_set(frag, data_len); + skb_frag_fill_page_desc(frag, page, 0, data_len); if (copy_from_user(page_address(page), data_in + size, data_len)) { diff --git a/net/core/gro.c b/net/core/gro.c index 2d84165cb4f1..6783a47a6136 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -239,9 +239,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags; - __skb_frag_set_page(frag, page); - skb_frag_off_set(frag, first_offset); - skb_frag_size_set(frag, first_size); + skb_frag_fill_page_desc(frag, page, first_offset, first_size); memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags); /* We dont need to clear skbinfo->nr_frags here */ diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 760238196db1..f56b8d697014 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2785,14 +2785,17 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, break; } get_page(pkt_dev->page); - skb_frag_set_page(skb, i, pkt_dev->page); - skb_frag_off_set(&skb_shinfo(skb)->frags[i], 0); + /*last fragment, fill rest of data*/ if (i == (frags - 1)) - skb_frag_size_set(&skb_shinfo(skb)->frags[i], - (datalen < PAGE_SIZE ? datalen : PAGE_SIZE)); + skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[i], + pkt_dev->page, 0, + (datalen < PAGE_SIZE ? + datalen : PAGE_SIZE)); else - skb_frag_size_set(&skb_shinfo(skb)->frags[i], frag_len); + skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[i], + pkt_dev->page, 0, frag_len); + datalen -= skb_frag_size(&skb_shinfo(skb)->frags[i]); skb->len += skb_frag_size(&skb_shinfo(skb)->frags[i]); skb->data_len += skb_frag_size(&skb_shinfo(skb)->frags[i]); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 01b48e68aca0..6724a84ebb09 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4234,10 +4234,9 @@ static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb) struct page *page; page = virt_to_head_page(frag_skb->head); - __skb_frag_set_page(&head_frag, page); - skb_frag_off_set(&head_frag, frag_skb->data - - (unsigned char *)page_address(page)); - skb_frag_size_set(&head_frag, skb_headlen(frag_skb)); + skb_frag_fill_page_desc(&head_frag, page, frag_skb->data - + (unsigned char *)page_address(page), + skb_headlen(frag_skb)); return head_frag; } diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index a7cc4f9faac2..daeff54bdbfa 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -268,9 +268,8 @@ static void tls_append_frag(struct tls_record_info *record, skb_frag_size_add(frag, size); } else { ++frag; - __skb_frag_set_page(frag, pfrag->page); - skb_frag_off_set(frag, pfrag->offset); - skb_frag_size_set(frag, size); + skb_frag_fill_page_desc(frag, pfrag->page, pfrag->offset, + size); ++record->num_frags; get_page(pfrag->page); } @@ -357,9 +356,8 @@ static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx, return -ENOMEM; frag = &record->frags[0]; - __skb_frag_set_page(frag, pfrag->page); - skb_frag_off_set(frag, pfrag->offset); - skb_frag_size_set(frag, prepend_size); + skb_frag_fill_page_desc(frag, pfrag->page, pfrag->offset, + prepend_size); get_page(pfrag->page); pfrag->offset += prepend_size; diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 80143360bf09..9c0fa0e1786a 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -74,14 +74,11 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) if (!page) return -ENOMEM; - __skb_frag_set_page(frag, page); - len = PAGE_SIZE; if (dlen < len) len = dlen; - skb_frag_off_set(frag, 0); - skb_frag_size_set(frag, len); + skb_frag_fill_page_desc(frag, page, 0, len); memcpy(skb_frag_address(frag), scratch, len); skb->truesize += len; -- cgit v1.2.3 From 278fda0d52f67244044384abd7dd5b3a5b3a5604 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Thu, 11 May 2023 09:12:13 +0800 Subject: net: remove __skb_frag_set_page() The remaining users calling __skb_frag_set_page() with page being NULL seems to be doing defensive programming, as shinfo->nr_frags is already decremented, so remove them. Signed-off-by: Yunsheng Lin Reviewed-by: Leon Romanovsky Reviewed-by: Michael Chan Reviewed-by: Jesse Brandeburg Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 1 - drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +---- include/linux/skbuff.h | 12 ------------ 3 files changed, 1 insertion(+), 17 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 466e1d62bcf6..0d917a9699c5 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -2955,7 +2955,6 @@ bnx2_reuse_rx_skb_pages(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, shinfo = skb_shinfo(skb); shinfo->nr_frags--; page = skb_frag_page(&shinfo->frags[shinfo->nr_frags]); - __skb_frag_set_page(&shinfo->frags[shinfo->nr_frags], NULL); cons_rx_pg->page = page; dev_kfree_skb(skb); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index efaff5018af8..f42e51bd3e42 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1102,10 +1102,7 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp, xdp_buff_set_frag_pfmemalloc(xdp); if (bnxt_alloc_rx_page(bp, rxr, prod, GFP_ATOMIC) != 0) { - unsigned int nr_frags; - - nr_frags = --shinfo->nr_frags; - __skb_frag_set_page(&shinfo->frags[nr_frags], NULL); + --shinfo->nr_frags; cons_rx_buf->page = page; /* Update prod since possibly some pages have been diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 30be21c7d05f..00e8c435fa1a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3491,18 +3491,6 @@ static inline void skb_frag_page_copy(skb_frag_t *fragto, fragto->bv_page = fragfrom->bv_page; } -/** - * __skb_frag_set_page - sets the page contained in a paged fragment - * @frag: the paged fragment - * @page: the page to set - * - * Sets the fragment @frag to contain @page. - */ -static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page) -{ - frag->bv_page = page; -} - bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); /** -- cgit v1.2.3 From 314cf958de2a784dd3984190311e15aa1fbf2717 Mon Sep 17 00:00:00 2001 From: Daire McNamara Date: Fri, 12 May 2023 13:20:32 +0100 Subject: net: macb: Shorten max_tx_len to 4KiB - 56 on mpfs On mpfs, with SRAM configured for 4 queues, setting max_tx_len to GEM_TX_MAX_LEN=0x3f0 results multiple AMBA errors. Setting max_tx_len to (4KiB - 56) removes those errors. The details are described in erratum 1686 by Cadence The max jumbo frame size is also reduced for mpfs to (4KiB - 56). Signed-off-by: Daire McNamara Reviewed-by: Conor Dooley Reviewed-by: Simon Horman Reviewed-by: Claudiu Beznea Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.h | 1 + drivers/net/ethernet/cadence/macb_main.c | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index cfbdd0022764..b6d5bf8deb79 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -1181,6 +1181,7 @@ struct macb_config { struct clk **hclk, struct clk **tx_clk, struct clk **rx_clk, struct clk **tsu_clk); int (*init)(struct platform_device *pdev); + unsigned int max_tx_length; int jumbo_max_len; const struct macb_usrio_config *usrio; }; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 29a1199dad14..50a4b04315e9 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4117,14 +4117,12 @@ static int macb_init(struct platform_device *pdev) /* setup appropriated routines according to adapter type */ if (macb_is_gem(bp)) { - bp->max_tx_length = GEM_MAX_TX_LEN; bp->macbgem_ops.mog_alloc_rx_buffers = gem_alloc_rx_buffers; bp->macbgem_ops.mog_free_rx_buffers = gem_free_rx_buffers; bp->macbgem_ops.mog_init_rings = gem_init_rings; bp->macbgem_ops.mog_rx = gem_rx; dev->ethtool_ops = &gem_ethtool_ops; } else { - bp->max_tx_length = MACB_MAX_TX_LEN; bp->macbgem_ops.mog_alloc_rx_buffers = macb_alloc_rx_buffers; bp->macbgem_ops.mog_free_rx_buffers = macb_free_rx_buffers; bp->macbgem_ops.mog_init_rings = macb_init_rings; @@ -4861,7 +4859,8 @@ static const struct macb_config mpfs_config = { .clk_init = macb_clk_init, .init = init_reset_optional, .usrio = &macb_default_usrio, - .jumbo_max_len = 10240, + .max_tx_length = 4040, /* Cadence Erratum 1686 */ + .jumbo_max_len = 4040, }; static const struct macb_config sama7g5_gem_config = { @@ -5012,6 +5011,13 @@ static int macb_probe(struct platform_device *pdev) if (macb_config) bp->jumbo_max_len = macb_config->jumbo_max_len; + if (!hw_is_gem(bp->regs, bp->native_io)) + bp->max_tx_length = MACB_MAX_TX_LEN; + else if (macb_config->max_tx_length) + bp->max_tx_length = macb_config->max_tx_length; + else + bp->max_tx_length = GEM_MAX_TX_LEN; + bp->wol = 0; if (of_property_read_bool(np, "magic-packet")) bp->wol |= MACB_WOL_HAS_MAGIC_PACKET; -- cgit v1.2.3 From 144470c88c5d9a4cab81da22a26c129e6702c6cf Mon Sep 17 00:00:00 2001 From: Shenwei Wang Date: Fri, 12 May 2023 08:20:10 -0500 Subject: net: fec: using the standard return codes when xdp xmit errors This patch standardizes the inconsistent return values for unsuccessful XDP transmits by using standardized error codes (-EBUSY or -ENOMEM). Signed-off-by: Shenwei Wang Reviewed-by: Simon Horman Reviewed-by: Horatiu Vultur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 42ec6ca3bf03..cd215ab20ff9 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3799,7 +3799,7 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, if (entries_free < MAX_SKB_FRAGS + 1) { netdev_err(fep->netdev, "NOT enough BD for SG!\n"); xdp_return_frame(frame); - return NETDEV_TX_BUSY; + return -EBUSY; } /* Fill in a Tx ring entry */ @@ -3813,7 +3813,7 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, dma_addr = dma_map_single(&fep->pdev->dev, frame->data, frame->len, DMA_TO_DEVICE); if (dma_mapping_error(&fep->pdev->dev, dma_addr)) - return FEC_ENET_XDP_CONSUMED; + return -ENOMEM; status |= (BD_ENET_TX_INTR | BD_ENET_TX_LAST); if (fep->bufdesc_ex) @@ -3869,7 +3869,7 @@ static int fec_enet_xdp_xmit(struct net_device *dev, __netif_tx_lock(nq, cpu); for (i = 0; i < num_frames; i++) { - if (fec_enet_txq_xmit_frame(fep, txq, frames[i]) != 0) + if (fec_enet_txq_xmit_frame(fep, txq, frames[i]) < 0) break; sent_frames++; } -- cgit v1.2.3 From befcc1fce564bdb20ee55be981a355b0a7d0eac5 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 12 May 2023 16:35:58 +0100 Subject: sfc: fix use-after-free in efx_tc_flower_record_encap_match() When writing error messages to extack for pseudo collisions, we can't use encap->type as encap has already been freed. Fortunately the same value is stored in local variable em_type, so use that instead. Fixes: 3c9561c0a5b9 ("sfc: support TC decap rules matching on enc_ip_tos") Reported-by: Simon Horman Signed-off-by: Edward Cree Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/tc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index da684b4b7211..6dfbdb39f2fe 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -504,7 +504,7 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, if (em_type != EFX_TC_EM_PSEUDO_MASK) { NL_SET_ERR_MSG_FMT_MOD(extack, "%s encap match conflicts with existing pseudo(MASK) entry", - encap->type ? "Pseudo" : "Direct"); + em_type ? "Pseudo" : "Direct"); return -EEXIST; } if (child_ip_tos_mask != old->child_ip_tos_mask) { @@ -525,7 +525,7 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, default: /* Unrecognised pseudo-type. Just say no */ NL_SET_ERR_MSG_FMT_MOD(extack, "%s encap match conflicts with existing pseudo(%d) entry", - encap->type ? "Pseudo" : "Direct", + em_type ? "Pseudo" : "Direct", old->type); return -EEXIST; } -- cgit v1.2.3 From 12e7789ad5b476e945aba8edb1161c633cb7db31 Mon Sep 17 00:00:00 2001 From: Naveen Mamindlapalli Date: Sat, 13 May 2023 14:21:36 +0530 Subject: sch_htb: Allow HTB priority parameter in offload mode The current implementation of HTB offload returns the EINVAL error for unsupported parameters like prio and quantum. This patch removes the error returning checks for 'prio' parameter and populates its value to tc_htb_qopt_offload structure such that driver can use the same. Add prio parameter check in mlx5 driver, as mlx5 devices are not capable of supporting the prio parameter when htb offload is used. Report error if prio parameter is set to a non-default value. Signed-off-by: Naveen Mamindlapalli Co-developed-by: Rahul Rameshbabu Signed-off-by: Rahul Rameshbabu Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Kovvuri Goutham Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en/qos.c | 7 ++++++- include/net/pkt_cls.h | 1 + net/sched/sch_htb.c | 7 +++---- 3 files changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index 2842195ee548..1874c2f0587f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -379,6 +379,12 @@ int mlx5e_htb_setup_tc(struct mlx5e_priv *priv, struct tc_htb_qopt_offload *htb_ if (!htb && htb_qopt->command != TC_HTB_CREATE) return -EINVAL; + if (htb_qopt->prio) { + NL_SET_ERR_MSG_MOD(htb_qopt->extack, + "prio parameter is not supported by device with HTB offload enabled."); + return -EOPNOTSUPP; + } + switch (htb_qopt->command) { case TC_HTB_CREATE: if (!mlx5_qos_is_supported(priv->mdev)) { @@ -515,4 +521,3 @@ int mlx5e_mqprio_rl_get_node_hw_id(struct mlx5e_mqprio_rl *rl, int tc, u32 *hw_i *hw_id = rl->leaves_id[tc]; return 0; } - diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index b3b5b0b62f16..a2ea45c7b53e 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -868,6 +868,7 @@ struct tc_htb_qopt_offload { u16 qid; u64 rate; u64 ceil; + u8 prio; }; #define TC_HTB_CLASSID_ROOT U32_MAX diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 8aef7dd9fb88..325c29041c7d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1814,10 +1814,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter"); goto failure; } - if (hopt->prio) { - NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter"); - goto failure; - } } /* Keeping backward compatible with rate_table based iproute2 tc */ @@ -1913,6 +1909,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, TC_HTB_CLASSID_ROOT, .rate = max_t(u64, hopt->rate.rate, rate64), .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .prio = hopt->prio, .extack = extack, }; err = htb_offload(dev, &offload_opt); @@ -1933,6 +1930,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, TC_H_MIN(parent->common.classid), .rate = max_t(u64, hopt->rate.rate, rate64), .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .prio = hopt->prio, .extack = extack, }; err = htb_offload(dev, &offload_opt); @@ -2018,6 +2016,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, .classid = cl->common.classid, .rate = max_t(u64, hopt->rate.rate, rate64), .ceil = max_t(u64, hopt->ceil.rate, ceil64), + .prio = hopt->prio, .extack = extack, }; err = htb_offload(dev, &offload_opt); -- cgit v1.2.3 From 508c58f76ca510956625c945f9b8eb104f2c8208 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Sat, 13 May 2023 14:21:37 +0530 Subject: octeontx2-pf: Rename tot_tx_queues to non_qos_queues current implementation is such that tot_tx_queues contains both xdp queues and normal tx queues. which will be allocated in interface open calls and deallocated on interface down calls respectively. With addition of QOS, where send quees are allocated/deallacated upon user request Qos send queues won't be part of tot_tx_queues. So this patch renames tot_tx_queues to non_qos_queues. Signed-off-by: Hariprasad Kelam Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 12 ++++++------ drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h | 2 +- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 14 +++++++------- drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 8a41ad8ca04f..43bc56fb3c33 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -762,7 +762,7 @@ void otx2_sqb_flush(struct otx2_nic *pfvf) int timeout = 1000; ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS); - for (qidx = 0; qidx < pfvf->hw.tot_tx_queues; qidx++) { + for (qidx = 0; qidx < pfvf->hw.non_qos_queues; qidx++) { incr = (u64)qidx << 32; while (timeout) { val = otx2_atomic64_add(incr, ptr); @@ -1048,7 +1048,7 @@ int otx2_config_nix_queues(struct otx2_nic *pfvf) } /* Initialize TX queues */ - for (qidx = 0; qidx < pfvf->hw.tot_tx_queues; qidx++) { + for (qidx = 0; qidx < pfvf->hw.non_qos_queues; qidx++) { u16 sqb_aura = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx); err = otx2_sq_init(pfvf, qidx, sqb_aura); @@ -1095,7 +1095,7 @@ int otx2_config_nix(struct otx2_nic *pfvf) /* Set RQ/SQ/CQ counts */ nixlf->rq_cnt = pfvf->hw.rx_queues; - nixlf->sq_cnt = pfvf->hw.tot_tx_queues; + nixlf->sq_cnt = pfvf->hw.non_qos_queues; nixlf->cq_cnt = pfvf->qset.cq_cnt; nixlf->rss_sz = MAX_RSS_INDIR_TBL_SIZE; nixlf->rss_grps = MAX_RSS_GROUPS; @@ -1133,7 +1133,7 @@ void otx2_sq_free_sqbs(struct otx2_nic *pfvf) int sqb, qidx; u64 iova, pa; - for (qidx = 0; qidx < hw->tot_tx_queues; qidx++) { + for (qidx = 0; qidx < hw->non_qos_queues; qidx++) { sq = &qset->sq[qidx]; if (!sq->sqb_ptrs) continue; @@ -1349,7 +1349,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) stack_pages = (num_sqbs + hw->stack_pg_ptrs - 1) / hw->stack_pg_ptrs; - for (qidx = 0; qidx < hw->tot_tx_queues; qidx++) { + for (qidx = 0; qidx < hw->non_qos_queues; qidx++) { pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx); /* Initialize aura context */ err = otx2_aura_init(pfvf, pool_id, pool_id, num_sqbs); @@ -1369,7 +1369,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) goto fail; /* Allocate pointers and free them to aura/pool */ - for (qidx = 0; qidx < hw->tot_tx_queues; qidx++) { + for (qidx = 0; qidx < hw->non_qos_queues; qidx++) { pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx); pool = &pfvf->qset.pool[pool_id]; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index d17274aab374..76c50dd8e5ef 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -190,7 +190,7 @@ struct otx2_hw { u16 rx_queues; u16 tx_queues; u16 xdp_queues; - u16 tot_tx_queues; + u16 non_qos_queues; /* tx queues plus xdp queues */ u16 max_queues; u16 pool_cnt; u16 rqpool_cnt; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 18284ad75157..cc4a94fd9afc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1257,7 +1257,7 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data) } /* SQ */ - for (qidx = 0; qidx < pf->hw.tot_tx_queues; qidx++) { + for (qidx = 0; qidx < pf->hw.non_qos_queues; qidx++) { u64 sq_op_err_dbg, mnq_err_dbg, snd_err_dbg; u8 sq_op_err_code, mnq_err_code, snd_err_code; @@ -1383,7 +1383,7 @@ static void otx2_free_sq_res(struct otx2_nic *pf) otx2_ctx_disable(&pf->mbox, NIX_AQ_CTYPE_SQ, false); /* Free SQB pointers */ otx2_sq_free_sqbs(pf); - for (qidx = 0; qidx < pf->hw.tot_tx_queues; qidx++) { + for (qidx = 0; qidx < pf->hw.non_qos_queues; qidx++) { sq = &qset->sq[qidx]; qmem_free(pf->dev, sq->sqe); qmem_free(pf->dev, sq->tso_hdrs); @@ -1433,7 +1433,7 @@ static int otx2_init_hw_resources(struct otx2_nic *pf) * so, aura count = pool count. */ hw->rqpool_cnt = hw->rx_queues; - hw->sqpool_cnt = hw->tot_tx_queues; + hw->sqpool_cnt = hw->non_qos_queues; hw->pool_cnt = hw->rqpool_cnt + hw->sqpool_cnt; /* Maximum hardware supported transmit length */ @@ -1688,7 +1688,7 @@ int otx2_open(struct net_device *netdev) netif_carrier_off(netdev); - pf->qset.cq_cnt = pf->hw.rx_queues + pf->hw.tot_tx_queues; + pf->qset.cq_cnt = pf->hw.rx_queues + pf->hw.non_qos_queues; /* RQ and SQs are mapped to different CQs, * so find out max CQ IRQs (i.e CINTs) needed. */ @@ -1708,7 +1708,7 @@ int otx2_open(struct net_device *netdev) if (!qset->cq) goto err_free_mem; - qset->sq = kcalloc(pf->hw.tot_tx_queues, + qset->sq = kcalloc(pf->hw.non_qos_queues, sizeof(struct otx2_snd_queue), GFP_KERNEL); if (!qset->sq) goto err_free_mem; @@ -2529,7 +2529,7 @@ static int otx2_xdp_setup(struct otx2_nic *pf, struct bpf_prog *prog) xdp_features_clear_redirect_target(dev); } - pf->hw.tot_tx_queues += pf->hw.xdp_queues; + pf->hw.non_qos_queues += pf->hw.xdp_queues; if (if_up) otx2_open(pf->netdev); @@ -2760,7 +2760,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) hw->pdev = pdev; hw->rx_queues = qcount; hw->tx_queues = qcount; - hw->tot_tx_queues = qcount; + hw->non_qos_queues = qcount; hw->max_queues = qcount; hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN; /* Use CQE of 128 byte descriptor size by default */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 53366dbfbf27..64be99ace04e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -570,7 +570,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) hw->rx_queues = qcount; hw->tx_queues = qcount; hw->max_queues = qcount; - hw->tot_tx_queues = qcount; + hw->non_qos_queues = qcount; hw->rbuf_len = OTX2_DEFAULT_RBUF_LEN; /* Use CQE of 128 byte descriptor size by default */ hw->xqe_size = 128; -- cgit v1.2.3 From ab6dddd2a669a0ecc2ce07485c7a15fadbb5a0aa Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Sat, 13 May 2023 14:21:38 +0530 Subject: octeontx2-pf: qos send queues management Current implementation is such that the number of Send queues (SQs) are decided on the device probe which is equal to the number of online cpus. These SQs are allocated and deallocated in interface open and c lose calls respectively. This patch defines new APIs for initializing and deinitializing Send queues dynamically and allocates more number of transmit queues for QOS feature. Signed-off-by: Subbaraya Sundeep Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Kovvuri Goutham Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/af/rvu_debugfs.c | 5 + .../net/ethernet/marvell/octeontx2/nic/Makefile | 2 +- .../ethernet/marvell/octeontx2/nic/otx2_common.c | 43 ++-- .../ethernet/marvell/octeontx2/nic/otx2_common.h | 39 ++- .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 44 +++- .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 24 +- .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.h | 3 +- .../net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 7 +- drivers/net/ethernet/marvell/octeontx2/nic/qos.h | 19 ++ .../net/ethernet/marvell/octeontx2/nic/qos_sq.c | 282 +++++++++++++++++++++ 10 files changed, 426 insertions(+), 42 deletions(-) create mode 100644 drivers/net/ethernet/marvell/octeontx2/nic/qos.h create mode 100644 drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 9533b1d92960..3b26893efdf8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -1222,6 +1222,11 @@ static int rvu_dbg_npa_ctx_display(struct seq_file *m, void *unused, int ctype) for (aura = id; aura < max_id; aura++) { aq_req.aura_id = aura; + + /* Skip if queue is uninitialized */ + if (ctype == NPA_AQ_CTYPE_POOL && !test_bit(aura, pfvf->pool_bmap)) + continue; + seq_printf(m, "======%s : %d=======\n", (ctype == NPA_AQ_CTYPE_AURA) ? "AURA" : "POOL", aq_req.aura_id); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile index 73fdb8798614..3d31ddf7c652 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o otx2_ptp.o rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \ otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o \ - otx2_devlink.o + otx2_devlink.o qos_sq.o rvu_nicvf-y := otx2_vf.o otx2_devlink.o rvu_nicpf-$(CONFIG_DCB) += otx2_dcbnl.o diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 43bc56fb3c33..adbcc087d2a8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -513,8 +513,8 @@ void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx) (pfvf->hw.cq_ecount_wait - 1)); } -int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, - dma_addr_t *dma) +static int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, + dma_addr_t *dma) { u8 *buf; @@ -532,8 +532,8 @@ int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, return 0; } -static int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, - dma_addr_t *dma) +int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, + dma_addr_t *dma) { int ret; @@ -758,11 +758,16 @@ int otx2_txschq_stop(struct otx2_nic *pfvf) void otx2_sqb_flush(struct otx2_nic *pfvf) { int qidx, sqe_tail, sqe_head; + struct otx2_snd_queue *sq; u64 incr, *ptr, val; int timeout = 1000; ptr = (u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS); - for (qidx = 0; qidx < pfvf->hw.non_qos_queues; qidx++) { + for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) { + sq = &pfvf->qset.sq[qidx]; + if (!sq->sqb_ptrs) + continue; + incr = (u64)qidx << 32; while (timeout) { val = otx2_atomic64_add(incr, ptr); @@ -862,7 +867,7 @@ int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura) return otx2_sync_mbox_msg(&pfvf->mbox); } -static int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) +int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) { struct otx2_qset *qset = &pfvf->qset; struct otx2_snd_queue *sq; @@ -935,9 +940,17 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx) cq->cint_idx = qidx - pfvf->hw.rx_queues; cq->cqe_cnt = qset->sqe_cnt; } else { - cq->cq_type = CQ_XDP; - cq->cint_idx = qidx - non_xdp_queues; - cq->cqe_cnt = qset->sqe_cnt; + if (pfvf->hw.xdp_queues && + qidx < non_xdp_queues + pfvf->hw.xdp_queues) { + cq->cq_type = CQ_XDP; + cq->cint_idx = qidx - non_xdp_queues; + cq->cqe_cnt = qset->sqe_cnt; + } else { + cq->cq_type = CQ_QOS; + cq->cint_idx = qidx - non_xdp_queues - + pfvf->hw.xdp_queues; + cq->cqe_cnt = qset->sqe_cnt; + } } cq->cqe_size = pfvf->qset.xqe_size; @@ -1095,7 +1108,7 @@ int otx2_config_nix(struct otx2_nic *pfvf) /* Set RQ/SQ/CQ counts */ nixlf->rq_cnt = pfvf->hw.rx_queues; - nixlf->sq_cnt = pfvf->hw.non_qos_queues; + nixlf->sq_cnt = otx2_get_total_tx_queues(pfvf); nixlf->cq_cnt = pfvf->qset.cq_cnt; nixlf->rss_sz = MAX_RSS_INDIR_TBL_SIZE; nixlf->rss_grps = MAX_RSS_GROUPS; @@ -1133,7 +1146,7 @@ void otx2_sq_free_sqbs(struct otx2_nic *pfvf) int sqb, qidx; u64 iova, pa; - for (qidx = 0; qidx < hw->non_qos_queues; qidx++) { + for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) { sq = &qset->sq[qidx]; if (!sq->sqb_ptrs) continue; @@ -1201,8 +1214,8 @@ void otx2_aura_pool_free(struct otx2_nic *pfvf) pfvf->qset.pool = NULL; } -static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, - int pool_id, int numptrs) +int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, + int pool_id, int numptrs) { struct npa_aq_enq_req *aq; struct otx2_pool *pool; @@ -1278,8 +1291,8 @@ static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, return 0; } -static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, - int stack_pages, int numptrs, int buf_size) +int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, + int stack_pages, int numptrs, int buf_size) { struct npa_aq_enq_req *aq; struct otx2_pool *pool; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 76c50dd8e5ef..eb5009152c92 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -28,6 +28,7 @@ #include "otx2_txrx.h" #include "otx2_devlink.h" #include +#include "qos.h" /* IPv4 flag more fragment bit */ #define IPV4_FLAG_MORE 0x20 @@ -190,6 +191,7 @@ struct otx2_hw { u16 rx_queues; u16 tx_queues; u16 xdp_queues; + u16 tc_tx_queues; u16 non_qos_queues; /* tx queues plus xdp queues */ u16 max_queues; u16 pool_cnt; @@ -506,6 +508,8 @@ struct otx2_nic { u16 pfc_schq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC]; bool pfc_alloc_status[NIX_PF_PFC_PRIO_MAX]; #endif + /* qos */ + struct otx2_qos qos; /* napi event count. It is needed for adaptive irq coalescing. */ u32 napi_events; @@ -750,8 +754,7 @@ static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf) /* Alloc pointer from pool/aura */ static inline u64 otx2_aura_allocptr(struct otx2_nic *pfvf, int aura) { - u64 *ptr = (u64 *)otx2_get_regaddr(pfvf, - NPA_LF_AURA_OP_ALLOCX(0)); + u64 *ptr = (__force u64 *)otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_ALLOCX(0)); u64 incr = (u64)aura | BIT_ULL(63); return otx2_atomic64_add(incr, ptr); @@ -893,12 +896,23 @@ static inline void otx2_dma_unmap_page(struct otx2_nic *pfvf, static inline u16 otx2_get_smq_idx(struct otx2_nic *pfvf, u16 qidx) { + u16 smq; #ifdef CONFIG_DCB if (qidx < NIX_PF_PFC_PRIO_MAX && pfvf->pfc_alloc_status[qidx]) return pfvf->pfc_schq_list[NIX_TXSCH_LVL_SMQ][qidx]; #endif + /* check if qidx falls under QOS queues */ + if (qidx >= pfvf->hw.non_qos_queues) + smq = pfvf->qos.qid_to_sqmap[qidx - pfvf->hw.non_qos_queues]; + else + smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0]; - return pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0]; + return smq; +} + +static inline u16 otx2_get_total_tx_queues(struct otx2_nic *pfvf) +{ + return pfvf->hw.non_qos_queues + pfvf->hw.tc_tx_queues; } /* MSI-X APIs */ @@ -927,17 +941,22 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool pfc_en); int otx2_txsch_alloc(struct otx2_nic *pfvf); int otx2_txschq_stop(struct otx2_nic *pfvf); void otx2_sqb_flush(struct otx2_nic *pfvf); -int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, - dma_addr_t *dma); +int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, + dma_addr_t *dma); int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable); void otx2_ctx_disable(struct mbox *mbox, int type, bool npa); int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable); void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq); void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq); +int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura); int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura); int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura); int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, dma_addr_t *dma); +int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, + int stack_pages, int numptrs, int buf_size); +int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, + int pool_id, int numptrs); /* RSS configuration APIs*/ int otx2_rss_init(struct otx2_nic *pfvf); @@ -1045,4 +1064,14 @@ static inline void cn10k_handle_mcs_event(struct otx2_nic *pfvf, {} #endif /* CONFIG_MACSEC */ +/* qos support */ +static inline void otx2_qos_init(struct otx2_nic *pfvf, int qos_txqs) +{ + struct otx2_hw *hw = &pfvf->hw; + + hw->tc_tx_queues = qos_txqs; +} + +u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb, + struct net_device *sb_dev); #endif /* OTX2_COMMON_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index cc4a94fd9afc..918114dc4688 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -23,6 +23,7 @@ #include "otx2_struct.h" #include "otx2_ptp.h" #include "cn10k.h" +#include "qos.h" #include #define DRV_NAME "rvu_nicpf" @@ -1228,6 +1229,7 @@ static char *nix_snd_status_e_str[NIX_SND_STATUS_MAX] = { static irqreturn_t otx2_q_intr_handler(int irq, void *data) { struct otx2_nic *pf = data; + struct otx2_snd_queue *sq; u64 val, *ptr; u64 qidx = 0; @@ -1257,10 +1259,14 @@ static irqreturn_t otx2_q_intr_handler(int irq, void *data) } /* SQ */ - for (qidx = 0; qidx < pf->hw.non_qos_queues; qidx++) { + for (qidx = 0; qidx < otx2_get_total_tx_queues(pf); qidx++) { u64 sq_op_err_dbg, mnq_err_dbg, snd_err_dbg; u8 sq_op_err_code, mnq_err_code, snd_err_code; + sq = &pf->qset.sq[qidx]; + if (!sq->sqb_ptrs) + continue; + /* Below debug registers captures first errors corresponding to * those registers. We don't have to check against SQ qid as * these are fatal errors. @@ -1383,7 +1389,7 @@ static void otx2_free_sq_res(struct otx2_nic *pf) otx2_ctx_disable(&pf->mbox, NIX_AQ_CTYPE_SQ, false); /* Free SQB pointers */ otx2_sq_free_sqbs(pf); - for (qidx = 0; qidx < pf->hw.non_qos_queues; qidx++) { + for (qidx = 0; qidx < otx2_get_total_tx_queues(pf); qidx++) { sq = &qset->sq[qidx]; qmem_free(pf->dev, sq->sqe); qmem_free(pf->dev, sq->tso_hdrs); @@ -1433,7 +1439,7 @@ static int otx2_init_hw_resources(struct otx2_nic *pf) * so, aura count = pool count. */ hw->rqpool_cnt = hw->rx_queues; - hw->sqpool_cnt = hw->non_qos_queues; + hw->sqpool_cnt = otx2_get_total_tx_queues(pf); hw->pool_cnt = hw->rqpool_cnt + hw->sqpool_cnt; /* Maximum hardware supported transmit length */ @@ -1688,11 +1694,14 @@ int otx2_open(struct net_device *netdev) netif_carrier_off(netdev); - pf->qset.cq_cnt = pf->hw.rx_queues + pf->hw.non_qos_queues; /* RQ and SQs are mapped to different CQs, * so find out max CQ IRQs (i.e CINTs) needed. */ - pf->hw.cint_cnt = max(pf->hw.rx_queues, pf->hw.tx_queues); + pf->hw.cint_cnt = max3(pf->hw.rx_queues, pf->hw.tx_queues, + pf->hw.tc_tx_queues); + + pf->qset.cq_cnt = pf->hw.rx_queues + otx2_get_total_tx_queues(pf); + qset->napi = kcalloc(pf->hw.cint_cnt, sizeof(*cq_poll), GFP_KERNEL); if (!qset->napi) return -ENOMEM; @@ -1743,6 +1752,11 @@ int otx2_open(struct net_device *netdev) else cq_poll->cq_ids[CQ_XDP] = CINT_INVALID_CQ; + cq_poll->cq_ids[CQ_QOS] = (qidx < pf->hw.tc_tx_queues) ? + (qidx + pf->hw.rx_queues + + pf->hw.non_qos_queues) : + CINT_INVALID_CQ; + cq_poll->dev = (void *)pf; cq_poll->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE; INIT_WORK(&cq_poll->dim.work, otx2_dim_work); @@ -1947,6 +1961,12 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev) int qidx = skb_get_queue_mapping(skb); struct otx2_snd_queue *sq; struct netdev_queue *txq; + int sq_idx; + + /* XDP SQs are not mapped with TXQs + * advance qid to derive correct sq mapped with QOS + */ + sq_idx = (qidx >= pf->hw.tx_queues) ? (qidx + pf->hw.xdp_queues) : qidx; /* Check for minimum and maximum packet length */ if (skb->len <= ETH_HLEN || @@ -1955,7 +1975,7 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } - sq = &pf->qset.sq[qidx]; + sq = &pf->qset.sq[sq_idx]; txq = netdev_get_tx_queue(netdev, qidx); if (!otx2_sq_append_skb(netdev, sq, skb, qidx)) { @@ -1973,8 +1993,8 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } -static u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb, - struct net_device *sb_dev) +u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb, + struct net_device *sb_dev) { #ifdef CONFIG_DCB struct otx2_nic *pf = netdev_priv(netdev); @@ -1996,6 +2016,7 @@ pick_tx: #endif return netdev_pick_tx(netdev, skb, NULL); } +EXPORT_SYMBOL(otx2_select_queue); static netdev_features_t otx2_fix_features(struct net_device *dev, netdev_features_t features) @@ -2712,10 +2733,10 @@ static void otx2_sriov_vfcfg_cleanup(struct otx2_nic *pf) static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct device *dev = &pdev->dev; + int err, qcount, qos_txqs; struct net_device *netdev; struct otx2_nic *pf; struct otx2_hw *hw; - int err, qcount; int num_vec; err = pcim_enable_device(pdev); @@ -2740,8 +2761,9 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* Set number of queues */ qcount = min_t(int, num_online_cpus(), OTX2_MAX_CQ_CNT); + qos_txqs = min_t(int, qcount, OTX2_QOS_MAX_LEAF_NODES); - netdev = alloc_etherdev_mqs(sizeof(*pf), qcount, qcount); + netdev = alloc_etherdev_mqs(sizeof(*pf), qcount + qos_txqs, qcount); if (!netdev) { err = -ENOMEM; goto err_release_regions; @@ -2929,6 +2951,8 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_pf_sriov_init; #endif + otx2_qos_init(pf, qos_txqs); + return 0; err_pf_sriov_init: diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 7045fedfd73a..e288f46b23a8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -464,12 +464,13 @@ process_cqe: break; } - if (cq->cq_type == CQ_XDP) { + qidx = cq->cq_idx - pfvf->hw.rx_queues; + + if (cq->cq_type == CQ_XDP) otx2_xdp_snd_pkt_handler(pfvf, sq, cqe); - } else { - otx2_snd_pkt_handler(pfvf, cq, sq, cqe, budget, - &tx_pkts, &tx_bytes); - } + else + otx2_snd_pkt_handler(pfvf, cq, &pfvf->qset.sq[qidx], + cqe, budget, &tx_pkts, &tx_bytes); cqe->hdr.cqe_type = NIX_XQE_TYPE_INVALID; processed_cqe++; @@ -486,7 +487,11 @@ process_cqe: if (likely(tx_pkts)) { struct netdev_queue *txq; - txq = netdev_get_tx_queue(pfvf->netdev, cq->cint_idx); + qidx = cq->cq_idx - pfvf->hw.rx_queues; + + if (qidx >= pfvf->hw.tx_queues) + qidx -= pfvf->hw.xdp_queues; + txq = netdev_get_tx_queue(pfvf->netdev, qidx); netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); /* Check if queue was stopped earlier due to ring full */ smp_mb(); @@ -736,7 +741,8 @@ static void otx2_sqe_add_hdr(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, sqe_hdr->aura = sq->aura_id; /* Post a CQE Tx after pkt transmission */ sqe_hdr->pnc = 1; - sqe_hdr->sq = qidx; + sqe_hdr->sq = (qidx >= pfvf->hw.tx_queues) ? + qidx + pfvf->hw.xdp_queues : qidx; } sqe_hdr->total = skb->len; /* Set SQE identifier which will be used later for freeing SKB */ @@ -1221,8 +1227,10 @@ void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) struct nix_cqe_tx_s *cqe; int processed_cqe = 0; struct sg_list *sg; + int qidx; - sq = &pfvf->qset.sq[cq->cint_idx]; + qidx = cq->cq_idx - pfvf->hw.rx_queues; + sq = &pfvf->qset.sq[qidx]; if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe) return; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h index 93cac2c2664c..7ab6db9a986f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h @@ -102,7 +102,8 @@ enum cq_type { CQ_RX, CQ_TX, CQ_XDP, - CQS_PER_CINT = 3, /* RQ + SQ + XDP */ + CQ_QOS, + CQS_PER_CINT = 4, /* RQ + SQ + XDP + QOS_SQ */ }; struct otx2_cq_poll { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 64be99ace04e..cd92b9535cdb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -479,6 +479,7 @@ static const struct net_device_ops otx2vf_netdev_ops = { .ndo_open = otx2vf_open, .ndo_stop = otx2vf_stop, .ndo_start_xmit = otx2vf_xmit, + .ndo_select_queue = otx2_select_queue, .ndo_set_rx_mode = otx2vf_set_rx_mode, .ndo_set_mac_address = otx2_set_mac_address, .ndo_change_mtu = otx2vf_change_mtu, @@ -524,10 +525,10 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int num_vec = pci_msix_vec_count(pdev); struct device *dev = &pdev->dev; + int err, qcount, qos_txqs; struct net_device *netdev; struct otx2_nic *vf; struct otx2_hw *hw; - int err, qcount; err = pcim_enable_device(pdev); if (err) { @@ -550,7 +551,8 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_master(pdev); qcount = num_online_cpus(); - netdev = alloc_etherdev_mqs(sizeof(*vf), qcount, qcount); + qos_txqs = min_t(int, qcount, OTX2_QOS_MAX_LEAF_NODES); + netdev = alloc_etherdev_mqs(sizeof(*vf), qcount + qos_txqs, qcount); if (!netdev) { err = -ENOMEM; goto err_release_regions; @@ -699,6 +701,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto err_shutdown_tc; #endif + otx2_qos_init(vf, qos_txqs); return 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.h b/drivers/net/ethernet/marvell/octeontx2/nic/qos.h new file mode 100644 index 000000000000..73a62d092e99 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell RVU Ethernet driver + * + * Copyright (C) 2023 Marvell. + * + */ +#ifndef OTX2_QOS_H +#define OTX2_QOS_H + +#define OTX2_QOS_MAX_LEAF_NODES 16 + +int otx2_qos_enable_sq(struct otx2_nic *pfvf, int qidx, u16 smq); +void otx2_qos_disable_sq(struct otx2_nic *pfvf, int qidx, u16 mdq); + +struct otx2_qos { + u16 qid_to_sqmap[OTX2_QOS_MAX_LEAF_NODES]; + }; + +#endif diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c new file mode 100644 index 000000000000..e142d43f5a62 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c @@ -0,0 +1,282 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell RVU Physical Function ethernet driver + * + * Copyright (C) 2023 Marvell. + * + */ + +#include +#include + +#include "cn10k.h" +#include "otx2_reg.h" +#include "otx2_common.h" +#include "otx2_txrx.h" +#include "otx2_struct.h" + +#define OTX2_QOS_MAX_LEAF_NODES 16 + +static void otx2_qos_aura_pool_free(struct otx2_nic *pfvf, int pool_id) +{ + struct otx2_pool *pool; + + if (!pfvf->qset.pool) + return; + + pool = &pfvf->qset.pool[pool_id]; + qmem_free(pfvf->dev, pool->stack); + qmem_free(pfvf->dev, pool->fc_addr); + pool->stack = NULL; + pool->fc_addr = NULL; +} + +static int otx2_qos_sq_aura_pool_init(struct otx2_nic *pfvf, int qidx) +{ + struct otx2_qset *qset = &pfvf->qset; + int pool_id, stack_pages, num_sqbs; + struct otx2_hw *hw = &pfvf->hw; + struct otx2_snd_queue *sq; + struct otx2_pool *pool; + dma_addr_t bufptr; + int err, ptr; + u64 iova, pa; + + /* Calculate number of SQBs needed. + * + * For a 128byte SQE, and 4K size SQB, 31 SQEs will fit in one SQB. + * Last SQE is used for pointing to next SQB. + */ + num_sqbs = (hw->sqb_size / 128) - 1; + num_sqbs = (qset->sqe_cnt + num_sqbs) / num_sqbs; + + /* Get no of stack pages needed */ + stack_pages = + (num_sqbs + hw->stack_pg_ptrs - 1) / hw->stack_pg_ptrs; + + pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx); + pool = &pfvf->qset.pool[pool_id]; + + /* Initialize aura context */ + err = otx2_aura_init(pfvf, pool_id, pool_id, num_sqbs); + if (err) + return err; + + /* Initialize pool context */ + err = otx2_pool_init(pfvf, pool_id, stack_pages, + num_sqbs, hw->sqb_size); + if (err) + goto aura_free; + + /* Flush accumulated messages */ + err = otx2_sync_mbox_msg(&pfvf->mbox); + if (err) + goto pool_free; + + /* Allocate pointers and free them to aura/pool */ + sq = &qset->sq[qidx]; + sq->sqb_count = 0; + sq->sqb_ptrs = kcalloc(num_sqbs, sizeof(*sq->sqb_ptrs), GFP_KERNEL); + if (!sq->sqb_ptrs) { + err = -ENOMEM; + goto pool_free; + } + + for (ptr = 0; ptr < num_sqbs; ptr++) { + err = otx2_alloc_rbuf(pfvf, pool, &bufptr); + if (err) + goto sqb_free; + pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr); + sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr; + } + + return 0; + +sqb_free: + while (ptr--) { + if (!sq->sqb_ptrs[ptr]) + continue; + iova = sq->sqb_ptrs[ptr]; + pa = otx2_iova_to_phys(pfvf->iommu_domain, iova); + dma_unmap_page_attrs(pfvf->dev, iova, hw->sqb_size, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + put_page(virt_to_page(phys_to_virt(pa))); + otx2_aura_allocptr(pfvf, pool_id); + } + sq->sqb_count = 0; + kfree(sq->sqb_ptrs); +pool_free: + qmem_free(pfvf->dev, pool->stack); +aura_free: + qmem_free(pfvf->dev, pool->fc_addr); + otx2_mbox_reset(&pfvf->mbox.mbox, 0); + return err; +} + +static void otx2_qos_sq_free_sqbs(struct otx2_nic *pfvf, int qidx) +{ + struct otx2_qset *qset = &pfvf->qset; + struct otx2_hw *hw = &pfvf->hw; + struct otx2_snd_queue *sq; + u64 iova, pa; + int sqb; + + sq = &qset->sq[qidx]; + if (!sq->sqb_ptrs) + return; + for (sqb = 0; sqb < sq->sqb_count; sqb++) { + if (!sq->sqb_ptrs[sqb]) + continue; + iova = sq->sqb_ptrs[sqb]; + pa = otx2_iova_to_phys(pfvf->iommu_domain, iova); + dma_unmap_page_attrs(pfvf->dev, iova, hw->sqb_size, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + put_page(virt_to_page(phys_to_virt(pa))); + } + + sq->sqb_count = 0; + + sq = &qset->sq[qidx]; + qmem_free(pfvf->dev, sq->sqe); + qmem_free(pfvf->dev, sq->tso_hdrs); + kfree(sq->sg); + kfree(sq->sqb_ptrs); + qmem_free(pfvf->dev, sq->timestamps); + + memset((void *)sq, 0, sizeof(*sq)); +} + +/* send queue id */ +static void otx2_qos_sqb_flush(struct otx2_nic *pfvf, int qidx) +{ + int sqe_tail, sqe_head; + u64 incr, *ptr, val; + + ptr = (__force u64 *)otx2_get_regaddr(pfvf, NIX_LF_SQ_OP_STATUS); + incr = (u64)qidx << 32; + val = otx2_atomic64_add(incr, ptr); + sqe_head = (val >> 20) & 0x3F; + sqe_tail = (val >> 28) & 0x3F; + if (sqe_head != sqe_tail) + usleep_range(50, 60); +} + +static int otx2_qos_ctx_disable(struct otx2_nic *pfvf, u16 qidx, int aura_id) +{ + struct nix_cn10k_aq_enq_req *cn10k_sq_aq; + struct npa_aq_enq_req *aura_aq; + struct npa_aq_enq_req *pool_aq; + struct nix_aq_enq_req *sq_aq; + + if (test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) { + cn10k_sq_aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox); + if (!cn10k_sq_aq) + return -ENOMEM; + cn10k_sq_aq->qidx = qidx; + cn10k_sq_aq->sq.ena = 0; + cn10k_sq_aq->sq_mask.ena = 1; + cn10k_sq_aq->ctype = NIX_AQ_CTYPE_SQ; + cn10k_sq_aq->op = NIX_AQ_INSTOP_WRITE; + } else { + sq_aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox); + if (!sq_aq) + return -ENOMEM; + sq_aq->qidx = qidx; + sq_aq->sq.ena = 0; + sq_aq->sq_mask.ena = 1; + sq_aq->ctype = NIX_AQ_CTYPE_SQ; + sq_aq->op = NIX_AQ_INSTOP_WRITE; + } + + aura_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox); + if (!aura_aq) { + otx2_mbox_reset(&pfvf->mbox.mbox, 0); + return -ENOMEM; + } + + aura_aq->aura_id = aura_id; + aura_aq->aura.ena = 0; + aura_aq->aura_mask.ena = 1; + aura_aq->ctype = NPA_AQ_CTYPE_AURA; + aura_aq->op = NPA_AQ_INSTOP_WRITE; + + pool_aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox); + if (!pool_aq) { + otx2_mbox_reset(&pfvf->mbox.mbox, 0); + return -ENOMEM; + } + + pool_aq->aura_id = aura_id; + pool_aq->pool.ena = 0; + pool_aq->pool_mask.ena = 1; + + pool_aq->ctype = NPA_AQ_CTYPE_POOL; + pool_aq->op = NPA_AQ_INSTOP_WRITE; + + return otx2_sync_mbox_msg(&pfvf->mbox); +} + +int otx2_qos_enable_sq(struct otx2_nic *pfvf, int qidx, u16 smq) +{ + struct otx2_hw *hw = &pfvf->hw; + int pool_id, sq_idx, err; + + if (pfvf->flags & OTX2_FLAG_INTF_DOWN) + return -EPERM; + + sq_idx = hw->non_qos_queues + qidx; + + mutex_lock(&pfvf->mbox.lock); + err = otx2_qos_sq_aura_pool_init(pfvf, sq_idx); + if (err) + goto out; + + pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, sq_idx); + pfvf->qos.qid_to_sqmap[qidx] = smq; + err = otx2_sq_init(pfvf, sq_idx, pool_id); + if (err) + goto out; +out: + mutex_unlock(&pfvf->mbox.lock); + return err; +} + +void otx2_qos_disable_sq(struct otx2_nic *pfvf, int qidx, u16 mdq) +{ + struct otx2_qset *qset = &pfvf->qset; + struct otx2_hw *hw = &pfvf->hw; + struct otx2_snd_queue *sq; + struct otx2_cq_queue *cq; + int pool_id, sq_idx; + + sq_idx = hw->non_qos_queues + qidx; + + /* If the DOWN flag is set SQs are already freed */ + if (pfvf->flags & OTX2_FLAG_INTF_DOWN) + return; + + sq = &pfvf->qset.sq[sq_idx]; + if (!sq->sqb_ptrs) + return; + + if (sq_idx < hw->non_qos_queues || + sq_idx >= otx2_get_total_tx_queues(pfvf)) { + netdev_err(pfvf->netdev, "Send Queue is not a QoS queue\n"); + return; + } + + cq = &qset->cq[pfvf->hw.rx_queues + sq_idx]; + pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, sq_idx); + + otx2_qos_sqb_flush(pfvf, sq_idx); + otx2_smq_flush(pfvf, otx2_get_smq_idx(pfvf, sq_idx)); + otx2_cleanup_tx_cqes(pfvf, cq); + + mutex_lock(&pfvf->mbox.lock); + otx2_qos_ctx_disable(pfvf, sq_idx, pool_id); + mutex_unlock(&pfvf->mbox.lock); + + otx2_qos_sq_free_sqbs(pfvf, sq_idx); + otx2_qos_aura_pool_free(pfvf, pool_id); +} -- cgit v1.2.3 From 6b4b2ded9c4282deea421eef144ab0ced954721c Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Sat, 13 May 2023 14:21:39 +0530 Subject: octeontx2-pf: Refactor schedular queue alloc/free calls 1. Upon txschq free request, the transmit schedular config in hardware is not getting reset. This patch adds necessary changes to do the same. 2. Current implementation calls txschq alloc during interface initialization and in response handler updates the default txschq array. This creates a problem for htb offload where txsch alloc will be called for every tc class. This patch addresses the issue by reading txschq response in mbox caller function instead in the response handler. 3. Current otx2_txschq_stop routine tries to free all txschq nodes allocated to the interface. This creates a problem for htb offload. This patch introduces the otx2_txschq_free_one to free txschq in a given level. Signed-off-by: Hariprasad Kelam Signed-off-by: Naveen Mamindlapalli Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/af/rvu_nix.c | 45 +++++++++++++++ .../ethernet/marvell/octeontx2/nic/otx2_common.c | 67 ++++++++++++++-------- .../ethernet/marvell/octeontx2/nic/otx2_common.h | 3 +- .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 13 +---- .../net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 4 -- 5 files changed, 94 insertions(+), 38 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 4ad707e758b9..79ed7af0b0a4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -1691,6 +1691,42 @@ exit: return true; } +static void nix_reset_tx_schedule(struct rvu *rvu, int blkaddr, + int lvl, int schq) +{ + u64 tlx_parent = 0, tlx_schedule = 0; + + switch (lvl) { + case NIX_TXSCH_LVL_TL2: + tlx_parent = NIX_AF_TL2X_PARENT(schq); + tlx_schedule = NIX_AF_TL2X_SCHEDULE(schq); + break; + case NIX_TXSCH_LVL_TL3: + tlx_parent = NIX_AF_TL3X_PARENT(schq); + tlx_schedule = NIX_AF_TL3X_SCHEDULE(schq); + break; + case NIX_TXSCH_LVL_TL4: + tlx_parent = NIX_AF_TL4X_PARENT(schq); + tlx_schedule = NIX_AF_TL4X_SCHEDULE(schq); + break; + case NIX_TXSCH_LVL_MDQ: + /* no need to reset SMQ_CFG as HW clears this CSR + * on SMQ flush + */ + tlx_parent = NIX_AF_MDQX_PARENT(schq); + tlx_schedule = NIX_AF_MDQX_SCHEDULE(schq); + break; + default: + return; + } + + if (tlx_parent) + rvu_write64(rvu, blkaddr, tlx_parent, 0x0); + + if (tlx_schedule) + rvu_write64(rvu, blkaddr, tlx_schedule, 0x0); +} + /* Disable shaping of pkts by a scheduler queue * at a given scheduler level. */ @@ -2039,6 +2075,7 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu, pfvf_map[schq] = TXSCH_MAP(pcifunc, 0); nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq); nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq); + nix_reset_tx_schedule(rvu, blkaddr, lvl, schq); } for (idx = 0; idx < req->schq[lvl]; idx++) { @@ -2048,6 +2085,7 @@ int rvu_mbox_handler_nix_txsch_alloc(struct rvu *rvu, pfvf_map[schq] = TXSCH_MAP(pcifunc, 0); nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq); nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq); + nix_reset_tx_schedule(rvu, blkaddr, lvl, schq); } } @@ -2143,6 +2181,7 @@ static int nix_txschq_free(struct rvu *rvu, u16 pcifunc) continue; nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq); nix_clear_tx_xoff(rvu, blkaddr, lvl, schq); + nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq); } } nix_clear_tx_xoff(rvu, blkaddr, NIX_TXSCH_LVL_TL1, @@ -2181,6 +2220,7 @@ static int nix_txschq_free(struct rvu *rvu, u16 pcifunc) for (schq = 0; schq < txsch->schq.max; schq++) { if (TXSCH_MAP_FUNC(txsch->pfvf_map[schq]) != pcifunc) continue; + nix_reset_tx_schedule(rvu, blkaddr, lvl, schq); rvu_free_rsrc(&txsch->schq, schq); txsch->pfvf_map[schq] = TXSCH_MAP(0, NIX_TXSCHQ_FREE); } @@ -2240,6 +2280,9 @@ static int nix_txschq_free_one(struct rvu *rvu, */ nix_clear_tx_xoff(rvu, blkaddr, lvl, schq); + nix_reset_tx_linkcfg(rvu, blkaddr, lvl, schq); + nix_reset_tx_shaping(rvu, blkaddr, nixlf, lvl, schq); + /* Flush if it is a SMQ. Onus of disabling * TL2/3 queue links before SMQ flush is on user */ @@ -2249,6 +2292,8 @@ static int nix_txschq_free_one(struct rvu *rvu, goto err; } + nix_reset_tx_schedule(rvu, blkaddr, lvl, schq); + /* Free the resource */ rvu_free_rsrc(&txsch->schq, schq); txsch->pfvf_map[schq] = TXSCH_MAP(0, NIX_TXSCHQ_FREE); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index adbcc087d2a8..6df6f6380b55 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -716,7 +716,8 @@ EXPORT_SYMBOL(otx2_smq_flush); int otx2_txsch_alloc(struct otx2_nic *pfvf) { struct nix_txsch_alloc_req *req; - int lvl; + struct nix_txsch_alloc_rsp *rsp; + int lvl, schq, rc; /* Get memory to put this msg */ req = otx2_mbox_alloc_msg_nix_txsch_alloc(&pfvf->mbox); @@ -726,33 +727,68 @@ int otx2_txsch_alloc(struct otx2_nic *pfvf) /* Request one schq per level */ for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) req->schq[lvl] = 1; + rc = otx2_sync_mbox_msg(&pfvf->mbox); + if (rc) + return rc; - return otx2_sync_mbox_msg(&pfvf->mbox); + rsp = (struct nix_txsch_alloc_rsp *) + otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr); + if (IS_ERR(rsp)) + return PTR_ERR(rsp); + + /* Setup transmit scheduler list */ + for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) + for (schq = 0; schq < rsp->schq[lvl]; schq++) + pfvf->hw.txschq_list[lvl][schq] = + rsp->schq_list[lvl][schq]; + + pfvf->hw.txschq_link_cfg_lvl = rsp->link_cfg_lvl; + + return 0; } -int otx2_txschq_stop(struct otx2_nic *pfvf) +void otx2_txschq_free_one(struct otx2_nic *pfvf, u16 lvl, u16 schq) { struct nix_txsch_free_req *free_req; - int lvl, schq, err; + int err; mutex_lock(&pfvf->mbox.lock); - /* Free the transmit schedulers */ + free_req = otx2_mbox_alloc_msg_nix_txsch_free(&pfvf->mbox); if (!free_req) { mutex_unlock(&pfvf->mbox.lock); - return -ENOMEM; + netdev_err(pfvf->netdev, + "Failed alloc txschq free req\n"); + return; } - free_req->flags = TXSCHQ_FREE_ALL; + free_req->schq_lvl = lvl; + free_req->schq = schq; + err = otx2_sync_mbox_msg(&pfvf->mbox); + if (err) { + netdev_err(pfvf->netdev, + "Failed stop txschq %d at level %d\n", schq, lvl); + } + mutex_unlock(&pfvf->mbox.lock); +} + +void otx2_txschq_stop(struct otx2_nic *pfvf) +{ + int lvl, schq; + + /* free non QOS TLx nodes */ + for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) + otx2_txschq_free_one(pfvf, lvl, + pfvf->hw.txschq_list[lvl][0]); /* Clear the txschq list */ for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) { for (schq = 0; schq < MAX_TXSCHQ_PER_FUNC; schq++) pfvf->hw.txschq_list[lvl][schq] = 0; } - return err; + } void otx2_sqb_flush(struct otx2_nic *pfvf) @@ -1642,21 +1678,6 @@ void mbox_handler_cgx_fec_stats(struct otx2_nic *pfvf, pfvf->hw.cgx_fec_uncorr_blks += rsp->fec_uncorr_blks; } -void mbox_handler_nix_txsch_alloc(struct otx2_nic *pf, - struct nix_txsch_alloc_rsp *rsp) -{ - int lvl, schq; - - /* Setup transmit scheduler list */ - for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) - for (schq = 0; schq < rsp->schq[lvl]; schq++) - pf->hw.txschq_list[lvl][schq] = - rsp->schq_list[lvl][schq]; - - pf->hw.txschq_link_cfg_lvl = rsp->link_cfg_lvl; -} -EXPORT_SYMBOL(mbox_handler_nix_txsch_alloc); - void mbox_handler_npa_lf_alloc(struct otx2_nic *pfvf, struct npa_lf_alloc_rsp *rsp) { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index eb5009152c92..0b870a88f9fd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -939,7 +939,8 @@ int otx2_config_nix(struct otx2_nic *pfvf); int otx2_config_nix_queues(struct otx2_nic *pfvf); int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool pfc_en); int otx2_txsch_alloc(struct otx2_nic *pfvf); -int otx2_txschq_stop(struct otx2_nic *pfvf); +void otx2_txschq_stop(struct otx2_nic *pfvf); +void otx2_txschq_free_one(struct otx2_nic *pfvf, u16 lvl, u16 schq); void otx2_sqb_flush(struct otx2_nic *pfvf); int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, dma_addr_t *dma); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 918114dc4688..0ca99482558c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -792,10 +792,6 @@ static void otx2_process_pfaf_mbox_msg(struct otx2_nic *pf, case MBOX_MSG_NIX_LF_ALLOC: mbox_handler_nix_lf_alloc(pf, (struct nix_lf_alloc_rsp *)msg); break; - case MBOX_MSG_NIX_TXSCH_ALLOC: - mbox_handler_nix_txsch_alloc(pf, - (struct nix_txsch_alloc_rsp *)msg); - break; case MBOX_MSG_NIX_BP_ENABLE: mbox_handler_nix_bp_enable(pf, (struct nix_bp_cfg_rsp *)msg); break; @@ -1522,8 +1518,7 @@ err_free_nix_queues: otx2_free_cq_res(pf); otx2_ctx_disable(mbox, NIX_AQ_CTYPE_RQ, false); err_free_txsch: - if (otx2_txschq_stop(pf)) - dev_err(pf->dev, "%s failed to stop TX schedulers\n", __func__); + otx2_txschq_stop(pf); err_free_sq_ptrs: otx2_sq_free_sqbs(pf); err_free_rq_ptrs: @@ -1558,15 +1553,13 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) struct mbox *mbox = &pf->mbox; struct otx2_cq_queue *cq; struct msg_req *req; - int qidx, err; + int qidx; /* Ensure all SQE are processed */ otx2_sqb_flush(pf); /* Stop transmission */ - err = otx2_txschq_stop(pf); - if (err) - dev_err(pf->dev, "RVUPF: Failed to stop/free TX schedulers\n"); + otx2_txschq_stop(pf); #ifdef CONFIG_DCB if (pf->pfc_en) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index cd92b9535cdb..404855bccb4b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -70,10 +70,6 @@ static void otx2vf_process_vfaf_mbox_msg(struct otx2_nic *vf, case MBOX_MSG_NIX_LF_ALLOC: mbox_handler_nix_lf_alloc(vf, (struct nix_lf_alloc_rsp *)msg); break; - case MBOX_MSG_NIX_TXSCH_ALLOC: - mbox_handler_nix_txsch_alloc(vf, - (struct nix_txsch_alloc_rsp *)msg); - break; case MBOX_MSG_NIX_BP_ENABLE: mbox_handler_nix_bp_enable(vf, (struct nix_bp_cfg_rsp *)msg); break; -- cgit v1.2.3 From cb748a7ebad79b35a4cb652c2148fbebdcd860f3 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Sat, 13 May 2023 14:21:40 +0530 Subject: octeontx2-pf: Prepare for QOS offload This patch moves rate limiting definitions to a common header file and adds csr definitions required for QOS code. Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/nic/otx2_common.h | 28 ++++++++++++++++++++++ .../net/ethernet/marvell/octeontx2/nic/otx2_reg.h | 13 ++++++++++ .../net/ethernet/marvell/octeontx2/nic/otx2_tc.c | 22 ++--------------- 3 files changed, 43 insertions(+), 20 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 0b870a88f9fd..4ce2009d5247 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -185,6 +185,21 @@ struct mbox { int up_num_msgs; /* mbox_up number of messages */ }; +/* Egress rate limiting definitions */ +#define MAX_BURST_EXPONENT 0x0FULL +#define MAX_BURST_MANTISSA 0xFFULL +#define MAX_BURST_SIZE 130816ULL +#define MAX_RATE_DIVIDER_EXPONENT 12ULL +#define MAX_RATE_EXPONENT 0x0FULL +#define MAX_RATE_MANTISSA 0xFFULL + +/* Bitfields in NIX_TLX_PIR register */ +#define TLX_RATE_MANTISSA GENMASK_ULL(8, 1) +#define TLX_RATE_EXPONENT GENMASK_ULL(12, 9) +#define TLX_RATE_DIVIDER_EXPONENT GENMASK_ULL(16, 13) +#define TLX_BURST_MANTISSA GENMASK_ULL(36, 29) +#define TLX_BURST_EXPONENT GENMASK_ULL(40, 37) + struct otx2_hw { struct pci_dev *pdev; struct otx2_rss_info rss_info; @@ -253,6 +268,7 @@ struct otx2_hw { #define CN10K_RPM 3 #define CN10K_PTP_ONESTEP 4 #define CN10K_HW_MACSEC 5 +#define QOS_CIR_PIR_SUPPORT 6 unsigned long cap_flag; #define LMT_LINE_SIZE 128 @@ -591,6 +607,7 @@ static inline void otx2_setup_dev_hw_settings(struct otx2_nic *pfvf) __set_bit(CN10K_LMTST, &hw->cap_flag); __set_bit(CN10K_RPM, &hw->cap_flag); __set_bit(CN10K_PTP_ONESTEP, &hw->cap_flag); + __set_bit(QOS_CIR_PIR_SUPPORT, &hw->cap_flag); } if (is_dev_cn10kb(pfvf->pdev)) @@ -915,6 +932,17 @@ static inline u16 otx2_get_total_tx_queues(struct otx2_nic *pfvf) return pfvf->hw.non_qos_queues + pfvf->hw.tc_tx_queues; } +static inline u64 otx2_convert_rate(u64 rate) +{ + u64 converted_rate; + + /* Convert bytes per second to Mbps */ + converted_rate = rate * 8; + converted_rate = max_t(u64, converted_rate / 1000000, 1); + + return converted_rate; +} + /* MSI-X APIs */ void otx2_free_cints(struct otx2_nic *pfvf, int n); void otx2_set_cints_affinity(struct otx2_nic *pfvf); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h index 1b967eaf948b..45a32e4b49d1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h @@ -145,12 +145,25 @@ #define NIX_AF_TL1X_TOPOLOGY(a) (0xC80 | (a) << 16) #define NIX_AF_TL2X_PARENT(a) (0xE88 | (a) << 16) #define NIX_AF_TL2X_SCHEDULE(a) (0xE00 | (a) << 16) +#define NIX_AF_TL2X_TOPOLOGY(a) (0xE80 | (a) << 16) +#define NIX_AF_TL2X_CIR(a) (0xE20 | (a) << 16) +#define NIX_AF_TL2X_PIR(a) (0xE30 | (a) << 16) #define NIX_AF_TL3X_PARENT(a) (0x1088 | (a) << 16) #define NIX_AF_TL3X_SCHEDULE(a) (0x1000 | (a) << 16) +#define NIX_AF_TL3X_SHAPE(a) (0x1010 | (a) << 16) +#define NIX_AF_TL3X_CIR(a) (0x1020 | (a) << 16) +#define NIX_AF_TL3X_PIR(a) (0x1030 | (a) << 16) +#define NIX_AF_TL3X_TOPOLOGY(a) (0x1080 | (a) << 16) #define NIX_AF_TL4X_PARENT(a) (0x1288 | (a) << 16) #define NIX_AF_TL4X_SCHEDULE(a) (0x1200 | (a) << 16) +#define NIX_AF_TL4X_SHAPE(a) (0x1210 | (a) << 16) +#define NIX_AF_TL4X_CIR(a) (0x1220 | (a) << 16) #define NIX_AF_TL4X_PIR(a) (0x1230 | (a) << 16) +#define NIX_AF_TL4X_TOPOLOGY(a) (0x1280 | (a) << 16) #define NIX_AF_MDQX_SCHEDULE(a) (0x1400 | (a) << 16) +#define NIX_AF_MDQX_SHAPE(a) (0x1410 | (a) << 16) +#define NIX_AF_MDQX_CIR(a) (0x1420 | (a) << 16) +#define NIX_AF_MDQX_PIR(a) (0x1430 | (a) << 16) #define NIX_AF_MDQX_PARENT(a) (0x1480 | (a) << 16) #define NIX_AF_TL3_TL2X_LINKX_CFG(a, b) (0x1700 | (a) << 16 | (b) << 3) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 8392f63e433f..2735cfff8fc1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -20,24 +20,9 @@ #include "cn10k.h" #include "otx2_common.h" -/* Egress rate limiting definitions */ -#define MAX_BURST_EXPONENT 0x0FULL -#define MAX_BURST_MANTISSA 0xFFULL -#define MAX_BURST_SIZE 130816ULL -#define MAX_RATE_DIVIDER_EXPONENT 12ULL -#define MAX_RATE_EXPONENT 0x0FULL -#define MAX_RATE_MANTISSA 0xFFULL - #define CN10K_MAX_BURST_MANTISSA 0x7FFFULL #define CN10K_MAX_BURST_SIZE 8453888ULL -/* Bitfields in NIX_TLX_PIR register */ -#define TLX_RATE_MANTISSA GENMASK_ULL(8, 1) -#define TLX_RATE_EXPONENT GENMASK_ULL(12, 9) -#define TLX_RATE_DIVIDER_EXPONENT GENMASK_ULL(16, 13) -#define TLX_BURST_MANTISSA GENMASK_ULL(36, 29) -#define TLX_BURST_EXPONENT GENMASK_ULL(40, 37) - #define CN10K_TLX_BURST_MANTISSA GENMASK_ULL(43, 29) #define CN10K_TLX_BURST_EXPONENT GENMASK_ULL(47, 44) @@ -264,7 +249,6 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic, struct netlink_ext_ack *extack = cls->common.extack; struct flow_action *actions = &cls->rule->action; struct flow_action_entry *entry; - u64 rate; int err; err = otx2_tc_validate_flow(nic, actions, extack); @@ -288,10 +272,8 @@ static int otx2_tc_egress_matchall_install(struct otx2_nic *nic, NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second"); return -EOPNOTSUPP; } - /* Convert bytes per second to Mbps */ - rate = entry->police.rate_bytes_ps * 8; - rate = max_t(u64, rate / 1000000, 1); - err = otx2_set_matchall_egress_rate(nic, entry->police.burst, rate); + err = otx2_set_matchall_egress_rate(nic, entry->police.burst, + otx2_convert_rate(entry->police.rate_bytes_ps)); if (err) return err; nic->flags |= OTX2_FLAG_TC_MATCHALL_EGRESS_ENABLED; -- cgit v1.2.3 From 5e6808b4c68d7882971514ab3279926eb07c8b2d Mon Sep 17 00:00:00 2001 From: Naveen Mamindlapalli Date: Sat, 13 May 2023 14:21:41 +0530 Subject: octeontx2-pf: Add support for HTB offload This patch registers callbacks to support HTB offload. Below are features supported, - supports traffic shaping on the given class by honoring rate and ceil configuration. - supports traffic scheduling, which prioritizes different types of traffic based on strict priority values. - supports the creation of leaf to inner classes such that parent node rate limits apply to all child nodes. Signed-off-by: Naveen Mamindlapalli Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/common.h | 2 +- .../net/ethernet/marvell/octeontx2/nic/Makefile | 2 +- .../ethernet/marvell/octeontx2/nic/otx2_common.c | 5 + .../ethernet/marvell/octeontx2/nic/otx2_common.h | 10 + .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 53 +- .../net/ethernet/marvell/octeontx2/nic/otx2_tc.c | 7 +- .../net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 1 + drivers/net/ethernet/marvell/octeontx2/nic/qos.c | 1363 ++++++++++++++++++++ drivers/net/ethernet/marvell/octeontx2/nic/qos.h | 58 +- .../net/ethernet/marvell/octeontx2/nic/qos_sq.c | 20 +- 10 files changed, 1507 insertions(+), 14 deletions(-) create mode 100644 drivers/net/ethernet/marvell/octeontx2/nic/qos.c (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h index 8931864ee110..f5bf719a6ccf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h @@ -142,7 +142,7 @@ enum nix_scheduler { #define TXSCH_RR_QTM_MAX ((1 << 24) - 1) #define TXSCH_TL1_DFLT_RR_QTM TXSCH_RR_QTM_MAX -#define TXSCH_TL1_DFLT_RR_PRIO (0x1ull) +#define TXSCH_TL1_DFLT_RR_PRIO (0x7ull) #define CN10K_MAX_DWRR_WEIGHT 16384 /* Weight is 14bit on CN10K */ /* Min/Max packet sizes, excluding FCS */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile index 3d31ddf7c652..5664f768cb0c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o otx2_ptp.o rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \ otx2_flows.o otx2_tc.o cn10k.o otx2_dmac_flt.o \ - otx2_devlink.o qos_sq.o + otx2_devlink.o qos_sq.o qos.o rvu_nicvf-y := otx2_vf.o otx2_devlink.o rvu_nicpf-$(CONFIG_DCB) += otx2_dcbnl.o diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 6df6f6380b55..f9286648e45c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -89,6 +89,11 @@ int otx2_update_sq_stats(struct otx2_nic *pfvf, int qidx) if (!pfvf->qset.sq) return 0; + if (qidx >= pfvf->hw.non_qos_queues) { + if (!test_bit(qidx - pfvf->hw.non_qos_queues, pfvf->qos.qos_sq_bmap)) + return 0; + } + otx2_nix_sq_op_stats(&sq->stats, pfvf, qidx); return 1; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 4ce2009d5247..0f2b2a901225 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -1099,8 +1099,18 @@ static inline void otx2_qos_init(struct otx2_nic *pfvf, int qos_txqs) struct otx2_hw *hw = &pfvf->hw; hw->tc_tx_queues = qos_txqs; + INIT_LIST_HEAD(&pfvf->qos.qos_tree); + mutex_init(&pfvf->qos.qos_lock); +} + +static inline void otx2_shutdown_qos(struct otx2_nic *pfvf) +{ + mutex_destroy(&pfvf->qos.qos_lock); } u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb, struct net_device *sb_dev); +int otx2_get_txq_by_classid(struct otx2_nic *pfvf, u16 classid); +void otx2_qos_config_txschq(struct otx2_nic *pfvf); +void otx2_clean_qos_queues(struct otx2_nic *pfvf); #endif /* OTX2_COMMON_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 0ca99482558c..e1883c3edda3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1387,6 +1387,9 @@ static void otx2_free_sq_res(struct otx2_nic *pf) otx2_sq_free_sqbs(pf); for (qidx = 0; qidx < otx2_get_total_tx_queues(pf); qidx++) { sq = &qset->sq[qidx]; + /* Skip freeing Qos queues if they are not initialized */ + if (!sq->sqe) + continue; qmem_free(pf->dev, sq->sqe); qmem_free(pf->dev, sq->tso_hdrs); kfree(sq->sg); @@ -1566,6 +1569,8 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) otx2_pfc_txschq_stop(pf); #endif + otx2_clean_qos_queues(pf); + mutex_lock(&mbox->lock); /* Disable backpressure */ if (!(pf->pcifunc & RVU_PFVF_FUNC_MASK)) @@ -1710,7 +1715,7 @@ int otx2_open(struct net_device *netdev) if (!qset->cq) goto err_free_mem; - qset->sq = kcalloc(pf->hw.non_qos_queues, + qset->sq = kcalloc(otx2_get_total_tx_queues(pf), sizeof(struct otx2_snd_queue), GFP_KERNEL); if (!qset->sq) goto err_free_mem; @@ -1833,6 +1838,9 @@ int otx2_open(struct net_device *netdev) /* 'intf_down' may be checked on any cpu */ smp_wmb(); + /* Enable QoS configuration before starting tx queues */ + otx2_qos_config_txschq(pf); + /* we have already received link status notification */ if (pf->linfo.link_up && !(pf->pcifunc & RVU_PFVF_FUNC_MASK)) otx2_handle_link_event(pf); @@ -1986,14 +1994,48 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } +static int otx2_qos_select_htb_queue(struct otx2_nic *pf, struct sk_buff *skb, + u16 htb_maj_id) +{ + u16 classid; + + if ((TC_H_MAJ(skb->priority) >> 16) == htb_maj_id) + classid = TC_H_MIN(skb->priority); + else + classid = READ_ONCE(pf->qos.defcls); + + if (!classid) + return 0; + + return otx2_get_txq_by_classid(pf, classid); +} + u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb, struct net_device *sb_dev) { -#ifdef CONFIG_DCB struct otx2_nic *pf = netdev_priv(netdev); + bool qos_enabled; +#ifdef CONFIG_DCB u8 vlan_prio; #endif + int txq; + + qos_enabled = (netdev->real_num_tx_queues > pf->hw.tx_queues) ? true : false; + if (unlikely(qos_enabled)) { + /* This smp_load_acquire() pairs with smp_store_release() in + * otx2_qos_root_add() called from htb offload root creation + */ + u16 htb_maj_id = smp_load_acquire(&pf->qos.maj_id); + if (unlikely(htb_maj_id)) { + txq = otx2_qos_select_htb_queue(pf, skb, htb_maj_id); + if (txq > 0) + return txq; + goto process_pfc; + } + } + +process_pfc: #ifdef CONFIG_DCB if (!skb_vlan_tag_present(skb)) goto pick_tx; @@ -2007,7 +2049,11 @@ u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb, pick_tx: #endif - return netdev_pick_tx(netdev, skb, NULL); + txq = netdev_pick_tx(netdev, skb, NULL); + if (unlikely(qos_enabled)) + return txq % pf->hw.tx_queues; + + return txq; } EXPORT_SYMBOL(otx2_select_queue); @@ -3121,6 +3167,7 @@ static void otx2_remove(struct pci_dev *pdev) otx2_ptp_destroy(pf); otx2_mcam_flow_del(pf); otx2_shutdown_tc(pf); + otx2_shutdown_qos(pf); otx2_detach_resources(&pf->mbox); if (pf->hw.lmt_info) free_percpu(pf->hw.lmt_info); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 2735cfff8fc1..231c3f0efb60 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -19,6 +19,7 @@ #include "cn10k.h" #include "otx2_common.h" +#include "qos.h" #define CN10K_MAX_BURST_MANTISSA 0x7FFFULL #define CN10K_MAX_BURST_SIZE 8453888ULL @@ -132,8 +133,8 @@ static void otx2_get_egress_rate_cfg(u64 maxrate, u32 *exp, } } -static u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic, - u64 maxrate, u32 burst) +u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic, + u64 maxrate, u32 burst) { u32 burst_exp, burst_mantissa; u32 exp, mantissa, div_exp; @@ -1109,6 +1110,8 @@ int otx2_setup_tc(struct net_device *netdev, enum tc_setup_type type, switch (type) { case TC_SETUP_BLOCK: return otx2_setup_tc_block(netdev, type_data); + case TC_SETUP_QDISC_HTB: + return otx2_setup_tc_htb(netdev, type_data); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 404855bccb4b..3734c799e416 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -760,6 +760,7 @@ static void otx2vf_remove(struct pci_dev *pdev) otx2_ptp_destroy(vf); otx2_mcam_flow_del(vf); otx2_shutdown_tc(vf); + otx2_shutdown_qos(vf); otx2vf_disable_mbox_intr(vf); otx2_detach_resources(&vf->mbox); free_percpu(vf->hw.lmt_info); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c new file mode 100644 index 000000000000..d3a76c5ccda8 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.c @@ -0,0 +1,1363 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell RVU Ethernet driver + * + * Copyright (C) 2023 Marvell. + * + */ +#include +#include +#include +#include + +#include "otx2_common.h" +#include "cn10k.h" +#include "qos.h" + +#define OTX2_QOS_QID_INNER 0xFFFFU +#define OTX2_QOS_QID_NONE 0xFFFEU +#define OTX2_QOS_ROOT_CLASSID 0xFFFFFFFF +#define OTX2_QOS_CLASS_NONE 0 +#define OTX2_QOS_DEFAULT_PRIO 0xF +#define OTX2_QOS_INVALID_SQ 0xFFFF + +static void otx2_qos_update_tx_netdev_queues(struct otx2_nic *pfvf) +{ + struct otx2_hw *hw = &pfvf->hw; + int tx_queues, qos_txqs, err; + + qos_txqs = bitmap_weight(pfvf->qos.qos_sq_bmap, + OTX2_QOS_MAX_LEAF_NODES); + + tx_queues = hw->tx_queues + qos_txqs; + + err = netif_set_real_num_tx_queues(pfvf->netdev, tx_queues); + if (err) { + netdev_err(pfvf->netdev, + "Failed to set no of Tx queues: %d\n", tx_queues); + return; + } +} + +static void otx2_qos_get_regaddr(struct otx2_qos_node *node, + struct nix_txschq_config *cfg, + int index) +{ + if (node->level == NIX_TXSCH_LVL_SMQ) { + cfg->reg[index++] = NIX_AF_MDQX_PARENT(node->schq); + cfg->reg[index++] = NIX_AF_MDQX_SCHEDULE(node->schq); + cfg->reg[index++] = NIX_AF_MDQX_PIR(node->schq); + cfg->reg[index] = NIX_AF_MDQX_CIR(node->schq); + } else if (node->level == NIX_TXSCH_LVL_TL4) { + cfg->reg[index++] = NIX_AF_TL4X_PARENT(node->schq); + cfg->reg[index++] = NIX_AF_TL4X_SCHEDULE(node->schq); + cfg->reg[index++] = NIX_AF_TL4X_PIR(node->schq); + cfg->reg[index] = NIX_AF_TL4X_CIR(node->schq); + } else if (node->level == NIX_TXSCH_LVL_TL3) { + cfg->reg[index++] = NIX_AF_TL3X_PARENT(node->schq); + cfg->reg[index++] = NIX_AF_TL3X_SCHEDULE(node->schq); + cfg->reg[index++] = NIX_AF_TL3X_PIR(node->schq); + cfg->reg[index] = NIX_AF_TL3X_CIR(node->schq); + } else if (node->level == NIX_TXSCH_LVL_TL2) { + cfg->reg[index++] = NIX_AF_TL2X_PARENT(node->schq); + cfg->reg[index++] = NIX_AF_TL2X_SCHEDULE(node->schq); + cfg->reg[index++] = NIX_AF_TL2X_PIR(node->schq); + cfg->reg[index] = NIX_AF_TL2X_CIR(node->schq); + } +} + +static void otx2_config_sched_shaping(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct nix_txschq_config *cfg, + int *num_regs) +{ + u64 maxrate; + + otx2_qos_get_regaddr(node, cfg, *num_regs); + + /* configure parent txschq */ + cfg->regval[*num_regs] = node->parent->schq << 16; + (*num_regs)++; + + /* configure prio/quantum */ + if (node->qid == OTX2_QOS_QID_NONE) { + cfg->regval[*num_regs] = node->prio << 24 | + mtu_to_dwrr_weight(pfvf, pfvf->tx_max_pktlen); + (*num_regs)++; + return; + } + + /* configure priority */ + cfg->regval[*num_regs] = (node->schq - node->parent->prio_anchor) << 24; + (*num_regs)++; + + /* configure PIR */ + maxrate = (node->rate > node->ceil) ? node->rate : node->ceil; + + cfg->regval[*num_regs] = + otx2_get_txschq_rate_regval(pfvf, maxrate, 65536); + (*num_regs)++; + + /* Don't configure CIR when both CIR+PIR not supported + * On 96xx, CIR + PIR + RED_ALGO=STALL causes deadlock + */ + if (!test_bit(QOS_CIR_PIR_SUPPORT, &pfvf->hw.cap_flag)) + return; + + cfg->regval[*num_regs] = + otx2_get_txschq_rate_regval(pfvf, node->rate, 65536); + (*num_regs)++; +} + +static void __otx2_qos_txschq_cfg(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct nix_txschq_config *cfg) +{ + struct otx2_hw *hw = &pfvf->hw; + int num_regs = 0; + u8 level; + + level = node->level; + + /* program txschq registers */ + if (level == NIX_TXSCH_LVL_SMQ) { + cfg->reg[num_regs] = NIX_AF_SMQX_CFG(node->schq); + cfg->regval[num_regs] = ((u64)pfvf->tx_max_pktlen << 8) | + OTX2_MIN_MTU; + cfg->regval[num_regs] |= (0x20ULL << 51) | (0x80ULL << 39) | + (0x2ULL << 36); + num_regs++; + + otx2_config_sched_shaping(pfvf, node, cfg, &num_regs); + + } else if (level == NIX_TXSCH_LVL_TL4) { + otx2_config_sched_shaping(pfvf, node, cfg, &num_regs); + } else if (level == NIX_TXSCH_LVL_TL3) { + /* configure link cfg */ + if (level == pfvf->qos.link_cfg_lvl) { + cfg->reg[num_regs] = NIX_AF_TL3_TL2X_LINKX_CFG(node->schq, hw->tx_link); + cfg->regval[num_regs] = BIT_ULL(13) | BIT_ULL(12); + num_regs++; + } + + otx2_config_sched_shaping(pfvf, node, cfg, &num_regs); + } else if (level == NIX_TXSCH_LVL_TL2) { + /* configure link cfg */ + if (level == pfvf->qos.link_cfg_lvl) { + cfg->reg[num_regs] = NIX_AF_TL3_TL2X_LINKX_CFG(node->schq, hw->tx_link); + cfg->regval[num_regs] = BIT_ULL(13) | BIT_ULL(12); + num_regs++; + } + + /* check if node is root */ + if (node->qid == OTX2_QOS_QID_INNER && !node->parent) { + cfg->reg[num_regs] = NIX_AF_TL2X_SCHEDULE(node->schq); + cfg->regval[num_regs] = TXSCH_TL1_DFLT_RR_PRIO << 24 | + mtu_to_dwrr_weight(pfvf, + pfvf->tx_max_pktlen); + num_regs++; + goto txschq_cfg_out; + } + + otx2_config_sched_shaping(pfvf, node, cfg, &num_regs); + } + +txschq_cfg_out: + cfg->num_regs = num_regs; +} + +static int otx2_qos_txschq_set_parent_topology(struct otx2_nic *pfvf, + struct otx2_qos_node *parent) +{ + struct mbox *mbox = &pfvf->mbox; + struct nix_txschq_config *cfg; + int rc; + + if (parent->level == NIX_TXSCH_LVL_MDQ) + return 0; + + mutex_lock(&mbox->lock); + + cfg = otx2_mbox_alloc_msg_nix_txschq_cfg(&pfvf->mbox); + if (!cfg) { + mutex_unlock(&mbox->lock); + return -ENOMEM; + } + + cfg->lvl = parent->level; + + if (parent->level == NIX_TXSCH_LVL_TL4) + cfg->reg[0] = NIX_AF_TL4X_TOPOLOGY(parent->schq); + else if (parent->level == NIX_TXSCH_LVL_TL3) + cfg->reg[0] = NIX_AF_TL3X_TOPOLOGY(parent->schq); + else if (parent->level == NIX_TXSCH_LVL_TL2) + cfg->reg[0] = NIX_AF_TL2X_TOPOLOGY(parent->schq); + else if (parent->level == NIX_TXSCH_LVL_TL1) + cfg->reg[0] = NIX_AF_TL1X_TOPOLOGY(parent->schq); + + cfg->regval[0] = (u64)parent->prio_anchor << 32; + if (parent->level == NIX_TXSCH_LVL_TL1) + cfg->regval[0] |= (u64)TXSCH_TL1_DFLT_RR_PRIO << 1; + + cfg->num_regs++; + + rc = otx2_sync_mbox_msg(&pfvf->mbox); + + mutex_unlock(&mbox->lock); + + return rc; +} + +static void otx2_qos_free_hw_node_schq(struct otx2_nic *pfvf, + struct otx2_qos_node *parent) +{ + struct otx2_qos_node *node; + + list_for_each_entry_reverse(node, &parent->child_schq_list, list) + otx2_txschq_free_one(pfvf, node->level, node->schq); +} + +static void otx2_qos_free_hw_node(struct otx2_nic *pfvf, + struct otx2_qos_node *parent) +{ + struct otx2_qos_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &parent->child_list, list) { + otx2_qos_free_hw_node(pfvf, node); + otx2_qos_free_hw_node_schq(pfvf, node); + otx2_txschq_free_one(pfvf, node->level, node->schq); + } +} + +static void otx2_qos_free_hw_cfg(struct otx2_nic *pfvf, + struct otx2_qos_node *node) +{ + mutex_lock(&pfvf->qos.qos_lock); + + /* free child node hw mappings */ + otx2_qos_free_hw_node(pfvf, node); + otx2_qos_free_hw_node_schq(pfvf, node); + + /* free node hw mappings */ + otx2_txschq_free_one(pfvf, node->level, node->schq); + + mutex_unlock(&pfvf->qos.qos_lock); +} + +static void otx2_qos_sw_node_delete(struct otx2_nic *pfvf, + struct otx2_qos_node *node) +{ + hash_del_rcu(&node->hlist); + + if (node->qid != OTX2_QOS_QID_INNER && node->qid != OTX2_QOS_QID_NONE) { + __clear_bit(node->qid, pfvf->qos.qos_sq_bmap); + otx2_qos_update_tx_netdev_queues(pfvf); + } + + list_del(&node->list); + kfree(node); +} + +static void otx2_qos_free_sw_node_schq(struct otx2_nic *pfvf, + struct otx2_qos_node *parent) +{ + struct otx2_qos_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &parent->child_schq_list, list) { + list_del(&node->list); + kfree(node); + } +} + +static void __otx2_qos_free_sw_node(struct otx2_nic *pfvf, + struct otx2_qos_node *parent) +{ + struct otx2_qos_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &parent->child_list, list) { + __otx2_qos_free_sw_node(pfvf, node); + otx2_qos_free_sw_node_schq(pfvf, node); + otx2_qos_sw_node_delete(pfvf, node); + } +} + +static void otx2_qos_free_sw_node(struct otx2_nic *pfvf, + struct otx2_qos_node *node) +{ + mutex_lock(&pfvf->qos.qos_lock); + + __otx2_qos_free_sw_node(pfvf, node); + otx2_qos_free_sw_node_schq(pfvf, node); + otx2_qos_sw_node_delete(pfvf, node); + + mutex_unlock(&pfvf->qos.qos_lock); +} + +static void otx2_qos_destroy_node(struct otx2_nic *pfvf, + struct otx2_qos_node *node) +{ + otx2_qos_free_hw_cfg(pfvf, node); + otx2_qos_free_sw_node(pfvf, node); +} + +static void otx2_qos_fill_cfg_schq(struct otx2_qos_node *parent, + struct otx2_qos_cfg *cfg) +{ + struct otx2_qos_node *node; + + list_for_each_entry(node, &parent->child_schq_list, list) + cfg->schq[node->level]++; +} + +static void otx2_qos_fill_cfg_tl(struct otx2_qos_node *parent, + struct otx2_qos_cfg *cfg) +{ + struct otx2_qos_node *node; + + list_for_each_entry(node, &parent->child_list, list) { + otx2_qos_fill_cfg_tl(node, cfg); + cfg->schq_contig[node->level]++; + otx2_qos_fill_cfg_schq(node, cfg); + } +} + +static void otx2_qos_prepare_txschq_cfg(struct otx2_nic *pfvf, + struct otx2_qos_node *parent, + struct otx2_qos_cfg *cfg) +{ + mutex_lock(&pfvf->qos.qos_lock); + otx2_qos_fill_cfg_tl(parent, cfg); + mutex_unlock(&pfvf->qos.qos_lock); +} + +static void otx2_qos_read_txschq_cfg_schq(struct otx2_qos_node *parent, + struct otx2_qos_cfg *cfg) +{ + struct otx2_qos_node *node; + int cnt; + + list_for_each_entry(node, &parent->child_schq_list, list) { + cnt = cfg->dwrr_node_pos[node->level]; + cfg->schq_list[node->level][cnt] = node->schq; + cfg->schq[node->level]++; + cfg->dwrr_node_pos[node->level]++; + } +} + +static void otx2_qos_read_txschq_cfg_tl(struct otx2_qos_node *parent, + struct otx2_qos_cfg *cfg) +{ + struct otx2_qos_node *node; + int cnt; + + list_for_each_entry(node, &parent->child_list, list) { + otx2_qos_read_txschq_cfg_tl(node, cfg); + cnt = cfg->static_node_pos[node->level]; + cfg->schq_contig_list[node->level][cnt] = node->schq; + cfg->schq_contig[node->level]++; + cfg->static_node_pos[node->level]++; + otx2_qos_read_txschq_cfg_schq(node, cfg); + } +} + +static void otx2_qos_read_txschq_cfg(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + mutex_lock(&pfvf->qos.qos_lock); + otx2_qos_read_txschq_cfg_tl(node, cfg); + mutex_unlock(&pfvf->qos.qos_lock); +} + +static struct otx2_qos_node * +otx2_qos_alloc_root(struct otx2_nic *pfvf) +{ + struct otx2_qos_node *node; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->parent = NULL; + if (!is_otx2_vf(pfvf->pcifunc)) + node->level = NIX_TXSCH_LVL_TL1; + else + node->level = NIX_TXSCH_LVL_TL2; + + WRITE_ONCE(node->qid, OTX2_QOS_QID_INNER); + node->classid = OTX2_QOS_ROOT_CLASSID; + + hash_add_rcu(pfvf->qos.qos_hlist, &node->hlist, node->classid); + list_add_tail(&node->list, &pfvf->qos.qos_tree); + INIT_LIST_HEAD(&node->child_list); + INIT_LIST_HEAD(&node->child_schq_list); + + return node; +} + +static int otx2_qos_add_child_node(struct otx2_qos_node *parent, + struct otx2_qos_node *node) +{ + struct list_head *head = &parent->child_list; + struct otx2_qos_node *tmp_node; + struct list_head *tmp; + + for (tmp = head->next; tmp != head; tmp = tmp->next) { + tmp_node = list_entry(tmp, struct otx2_qos_node, list); + if (tmp_node->prio == node->prio) + return -EEXIST; + if (tmp_node->prio > node->prio) { + list_add_tail(&node->list, tmp); + return 0; + } + } + + list_add_tail(&node->list, head); + return 0; +} + +static int otx2_qos_alloc_txschq_node(struct otx2_nic *pfvf, + struct otx2_qos_node *node) +{ + struct otx2_qos_node *txschq_node, *parent, *tmp; + int lvl; + + parent = node; + for (lvl = node->level - 1; lvl >= NIX_TXSCH_LVL_MDQ; lvl--) { + txschq_node = kzalloc(sizeof(*txschq_node), GFP_KERNEL); + if (!txschq_node) + goto err_out; + + txschq_node->parent = parent; + txschq_node->level = lvl; + txschq_node->classid = OTX2_QOS_CLASS_NONE; + WRITE_ONCE(txschq_node->qid, OTX2_QOS_QID_NONE); + txschq_node->rate = 0; + txschq_node->ceil = 0; + txschq_node->prio = 0; + + mutex_lock(&pfvf->qos.qos_lock); + list_add_tail(&txschq_node->list, &node->child_schq_list); + mutex_unlock(&pfvf->qos.qos_lock); + + INIT_LIST_HEAD(&txschq_node->child_list); + INIT_LIST_HEAD(&txschq_node->child_schq_list); + parent = txschq_node; + } + + return 0; + +err_out: + list_for_each_entry_safe(txschq_node, tmp, &node->child_schq_list, + list) { + list_del(&txschq_node->list); + kfree(txschq_node); + } + return -ENOMEM; +} + +static struct otx2_qos_node * +otx2_qos_sw_create_leaf_node(struct otx2_nic *pfvf, + struct otx2_qos_node *parent, + u16 classid, u32 prio, u64 rate, u64 ceil, + u16 qid) +{ + struct otx2_qos_node *node; + int err; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return ERR_PTR(-ENOMEM); + + node->parent = parent; + node->level = parent->level - 1; + node->classid = classid; + WRITE_ONCE(node->qid, qid); + + node->rate = otx2_convert_rate(rate); + node->ceil = otx2_convert_rate(ceil); + node->prio = prio; + + __set_bit(qid, pfvf->qos.qos_sq_bmap); + + hash_add_rcu(pfvf->qos.qos_hlist, &node->hlist, classid); + + mutex_lock(&pfvf->qos.qos_lock); + err = otx2_qos_add_child_node(parent, node); + if (err) { + mutex_unlock(&pfvf->qos.qos_lock); + return ERR_PTR(err); + } + mutex_unlock(&pfvf->qos.qos_lock); + + INIT_LIST_HEAD(&node->child_list); + INIT_LIST_HEAD(&node->child_schq_list); + + err = otx2_qos_alloc_txschq_node(pfvf, node); + if (err) { + otx2_qos_sw_node_delete(pfvf, node); + return ERR_PTR(-ENOMEM); + } + + return node; +} + +static struct otx2_qos_node * +otx2_sw_node_find(struct otx2_nic *pfvf, u32 classid) +{ + struct otx2_qos_node *node = NULL; + + hash_for_each_possible(pfvf->qos.qos_hlist, node, hlist, classid) { + if (node->classid == classid) + break; + } + + return node; +} + +static struct otx2_qos_node * +otx2_sw_node_find_rcu(struct otx2_nic *pfvf, u32 classid) +{ + struct otx2_qos_node *node = NULL; + + hash_for_each_possible_rcu(pfvf->qos.qos_hlist, node, hlist, classid) { + if (node->classid == classid) + break; + } + + return node; +} + +int otx2_get_txq_by_classid(struct otx2_nic *pfvf, u16 classid) +{ + struct otx2_qos_node *node; + u16 qid; + int res; + + node = otx2_sw_node_find_rcu(pfvf, classid); + if (!node) { + res = -ENOENT; + goto out; + } + qid = READ_ONCE(node->qid); + if (qid == OTX2_QOS_QID_INNER) { + res = -EINVAL; + goto out; + } + res = pfvf->hw.tx_queues + qid; +out: + return res; +} + +static int +otx2_qos_txschq_config(struct otx2_nic *pfvf, struct otx2_qos_node *node) +{ + struct mbox *mbox = &pfvf->mbox; + struct nix_txschq_config *req; + int rc; + + mutex_lock(&mbox->lock); + + req = otx2_mbox_alloc_msg_nix_txschq_cfg(&pfvf->mbox); + if (!req) { + mutex_unlock(&mbox->lock); + return -ENOMEM; + } + + req->lvl = node->level; + __otx2_qos_txschq_cfg(pfvf, node, req); + + rc = otx2_sync_mbox_msg(&pfvf->mbox); + + mutex_unlock(&mbox->lock); + + return rc; +} + +static int otx2_qos_txschq_alloc(struct otx2_nic *pfvf, + struct otx2_qos_cfg *cfg) +{ + struct nix_txsch_alloc_req *req; + struct nix_txsch_alloc_rsp *rsp; + struct mbox *mbox = &pfvf->mbox; + int lvl, rc, schq; + + mutex_lock(&mbox->lock); + req = otx2_mbox_alloc_msg_nix_txsch_alloc(&pfvf->mbox); + if (!req) { + mutex_unlock(&mbox->lock); + return -ENOMEM; + } + + for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) { + req->schq[lvl] = cfg->schq[lvl]; + req->schq_contig[lvl] = cfg->schq_contig[lvl]; + } + + rc = otx2_sync_mbox_msg(&pfvf->mbox); + if (rc) { + mutex_unlock(&mbox->lock); + return rc; + } + + rsp = (struct nix_txsch_alloc_rsp *) + otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr); + + if (IS_ERR(rsp)) { + rc = PTR_ERR(rsp); + goto out; + } + + for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) { + for (schq = 0; schq < rsp->schq_contig[lvl]; schq++) { + cfg->schq_contig_list[lvl][schq] = + rsp->schq_contig_list[lvl][schq]; + } + } + + for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) { + for (schq = 0; schq < rsp->schq[lvl]; schq++) { + cfg->schq_list[lvl][schq] = + rsp->schq_list[lvl][schq]; + } + } + + pfvf->qos.link_cfg_lvl = rsp->link_cfg_lvl; + +out: + mutex_unlock(&mbox->lock); + return rc; +} + +static void otx2_qos_txschq_fill_cfg_schq(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + struct otx2_qos_node *tmp; + int cnt; + + list_for_each_entry(tmp, &node->child_schq_list, list) { + cnt = cfg->dwrr_node_pos[tmp->level]; + tmp->schq = cfg->schq_list[tmp->level][cnt]; + cfg->dwrr_node_pos[tmp->level]++; + } +} + +static void otx2_qos_txschq_fill_cfg_tl(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + struct otx2_qos_node *tmp; + int cnt; + + list_for_each_entry(tmp, &node->child_list, list) { + otx2_qos_txschq_fill_cfg_tl(pfvf, tmp, cfg); + cnt = cfg->static_node_pos[tmp->level]; + tmp->schq = cfg->schq_contig_list[tmp->level][cnt]; + if (cnt == 0) + node->prio_anchor = tmp->schq; + cfg->static_node_pos[tmp->level]++; + otx2_qos_txschq_fill_cfg_schq(pfvf, tmp, cfg); + } +} + +static void otx2_qos_txschq_fill_cfg(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + mutex_lock(&pfvf->qos.qos_lock); + otx2_qos_txschq_fill_cfg_tl(pfvf, node, cfg); + otx2_qos_txschq_fill_cfg_schq(pfvf, node, cfg); + mutex_unlock(&pfvf->qos.qos_lock); +} + +static int otx2_qos_txschq_push_cfg_schq(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + struct otx2_qos_node *tmp; + int ret; + + list_for_each_entry(tmp, &node->child_schq_list, list) { + ret = otx2_qos_txschq_config(pfvf, tmp); + if (ret) + return -EIO; + ret = otx2_qos_txschq_set_parent_topology(pfvf, tmp->parent); + if (ret) + return -EIO; + } + + return 0; +} + +static int otx2_qos_txschq_push_cfg_tl(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + struct otx2_qos_node *tmp; + int ret; + + list_for_each_entry(tmp, &node->child_list, list) { + ret = otx2_qos_txschq_push_cfg_tl(pfvf, tmp, cfg); + if (ret) + return -EIO; + ret = otx2_qos_txschq_config(pfvf, tmp); + if (ret) + return -EIO; + ret = otx2_qos_txschq_push_cfg_schq(pfvf, tmp, cfg); + if (ret) + return -EIO; + } + + ret = otx2_qos_txschq_set_parent_topology(pfvf, node); + if (ret) + return -EIO; + + return 0; +} + +static int otx2_qos_txschq_push_cfg(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + int ret; + + mutex_lock(&pfvf->qos.qos_lock); + ret = otx2_qos_txschq_push_cfg_tl(pfvf, node, cfg); + if (ret) + goto out; + ret = otx2_qos_txschq_push_cfg_schq(pfvf, node, cfg); +out: + mutex_unlock(&pfvf->qos.qos_lock); + return ret; +} + +static int otx2_qos_txschq_update_config(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + otx2_qos_txschq_fill_cfg(pfvf, node, cfg); + + return otx2_qos_txschq_push_cfg(pfvf, node, cfg); +} + +static int otx2_qos_txschq_update_root_cfg(struct otx2_nic *pfvf, + struct otx2_qos_node *root, + struct otx2_qos_cfg *cfg) +{ + root->schq = cfg->schq_list[root->level][0]; + return otx2_qos_txschq_config(pfvf, root); +} + +static void otx2_qos_free_cfg(struct otx2_nic *pfvf, struct otx2_qos_cfg *cfg) +{ + int lvl, idx, schq; + + for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) { + for (idx = 0; idx < cfg->schq[lvl]; idx++) { + schq = cfg->schq_list[lvl][idx]; + otx2_txschq_free_one(pfvf, lvl, schq); + } + } + + for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) { + for (idx = 0; idx < cfg->schq_contig[lvl]; idx++) { + schq = cfg->schq_contig_list[lvl][idx]; + otx2_txschq_free_one(pfvf, lvl, schq); + } + } +} + +static void otx2_qos_enadis_sq(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + u16 qid) +{ + if (pfvf->qos.qid_to_sqmap[qid] != OTX2_QOS_INVALID_SQ) + otx2_qos_disable_sq(pfvf, qid); + + pfvf->qos.qid_to_sqmap[qid] = node->schq; + otx2_qos_enable_sq(pfvf, qid); +} + +static void otx2_qos_update_smq_schq(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + bool action) +{ + struct otx2_qos_node *tmp; + + if (node->qid == OTX2_QOS_QID_INNER) + return; + + list_for_each_entry(tmp, &node->child_schq_list, list) { + if (tmp->level == NIX_TXSCH_LVL_MDQ) { + if (action == QOS_SMQ_FLUSH) + otx2_smq_flush(pfvf, tmp->schq); + else + otx2_qos_enadis_sq(pfvf, tmp, node->qid); + } + } +} + +static void __otx2_qos_update_smq(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + bool action) +{ + struct otx2_qos_node *tmp; + + list_for_each_entry(tmp, &node->child_list, list) { + __otx2_qos_update_smq(pfvf, tmp, action); + if (tmp->qid == OTX2_QOS_QID_INNER) + continue; + if (tmp->level == NIX_TXSCH_LVL_MDQ) { + if (action == QOS_SMQ_FLUSH) + otx2_smq_flush(pfvf, tmp->schq); + else + otx2_qos_enadis_sq(pfvf, tmp, tmp->qid); + } else { + otx2_qos_update_smq_schq(pfvf, tmp, action); + } + } +} + +static void otx2_qos_update_smq(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + bool action) +{ + mutex_lock(&pfvf->qos.qos_lock); + __otx2_qos_update_smq(pfvf, node, action); + otx2_qos_update_smq_schq(pfvf, node, action); + mutex_unlock(&pfvf->qos.qos_lock); +} + +static int otx2_qos_push_txschq_cfg(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + int ret; + + ret = otx2_qos_txschq_alloc(pfvf, cfg); + if (ret) + return -ENOSPC; + + if (!(pfvf->netdev->flags & IFF_UP)) { + otx2_qos_txschq_fill_cfg(pfvf, node, cfg); + return 0; + } + + ret = otx2_qos_txschq_update_config(pfvf, node, cfg); + if (ret) { + otx2_qos_free_cfg(pfvf, cfg); + return -EIO; + } + + otx2_qos_update_smq(pfvf, node, QOS_CFG_SQ); + + return 0; +} + +static int otx2_qos_update_tree(struct otx2_nic *pfvf, + struct otx2_qos_node *node, + struct otx2_qos_cfg *cfg) +{ + otx2_qos_prepare_txschq_cfg(pfvf, node->parent, cfg); + return otx2_qos_push_txschq_cfg(pfvf, node->parent, cfg); +} + +static int otx2_qos_root_add(struct otx2_nic *pfvf, u16 htb_maj_id, u16 htb_defcls, + struct netlink_ext_ack *extack) +{ + struct otx2_qos_cfg *new_cfg; + struct otx2_qos_node *root; + int err; + + netdev_dbg(pfvf->netdev, + "TC_HTB_CREATE: handle=0x%x defcls=0x%x\n", + htb_maj_id, htb_defcls); + + root = otx2_qos_alloc_root(pfvf); + if (IS_ERR(root)) { + err = PTR_ERR(root); + return err; + } + + /* allocate txschq queue */ + new_cfg = kzalloc(sizeof(*new_cfg), GFP_KERNEL); + if (!new_cfg) { + NL_SET_ERR_MSG_MOD(extack, "Memory allocation error"); + err = -ENOMEM; + goto free_root_node; + } + /* allocate htb root node */ + new_cfg->schq[root->level] = 1; + err = otx2_qos_txschq_alloc(pfvf, new_cfg); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error allocating txschq"); + goto free_root_node; + } + + if (!(pfvf->netdev->flags & IFF_UP) || + root->level == NIX_TXSCH_LVL_TL1) { + root->schq = new_cfg->schq_list[root->level][0]; + goto out; + } + + /* update the txschq configuration in hw */ + err = otx2_qos_txschq_update_root_cfg(pfvf, root, new_cfg); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Error updating txschq configuration"); + goto txschq_free; + } + +out: + WRITE_ONCE(pfvf->qos.defcls, htb_defcls); + /* Pairs with smp_load_acquire() in ndo_select_queue */ + smp_store_release(&pfvf->qos.maj_id, htb_maj_id); + kfree(new_cfg); + return 0; + +txschq_free: + otx2_qos_free_cfg(pfvf, new_cfg); +free_root_node: + kfree(new_cfg); + otx2_qos_sw_node_delete(pfvf, root); + return err; +} + +static int otx2_qos_root_destroy(struct otx2_nic *pfvf) +{ + struct otx2_qos_node *root; + + netdev_dbg(pfvf->netdev, "TC_HTB_DESTROY\n"); + + /* find root node */ + root = otx2_sw_node_find(pfvf, OTX2_QOS_ROOT_CLASSID); + if (!root) + return -ENOENT; + + /* free the hw mappings */ + otx2_qos_destroy_node(pfvf, root); + + return 0; +} + +static int otx2_qos_validate_configuration(struct otx2_qos_node *parent, + struct netlink_ext_ack *extack, + struct otx2_nic *pfvf, + u64 prio) +{ + if (test_bit(prio, parent->prio_bmap)) { + NL_SET_ERR_MSG_MOD(extack, + "Static priority child with same priority exists"); + return -EEXIST; + } + + if (prio == TXSCH_TL1_DFLT_RR_PRIO) { + NL_SET_ERR_MSG_MOD(extack, + "Priority is reserved for Round Robin"); + return -EINVAL; + } + + return 0; +} + +static int otx2_qos_leaf_alloc_queue(struct otx2_nic *pfvf, u16 classid, + u32 parent_classid, u64 rate, u64 ceil, + u64 prio, struct netlink_ext_ack *extack) +{ + struct otx2_qos_cfg *old_cfg, *new_cfg; + struct otx2_qos_node *node, *parent; + int qid, ret, err; + + netdev_dbg(pfvf->netdev, + "TC_HTB_LEAF_ALLOC_QUEUE: classid=0x%x parent_classid=0x%x rate=%lld ceil=%lld prio=%lld\n", + classid, parent_classid, rate, ceil, prio); + + if (prio > OTX2_QOS_MAX_PRIO) { + NL_SET_ERR_MSG_MOD(extack, "Valid priority range 0 to 7"); + ret = -EOPNOTSUPP; + goto out; + } + + /* get parent node */ + parent = otx2_sw_node_find(pfvf, parent_classid); + if (!parent) { + NL_SET_ERR_MSG_MOD(extack, "parent node not found"); + ret = -ENOENT; + goto out; + } + if (parent->level == NIX_TXSCH_LVL_MDQ) { + NL_SET_ERR_MSG_MOD(extack, "HTB qos max levels reached"); + ret = -EOPNOTSUPP; + goto out; + } + + ret = otx2_qos_validate_configuration(parent, extack, pfvf, prio); + if (ret) + goto out; + + set_bit(prio, parent->prio_bmap); + + /* read current txschq configuration */ + old_cfg = kzalloc(sizeof(*old_cfg), GFP_KERNEL); + if (!old_cfg) { + NL_SET_ERR_MSG_MOD(extack, "Memory allocation error"); + ret = -ENOMEM; + goto reset_prio; + } + otx2_qos_read_txschq_cfg(pfvf, parent, old_cfg); + + /* allocate a new sq */ + qid = otx2_qos_get_qid(pfvf); + if (qid < 0) { + NL_SET_ERR_MSG_MOD(extack, "Reached max supported QOS SQ's"); + ret = -ENOMEM; + goto free_old_cfg; + } + + /* Actual SQ mapping will be updated after SMQ alloc */ + pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ; + + /* allocate and initialize a new child node */ + node = otx2_qos_sw_create_leaf_node(pfvf, parent, classid, prio, rate, + ceil, qid); + if (IS_ERR(node)) { + NL_SET_ERR_MSG_MOD(extack, "Unable to allocate leaf node"); + ret = PTR_ERR(node); + goto free_old_cfg; + } + + /* push new txschq config to hw */ + new_cfg = kzalloc(sizeof(*new_cfg), GFP_KERNEL); + if (!new_cfg) { + NL_SET_ERR_MSG_MOD(extack, "Memory allocation error"); + ret = -ENOMEM; + goto free_node; + } + ret = otx2_qos_update_tree(pfvf, node, new_cfg); + if (ret) { + NL_SET_ERR_MSG_MOD(extack, "HTB HW configuration error"); + kfree(new_cfg); + otx2_qos_sw_node_delete(pfvf, node); + /* restore the old qos tree */ + err = otx2_qos_txschq_update_config(pfvf, parent, old_cfg); + if (err) { + netdev_err(pfvf->netdev, + "Failed to restore txcshq configuration"); + goto free_old_cfg; + } + + otx2_qos_update_smq(pfvf, parent, QOS_CFG_SQ); + goto free_old_cfg; + } + + /* update tx_real_queues */ + otx2_qos_update_tx_netdev_queues(pfvf); + + /* free new txschq config */ + kfree(new_cfg); + + /* free old txschq config */ + otx2_qos_free_cfg(pfvf, old_cfg); + kfree(old_cfg); + + return pfvf->hw.tx_queues + qid; + +free_node: + otx2_qos_sw_node_delete(pfvf, node); +free_old_cfg: + kfree(old_cfg); +reset_prio: + clear_bit(prio, parent->prio_bmap); +out: + return ret; +} + +static int otx2_qos_leaf_to_inner(struct otx2_nic *pfvf, u16 classid, + u16 child_classid, u64 rate, u64 ceil, u64 prio, + struct netlink_ext_ack *extack) +{ + struct otx2_qos_cfg *old_cfg, *new_cfg; + struct otx2_qos_node *node, *child; + int ret, err; + u16 qid; + + netdev_dbg(pfvf->netdev, + "TC_HTB_LEAF_TO_INNER classid %04x, child %04x, rate %llu, ceil %llu\n", + classid, child_classid, rate, ceil); + + if (prio > OTX2_QOS_MAX_PRIO) { + NL_SET_ERR_MSG_MOD(extack, "Valid priority range 0 to 7"); + ret = -EOPNOTSUPP; + goto out; + } + + /* find node related to classid */ + node = otx2_sw_node_find(pfvf, classid); + if (!node) { + NL_SET_ERR_MSG_MOD(extack, "HTB node not found"); + ret = -ENOENT; + goto out; + } + /* check max qos txschq level */ + if (node->level == NIX_TXSCH_LVL_MDQ) { + NL_SET_ERR_MSG_MOD(extack, "HTB qos level not supported"); + ret = -EOPNOTSUPP; + goto out; + } + + set_bit(prio, node->prio_bmap); + + /* store the qid to assign to leaf node */ + qid = node->qid; + + /* read current txschq configuration */ + old_cfg = kzalloc(sizeof(*old_cfg), GFP_KERNEL); + if (!old_cfg) { + NL_SET_ERR_MSG_MOD(extack, "Memory allocation error"); + ret = -ENOMEM; + goto reset_prio; + } + otx2_qos_read_txschq_cfg(pfvf, node, old_cfg); + + /* delete the txschq nodes allocated for this node */ + otx2_qos_free_sw_node_schq(pfvf, node); + + /* mark this node as htb inner node */ + WRITE_ONCE(node->qid, OTX2_QOS_QID_INNER); + + /* allocate and initialize a new child node */ + child = otx2_qos_sw_create_leaf_node(pfvf, node, child_classid, + prio, rate, ceil, qid); + if (IS_ERR(child)) { + NL_SET_ERR_MSG_MOD(extack, "Unable to allocate leaf node"); + ret = PTR_ERR(child); + goto free_old_cfg; + } + + /* push new txschq config to hw */ + new_cfg = kzalloc(sizeof(*new_cfg), GFP_KERNEL); + if (!new_cfg) { + NL_SET_ERR_MSG_MOD(extack, "Memory allocation error"); + ret = -ENOMEM; + goto free_node; + } + ret = otx2_qos_update_tree(pfvf, child, new_cfg); + if (ret) { + NL_SET_ERR_MSG_MOD(extack, "HTB HW configuration error"); + kfree(new_cfg); + otx2_qos_sw_node_delete(pfvf, child); + /* restore the old qos tree */ + WRITE_ONCE(node->qid, qid); + err = otx2_qos_alloc_txschq_node(pfvf, node); + if (err) { + netdev_err(pfvf->netdev, + "Failed to restore old leaf node"); + goto free_old_cfg; + } + err = otx2_qos_txschq_update_config(pfvf, node, old_cfg); + if (err) { + netdev_err(pfvf->netdev, + "Failed to restore txcshq configuration"); + goto free_old_cfg; + } + otx2_qos_update_smq(pfvf, node, QOS_CFG_SQ); + goto free_old_cfg; + } + + /* free new txschq config */ + kfree(new_cfg); + + /* free old txschq config */ + otx2_qos_free_cfg(pfvf, old_cfg); + kfree(old_cfg); + + return 0; + +free_node: + otx2_qos_sw_node_delete(pfvf, child); +free_old_cfg: + kfree(old_cfg); +reset_prio: + clear_bit(prio, node->prio_bmap); +out: + return ret; +} + +static int otx2_qos_leaf_del(struct otx2_nic *pfvf, u16 *classid, + struct netlink_ext_ack *extack) +{ + struct otx2_qos_node *node, *parent; + u64 prio; + u16 qid; + + netdev_dbg(pfvf->netdev, "TC_HTB_LEAF_DEL classid %04x\n", *classid); + + /* find node related to classid */ + node = otx2_sw_node_find(pfvf, *classid); + if (!node) { + NL_SET_ERR_MSG_MOD(extack, "HTB node not found"); + return -ENOENT; + } + parent = node->parent; + prio = node->prio; + qid = node->qid; + + otx2_qos_disable_sq(pfvf, node->qid); + + otx2_qos_destroy_node(pfvf, node); + pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ; + + clear_bit(prio, parent->prio_bmap); + + return 0; +} + +static int otx2_qos_leaf_del_last(struct otx2_nic *pfvf, u16 classid, bool force, + struct netlink_ext_ack *extack) +{ + struct otx2_qos_node *node, *parent; + struct otx2_qos_cfg *new_cfg; + u64 prio; + int err; + u16 qid; + + netdev_dbg(pfvf->netdev, + "TC_HTB_LEAF_DEL_LAST classid %04x\n", classid); + + /* find node related to classid */ + node = otx2_sw_node_find(pfvf, classid); + if (!node) { + NL_SET_ERR_MSG_MOD(extack, "HTB node not found"); + return -ENOENT; + } + + /* save qid for use by parent */ + qid = node->qid; + prio = node->prio; + + parent = otx2_sw_node_find(pfvf, node->parent->classid); + if (!parent) { + NL_SET_ERR_MSG_MOD(extack, "parent node not found"); + return -ENOENT; + } + + /* destroy the leaf node */ + otx2_qos_destroy_node(pfvf, node); + pfvf->qos.qid_to_sqmap[qid] = OTX2_QOS_INVALID_SQ; + + clear_bit(prio, parent->prio_bmap); + + /* create downstream txschq entries to parent */ + err = otx2_qos_alloc_txschq_node(pfvf, parent); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "HTB failed to create txsch configuration"); + return err; + } + WRITE_ONCE(parent->qid, qid); + __set_bit(qid, pfvf->qos.qos_sq_bmap); + + /* push new txschq config to hw */ + new_cfg = kzalloc(sizeof(*new_cfg), GFP_KERNEL); + if (!new_cfg) { + NL_SET_ERR_MSG_MOD(extack, "Memory allocation error"); + return -ENOMEM; + } + /* fill txschq cfg and push txschq cfg to hw */ + otx2_qos_fill_cfg_schq(parent, new_cfg); + err = otx2_qos_push_txschq_cfg(pfvf, parent, new_cfg); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "HTB HW configuration error"); + kfree(new_cfg); + return err; + } + kfree(new_cfg); + + /* update tx_real_queues */ + otx2_qos_update_tx_netdev_queues(pfvf); + + return 0; +} + +void otx2_clean_qos_queues(struct otx2_nic *pfvf) +{ + struct otx2_qos_node *root; + + root = otx2_sw_node_find(pfvf, OTX2_QOS_ROOT_CLASSID); + if (!root) + return; + + otx2_qos_update_smq(pfvf, root, QOS_SMQ_FLUSH); +} + +void otx2_qos_config_txschq(struct otx2_nic *pfvf) +{ + struct otx2_qos_node *root; + int err; + + root = otx2_sw_node_find(pfvf, OTX2_QOS_ROOT_CLASSID); + if (!root) + return; + + err = otx2_qos_txschq_config(pfvf, root); + if (err) { + netdev_err(pfvf->netdev, "Error update txschq configuration\n"); + goto root_destroy; + } + + err = otx2_qos_txschq_push_cfg_tl(pfvf, root, NULL); + if (err) { + netdev_err(pfvf->netdev, "Error update txschq configuration\n"); + goto root_destroy; + } + + otx2_qos_update_smq(pfvf, root, QOS_CFG_SQ); + return; + +root_destroy: + netdev_err(pfvf->netdev, "Failed to update Scheduler/Shaping config in Hardware\n"); + /* Free resources allocated */ + otx2_qos_root_destroy(pfvf); +} + +int otx2_setup_tc_htb(struct net_device *ndev, struct tc_htb_qopt_offload *htb) +{ + struct otx2_nic *pfvf = netdev_priv(ndev); + int res; + + switch (htb->command) { + case TC_HTB_CREATE: + return otx2_qos_root_add(pfvf, htb->parent_classid, + htb->classid, htb->extack); + case TC_HTB_DESTROY: + return otx2_qos_root_destroy(pfvf); + case TC_HTB_LEAF_ALLOC_QUEUE: + res = otx2_qos_leaf_alloc_queue(pfvf, htb->classid, + htb->parent_classid, + htb->rate, htb->ceil, + htb->prio, htb->extack); + if (res < 0) + return res; + htb->qid = res; + return 0; + case TC_HTB_LEAF_TO_INNER: + return otx2_qos_leaf_to_inner(pfvf, htb->parent_classid, + htb->classid, htb->rate, + htb->ceil, htb->prio, + htb->extack); + case TC_HTB_LEAF_DEL: + return otx2_qos_leaf_del(pfvf, &htb->classid, htb->extack); + case TC_HTB_LEAF_DEL_LAST: + case TC_HTB_LEAF_DEL_LAST_FORCE: + return otx2_qos_leaf_del_last(pfvf, htb->classid, + htb->command == TC_HTB_LEAF_DEL_LAST_FORCE, + htb->extack); + case TC_HTB_LEAF_QUERY_QUEUE: + res = otx2_get_txq_by_classid(pfvf, htb->classid); + htb->qid = res; + return 0; + case TC_HTB_NODE_MODIFY: + fallthrough; + default: + return -EOPNOTSUPP; + } +} diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos.h b/drivers/net/ethernet/marvell/octeontx2/nic/qos.h index 73a62d092e99..19773284be27 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/qos.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos.h @@ -7,13 +7,63 @@ #ifndef OTX2_QOS_H #define OTX2_QOS_H +#include +#include +#include + +#define OTX2_QOS_MAX_LVL 4 +#define OTX2_QOS_MAX_PRIO 7 #define OTX2_QOS_MAX_LEAF_NODES 16 -int otx2_qos_enable_sq(struct otx2_nic *pfvf, int qidx, u16 smq); -void otx2_qos_disable_sq(struct otx2_nic *pfvf, int qidx, u16 mdq); +enum qos_smq_operations { + QOS_CFG_SQ, + QOS_SMQ_FLUSH, +}; + +u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic, u64 maxrate, u32 burst); + +int otx2_setup_tc_htb(struct net_device *ndev, struct tc_htb_qopt_offload *htb); +int otx2_qos_get_qid(struct otx2_nic *pfvf); +void otx2_qos_free_qid(struct otx2_nic *pfvf, int qidx); +int otx2_qos_enable_sq(struct otx2_nic *pfvf, int qidx); +void otx2_qos_disable_sq(struct otx2_nic *pfvf, int qidx); + +struct otx2_qos_cfg { + u16 schq[NIX_TXSCH_LVL_CNT]; + u16 schq_contig[NIX_TXSCH_LVL_CNT]; + int static_node_pos[NIX_TXSCH_LVL_CNT]; + int dwrr_node_pos[NIX_TXSCH_LVL_CNT]; + u16 schq_contig_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC]; + u16 schq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC]; +}; struct otx2_qos { - u16 qid_to_sqmap[OTX2_QOS_MAX_LEAF_NODES]; - }; + DECLARE_HASHTABLE(qos_hlist, order_base_2(OTX2_QOS_MAX_LEAF_NODES)); + struct mutex qos_lock; /* child list lock */ + u16 qid_to_sqmap[OTX2_QOS_MAX_LEAF_NODES]; + struct list_head qos_tree; + DECLARE_BITMAP(qos_sq_bmap, OTX2_QOS_MAX_LEAF_NODES); + u16 maj_id; + u16 defcls; + u8 link_cfg_lvl; /* LINKX_CFG CSRs mapped to TL3 or TL2's index ? */ +}; + +struct otx2_qos_node { + struct list_head list; /* list management */ + struct list_head child_list; + struct list_head child_schq_list; + struct hlist_node hlist; + DECLARE_BITMAP(prio_bmap, OTX2_QOS_MAX_PRIO + 1); + struct otx2_qos_node *parent; /* parent qos node */ + u64 rate; /* htb params */ + u64 ceil; + u32 classid; + u32 prio; + u16 schq; /* hw txschq */ + u16 qid; + u16 prio_anchor; + u8 level; +}; + #endif diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c index e142d43f5a62..d96ed29c1567 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c @@ -217,7 +217,22 @@ static int otx2_qos_ctx_disable(struct otx2_nic *pfvf, u16 qidx, int aura_id) return otx2_sync_mbox_msg(&pfvf->mbox); } -int otx2_qos_enable_sq(struct otx2_nic *pfvf, int qidx, u16 smq) +int otx2_qos_get_qid(struct otx2_nic *pfvf) +{ + int qidx; + + qidx = find_first_zero_bit(pfvf->qos.qos_sq_bmap, + pfvf->hw.tc_tx_queues); + + return qidx == pfvf->hw.tc_tx_queues ? -ENOSPC : qidx; +} + +void otx2_qos_free_qid(struct otx2_nic *pfvf, int qidx) +{ + clear_bit(qidx, pfvf->qos.qos_sq_bmap); +} + +int otx2_qos_enable_sq(struct otx2_nic *pfvf, int qidx) { struct otx2_hw *hw = &pfvf->hw; int pool_id, sq_idx, err; @@ -233,7 +248,6 @@ int otx2_qos_enable_sq(struct otx2_nic *pfvf, int qidx, u16 smq) goto out; pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, sq_idx); - pfvf->qos.qid_to_sqmap[qidx] = smq; err = otx2_sq_init(pfvf, sq_idx, pool_id); if (err) goto out; @@ -242,7 +256,7 @@ out: return err; } -void otx2_qos_disable_sq(struct otx2_nic *pfvf, int qidx, u16 mdq) +void otx2_qos_disable_sq(struct otx2_nic *pfvf, int qidx) { struct otx2_qset *qset = &pfvf->qset; struct otx2_hw *hw = &pfvf->hw; -- cgit v1.2.3 From 6cebb6a4b114783af5f4747ffe3ec87d94eccf52 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Sat, 13 May 2023 14:21:42 +0530 Subject: octeontx2-pf: ethtool expose qos stats This patch extends ethtool stats support for QoS send queues as well. upon the number of transmit channels change request, Ensures the real number of transmit queues are equal to active QoS send queues plus configured transmit queues. ethtool -S eth0 txq_qos0: bytes: 3021391800 txq_qos0: frames: 1998275 txq_qos1: bytes: 4619766312 txq_qos1: frames: 3055401 ... ... Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 29 +++++++++++++++------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 0f8d1a69139f..c47d91da32dc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -92,10 +92,16 @@ static void otx2_get_qset_strings(struct otx2_nic *pfvf, u8 **data, int qset) *data += ETH_GSTRING_LEN; } } - for (qidx = 0; qidx < pfvf->hw.tx_queues; qidx++) { + + for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) { for (stats = 0; stats < otx2_n_queue_stats; stats++) { - sprintf(*data, "txq%d: %s", qidx + start_qidx, - otx2_queue_stats[stats].name); + if (qidx >= pfvf->hw.non_qos_queues) + sprintf(*data, "txq_qos%d: %s", + qidx + start_qidx - pfvf->hw.non_qos_queues, + otx2_queue_stats[stats].name); + else + sprintf(*data, "txq%d: %s", qidx + start_qidx, + otx2_queue_stats[stats].name); *data += ETH_GSTRING_LEN; } } @@ -159,7 +165,7 @@ static void otx2_get_qset_stats(struct otx2_nic *pfvf, [otx2_queue_stats[stat].index]; } - for (qidx = 0; qidx < pfvf->hw.tx_queues; qidx++) { + for (qidx = 0; qidx < otx2_get_total_tx_queues(pfvf); qidx++) { if (!otx2_update_sq_stats(pfvf, qidx)) { for (stat = 0; stat < otx2_n_queue_stats; stat++) *((*data)++) = 0; @@ -254,7 +260,7 @@ static int otx2_get_sset_count(struct net_device *netdev, int sset) return -EINVAL; qstats_count = otx2_n_queue_stats * - (pfvf->hw.rx_queues + pfvf->hw.tx_queues); + (pfvf->hw.rx_queues + otx2_get_total_tx_queues(pfvf)); if (!test_bit(CN10K_RPM, &pfvf->hw.cap_flag)) mac_stats = CGX_RX_STATS_COUNT + CGX_TX_STATS_COUNT; otx2_update_lmac_fec_stats(pfvf); @@ -282,7 +288,7 @@ static int otx2_set_channels(struct net_device *dev, { struct otx2_nic *pfvf = netdev_priv(dev); bool if_up = netif_running(dev); - int err = 0; + int err, qos_txqs; if (!channel->rx_count || !channel->tx_count) return -EINVAL; @@ -296,14 +302,19 @@ static int otx2_set_channels(struct net_device *dev, if (if_up) dev->netdev_ops->ndo_stop(dev); - err = otx2_set_real_num_queues(dev, channel->tx_count, + qos_txqs = bitmap_weight(pfvf->qos.qos_sq_bmap, + OTX2_QOS_MAX_LEAF_NODES); + + err = otx2_set_real_num_queues(dev, channel->tx_count + qos_txqs, channel->rx_count); if (err) return err; pfvf->hw.rx_queues = channel->rx_count; pfvf->hw.tx_queues = channel->tx_count; - pfvf->qset.cq_cnt = pfvf->hw.tx_queues + pfvf->hw.rx_queues; + if (pfvf->xdp_prog) + pfvf->hw.xdp_queues = channel->rx_count; + pfvf->hw.non_qos_queues = pfvf->hw.tx_queues + pfvf->hw.xdp_queues; if (if_up) err = dev->netdev_ops->ndo_open(dev); @@ -1405,7 +1416,7 @@ static int otx2vf_get_sset_count(struct net_device *netdev, int sset) return -EINVAL; qstats_count = otx2_n_queue_stats * - (vf->hw.rx_queues + vf->hw.tx_queues); + (vf->hw.rx_queues + otx2_get_total_tx_queues(vf)); return otx2_n_dev_stats + otx2_n_drv_stats + qstats_count + 1; } -- cgit v1.2.3 From c515a4443cb8c8802751223b13855d1575db8cad Mon Sep 17 00:00:00 2001 From: Anup Sharma Date: Sat, 13 May 2023 16:36:54 +0530 Subject: net: ethernet: microchip: vcap: Remove extra semicolon Remove the extra semicolon at end. Issue identified using semicolon.cocci Coccinelle semantic patch. drivers/net/ethernet/microchip/vcap/vcap_api.c:1124:3-4: Unneeded semicolon drivers/net/ethernet/microchip/vcap/vcap_api.c:1165:3-4: Unneeded semicolon drivers/net/ethernet/microchip/vcap/vcap_api.c:1239:3-4: Unneeded semicolon drivers/net/ethernet/microchip/vcap/vcap_api.c:1287:3-4: Unneeded semicolon Signed-off-by: Anup Sharma Changes: V1 -> V2: Target tree included in the subject line. Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/vcap/vcap_api.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api.c b/drivers/net/ethernet/microchip/vcap/vcap_api.c index 5675b0962bc3..a418ad8e8770 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api.c @@ -1121,7 +1121,7 @@ static void vcap_copy_to_client_actionfield(struct vcap_rule_internal *ri, vcap_copy_from_w32be(field->data.u128.value, value, field_size, width); break; - }; + } } else { switch (field->ctrl.type) { case VCAP_FIELD_BIT: @@ -1162,7 +1162,7 @@ static void vcap_copy_to_client_actionfield(struct vcap_rule_internal *ri, value, width, field_size); break; - }; + } } } @@ -1236,7 +1236,7 @@ static void vcap_copy_to_client_keyfield(struct vcap_rule_internal *ri, vcap_copy_from_w32be(field->data.u128.mask, mask, field_size, width); break; - }; + } } else { switch (field->ctrl.type) { case VCAP_FIELD_BIT: @@ -1284,7 +1284,7 @@ static void vcap_copy_to_client_keyfield(struct vcap_rule_internal *ri, value, mask, width, field_size); break; - }; + } } } -- cgit v1.2.3 From d1e4632b304c594d6f0d4cb7581350e5a6fc33b7 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Mon, 15 May 2023 16:56:45 +0800 Subject: octeontx2-pf: mcs: Remove unneeded semicolon ./drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c:242:2-3: Unneeded semicolon ./drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c:476:2-3: Unneeded semicolon Reported-by: Abaci Robot Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=4947 Signed-off-by: Yang Li Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c index 8eaa50d0f668..b59532cf53ce 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c @@ -239,7 +239,7 @@ static int cn10k_mcs_write_rx_secy(struct otx2_nic *pfvf, cipher = MCS_GCM_AES_128; dev_warn(pfvf->dev, "Unsupported key length\n"); break; - }; + } policy |= FIELD_PREP(MCS_RX_SECY_PLCY_CIP, cipher); policy |= FIELD_PREP(MCS_RX_SECY_PLCY_VAL, secy->validate_frames); @@ -473,7 +473,7 @@ static int cn10k_mcs_write_tx_secy(struct otx2_nic *pfvf, cipher = MCS_GCM_AES_128; dev_warn(pfvf->dev, "Unsupported key length\n"); break; - }; + } policy |= FIELD_PREP(MCS_TX_SECY_PLCY_CIP, cipher); -- cgit v1.2.3 From dc3eb2f4ec09e90b77ee8c28f73912b022462e8e Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Mon, 15 May 2023 13:07:12 +0700 Subject: net: ethernet: 8390: Convert unversioned GPL notice to SPDX license identifier Replace boilerplate notice for unversioned GPL to SPDX tag for GPL 1.0+. For ne2k-pci.c, only add SPDX tag and keep the boilerplate instead, since the boilerplate notes that it must be preserved. Cc: David A. Hinds Cc: Donald Becker Cc: Alan Cox Cc: Geert Uytterhoeven Cc: Richard Fontana Signed-off-by: Bagas Sanjaya Signed-off-by: Paolo Abeni --- drivers/net/ethernet/8390/8390.h | 2 ++ drivers/net/ethernet/8390/axnet_cs.c | 6 +++--- drivers/net/ethernet/8390/lib8390.c | 5 ++--- drivers/net/ethernet/8390/mac8390.c | 6 ++---- drivers/net/ethernet/8390/ne.c | 4 +--- drivers/net/ethernet/8390/ne2k-pci.c | 1 + drivers/net/ethernet/8390/pcnet_cs.c | 5 ++--- drivers/net/ethernet/8390/smc-ultra.c | 4 +--- drivers/net/ethernet/8390/wd.c | 4 +--- 9 files changed, 15 insertions(+), 22 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/8390/8390.h b/drivers/net/ethernet/8390/8390.h index e52264465998..f784a6e2ab0e 100644 --- a/drivers/net/ethernet/8390/8390.h +++ b/drivers/net/ethernet/8390/8390.h @@ -1,3 +1,5 @@ +/* SPDX-License-Identifier: GPL-1.0+ */ + /* Generic NS8390 register definitions. */ /* This file is part of Donald Becker's 8390 drivers, and is distributed diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c index 78f985885547..fea489af72fb 100644 --- a/drivers/net/ethernet/8390/axnet_cs.c +++ b/drivers/net/ethernet/8390/axnet_cs.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: GPL-1.0+ + /*====================================================================== A PCMCIA ethernet driver for Asix AX88190-based cards @@ -17,9 +19,7 @@ Written 1992,1993 by Donald Becker. Copyright 1993 United States Government as represented by the - Director, National Security Agency. This software may be used and - distributed according to the terms of the GNU General Public License, - incorporated herein by reference. + Director, National Security Agency. Donald Becker may be reached at becker@scyld.com ======================================================================*/ diff --git a/drivers/net/ethernet/8390/lib8390.c b/drivers/net/ethernet/8390/lib8390.c index e84021282edf..84aeb8054304 100644 --- a/drivers/net/ethernet/8390/lib8390.c +++ b/drivers/net/ethernet/8390/lib8390.c @@ -1,3 +1,5 @@ +// SPDX-License-Identifier: GPL-1.0+ + /* 8390.c: A general NS8390 ethernet driver core for linux. */ /* Written 1992-94 by Donald Becker. @@ -5,9 +7,6 @@ Copyright 1993 United States Government as represented by the Director, National Security Agency. - This software may be used and distributed according to the terms - of the GNU General Public License, incorporated herein by reference. - The author may be reached as becker@scyld.com, or C/O Scyld Computing Corporation 410 Severn Ave., Suite 210 diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c index 7fb819b9b89a..4a0a095a1a8a 100644 --- a/drivers/net/ethernet/8390/mac8390.c +++ b/drivers/net/ethernet/8390/mac8390.c @@ -1,11 +1,9 @@ +// SPDX-License-Identifier: GPL-1.0+ /* mac8390.c: New driver for 8390-based Nubus (or Nubus-alike) Ethernet cards on Linux */ /* Based on the former daynaport.c driver, by Alan Cox. Some code taken from or inspired by skeleton.c by Donald Becker, acenic.c by - Jes Sorensen, and ne2k-pci.c by Donald Becker and Paul Gortmaker. - - This software may be used and distributed according to the terms of - the GNU Public License, incorporated herein by reference. */ + Jes Sorensen, and ne2k-pci.c by Donald Becker and Paul Gortmaker. */ /* 2000-02-28: support added for Dayna and Kinetics cards by A.G.deWijn@phys.uu.nl */ diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c index 0a9118b8be0c..cb04a3071f92 100644 --- a/drivers/net/ethernet/8390/ne.c +++ b/drivers/net/ethernet/8390/ne.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* ne.c: A general non-shared-memory NS8390 ethernet driver for linux. */ /* Written 1992-94 by Donald Becker. @@ -5,9 +6,6 @@ Copyright 1993 United States Government as represented by the Director, National Security Agency. - This software may be used and distributed according to the terms - of the GNU General Public License, incorporated herein by reference. - The author may be reached as becker@scyld.com, or C/O Scyld Computing Corporation, 410 Severn Ave., Suite 210, Annapolis MD 21403 diff --git a/drivers/net/ethernet/8390/ne2k-pci.c b/drivers/net/ethernet/8390/ne2k-pci.c index 6a0a2039600a..2c6bd36d2f31 100644 --- a/drivers/net/ethernet/8390/ne2k-pci.c +++ b/drivers/net/ethernet/8390/ne2k-pci.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* A Linux device driver for PCI NE2000 clones. * * Authors and other copyright holders: diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index 0f07fe03da98..9bd5e991f1e5 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /*====================================================================== A PCMCIA ethernet driver for NS8390-based cards @@ -17,9 +18,7 @@ Written 1992,1993 by Donald Becker. Copyright 1993 United States Government as represented by the - Director, National Security Agency. This software may be used and - distributed according to the terms of the GNU General Public License, - incorporated herein by reference. + Director, National Security Agency. Donald Becker may be reached at becker@scyld.com Based also on Keith Moore's changes to Don Becker's code, for IBM diff --git a/drivers/net/ethernet/8390/smc-ultra.c b/drivers/net/ethernet/8390/smc-ultra.c index 6e62c37c9400..ae10b7de41e8 100644 --- a/drivers/net/ethernet/8390/smc-ultra.c +++ b/drivers/net/ethernet/8390/smc-ultra.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* smc-ultra.c: A SMC Ultra ethernet driver for linux. */ /* This is a driver for the SMC Ultra and SMC EtherEZ ISA ethercards. @@ -7,9 +8,6 @@ Copyright 1993 United States Government as represented by the Director, National Security Agency. - This software may be used and distributed according to the terms - of the GNU General Public License, incorporated herein by reference. - The author may be reached as becker@scyld.com, or C/O Scyld Computing Corporation 410 Severn Ave., Suite 210 diff --git a/drivers/net/ethernet/8390/wd.c b/drivers/net/ethernet/8390/wd.c index 5b00c452bede..9a36667d00b6 100644 --- a/drivers/net/ethernet/8390/wd.c +++ b/drivers/net/ethernet/8390/wd.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* wd.c: A WD80x3 ethernet driver for linux. */ /* Written 1993-94 by Donald Becker. @@ -5,9 +6,6 @@ Copyright 1993 United States Government as represented by the Director, National Security Agency. - This software may be used and distributed according to the terms - of the GNU General Public License, incorporated herein by reference. - The author may be reached as becker@scyld.com, or C/O Scyld Computing Corporation 410 Severn Ave., Suite 210 -- cgit v1.2.3 From 9f07af05d0e4255ae9da2dfe240484a73433c5e4 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Mon, 15 May 2023 13:07:13 +0700 Subject: net: ethernet: 8390: Replace GPL 2.0 boilerplate with SPDX identifier The boilerplate refers to COPYING in the top-level directory of kernel tree. Replace it with corresponding SPDX license identifier. Cc: Donald Becker Cc: Peter De Schrijver Cc: Topi Kanerva Cc: Alain Malek Cc: Bruce Abbott Cc: Geert Uytterhoeven Cc: Richard Fontana Acked-by: Greg Ungerer Signed-off-by: Bagas Sanjaya Signed-off-by: Paolo Abeni --- drivers/net/ethernet/8390/apne.c | 7 +------ drivers/net/ethernet/8390/hydra.c | 6 ++---- drivers/net/ethernet/8390/mcf8390.c | 4 +--- drivers/net/ethernet/8390/stnic.c | 5 +---- drivers/net/ethernet/8390/zorro8390.c | 7 +------ 5 files changed, 6 insertions(+), 23 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/8390/apne.c b/drivers/net/ethernet/8390/apne.c index 991ad953aa79..a09f383dd249 100644 --- a/drivers/net/ethernet/8390/apne.c +++ b/drivers/net/ethernet/8390/apne.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Amiga Linux/68k 8390 based PCMCIA Ethernet Driver for the Amiga 1200 * @@ -19,12 +20,6 @@ * * ---------------------------------------------------------------------------- * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. - * - * ---------------------------------------------------------------------------- - * */ diff --git a/drivers/net/ethernet/8390/hydra.c b/drivers/net/ethernet/8390/hydra.c index 1df7601af86a..24f49a8ff903 100644 --- a/drivers/net/ethernet/8390/hydra.c +++ b/drivers/net/ethernet/8390/hydra.c @@ -1,10 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only + /* New Hydra driver using generic 8390 core */ /* Based on old hydra driver by Topi Kanerva (topi@susanna.oulu.fi) */ -/* This file is subject to the terms and conditions of the GNU General */ -/* Public License. See the file COPYING in the main directory of the */ -/* Linux distribution for more details. */ - /* Peter De Schrijver (p2@mind.be) */ /* Oldenburg 2000 */ diff --git a/drivers/net/ethernet/8390/mcf8390.c b/drivers/net/ethernet/8390/mcf8390.c index 8a7918d33419..217838b28220 100644 --- a/drivers/net/ethernet/8390/mcf8390.c +++ b/drivers/net/ethernet/8390/mcf8390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Support for ColdFire CPU based boards using a NS8390 Ethernet device. * @@ -5,9 +6,6 @@ * * (C) Copyright 2012, Greg Ungerer * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. */ #include diff --git a/drivers/net/ethernet/8390/stnic.c b/drivers/net/ethernet/8390/stnic.c index bd89ca8a92df..265976e3b64a 100644 --- a/drivers/net/ethernet/8390/stnic.c +++ b/drivers/net/ethernet/8390/stnic.c @@ -1,8 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-only /* stnic.c : A SH7750 specific part of driver for NS DP83902A ST-NIC. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. * * Copyright (C) 1999 kaz Kojima */ diff --git a/drivers/net/ethernet/8390/zorro8390.c b/drivers/net/ethernet/8390/zorro8390.c index e8b4fe813a08..d70390e9d03d 100644 --- a/drivers/net/ethernet/8390/zorro8390.c +++ b/drivers/net/ethernet/8390/zorro8390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Amiga Linux/m68k and Linux/PPC Zorro NS8390 Ethernet Driver * @@ -9,12 +10,6 @@ * * --------------------------------------------------------------------------- * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file COPYING in the main directory of the Linux - * distribution for more details. - * - * --------------------------------------------------------------------------- - * * The Ariadne II and X-Surf are Zorro-II boards containing Realtek RTL8019AS * Ethernet Controllers. */ -- cgit v1.2.3 From 9ac40d080befb4a0501e42ce31bdaa596f2b0f05 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Mon, 15 May 2023 13:07:14 +0700 Subject: net: ethernet: i825xx: Replace unversioned GPL (GPL 1.0) notice with SPDX identifier Replace unversioned GPL boilerplate notice with corresponding SPDX license identifier, which is GPL 1.0+. Cc: Donald Becker Cc: Richard Hirst Cc: Sam Creasey Signed-off-by: Bagas Sanjaya Signed-off-by: Paolo Abeni --- drivers/net/ethernet/i825xx/82596.c | 5 ++--- drivers/net/ethernet/i825xx/lasi_82596.c | 5 ++--- drivers/net/ethernet/i825xx/lib82596.c | 5 ++--- 3 files changed, 6 insertions(+), 9 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c index 3ee89ae496d0..773d7aa29ef5 100644 --- a/drivers/net/ethernet/i825xx/82596.c +++ b/drivers/net/ethernet/i825xx/82596.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* 82596.c: A generic 82596 ethernet driver for linux. */ /* Based on Apricot.c @@ -31,9 +32,7 @@ Driver skeleton Written 1993 by Donald Becker. Copyright 1993 United States Government as represented by the Director, - National Security Agency. This software may only be used and distributed - according to the terms of the GNU General Public License as modified by SRC, - incorporated herein by reference. + National Security Agency. The author may be reached as becker@scyld.com, or C/O Scyld Computing Corporation, 410 Severn Ave., Suite 210, Annapolis MD 21403 diff --git a/drivers/net/ethernet/i825xx/lasi_82596.c b/drivers/net/ethernet/i825xx/lasi_82596.c index 0af70094aba3..3e53e0c243ba 100644 --- a/drivers/net/ethernet/i825xx/lasi_82596.c +++ b/drivers/net/ethernet/i825xx/lasi_82596.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* lasi_82596.c -- driver for the intel 82596 ethernet controller, as munged into HPPA boxen . @@ -59,9 +60,7 @@ Driver skeleton Written 1993 by Donald Becker. Copyright 1993 United States Government as represented by the Director, - National Security Agency. This software may only be used and distributed - according to the terms of the GNU General Public License as modified by SRC, - incorporated herein by reference. + National Security Agency. The author may be reached as becker@scyld.com, or C/O Scyld Computing Corporation, 410 Severn Ave., Suite 210, Annapolis MD 21403 diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c index ca2fb303fcc6..67d248a7a6f4 100644 --- a/drivers/net/ethernet/i825xx/lib82596.c +++ b/drivers/net/ethernet/i825xx/lib82596.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* lasi_82596.c -- driver for the intel 82596 ethernet controller, as munged into HPPA boxen . @@ -59,9 +60,7 @@ Driver skeleton Written 1993 by Donald Becker. Copyright 1993 United States Government as represented by the Director, - National Security Agency. This software may only be used and distributed - according to the terms of the GNU General Public License as modified by SRC, - incorporated herein by reference. + National Security Agency. The author may be reached as becker@scyld.com, or C/O Scyld Computing Corporation, 410 Severn Ave., Suite 210, Annapolis MD 21403 -- cgit v1.2.3 From 4f693a8f5617f23aeedf0d6a91a516901eccf88b Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Mon, 15 May 2023 13:07:15 +0700 Subject: net: ethernet: i825xx: sun3_8256: Add SPDX license identifier The boilerplate reads that sun3_8256 driver is an extension to Linux kernel core, hence add SPDX license identifier for GPL 2.0. Cc: Greg Kroah-Hartman Cc: Michael Hipp Cc: Sam Creasey Signed-off-by: Bagas Sanjaya Signed-off-by: Paolo Abeni --- drivers/net/ethernet/i825xx/sun3_82586.c | 1 + drivers/net/ethernet/i825xx/sun3_82586.h | 1 + 2 files changed, 2 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c index 3909c6a0af89..5e27470c6b1e 100644 --- a/drivers/net/ethernet/i825xx/sun3_82586.c +++ b/drivers/net/ethernet/i825xx/sun3_82586.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Sun3 i82586 Ethernet driver * diff --git a/drivers/net/ethernet/i825xx/sun3_82586.h b/drivers/net/ethernet/i825xx/sun3_82586.h index d82eca563266..d8e249d704a7 100644 --- a/drivers/net/ethernet/i825xx/sun3_82586.h +++ b/drivers/net/ethernet/i825xx/sun3_82586.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Intel i82586 Ethernet definitions * -- cgit v1.2.3 From 38e97a98e371c615f67d144ee9e4f7087c0a41e8 Mon Sep 17 00:00:00 2001 From: Piotr Raczynski Date: Mon, 15 May 2023 21:03:12 +0200 Subject: ice: move interrupt related code to separate file Keep interrupt handling code in a dedicated file. This helps keep driver structured better and prepares for more functionality added to this file. Reviewed-by: Jacob Keller Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Piotr Raczynski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/Makefile | 1 + drivers/net/ethernet/intel/ice/ice.h | 1 + drivers/net/ethernet/intel/ice/ice_irq.c | 226 ++++++++++++++++++++++++++++++ drivers/net/ethernet/intel/ice/ice_irq.h | 10 ++ drivers/net/ethernet/intel/ice/ice_main.c | 218 ---------------------------- 5 files changed, 238 insertions(+), 218 deletions(-) create mode 100644 drivers/net/ethernet/intel/ice/ice_irq.c create mode 100644 drivers/net/ethernet/intel/ice/ice_irq.h (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 5d89392f969b..817977e3039d 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -18,6 +18,7 @@ ice-y := ice_main.o \ ice_txrx_lib.o \ ice_txrx.o \ ice_fltr.o \ + ice_irq.o \ ice_pf_vsi_vlan_ops.o \ ice_vsi_vlan_ops.o \ ice_vsi_vlan_lib.o \ diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index aa32111afd6e..4dc4c6bec081 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -74,6 +74,7 @@ #include "ice_lag.h" #include "ice_vsi_vlan_ops.h" #include "ice_gnss.h" +#include "ice_irq.h" #define ICE_BAR0 0 #define ICE_REQ_DESC_MULTIPLE 32 diff --git a/drivers/net/ethernet/intel/ice/ice_irq.c b/drivers/net/ethernet/intel/ice/ice_irq.c new file mode 100644 index 000000000000..1fc7daec9732 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_irq.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023, Intel Corporation. */ + +#include "ice.h" +#include "ice_lib.h" +#include "ice_irq.h" + +/** + * ice_reduce_msix_usage - Reduce usage of MSI-X vectors + * @pf: board private structure + * @v_remain: number of remaining MSI-X vectors to be distributed + * + * Reduce the usage of MSI-X vectors when entire request cannot be fulfilled. + * pf->num_lan_msix and pf->num_rdma_msix values are set based on number of + * remaining vectors. + */ +static void ice_reduce_msix_usage(struct ice_pf *pf, int v_remain) +{ + int v_rdma; + + if (!ice_is_rdma_ena(pf)) { + pf->num_lan_msix = v_remain; + return; + } + + /* RDMA needs at least 1 interrupt in addition to AEQ MSIX */ + v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1; + + if (v_remain < ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_RDMA_MSIX) { + dev_warn(ice_pf_to_dev(pf), "Not enough MSI-X vectors to support RDMA.\n"); + clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); + + pf->num_rdma_msix = 0; + pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX; + } else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) || + (v_remain - v_rdma < v_rdma)) { + /* Support minimum RDMA and give remaining vectors to LAN MSIX + */ + pf->num_rdma_msix = ICE_MIN_RDMA_MSIX; + pf->num_lan_msix = v_remain - ICE_MIN_RDMA_MSIX; + } else { + /* Split remaining MSIX with RDMA after accounting for AEQ MSIX + */ + pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 + + ICE_RDMA_NUM_AEQ_MSIX; + pf->num_lan_msix = v_remain - pf->num_rdma_msix; + } +} + +/** + * ice_ena_msix_range - Request a range of MSIX vectors from the OS + * @pf: board private structure + * + * Compute the number of MSIX vectors wanted and request from the OS. Adjust + * device usage if there are not enough vectors. Return the number of vectors + * reserved or negative on failure. + */ +static int ice_ena_msix_range(struct ice_pf *pf) +{ + int num_cpus, hw_num_msix, v_other, v_wanted, v_actual; + struct device *dev = ice_pf_to_dev(pf); + int err, i; + + hw_num_msix = pf->hw.func_caps.common_cap.num_msix_vectors; + num_cpus = num_online_cpus(); + + /* LAN miscellaneous handler */ + v_other = ICE_MIN_LAN_OICR_MSIX; + + /* Flow Director */ + if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) + v_other += ICE_FDIR_MSIX; + + /* switchdev */ + v_other += ICE_ESWITCH_MSIX; + + v_wanted = v_other; + + /* LAN traffic */ + pf->num_lan_msix = num_cpus; + v_wanted += pf->num_lan_msix; + + /* RDMA auxiliary driver */ + if (ice_is_rdma_ena(pf)) { + pf->num_rdma_msix = num_cpus + ICE_RDMA_NUM_AEQ_MSIX; + v_wanted += pf->num_rdma_msix; + } + + if (v_wanted > hw_num_msix) { + int v_remain; + + dev_warn(dev, "not enough device MSI-X vectors. wanted = %d, available = %d\n", + v_wanted, hw_num_msix); + + if (hw_num_msix < ICE_MIN_MSIX) { + err = -ERANGE; + goto exit_err; + } + + v_remain = hw_num_msix - v_other; + if (v_remain < ICE_MIN_LAN_TXRX_MSIX) { + v_other = ICE_MIN_MSIX - ICE_MIN_LAN_TXRX_MSIX; + v_remain = ICE_MIN_LAN_TXRX_MSIX; + } + + ice_reduce_msix_usage(pf, v_remain); + v_wanted = pf->num_lan_msix + pf->num_rdma_msix + v_other; + + dev_notice(dev, "Reducing request to %d MSI-X vectors for LAN traffic.\n", + pf->num_lan_msix); + if (ice_is_rdma_ena(pf)) + dev_notice(dev, "Reducing request to %d MSI-X vectors for RDMA.\n", + pf->num_rdma_msix); + } + + pf->msix_entries = devm_kcalloc(dev, v_wanted, + sizeof(*pf->msix_entries), GFP_KERNEL); + if (!pf->msix_entries) { + err = -ENOMEM; + goto exit_err; + } + + for (i = 0; i < v_wanted; i++) + pf->msix_entries[i].entry = i; + + /* actually reserve the vectors */ + v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries, + ICE_MIN_MSIX, v_wanted); + if (v_actual < 0) { + dev_err(dev, "unable to reserve MSI-X vectors\n"); + err = v_actual; + goto msix_err; + } + + if (v_actual < v_wanted) { + dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n", + v_wanted, v_actual); + + if (v_actual < ICE_MIN_MSIX) { + /* error if we can't get minimum vectors */ + pci_disable_msix(pf->pdev); + err = -ERANGE; + goto msix_err; + } else { + int v_remain = v_actual - v_other; + + if (v_remain < ICE_MIN_LAN_TXRX_MSIX) + v_remain = ICE_MIN_LAN_TXRX_MSIX; + + ice_reduce_msix_usage(pf, v_remain); + + dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n", + pf->num_lan_msix); + + if (ice_is_rdma_ena(pf)) + dev_notice(dev, "Enabled %d MSI-X vectors for RDMA.\n", + pf->num_rdma_msix); + } + } + + return v_actual; + +msix_err: + devm_kfree(dev, pf->msix_entries); + +exit_err: + pf->num_rdma_msix = 0; + pf->num_lan_msix = 0; + return err; +} + +/** + * ice_dis_msix - Disable MSI-X interrupt setup in OS + * @pf: board private structure + */ +static void ice_dis_msix(struct ice_pf *pf) +{ + pci_disable_msix(pf->pdev); + devm_kfree(ice_pf_to_dev(pf), pf->msix_entries); + pf->msix_entries = NULL; +} + +/** + * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme + * @pf: board private structure + */ +void ice_clear_interrupt_scheme(struct ice_pf *pf) +{ + ice_dis_msix(pf); + + if (pf->irq_tracker) { + devm_kfree(ice_pf_to_dev(pf), pf->irq_tracker); + pf->irq_tracker = NULL; + } +} + +/** + * ice_init_interrupt_scheme - Determine proper interrupt scheme + * @pf: board private structure to initialize + */ +int ice_init_interrupt_scheme(struct ice_pf *pf) +{ + int vectors; + + vectors = ice_ena_msix_range(pf); + + if (vectors < 0) + return vectors; + + /* set up vector assignment tracking */ + pf->irq_tracker = devm_kzalloc(ice_pf_to_dev(pf), + struct_size(pf->irq_tracker, list, + vectors), + GFP_KERNEL); + if (!pf->irq_tracker) { + ice_dis_msix(pf); + return -ENOMEM; + } + + /* populate SW interrupts pool with number of OS granted IRQs. */ + pf->num_avail_sw_msix = (u16)vectors; + pf->irq_tracker->num_entries = (u16)vectors; + pf->irq_tracker->end = pf->irq_tracker->num_entries; + + return 0; +} diff --git a/drivers/net/ethernet/intel/ice/ice_irq.h b/drivers/net/ethernet/intel/ice/ice_irq.h new file mode 100644 index 000000000000..82475162ab70 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_irq.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2023, Intel Corporation. */ + +#ifndef _ICE_IRQ_H_ +#define _ICE_IRQ_H_ + +int ice_init_interrupt_scheme(struct ice_pf *pf); +void ice_clear_interrupt_scheme(struct ice_pf *pf); + +#endif diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index a1f7c8edc22f..c377bacc5e2e 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3900,224 +3900,6 @@ static int ice_init_pf(struct ice_pf *pf) return 0; } -/** - * ice_reduce_msix_usage - Reduce usage of MSI-X vectors - * @pf: board private structure - * @v_remain: number of remaining MSI-X vectors to be distributed - * - * Reduce the usage of MSI-X vectors when entire request cannot be fulfilled. - * pf->num_lan_msix and pf->num_rdma_msix values are set based on number of - * remaining vectors. - */ -static void ice_reduce_msix_usage(struct ice_pf *pf, int v_remain) -{ - int v_rdma; - - if (!ice_is_rdma_ena(pf)) { - pf->num_lan_msix = v_remain; - return; - } - - /* RDMA needs at least 1 interrupt in addition to AEQ MSIX */ - v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1; - - if (v_remain < ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_RDMA_MSIX) { - dev_warn(ice_pf_to_dev(pf), "Not enough MSI-X vectors to support RDMA.\n"); - clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); - - pf->num_rdma_msix = 0; - pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX; - } else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) || - (v_remain - v_rdma < v_rdma)) { - /* Support minimum RDMA and give remaining vectors to LAN MSIX */ - pf->num_rdma_msix = ICE_MIN_RDMA_MSIX; - pf->num_lan_msix = v_remain - ICE_MIN_RDMA_MSIX; - } else { - /* Split remaining MSIX with RDMA after accounting for AEQ MSIX - */ - pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 + - ICE_RDMA_NUM_AEQ_MSIX; - pf->num_lan_msix = v_remain - pf->num_rdma_msix; - } -} - -/** - * ice_ena_msix_range - Request a range of MSIX vectors from the OS - * @pf: board private structure - * - * Compute the number of MSIX vectors wanted and request from the OS. Adjust - * device usage if there are not enough vectors. Return the number of vectors - * reserved or negative on failure. - */ -static int ice_ena_msix_range(struct ice_pf *pf) -{ - int num_cpus, hw_num_msix, v_other, v_wanted, v_actual; - struct device *dev = ice_pf_to_dev(pf); - int err, i; - - hw_num_msix = pf->hw.func_caps.common_cap.num_msix_vectors; - num_cpus = num_online_cpus(); - - /* LAN miscellaneous handler */ - v_other = ICE_MIN_LAN_OICR_MSIX; - - /* Flow Director */ - if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) - v_other += ICE_FDIR_MSIX; - - /* switchdev */ - v_other += ICE_ESWITCH_MSIX; - - v_wanted = v_other; - - /* LAN traffic */ - pf->num_lan_msix = num_cpus; - v_wanted += pf->num_lan_msix; - - /* RDMA auxiliary driver */ - if (ice_is_rdma_ena(pf)) { - pf->num_rdma_msix = num_cpus + ICE_RDMA_NUM_AEQ_MSIX; - v_wanted += pf->num_rdma_msix; - } - - if (v_wanted > hw_num_msix) { - int v_remain; - - dev_warn(dev, "not enough device MSI-X vectors. wanted = %d, available = %d\n", - v_wanted, hw_num_msix); - - if (hw_num_msix < ICE_MIN_MSIX) { - err = -ERANGE; - goto exit_err; - } - - v_remain = hw_num_msix - v_other; - if (v_remain < ICE_MIN_LAN_TXRX_MSIX) { - v_other = ICE_MIN_MSIX - ICE_MIN_LAN_TXRX_MSIX; - v_remain = ICE_MIN_LAN_TXRX_MSIX; - } - - ice_reduce_msix_usage(pf, v_remain); - v_wanted = pf->num_lan_msix + pf->num_rdma_msix + v_other; - - dev_notice(dev, "Reducing request to %d MSI-X vectors for LAN traffic.\n", - pf->num_lan_msix); - if (ice_is_rdma_ena(pf)) - dev_notice(dev, "Reducing request to %d MSI-X vectors for RDMA.\n", - pf->num_rdma_msix); - } - - pf->msix_entries = devm_kcalloc(dev, v_wanted, - sizeof(*pf->msix_entries), GFP_KERNEL); - if (!pf->msix_entries) { - err = -ENOMEM; - goto exit_err; - } - - for (i = 0; i < v_wanted; i++) - pf->msix_entries[i].entry = i; - - /* actually reserve the vectors */ - v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries, - ICE_MIN_MSIX, v_wanted); - if (v_actual < 0) { - dev_err(dev, "unable to reserve MSI-X vectors\n"); - err = v_actual; - goto msix_err; - } - - if (v_actual < v_wanted) { - dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n", - v_wanted, v_actual); - - if (v_actual < ICE_MIN_MSIX) { - /* error if we can't get minimum vectors */ - pci_disable_msix(pf->pdev); - err = -ERANGE; - goto msix_err; - } else { - int v_remain = v_actual - v_other; - - if (v_remain < ICE_MIN_LAN_TXRX_MSIX) - v_remain = ICE_MIN_LAN_TXRX_MSIX; - - ice_reduce_msix_usage(pf, v_remain); - - dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n", - pf->num_lan_msix); - - if (ice_is_rdma_ena(pf)) - dev_notice(dev, "Enabled %d MSI-X vectors for RDMA.\n", - pf->num_rdma_msix); - } - } - - return v_actual; - -msix_err: - devm_kfree(dev, pf->msix_entries); - -exit_err: - pf->num_rdma_msix = 0; - pf->num_lan_msix = 0; - return err; -} - -/** - * ice_dis_msix - Disable MSI-X interrupt setup in OS - * @pf: board private structure - */ -static void ice_dis_msix(struct ice_pf *pf) -{ - pci_disable_msix(pf->pdev); - devm_kfree(ice_pf_to_dev(pf), pf->msix_entries); - pf->msix_entries = NULL; -} - -/** - * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme - * @pf: board private structure - */ -static void ice_clear_interrupt_scheme(struct ice_pf *pf) -{ - ice_dis_msix(pf); - - if (pf->irq_tracker) { - devm_kfree(ice_pf_to_dev(pf), pf->irq_tracker); - pf->irq_tracker = NULL; - } -} - -/** - * ice_init_interrupt_scheme - Determine proper interrupt scheme - * @pf: board private structure to initialize - */ -static int ice_init_interrupt_scheme(struct ice_pf *pf) -{ - int vectors; - - vectors = ice_ena_msix_range(pf); - - if (vectors < 0) - return vectors; - - /* set up vector assignment tracking */ - pf->irq_tracker = devm_kzalloc(ice_pf_to_dev(pf), - struct_size(pf->irq_tracker, list, vectors), - GFP_KERNEL); - if (!pf->irq_tracker) { - ice_dis_msix(pf); - return -ENOMEM; - } - - /* populate SW interrupts pool with number of OS granted IRQs. */ - pf->num_avail_sw_msix = (u16)vectors; - pf->irq_tracker->num_entries = (u16)vectors; - pf->irq_tracker->end = pf->irq_tracker->num_entries; - - return 0; -} - /** * ice_is_wol_supported - check if WoL is supported * @hw: pointer to hardware info -- cgit v1.2.3 From afe87cfe820e74cc5fc59359445bcfd93ff0ab07 Mon Sep 17 00:00:00 2001 From: Piotr Raczynski Date: Mon, 15 May 2023 21:03:13 +0200 Subject: ice: use pci_irq_vector helper function Currently, driver gets interrupt number directly from ice_pf::msix_entries array. Use helper function dedicated to do just that. While at it use a variable to store interrupt number in ice_free_irq_msix_misc instead of calling the helper function twice. Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Piotr Raczynski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_arfs.c | 2 +- drivers/net/ethernet/intel/ice/ice_lib.c | 4 ++-- drivers/net/ethernet/intel/ice/ice_main.c | 12 ++++++------ drivers/net/ethernet/intel/ice/ice_ptp.c | 2 +- drivers/net/ethernet/intel/ice/ice_xsk.c | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index fba178e07600..e81797344f5e 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -616,7 +616,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) base_idx = vsi->base_vector; ice_for_each_q_vector(vsi, i) if (irq_cpu_rmap_add(netdev->rx_cpu_rmap, - pf->msix_entries[base_idx + i].vector)) { + pci_irq_vector(pf->pdev, base_idx + i))) { ice_free_cpu_rx_rmap(vsi); return -EINVAL; } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 450317dfcca7..79e1557f77e8 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3056,7 +3056,7 @@ void ice_vsi_free_irq(struct ice_vsi *vsi) u16 vector = i + base; int irq_num; - irq_num = pf->msix_entries[vector].vector; + irq_num = pci_irq_vector(pf->pdev, vector); /* free only the irqs that were actually requested */ if (!vsi->q_vectors[i] || @@ -3235,7 +3235,7 @@ void ice_vsi_dis_irq(struct ice_vsi *vsi) return; ice_for_each_q_vector(vsi, i) - synchronize_irq(pf->msix_entries[i + base].vector); + synchronize_irq(pci_irq_vector(pf->pdev, i + base)); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c377bacc5e2e..c103be660a9c 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2501,7 +2501,7 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) for (vector = 0; vector < q_vectors; vector++) { struct ice_q_vector *q_vector = vsi->q_vectors[vector]; - irq_num = pf->msix_entries[base + vector].vector; + irq_num = pci_irq_vector(pf->pdev, base + vector); if (q_vector->tx.tx_ring && q_vector->rx.rx_ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, @@ -2557,7 +2557,7 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) free_q_irqs: while (vector) { vector--; - irq_num = pf->msix_entries[base + vector].vector; + irq_num = pci_irq_vector(pf->pdev, base + vector); if (!IS_ENABLED(CONFIG_RFS_ACCEL)) irq_set_affinity_notifier(irq_num, NULL); irq_set_affinity_hint(irq_num, NULL); @@ -3234,6 +3234,7 @@ static void ice_dis_ctrlq_interrupts(struct ice_hw *hw) */ static void ice_free_irq_msix_misc(struct ice_pf *pf) { + int misc_irq_num = pci_irq_vector(pf->pdev, pf->oicr_idx); struct ice_hw *hw = &pf->hw; ice_dis_ctrlq_interrupts(hw); @@ -3243,9 +3244,8 @@ static void ice_free_irq_msix_misc(struct ice_pf *pf) ice_flush(hw); if (pf->msix_entries) { - synchronize_irq(pf->msix_entries[pf->oicr_idx].vector); - devm_free_irq(ice_pf_to_dev(pf), - pf->msix_entries[pf->oicr_idx].vector, pf); + synchronize_irq(misc_irq_num); + devm_free_irq(ice_pf_to_dev(pf), misc_irq_num, pf); } pf->num_avail_sw_msix += 1; @@ -3317,7 +3317,7 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) pf->oicr_idx = (u16)oicr_idx; err = devm_request_threaded_irq(dev, - pf->msix_entries[pf->oicr_idx].vector, + pci_irq_vector(pf->pdev, pf->oicr_idx), ice_misc_intr, ice_misc_intr_thread_fn, 0, pf->int_name, pf); if (err) { diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index ac6f06f9a2ed..972d4f6fd615 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -911,7 +911,7 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) spin_unlock(&tx->lock); /* wait for potentially outstanding interrupt to complete */ - synchronize_irq(pf->msix_entries[pf->oicr_idx].vector); + synchronize_irq(pci_irq_vector(pf->pdev, pf->oicr_idx)); ice_ptp_flush_tx_tracker(pf, tx); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index d1e489da7363..4102416d7a41 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -107,7 +107,7 @@ ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring, wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0); ice_flush(hw); - synchronize_irq(pf->msix_entries[v_idx + base].vector); + synchronize_irq(pci_irq_vector(pf->pdev, v_idx + base)); } } -- cgit v1.2.3 From 05018936a1fe07b92a612d1fd1511224ba8a1a34 Mon Sep 17 00:00:00 2001 From: Piotr Raczynski Date: Mon, 15 May 2023 21:03:14 +0200 Subject: ice: use preferred MSIX allocation api Move away from using pci_enable_msix_range/pci_disable_msix and use pci_alloc_irq_vectors/pci_free_irq_vectors instead. As a result stop tracking msix_entries since with newer API entries are handled by MSIX core. However, due to current design of communication with RDMA driver which accesses ice_pf::msix_entries directly, keep using the array just for RDMA driver use. Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Piotr Raczynski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_idc.c | 29 ++++++++++++++++++++-- drivers/net/ethernet/intel/ice/ice_irq.c | 40 +++++++------------------------ drivers/net/ethernet/intel/ice/ice_main.c | 6 ++--- 3 files changed, 37 insertions(+), 38 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index e6bc2285071e..1000759505d7 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -235,14 +235,33 @@ EXPORT_SYMBOL_GPL(ice_get_qos_params); static int ice_reserve_rdma_qvector(struct ice_pf *pf) { if (ice_is_rdma_ena(pf)) { - int index; + int index, i; index = ice_get_res(pf, pf->irq_tracker, pf->num_rdma_msix, ICE_RES_RDMA_VEC_ID); if (index < 0) return index; + + pf->msix_entries = kcalloc(pf->num_rdma_msix, + sizeof(*pf->msix_entries), + GFP_KERNEL); + if (!pf->msix_entries) { + ice_free_res(pf->irq_tracker, pf->rdma_base_vector, + ICE_RES_RDMA_VEC_ID); + return -ENOMEM; + } + pf->num_avail_sw_msix -= pf->num_rdma_msix; - pf->rdma_base_vector = (u16)index; + + /* RDMA is the only user of pf->msix_entries array */ + pf->rdma_base_vector = 0; + + for (i = 0; i < pf->num_rdma_msix; i++, index++) { + struct msix_entry *entry = &pf->msix_entries[i]; + + entry->entry = index; + entry->vector = pci_irq_vector(pf->pdev, index); + } } return 0; } @@ -253,6 +272,12 @@ static int ice_reserve_rdma_qvector(struct ice_pf *pf) */ static void ice_free_rdma_qvector(struct ice_pf *pf) { + if (!pf->msix_entries) + return; + + kfree(pf->msix_entries); + pf->msix_entries = NULL; + pf->num_avail_sw_msix -= pf->num_rdma_msix; ice_free_res(pf->irq_tracker, pf->rdma_base_vector, ICE_RES_RDMA_VEC_ID); diff --git a/drivers/net/ethernet/intel/ice/ice_irq.c b/drivers/net/ethernet/intel/ice/ice_irq.c index 1fc7daec9732..f61be5d76373 100644 --- a/drivers/net/ethernet/intel/ice/ice_irq.c +++ b/drivers/net/ethernet/intel/ice/ice_irq.c @@ -59,7 +59,7 @@ static int ice_ena_msix_range(struct ice_pf *pf) { int num_cpus, hw_num_msix, v_other, v_wanted, v_actual; struct device *dev = ice_pf_to_dev(pf); - int err, i; + int err; hw_num_msix = pf->hw.func_caps.common_cap.num_msix_vectors; num_cpus = num_online_cpus(); @@ -113,23 +113,13 @@ static int ice_ena_msix_range(struct ice_pf *pf) pf->num_rdma_msix); } - pf->msix_entries = devm_kcalloc(dev, v_wanted, - sizeof(*pf->msix_entries), GFP_KERNEL); - if (!pf->msix_entries) { - err = -ENOMEM; - goto exit_err; - } - - for (i = 0; i < v_wanted; i++) - pf->msix_entries[i].entry = i; - /* actually reserve the vectors */ - v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries, - ICE_MIN_MSIX, v_wanted); + v_actual = pci_alloc_irq_vectors(pf->pdev, ICE_MIN_MSIX, v_wanted, + PCI_IRQ_MSIX); if (v_actual < 0) { dev_err(dev, "unable to reserve MSI-X vectors\n"); err = v_actual; - goto msix_err; + goto exit_err; } if (v_actual < v_wanted) { @@ -138,9 +128,9 @@ static int ice_ena_msix_range(struct ice_pf *pf) if (v_actual < ICE_MIN_MSIX) { /* error if we can't get minimum vectors */ - pci_disable_msix(pf->pdev); + pci_free_irq_vectors(pf->pdev); err = -ERANGE; - goto msix_err; + goto exit_err; } else { int v_remain = v_actual - v_other; @@ -160,33 +150,19 @@ static int ice_ena_msix_range(struct ice_pf *pf) return v_actual; -msix_err: - devm_kfree(dev, pf->msix_entries); - exit_err: pf->num_rdma_msix = 0; pf->num_lan_msix = 0; return err; } -/** - * ice_dis_msix - Disable MSI-X interrupt setup in OS - * @pf: board private structure - */ -static void ice_dis_msix(struct ice_pf *pf) -{ - pci_disable_msix(pf->pdev); - devm_kfree(ice_pf_to_dev(pf), pf->msix_entries); - pf->msix_entries = NULL; -} - /** * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme * @pf: board private structure */ void ice_clear_interrupt_scheme(struct ice_pf *pf) { - ice_dis_msix(pf); + pci_free_irq_vectors(pf->pdev); if (pf->irq_tracker) { devm_kfree(ice_pf_to_dev(pf), pf->irq_tracker); @@ -213,7 +189,7 @@ int ice_init_interrupt_scheme(struct ice_pf *pf) vectors), GFP_KERNEL); if (!pf->irq_tracker) { - ice_dis_msix(pf); + pci_free_irq_vectors(pf->pdev); return -ENOMEM; } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c103be660a9c..ce8cd49ae10c 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3243,10 +3243,8 @@ static void ice_free_irq_msix_misc(struct ice_pf *pf) wr32(hw, PFINT_OICR_ENA, 0); ice_flush(hw); - if (pf->msix_entries) { - synchronize_irq(misc_irq_num); - devm_free_irq(ice_pf_to_dev(pf), misc_irq_num, pf); - } + synchronize_irq(misc_irq_num); + devm_free_irq(ice_pf_to_dev(pf), misc_irq_num, pf); pf->num_avail_sw_msix += 1; ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID); -- cgit v1.2.3 From 369bb5a2a9a76ab6ea2945962d09baa21b4e1a4f Mon Sep 17 00:00:00 2001 From: Piotr Raczynski Date: Mon, 15 May 2023 21:03:15 +0200 Subject: ice: refactor VF control VSI interrupt handling All VF control VSIs share the same interrupt vector. Currently, a helper function dedicated for that directly sets ice_vsi::base_vector. Use helper that returns pointer to first found VF control VSI instead. Reviewed-by: Jacob Keller Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Piotr Raczynski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 85 +++++++---------------------- drivers/net/ethernet/intel/ice/ice_vf_lib.c | 32 +++++++++++ drivers/net/ethernet/intel/ice/ice_vf_lib.h | 7 +++ 3 files changed, 58 insertions(+), 66 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 79e1557f77e8..25511240685d 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1473,36 +1473,6 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id) return ice_search_res(res, needed, id); } -/** - * ice_get_vf_ctrl_res - Get VF control VSI resource - * @pf: pointer to the PF structure - * @vsi: the VSI to allocate a resource for - * - * Look up whether another VF has already allocated the control VSI resource. - * If so, re-use this resource so that we share it among all VFs. - * - * Otherwise, allocate the resource and return it. - */ -static int ice_get_vf_ctrl_res(struct ice_pf *pf, struct ice_vsi *vsi) -{ - struct ice_vf *vf; - unsigned int bkt; - int base; - - rcu_read_lock(); - ice_for_each_vf_rcu(pf, bkt, vf) { - if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) { - base = pf->vsi[vf->ctrl_vsi_idx]->base_vector; - rcu_read_unlock(); - return base; - } - } - rcu_read_unlock(); - - return ice_get_res(pf, pf->irq_tracker, vsi->num_q_vectors, - ICE_RES_VF_CTRL_VEC_ID); -} - /** * ice_vsi_setup_vector_base - Set up the base vector for the given VSI * @vsi: ptr to the VSI @@ -1516,8 +1486,8 @@ static int ice_get_vf_ctrl_res(struct ice_pf *pf, struct ice_vsi *vsi) static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; + u16 num_q_vectors, id; struct device *dev; - u16 num_q_vectors; int base; dev = ice_pf_to_dev(pf); @@ -1536,12 +1506,20 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) num_q_vectors = vsi->num_q_vectors; /* reserve slots from OS requested IRQs */ if (vsi->type == ICE_VSI_CTRL && vsi->vf) { - base = ice_get_vf_ctrl_res(pf, vsi); + struct ice_vsi *ctrl_vsi = ice_get_vf_ctrl_vsi(pf, vsi); + + /* reuse VF control VSI interrupt vector */ + if (ctrl_vsi) { + vsi->base_vector = ctrl_vsi->base_vector; + return 0; + } + + id = ICE_RES_VF_CTRL_VEC_ID; } else { - base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, - vsi->idx); + id = vsi->idx; } + base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, id); if (base < 0) { dev_err(dev, "%d MSI-X interrupts available. %s %d failed to get %d MSI-X vectors\n", ice_get_free_res_count(pf->irq_tracker), @@ -2611,37 +2589,6 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi) vsi->agg_node->num_vsis); } -/** - * ice_free_vf_ctrl_res - Free the VF control VSI resource - * @pf: pointer to PF structure - * @vsi: the VSI to free resources for - * - * Check if the VF control VSI resource is still in use. If no VF is using it - * any more, release the VSI resource. Otherwise, leave it to be cleaned up - * once no other VF uses it. - */ -static void ice_free_vf_ctrl_res(struct ice_pf *pf, struct ice_vsi *vsi) -{ - struct ice_vf *vf; - unsigned int bkt; - - rcu_read_lock(); - ice_for_each_vf_rcu(pf, bkt, vf) { - if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) { - rcu_read_unlock(); - return; - } - } - rcu_read_unlock(); - - /* No other VFs left that have control VSI. It is now safe to reclaim - * SW interrupts back to the common pool. - */ - ice_free_res(pf->irq_tracker, vsi->base_vector, - ICE_RES_VF_CTRL_VEC_ID); - pf->num_avail_sw_msix += vsi->num_q_vectors; -} - static int ice_vsi_cfg_tc_lan(struct ice_pf *pf, struct ice_vsi *vsi) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; @@ -2916,7 +2863,13 @@ void ice_vsi_decfg(struct ice_vsi *vsi) * cleared in the same manner. */ if (vsi->type == ICE_VSI_CTRL && vsi->vf) { - ice_free_vf_ctrl_res(pf, vsi); + struct ice_vsi *ctrl_vsi = ice_get_vf_ctrl_vsi(pf, vsi); + + if (!ctrl_vsi) { + ice_free_res(pf->irq_tracker, vsi->base_vector, + ICE_RES_VF_CTRL_VEC_ID); + pf->num_avail_sw_msix += vsi->num_q_vectors; + } } else if (vsi->type != ICE_VSI_VF) { /* reclaim SW interrupts back to the common pool */ ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx); diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 89fd6982df09..68142facc85d 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -1310,3 +1310,35 @@ void ice_vf_set_initialized(struct ice_vf *vf) set_bit(ICE_VF_STATE_INIT, vf->vf_states); memset(&vf->vlan_v2_caps, 0, sizeof(vf->vlan_v2_caps)); } + +/** + * ice_get_vf_ctrl_vsi - Get first VF control VSI pointer + * @pf: the PF private structure + * @vsi: pointer to the VSI + * + * Return first found VF control VSI other than the vsi + * passed by parameter. This function is used to determine + * whether new resources have to be allocated for control VSI + * or they can be shared with existing one. + * + * Return found VF control VSI pointer other itself. Return + * NULL Otherwise. + * + */ +struct ice_vsi *ice_get_vf_ctrl_vsi(struct ice_pf *pf, struct ice_vsi *vsi) +{ + struct ice_vsi *ctrl_vsi = NULL; + struct ice_vf *vf; + unsigned int bkt; + + rcu_read_lock(); + ice_for_each_vf_rcu(pf, bkt, vf) { + if (vf != vsi->vf && vf->ctrl_vsi_idx != ICE_NO_VSI) { + ctrl_vsi = pf->vsi[vf->ctrl_vsi_idx]; + break; + } + } + + rcu_read_unlock(); + return ctrl_vsi; +} diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h index e3cda6fb71ab..48fea6fa0362 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -226,6 +226,7 @@ int ice_vf_clear_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m); int ice_reset_vf(struct ice_vf *vf, u32 flags); void ice_reset_all_vfs(struct ice_pf *pf); +struct ice_vsi *ice_get_vf_ctrl_vsi(struct ice_pf *pf, struct ice_vsi *vsi); #else /* CONFIG_PCI_IOV */ static inline struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id) { @@ -290,6 +291,12 @@ static inline int ice_reset_vf(struct ice_vf *vf, u32 flags) static inline void ice_reset_all_vfs(struct ice_pf *pf) { } + +static inline struct ice_vsi * +ice_get_vf_ctrl_vsi(struct ice_pf *pf, struct ice_vsi *vsi) +{ + return NULL; +} #endif /* !CONFIG_PCI_IOV */ #endif /* _ICE_VF_LIB_H_ */ -- cgit v1.2.3 From 524012c69ee1421d4a343291a0cfc1998ccba99a Mon Sep 17 00:00:00 2001 From: Piotr Raczynski Date: Mon, 15 May 2023 21:03:16 +0200 Subject: ice: remove redundant SRIOV code Remove redundant code from ice_get_max_valid_res_idx that has no effect. ice_pf::irq_tracker is initialized during driver probe, there is no reason to check it again. Also it is not possible for pf::sriov_base_vector to be lower than the tracker length, remove WARN_ON that will never happen. Get rid of ice_get_max_valid_res_idx helper function completely since it can never return negative value. Reviewed-by: Jacob Keller Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Piotr Raczynski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 36 ------------------------------ 1 file changed, 36 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index f1dca59bd844..65f971b74717 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -135,18 +135,9 @@ static void ice_dis_vf_mappings(struct ice_vf *vf) */ static int ice_sriov_free_msix_res(struct ice_pf *pf) { - struct ice_res_tracker *res; - if (!pf) return -EINVAL; - res = pf->irq_tracker; - if (!res) - return -EINVAL; - - /* give back irq_tracker resources used */ - WARN_ON(pf->sriov_base_vector < res->num_entries); - pf->sriov_base_vector = 0; return 0; @@ -409,29 +400,6 @@ int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector) q_vector->v_idx + 1; } -/** - * ice_get_max_valid_res_idx - Get the max valid resource index - * @res: pointer to the resource to find the max valid index for - * - * Start from the end of the ice_res_tracker and return right when we find the - * first res->list entry with the ICE_RES_VALID_BIT set. This function is only - * valid for SR-IOV because it is the only consumer that manipulates the - * res->end and this is always called when res->end is set to res->num_entries. - */ -static int ice_get_max_valid_res_idx(struct ice_res_tracker *res) -{ - int i; - - if (!res) - return -EINVAL; - - for (i = res->num_entries - 1; i >= 0; i--) - if (res->list[i] & ICE_RES_VALID_BIT) - return i; - - return 0; -} - /** * ice_sriov_set_msix_res - Set any used MSIX resources * @pf: pointer to PF structure @@ -490,7 +458,6 @@ static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed) */ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) { - int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker); u16 num_msix_per_vf, num_txq, num_rxq, avail_qs; int msix_avail_per_vf, msix_avail_for_sriov; struct device *dev = ice_pf_to_dev(pf); @@ -501,9 +468,6 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) if (!num_vfs) return -EINVAL; - if (max_valid_res_idx < 0) - return -ENOSPC; - /* determine MSI-X resources per VF */ msix_avail_for_sriov = pf->hw.func_caps.common_cap.num_msix_vectors - pf->irq_tracker->num_entries; -- cgit v1.2.3 From 4aad5335969f25c4dc966a15c5497db3718538bb Mon Sep 17 00:00:00 2001 From: Piotr Raczynski Date: Mon, 15 May 2023 21:03:17 +0200 Subject: ice: add individual interrupt allocation Currently interrupt allocations, depending on a feature are distributed in batches. Also, after allocation there is a series of operations that distributes per irq settings through that batch of interrupts. Although driver does not yet support dynamic interrupt allocation, keep allocated interrupts in a pool and add allocation abstraction logic to make code more flexible. Keep per interrupt information in the ice_q_vector structure, which yields ice_vsi::base_vector redundant. Also, as a result there are a few functions that can be removed. Reviewed-by: Jacob Keller Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Piotr Raczynski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 11 +- drivers/net/ethernet/intel/ice/ice_arfs.c | 5 +- drivers/net/ethernet/intel/ice/ice_base.c | 50 +++++- drivers/net/ethernet/intel/ice/ice_ethtool.c | 2 +- drivers/net/ethernet/intel/ice/ice_idc.c | 45 +++--- drivers/net/ethernet/intel/ice/ice_irq.c | 46 +++++- drivers/net/ethernet/intel/ice/ice_irq.h | 3 + drivers/net/ethernet/intel/ice/ice_lib.c | 228 +++------------------------ drivers/net/ethernet/intel/ice/ice_lib.h | 4 +- drivers/net/ethernet/intel/ice/ice_main.c | 44 +++--- drivers/net/ethernet/intel/ice/ice_ptp.c | 2 +- drivers/net/ethernet/intel/ice/ice_sriov.c | 2 +- drivers/net/ethernet/intel/ice/ice_xsk.c | 5 +- 13 files changed, 165 insertions(+), 282 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 4dc4c6bec081..d8dde291491e 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -105,10 +105,6 @@ #define ICE_Q_WAIT_MAX_RETRY (5 * ICE_Q_WAIT_RETRY_LIMIT) #define ICE_MAX_LG_RSS_QS 256 #define ICE_RES_VALID_BIT 0x8000 -#define ICE_RES_MISC_VEC_ID (ICE_RES_VALID_BIT - 1) -#define ICE_RES_RDMA_VEC_ID (ICE_RES_MISC_VEC_ID - 1) -/* All VF control VSIs share the same IRQ, so assign a unique ID for them */ -#define ICE_RES_VF_CTRL_VEC_ID (ICE_RES_RDMA_VEC_ID - 1) #define ICE_INVAL_Q_INDEX 0xffff #define ICE_MAX_RXQS_PER_TC 256 /* Used when setting VSI context per TC Rx queues */ @@ -349,7 +345,6 @@ struct ice_vsi { u32 rx_buf_failed; u32 rx_page_failed; u16 num_q_vectors; - u16 base_vector; /* IRQ base for OS reserved vectors */ enum ice_vsi_type type; u16 vsi_num; /* HW (absolute) index of this VSI */ u16 idx; /* software index in pf->vsi[] */ @@ -480,6 +475,7 @@ struct ice_q_vector { char name[ICE_INT_NAME_STR_LEN]; u16 total_events; /* net_dim(): number of interrupts processed */ + struct msi_map irq; } ____cacheline_internodealigned_in_smp; enum ice_pf_flags { @@ -584,8 +580,7 @@ struct ice_pf { u32 hw_csum_rx_error; u32 oicr_err_reg; - u16 oicr_idx; /* Other interrupt cause MSIX vector index */ - u16 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */ + struct msi_map oicr_irq; /* Other interrupt cause MSIX vector */ u16 max_pf_txqs; /* Total Tx queues PF wide */ u16 max_pf_rxqs; /* Total Rx queues PF wide */ u16 num_lan_msix; /* Total MSIX vectors for base driver */ @@ -671,7 +666,7 @@ ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi, struct ice_q_vector *q_vector) { u32 vector = (vsi && q_vector) ? q_vector->reg_idx : - ((struct ice_pf *)hw->back)->oicr_idx; + ((struct ice_pf *)hw->back)->oicr_irq.index; int itr = ICE_ITR_NONE; u32 val; diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index e81797344f5e..cca0e753f38f 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -596,7 +596,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) { struct net_device *netdev; struct ice_pf *pf; - int base_idx, i; + int i; if (!vsi || vsi->type != ICE_VSI_PF) return 0; @@ -613,10 +613,9 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) if (unlikely(!netdev->rx_cpu_rmap)) return -EINVAL; - base_idx = vsi->base_vector; ice_for_each_q_vector(vsi, i) if (irq_cpu_rmap_add(netdev->rx_cpu_rmap, - pci_irq_vector(pf->pdev, base_idx + i))) { + vsi->q_vectors[i]->irq.virq)) { ice_free_cpu_rx_rmap(vsi); return -EINVAL; } diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 1911d644dfa8..cb0913cb9741 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -103,10 +103,10 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx) { struct ice_pf *pf = vsi->back; struct ice_q_vector *q_vector; + int err; /* allocate q_vector */ - q_vector = devm_kzalloc(ice_pf_to_dev(pf), sizeof(*q_vector), - GFP_KERNEL); + q_vector = kzalloc(sizeof(*q_vector), GFP_KERNEL); if (!q_vector) return -ENOMEM; @@ -118,9 +118,34 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx) q_vector->rx.itr_mode = ITR_DYNAMIC; q_vector->tx.type = ICE_TX_CONTAINER; q_vector->rx.type = ICE_RX_CONTAINER; + q_vector->irq.index = -ENOENT; - if (vsi->type == ICE_VSI_VF) + if (vsi->type == ICE_VSI_VF) { + q_vector->reg_idx = ice_calc_vf_reg_idx(vsi->vf, q_vector); goto out; + } else if (vsi->type == ICE_VSI_CTRL && vsi->vf) { + struct ice_vsi *ctrl_vsi = ice_get_vf_ctrl_vsi(pf, vsi); + + if (ctrl_vsi) { + if (unlikely(!ctrl_vsi->q_vectors)) { + err = -ENOENT; + goto err_free_q_vector; + } + + q_vector->irq = ctrl_vsi->q_vectors[0]->irq; + goto skip_alloc; + } + } + + q_vector->irq = ice_alloc_irq(pf); + if (q_vector->irq.index < 0) { + err = -ENOMEM; + goto err_free_q_vector; + } + +skip_alloc: + q_vector->reg_idx = q_vector->irq.index; + /* only set affinity_mask if the CPU is online */ if (cpu_online(v_idx)) cpumask_set_cpu(v_idx, &q_vector->affinity_mask); @@ -137,6 +162,11 @@ out: vsi->q_vectors[v_idx] = q_vector; return 0; + +err_free_q_vector: + kfree(q_vector); + + return err; } /** @@ -168,7 +198,19 @@ static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx) if (vsi->netdev) netif_napi_del(&q_vector->napi); - devm_kfree(dev, q_vector); + /* release MSIX interrupt if q_vector had interrupt allocated */ + if (q_vector->irq.index < 0) + goto free_q_vector; + + /* only free last VF ctrl vsi interrupt */ + if (vsi->type == ICE_VSI_CTRL && vsi->vf && + ice_get_vf_ctrl_vsi(pf, vsi)) + goto free_q_vector; + + ice_free_irq(pf, q_vector->irq); + +free_q_vector: + kfree(q_vector); vsi->q_vectors[v_idx] = NULL; } diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index f86e814354a3..8407c7175cf6 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -956,7 +956,7 @@ static u64 ice_intr_test(struct net_device *netdev) netdev_info(netdev, "interrupt test\n"); - wr32(&pf->hw, GLINT_DYN_CTL(pf->oicr_idx), + wr32(&pf->hw, GLINT_DYN_CTL(pf->oicr_irq.index), GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M | GLINT_DYN_CTL_SWINT_TRIG_M); diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index 1000759505d7..bc016bb4440c 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -229,38 +229,33 @@ void ice_get_qos_params(struct ice_pf *pf, struct iidc_qos_params *qos) EXPORT_SYMBOL_GPL(ice_get_qos_params); /** - * ice_reserve_rdma_qvector - Reserve vector resources for RDMA driver + * ice_alloc_rdma_qvectors - Allocate vector resources for RDMA driver * @pf: board private structure to initialize */ -static int ice_reserve_rdma_qvector(struct ice_pf *pf) +static int ice_alloc_rdma_qvectors(struct ice_pf *pf) { if (ice_is_rdma_ena(pf)) { - int index, i; - - index = ice_get_res(pf, pf->irq_tracker, pf->num_rdma_msix, - ICE_RES_RDMA_VEC_ID); - if (index < 0) - return index; + int i; pf->msix_entries = kcalloc(pf->num_rdma_msix, sizeof(*pf->msix_entries), GFP_KERNEL); - if (!pf->msix_entries) { - ice_free_res(pf->irq_tracker, pf->rdma_base_vector, - ICE_RES_RDMA_VEC_ID); + if (!pf->msix_entries) return -ENOMEM; - } - - pf->num_avail_sw_msix -= pf->num_rdma_msix; /* RDMA is the only user of pf->msix_entries array */ pf->rdma_base_vector = 0; - for (i = 0; i < pf->num_rdma_msix; i++, index++) { + for (i = 0; i < pf->num_rdma_msix; i++) { struct msix_entry *entry = &pf->msix_entries[i]; + struct msi_map map; - entry->entry = index; - entry->vector = pci_irq_vector(pf->pdev, index); + map = ice_alloc_irq(pf); + if (map.index < 0) + break; + + entry->entry = map.index; + entry->vector = map.virq; } } return 0; @@ -272,15 +267,21 @@ static int ice_reserve_rdma_qvector(struct ice_pf *pf) */ static void ice_free_rdma_qvector(struct ice_pf *pf) { + int i; + if (!pf->msix_entries) return; + for (i = 0; i < pf->num_rdma_msix; i++) { + struct msi_map map; + + map.index = pf->msix_entries[i].entry; + map.virq = pf->msix_entries[i].vector; + ice_free_irq(pf, map); + } + kfree(pf->msix_entries); pf->msix_entries = NULL; - - pf->num_avail_sw_msix -= pf->num_rdma_msix; - ice_free_res(pf->irq_tracker, pf->rdma_base_vector, - ICE_RES_RDMA_VEC_ID); } /** @@ -382,7 +383,7 @@ int ice_init_rdma(struct ice_pf *pf) } /* Reserve vector resources */ - ret = ice_reserve_rdma_qvector(pf); + ret = ice_alloc_rdma_qvectors(pf); if (ret < 0) { dev_err(dev, "failed to reserve vectors for RDMA\n"); goto err_reserve_rdma_qvector; diff --git a/drivers/net/ethernet/intel/ice/ice_irq.c b/drivers/net/ethernet/intel/ice/ice_irq.c index f61be5d76373..ca1a1de26766 100644 --- a/drivers/net/ethernet/intel/ice/ice_irq.c +++ b/drivers/net/ethernet/intel/ice/ice_irq.c @@ -194,9 +194,53 @@ int ice_init_interrupt_scheme(struct ice_pf *pf) } /* populate SW interrupts pool with number of OS granted IRQs. */ - pf->num_avail_sw_msix = (u16)vectors; pf->irq_tracker->num_entries = (u16)vectors; pf->irq_tracker->end = pf->irq_tracker->num_entries; return 0; } + +/** + * ice_alloc_irq - Allocate new interrupt vector + * @pf: board private structure + * + * Allocate new interrupt vector for a given owner id. + * return struct msi_map with interrupt details and track + * allocated interrupt appropriately. + * + * This function mimics individual interrupt allocation, + * even interrupts are actually already allocated with + * pci_alloc_irq_vectors. Individual allocation helps + * to track interrupts and simplifies interrupt related + * handling. + * + * On failure, return map with negative .index. The caller + * is expected to check returned map index. + * + */ +struct msi_map ice_alloc_irq(struct ice_pf *pf) +{ + struct msi_map map = { .index = -ENOENT }; + int entry; + + entry = ice_get_res(pf, pf->irq_tracker); + if (entry < 0) + return map; + + map.index = entry; + map.virq = pci_irq_vector(pf->pdev, map.index); + + return map; +} + +/** + * ice_free_irq - Free interrupt vector + * @pf: board private structure + * @map: map with interrupt details + * + * Remove allocated interrupt from the interrupt tracker + */ +void ice_free_irq(struct ice_pf *pf, struct msi_map map) +{ + ice_free_res(pf->irq_tracker, map.index); +} diff --git a/drivers/net/ethernet/intel/ice/ice_irq.h b/drivers/net/ethernet/intel/ice/ice_irq.h index 82475162ab70..26e80dfe22b5 100644 --- a/drivers/net/ethernet/intel/ice/ice_irq.h +++ b/drivers/net/ethernet/intel/ice/ice_irq.h @@ -7,4 +7,7 @@ int ice_init_interrupt_scheme(struct ice_pf *pf); void ice_clear_interrupt_scheme(struct ice_pf *pf); +struct msi_map ice_alloc_irq(struct ice_pf *pf); +void ice_free_irq(struct ice_pf *pf, struct msi_map map); + #endif diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 25511240685d..fe908cf6da6a 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1374,162 +1374,45 @@ out: * ice_free_res - free a block of resources * @res: pointer to the resource * @index: starting index previously returned by ice_get_res - * @id: identifier to track owner * * Returns number of resources freed */ -int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id) +int ice_free_res(struct ice_res_tracker *res, u16 index) { - int count = 0; - int i; - if (!res || index >= res->end) return -EINVAL; - id |= ICE_RES_VALID_BIT; - for (i = index; i < res->end && res->list[i] == id; i++) { - res->list[i] = 0; - count++; - } - - return count; -} - -/** - * ice_search_res - Search the tracker for a block of resources - * @res: pointer to the resource - * @needed: size of the block needed - * @id: identifier to track owner - * - * Returns the base item index of the block, or -ENOMEM for error - */ -static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id) -{ - u16 start = 0, end = 0; - - if (needed > res->end) - return -ENOMEM; - - id |= ICE_RES_VALID_BIT; - - do { - /* skip already allocated entries */ - if (res->list[end++] & ICE_RES_VALID_BIT) { - start = end; - if ((start + needed) > res->end) - break; - } - - if (end == (start + needed)) { - int i = start; - - /* there was enough, so assign it to the requestor */ - while (i != end) - res->list[i++] = id; - - return start; - } - } while (end < res->end); - - return -ENOMEM; -} - -/** - * ice_get_free_res_count - Get free count from a resource tracker - * @res: Resource tracker instance - */ -static u16 ice_get_free_res_count(struct ice_res_tracker *res) -{ - u16 i, count = 0; + res->list[index] = 0; - for (i = 0; i < res->end; i++) - if (!(res->list[i] & ICE_RES_VALID_BIT)) - count++; - - return count; + return 0; } /** - * ice_get_res - get a block of resources + * ice_get_res - get a resource from the tracker * @pf: board private structure * @res: pointer to the resource - * @needed: size of the block needed - * @id: identifier to track owner * - * Returns the base item index of the block, or negative for error + * Returns the item index, or negative for error */ int -ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id) +ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res) { - if (!res || !pf) - return -EINVAL; + u16 i; - if (!needed || needed > res->num_entries || id >= ICE_RES_VALID_BIT) { - dev_err(ice_pf_to_dev(pf), "param err: needed=%d, num_entries = %d id=0x%04x\n", - needed, res->num_entries, id); + if (!res || !pf) return -EINVAL; - } - - return ice_search_res(res, needed, id); -} - -/** - * ice_vsi_setup_vector_base - Set up the base vector for the given VSI - * @vsi: ptr to the VSI - * - * This should only be called after ice_vsi_alloc_def() which allocates the - * corresponding SW VSI structure and initializes num_queue_pairs for the - * newly allocated VSI. - * - * Returns 0 on success or negative on failure - */ -static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) -{ - struct ice_pf *pf = vsi->back; - u16 num_q_vectors, id; - struct device *dev; - int base; - dev = ice_pf_to_dev(pf); - /* SRIOV doesn't grab irq_tracker entries for each VSI */ - if (vsi->type == ICE_VSI_VF) - return 0; - if (vsi->type == ICE_VSI_CHNL) - return 0; - - if (vsi->base_vector) { - dev_dbg(dev, "VSI %d has non-zero base vector %d\n", - vsi->vsi_num, vsi->base_vector); - return -EEXIST; - } - - num_q_vectors = vsi->num_q_vectors; - /* reserve slots from OS requested IRQs */ - if (vsi->type == ICE_VSI_CTRL && vsi->vf) { - struct ice_vsi *ctrl_vsi = ice_get_vf_ctrl_vsi(pf, vsi); - - /* reuse VF control VSI interrupt vector */ - if (ctrl_vsi) { - vsi->base_vector = ctrl_vsi->base_vector; - return 0; - } + /* skip already allocated entries */ + for (i = 0; i < res->end; i++) + if (!(res->list[i] & ICE_RES_VALID_BIT)) + break; - id = ICE_RES_VF_CTRL_VEC_ID; + if (i < res->end) { + res->list[i] = ICE_RES_VALID_BIT; + return i; } else { - id = vsi->idx; - } - - base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, id); - if (base < 0) { - dev_err(dev, "%d MSI-X interrupts available. %s %d failed to get %d MSI-X vectors\n", - ice_get_free_res_count(pf->irq_tracker), - ice_vsi_type_str(vsi->type), vsi->idx, num_q_vectors); - return -ENOENT; + return -ENOMEM; } - vsi->base_vector = (u16)base; - pf->num_avail_sw_msix -= num_q_vectors; - - return 0; } /** @@ -2387,50 +2270,6 @@ static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi) ice_vsi_set_dcb_tc_cfg(vsi); } -/** - * ice_vsi_set_q_vectors_reg_idx - set the HW register index for all q_vectors - * @vsi: VSI to set the q_vectors register index on - */ -static int -ice_vsi_set_q_vectors_reg_idx(struct ice_vsi *vsi) -{ - u16 i; - - if (!vsi || !vsi->q_vectors) - return -EINVAL; - - ice_for_each_q_vector(vsi, i) { - struct ice_q_vector *q_vector = vsi->q_vectors[i]; - - if (!q_vector) { - dev_err(ice_pf_to_dev(vsi->back), "Failed to set reg_idx on q_vector %d VSI %d\n", - i, vsi->vsi_num); - goto clear_reg_idx; - } - - if (vsi->type == ICE_VSI_VF) { - struct ice_vf *vf = vsi->vf; - - q_vector->reg_idx = ice_calc_vf_reg_idx(vf, q_vector); - } else { - q_vector->reg_idx = - q_vector->v_idx + vsi->base_vector; - } - } - - return 0; - -clear_reg_idx: - ice_for_each_q_vector(vsi, i) { - struct ice_q_vector *q_vector = vsi->q_vectors[i]; - - if (q_vector) - q_vector->reg_idx = 0; - } - - return -EINVAL; -} - /** * ice_cfg_sw_lldp - Config switch rules for LLDP packet handling * @vsi: the VSI being configured @@ -2675,14 +2514,6 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) if (ret) goto unroll_vsi_init; - ret = ice_vsi_setup_vector_base(vsi); - if (ret) - goto unroll_alloc_q_vector; - - ret = ice_vsi_set_q_vectors_reg_idx(vsi); - if (ret) - goto unroll_vector_base; - ret = ice_vsi_alloc_rings(vsi); if (ret) goto unroll_vector_base; @@ -2733,10 +2564,6 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) if (ret) goto unroll_alloc_q_vector; - ret = ice_vsi_set_q_vectors_reg_idx(vsi); - if (ret) - goto unroll_vector_base; - ret = ice_vsi_alloc_ring_stats(vsi); if (ret) goto unroll_vector_base; @@ -2769,8 +2596,6 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) unroll_vector_base: /* reclaim SW interrupts back to the common pool */ - ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx); - pf->num_avail_sw_msix += vsi->num_q_vectors; unroll_alloc_q_vector: ice_vsi_free_q_vectors(vsi); unroll_vsi_init: @@ -2862,20 +2687,6 @@ void ice_vsi_decfg(struct ice_vsi *vsi) * many interrupts each VF needs. SR-IOV MSIX resources are also * cleared in the same manner. */ - if (vsi->type == ICE_VSI_CTRL && vsi->vf) { - struct ice_vsi *ctrl_vsi = ice_get_vf_ctrl_vsi(pf, vsi); - - if (!ctrl_vsi) { - ice_free_res(pf->irq_tracker, vsi->base_vector, - ICE_RES_VF_CTRL_VEC_ID); - pf->num_avail_sw_msix += vsi->num_q_vectors; - } - } else if (vsi->type != ICE_VSI_VF) { - /* reclaim SW interrupts back to the common pool */ - ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx); - pf->num_avail_sw_msix += vsi->num_q_vectors; - vsi->base_vector = 0; - } if (vsi->type == ICE_VSI_VF && vsi->agg_node && vsi->agg_node->valid) @@ -2992,7 +2803,6 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi) void ice_vsi_free_irq(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; - int base = vsi->base_vector; int i; if (!vsi->q_vectors || !vsi->irqs_ready) @@ -3006,10 +2816,9 @@ void ice_vsi_free_irq(struct ice_vsi *vsi) ice_free_cpu_rx_rmap(vsi); ice_for_each_q_vector(vsi, i) { - u16 vector = i + base; int irq_num; - irq_num = pci_irq_vector(pf->pdev, vector); + irq_num = vsi->q_vectors[i]->irq.virq; /* free only the irqs that were actually requested */ if (!vsi->q_vectors[i] || @@ -3141,7 +2950,6 @@ void ice_dis_vsi(struct ice_vsi *vsi, bool locked) */ void ice_vsi_dis_irq(struct ice_vsi *vsi) { - int base = vsi->base_vector; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; u32 val; @@ -3188,7 +2996,7 @@ void ice_vsi_dis_irq(struct ice_vsi *vsi) return; ice_for_each_q_vector(vsi, i) - synchronize_irq(pci_irq_vector(pf->pdev, i + base)); + synchronize_irq(vsi->q_vectors[i]->irq.virq); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 75221478f2dc..2f52f9e32858 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -104,10 +104,10 @@ int ice_ena_vsi(struct ice_vsi *vsi, bool locked); void ice_vsi_decfg(struct ice_vsi *vsi); void ice_dis_vsi(struct ice_vsi *vsi, bool locked); -int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id); +int ice_free_res(struct ice_res_tracker *res, u16 index); int -ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id); +ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res); int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags); int ice_vsi_cfg(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ce8cd49ae10c..efc621c0bd6c 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2490,7 +2490,6 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) { int q_vectors = vsi->num_q_vectors; struct ice_pf *pf = vsi->back; - int base = vsi->base_vector; struct device *dev; int rx_int_idx = 0; int tx_int_idx = 0; @@ -2501,7 +2500,7 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) for (vector = 0; vector < q_vectors; vector++) { struct ice_q_vector *q_vector = vsi->q_vectors[vector]; - irq_num = pci_irq_vector(pf->pdev, base + vector); + irq_num = q_vector->irq.virq; if (q_vector->tx.tx_ring && q_vector->rx.rx_ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, @@ -2555,9 +2554,8 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) return 0; free_q_irqs: - while (vector) { - vector--; - irq_num = pci_irq_vector(pf->pdev, base + vector); + while (vector--) { + irq_num = vsi->q_vectors[vector]->irq.virq; if (!IS_ENABLED(CONFIG_RFS_ACCEL)) irq_set_affinity_notifier(irq_num, NULL); irq_set_affinity_hint(irq_num, NULL); @@ -3047,7 +3045,7 @@ static void ice_ena_misc_vector(struct ice_pf *pf) wr32(hw, PFINT_OICR_ENA, val); /* SW_ITR_IDX = 0, but don't change INTENA */ - wr32(hw, GLINT_DYN_CTL(pf->oicr_idx), + wr32(hw, GLINT_DYN_CTL(pf->oicr_irq.index), GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M); } @@ -3234,7 +3232,7 @@ static void ice_dis_ctrlq_interrupts(struct ice_hw *hw) */ static void ice_free_irq_msix_misc(struct ice_pf *pf) { - int misc_irq_num = pci_irq_vector(pf->pdev, pf->oicr_idx); + int misc_irq_num = pf->oicr_irq.virq; struct ice_hw *hw = &pf->hw; ice_dis_ctrlq_interrupts(hw); @@ -3246,8 +3244,7 @@ static void ice_free_irq_msix_misc(struct ice_pf *pf) synchronize_irq(misc_irq_num); devm_free_irq(ice_pf_to_dev(pf), misc_irq_num, pf); - pf->num_avail_sw_msix += 1; - ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID); + ice_free_irq(pf, pf->oicr_irq); } /** @@ -3293,7 +3290,8 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) { struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; - int oicr_idx, err = 0; + struct msi_map oicr_irq; + int err = 0; if (!pf->int_name[0]) snprintf(pf->int_name, sizeof(pf->int_name) - 1, "%s-%s:misc", @@ -3307,30 +3305,26 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) goto skip_req_irq; /* reserve one vector in irq_tracker for misc interrupts */ - oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID); - if (oicr_idx < 0) - return oicr_idx; - - pf->num_avail_sw_msix -= 1; - pf->oicr_idx = (u16)oicr_idx; - - err = devm_request_threaded_irq(dev, - pci_irq_vector(pf->pdev, pf->oicr_idx), - ice_misc_intr, ice_misc_intr_thread_fn, - 0, pf->int_name, pf); + oicr_irq = ice_alloc_irq(pf); + if (oicr_irq.index < 0) + return oicr_irq.index; + + pf->oicr_irq = oicr_irq; + err = devm_request_threaded_irq(dev, pf->oicr_irq.virq, ice_misc_intr, + ice_misc_intr_thread_fn, 0, + pf->int_name, pf); if (err) { dev_err(dev, "devm_request_threaded_irq for %s failed: %d\n", pf->int_name, err); - ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID); - pf->num_avail_sw_msix += 1; + ice_free_irq(pf, pf->oicr_irq); return err; } skip_req_irq: ice_ena_misc_vector(pf); - ice_ena_ctrlq_interrupts(hw, pf->oicr_idx); - wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_idx), + ice_ena_ctrlq_interrupts(hw, pf->oicr_irq.index); + wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_irq.index), ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S); ice_flush(hw); diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 972d4f6fd615..d4b6c997141d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -911,7 +911,7 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) spin_unlock(&tx->lock); /* wait for potentially outstanding interrupt to complete */ - synchronize_irq(pci_irq_vector(pf->pdev, pf->oicr_idx)); + synchronize_irq(pf->oicr_irq.virq); ice_ptp_flush_tx_tracker(pf, tx); diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 65f971b74717..0fc2b26a2fa6 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -835,7 +835,7 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) int ret; /* Disable global interrupt 0 so we don't try to handle the VFLR. */ - wr32(hw, GLINT_DYN_CTL(pf->oicr_idx), + wr32(hw, GLINT_DYN_CTL(pf->oicr_irq.index), ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S); set_bit(ICE_OICR_INTR_DIS, pf->state); ice_flush(hw); diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 4102416d7a41..a7fe2b4ce655 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -90,7 +90,6 @@ ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring, { struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; - int base = vsi->base_vector; u16 reg; u32 val; @@ -103,11 +102,9 @@ ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring, wr32(hw, QINT_RQCTL(reg), val); if (q_vector) { - u16 v_idx = q_vector->v_idx; - wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), 0); ice_flush(hw); - synchronize_irq(pci_irq_vector(pf->pdev, v_idx + base)); + synchronize_irq(q_vector->irq.virq); } } -- cgit v1.2.3 From cfebc0a36ea5518d6b32a6999da5accf0a94fafa Mon Sep 17 00:00:00 2001 From: Piotr Raczynski Date: Mon, 15 May 2023 21:03:18 +0200 Subject: ice: track interrupt vectors with xarray Replace custom interrupt tracker with generic xarray data structure. Remove all code responsible for searching for a new entry with xa_alloc, which always tries to allocate at the lowes possible index. As a result driver is always using a contiguous region of the MSIX vector table. New tracker keeps ice_irq_entry entries in xarray as opaque for the rest of the driver hiding the entry details from the caller. Reviewed-by: Jacob Keller Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Piotr Raczynski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 9 +-- drivers/net/ethernet/intel/ice/ice_irq.c | 101 ++++++++++++++++++++++------- drivers/net/ethernet/intel/ice/ice_irq.h | 9 +++ drivers/net/ethernet/intel/ice/ice_lib.c | 45 ------------- drivers/net/ethernet/intel/ice/ice_lib.h | 5 -- drivers/net/ethernet/intel/ice/ice_sriov.c | 4 +- 6 files changed, 89 insertions(+), 84 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index d8dde291491e..8541d986ec7f 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -104,7 +104,6 @@ #define ICE_Q_WAIT_RETRY_LIMIT 10 #define ICE_Q_WAIT_MAX_RETRY (5 * ICE_Q_WAIT_RETRY_LIMIT) #define ICE_MAX_LG_RSS_QS 256 -#define ICE_RES_VALID_BIT 0x8000 #define ICE_INVAL_Q_INDEX 0xffff #define ICE_MAX_RXQS_PER_TC 256 /* Used when setting VSI context per TC Rx queues */ @@ -242,12 +241,6 @@ struct ice_tc_cfg { struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS]; }; -struct ice_res_tracker { - u16 num_entries; - u16 end; - u16 list[]; -}; - struct ice_qs_cfg { struct mutex *qs_mutex; /* will be assigned to &pf->avail_q_mutex */ unsigned long *pf_map; @@ -536,7 +529,7 @@ struct ice_pf { /* OS reserved IRQ details */ struct msix_entry *msix_entries; - struct ice_res_tracker *irq_tracker; + struct ice_irq_tracker irq_tracker; /* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the * number of MSIX vectors needed for all SR-IOV VFs from the number of * MSIX vectors allowed on this PF. diff --git a/drivers/net/ethernet/intel/ice/ice_irq.c b/drivers/net/ethernet/intel/ice/ice_irq.c index ca1a1de26766..1713347c577f 100644 --- a/drivers/net/ethernet/intel/ice/ice_irq.c +++ b/drivers/net/ethernet/intel/ice/ice_irq.c @@ -5,6 +5,75 @@ #include "ice_lib.h" #include "ice_irq.h" +/** + * ice_init_irq_tracker - initialize interrupt tracker + * @pf: board private structure + * @max_vectors: maximum number of vectors that tracker can hold + */ +static void +ice_init_irq_tracker(struct ice_pf *pf, unsigned int max_vectors) +{ + pf->irq_tracker.num_entries = max_vectors; + xa_init_flags(&pf->irq_tracker.entries, XA_FLAGS_ALLOC); +} + +/** + * ice_deinit_irq_tracker - free xarray tracker + * @pf: board private structure + */ +static void ice_deinit_irq_tracker(struct ice_pf *pf) +{ + xa_destroy(&pf->irq_tracker.entries); +} + +/** + * ice_free_irq_res - free a block of resources + * @pf: board private structure + * @index: starting index previously returned by ice_get_res + */ +static void ice_free_irq_res(struct ice_pf *pf, u16 index) +{ + struct ice_irq_entry *entry; + + entry = xa_erase(&pf->irq_tracker.entries, index); + kfree(entry); +} + +/** + * ice_get_irq_res - get an interrupt resource + * @pf: board private structure + * + * Allocate new irq entry in the free slot of the tracker. Since xarray + * is used, always allocate new entry at the lowest possible index. Set + * proper allocation limit for maximum tracker entries. + * + * Returns allocated irq entry or NULL on failure. + */ +static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf) +{ + struct xa_limit limit = { .max = pf->irq_tracker.num_entries, + .min = 0 }; + struct ice_irq_entry *entry; + unsigned int index; + int ret; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return NULL; + + ret = xa_alloc(&pf->irq_tracker.entries, &index, entry, limit, + GFP_KERNEL); + + if (ret) { + kfree(entry); + entry = NULL; + } else { + entry->index = index; + } + + return entry; +} + /** * ice_reduce_msix_usage - Reduce usage of MSI-X vectors * @pf: board private structure @@ -163,11 +232,7 @@ exit_err: void ice_clear_interrupt_scheme(struct ice_pf *pf) { pci_free_irq_vectors(pf->pdev); - - if (pf->irq_tracker) { - devm_kfree(ice_pf_to_dev(pf), pf->irq_tracker); - pf->irq_tracker = NULL; - } + ice_deinit_irq_tracker(pf); } /** @@ -183,19 +248,7 @@ int ice_init_interrupt_scheme(struct ice_pf *pf) if (vectors < 0) return vectors; - /* set up vector assignment tracking */ - pf->irq_tracker = devm_kzalloc(ice_pf_to_dev(pf), - struct_size(pf->irq_tracker, list, - vectors), - GFP_KERNEL); - if (!pf->irq_tracker) { - pci_free_irq_vectors(pf->pdev); - return -ENOMEM; - } - - /* populate SW interrupts pool with number of OS granted IRQs. */ - pf->irq_tracker->num_entries = (u16)vectors; - pf->irq_tracker->end = pf->irq_tracker->num_entries; + ice_init_irq_tracker(pf, vectors); return 0; } @@ -221,13 +274,13 @@ int ice_init_interrupt_scheme(struct ice_pf *pf) struct msi_map ice_alloc_irq(struct ice_pf *pf) { struct msi_map map = { .index = -ENOENT }; - int entry; + struct ice_irq_entry *entry; - entry = ice_get_res(pf, pf->irq_tracker); - if (entry < 0) + entry = ice_get_irq_res(pf); + if (!entry) return map; - map.index = entry; + map.index = entry->index; map.virq = pci_irq_vector(pf->pdev, map.index); return map; @@ -238,9 +291,9 @@ struct msi_map ice_alloc_irq(struct ice_pf *pf) * @pf: board private structure * @map: map with interrupt details * - * Remove allocated interrupt from the interrupt tracker + * Remove allocated interrupt from the interrupt tracker. */ void ice_free_irq(struct ice_pf *pf, struct msi_map map) { - ice_free_res(pf->irq_tracker, map.index); + ice_free_irq_res(pf, map.index); } diff --git a/drivers/net/ethernet/intel/ice/ice_irq.h b/drivers/net/ethernet/intel/ice/ice_irq.h index 26e80dfe22b5..da5cdb1f0d3a 100644 --- a/drivers/net/ethernet/intel/ice/ice_irq.h +++ b/drivers/net/ethernet/intel/ice/ice_irq.h @@ -4,6 +4,15 @@ #ifndef _ICE_IRQ_H_ #define _ICE_IRQ_H_ +struct ice_irq_entry { + unsigned int index; +}; + +struct ice_irq_tracker { + struct xarray entries; + u16 num_entries; /* total vectors available */ +}; + int ice_init_interrupt_scheme(struct ice_pf *pf); void ice_clear_interrupt_scheme(struct ice_pf *pf); diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index fe908cf6da6a..387bb9cbafbe 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1370,51 +1370,6 @@ out: return ret; } -/** - * ice_free_res - free a block of resources - * @res: pointer to the resource - * @index: starting index previously returned by ice_get_res - * - * Returns number of resources freed - */ -int ice_free_res(struct ice_res_tracker *res, u16 index) -{ - if (!res || index >= res->end) - return -EINVAL; - - res->list[index] = 0; - - return 0; -} - -/** - * ice_get_res - get a resource from the tracker - * @pf: board private structure - * @res: pointer to the resource - * - * Returns the item index, or negative for error - */ -int -ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res) -{ - u16 i; - - if (!res || !pf) - return -EINVAL; - - /* skip already allocated entries */ - for (i = 0; i < res->end; i++) - if (!(res->list[i] & ICE_RES_VALID_BIT)) - break; - - if (i < res->end) { - res->list[i] = ICE_RES_VALID_BIT; - return i; - } else { - return -ENOMEM; - } -} - /** * ice_vsi_clear_rings - Deallocates the Tx and Rx rings for VSI * @vsi: the VSI having rings deallocated diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 2f52f9e32858..e985766e6bb5 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -104,11 +104,6 @@ int ice_ena_vsi(struct ice_vsi *vsi, bool locked); void ice_vsi_decfg(struct ice_vsi *vsi); void ice_dis_vsi(struct ice_vsi *vsi, bool locked); -int ice_free_res(struct ice_res_tracker *res, u16 index); - -int -ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res); - int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags); int ice_vsi_cfg(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params); diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 0fc2b26a2fa6..195105ce9039 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -418,7 +418,7 @@ int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector) static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed) { u16 total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors; - int vectors_used = pf->irq_tracker->num_entries; + int vectors_used = pf->irq_tracker.num_entries; int sriov_base_vector; sriov_base_vector = total_vectors - num_msix_needed; @@ -470,7 +470,7 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) /* determine MSI-X resources per VF */ msix_avail_for_sriov = pf->hw.func_caps.common_cap.num_msix_vectors - - pf->irq_tracker->num_entries; + pf->irq_tracker.num_entries; msix_avail_per_vf = msix_avail_for_sriov / num_vfs; if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MED) { num_msix_per_vf = ICE_NUM_VF_MSIX_MED; -- cgit v1.2.3 From 011670cc340cbc1131677fe233b1a52acee969ee Mon Sep 17 00:00:00 2001 From: Piotr Raczynski Date: Mon, 15 May 2023 21:03:19 +0200 Subject: ice: add dynamic interrupt allocation Currently driver can only allocate interrupt vectors during init phase by calling pci_alloc_irq_vectors. Change that and make use of new pci_msix_alloc_irq_at/pci_msix_free_irq API and enable to allocate and free more interrupts after MSIX has been enabled. Since not all platforms supports dynamic allocation, check it with pci_msix_can_alloc_dyn. Extend the tracker to keep track how many interrupts are allocated initially so when all such vectors are already used, additional interrupts are automatically allocated dynamically. Remember each interrupt allocation method to then free appropriately. Since some features may require interrupts allocated dynamically add appropriate VSI flag and take it into account when allocating new interrupt. Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Piotr Raczynski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 3 + drivers/net/ethernet/intel/ice/ice_base.c | 2 +- drivers/net/ethernet/intel/ice/ice_idc.c | 2 +- drivers/net/ethernet/intel/ice/ice_irq.c | 109 +++++++++++++++++++++++++---- drivers/net/ethernet/intel/ice/ice_irq.h | 5 +- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- drivers/net/ethernet/intel/ice/ice_sriov.c | 5 +- 7 files changed, 107 insertions(+), 21 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 8541d986ec7f..d637032c8139 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -338,6 +338,9 @@ struct ice_vsi { u32 rx_buf_failed; u32 rx_page_failed; u16 num_q_vectors; + /* tell if only dynamic irq allocation is allowed */ + bool irq_dyn_alloc; + enum ice_vsi_type type; u16 vsi_num; /* HW (absolute) index of this VSI */ u16 idx; /* software index in pf->vsi[] */ diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index cb0913cb9741..4a12316f7b46 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -137,7 +137,7 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx) } } - q_vector->irq = ice_alloc_irq(pf); + q_vector->irq = ice_alloc_irq(pf, vsi->irq_dyn_alloc); if (q_vector->irq.index < 0) { err = -ENOMEM; goto err_free_q_vector; diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index bc016bb4440c..145b27f2a4ce 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -250,7 +250,7 @@ static int ice_alloc_rdma_qvectors(struct ice_pf *pf) struct msix_entry *entry = &pf->msix_entries[i]; struct msi_map map; - map = ice_alloc_irq(pf); + map = ice_alloc_irq(pf, false); if (map.index < 0) break; diff --git a/drivers/net/ethernet/intel/ice/ice_irq.c b/drivers/net/ethernet/intel/ice/ice_irq.c index 1713347c577f..ad82ff7d1995 100644 --- a/drivers/net/ethernet/intel/ice/ice_irq.c +++ b/drivers/net/ethernet/intel/ice/ice_irq.c @@ -9,11 +9,14 @@ * ice_init_irq_tracker - initialize interrupt tracker * @pf: board private structure * @max_vectors: maximum number of vectors that tracker can hold + * @num_static: number of preallocated interrupts */ static void -ice_init_irq_tracker(struct ice_pf *pf, unsigned int max_vectors) +ice_init_irq_tracker(struct ice_pf *pf, unsigned int max_vectors, + unsigned int num_static) { pf->irq_tracker.num_entries = max_vectors; + pf->irq_tracker.num_static = num_static; xa_init_flags(&pf->irq_tracker.entries, XA_FLAGS_ALLOC); } @@ -42,6 +45,7 @@ static void ice_free_irq_res(struct ice_pf *pf, u16 index) /** * ice_get_irq_res - get an interrupt resource * @pf: board private structure + * @dyn_only: force entry to be dynamically allocated * * Allocate new irq entry in the free slot of the tracker. Since xarray * is used, always allocate new entry at the lowest possible index. Set @@ -49,10 +53,11 @@ static void ice_free_irq_res(struct ice_pf *pf, u16 index) * * Returns allocated irq entry or NULL on failure. */ -static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf) +static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf, bool dyn_only) { struct xa_limit limit = { .max = pf->irq_tracker.num_entries, .min = 0 }; + unsigned int num_static = pf->irq_tracker.num_static; struct ice_irq_entry *entry; unsigned int index; int ret; @@ -61,6 +66,10 @@ static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf) if (!entry) return NULL; + /* skip preallocated entries if the caller says so */ + if (dyn_only) + limit.min = num_static; + ret = xa_alloc(&pf->irq_tracker.entries, &index, entry, limit, GFP_KERNEL); @@ -69,6 +78,7 @@ static struct ice_irq_entry *ice_get_irq_res(struct ice_pf *pf) entry = NULL; } else { entry->index = index; + entry->dynamic = index >= num_static; } return entry; @@ -241,14 +251,20 @@ void ice_clear_interrupt_scheme(struct ice_pf *pf) */ int ice_init_interrupt_scheme(struct ice_pf *pf) { - int vectors; + int total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors; + int vectors, max_vectors; vectors = ice_ena_msix_range(pf); if (vectors < 0) - return vectors; + return -ENOMEM; + + if (pci_msix_can_alloc_dyn(pf->pdev)) + max_vectors = total_vectors; + else + max_vectors = vectors; - ice_init_irq_tracker(pf, vectors); + ice_init_irq_tracker(pf, max_vectors, vectors); return 0; } @@ -256,33 +272,55 @@ int ice_init_interrupt_scheme(struct ice_pf *pf) /** * ice_alloc_irq - Allocate new interrupt vector * @pf: board private structure + * @dyn_only: force dynamic allocation of the interrupt * * Allocate new interrupt vector for a given owner id. * return struct msi_map with interrupt details and track * allocated interrupt appropriately. * - * This function mimics individual interrupt allocation, - * even interrupts are actually already allocated with - * pci_alloc_irq_vectors. Individual allocation helps - * to track interrupts and simplifies interrupt related - * handling. + * This function reserves new irq entry from the irq_tracker. + * if according to the tracker information all interrupts that + * were allocated with ice_pci_alloc_irq_vectors are already used + * and dynamically allocated interrupts are supported then new + * interrupt will be allocated with pci_msix_alloc_irq_at. + * + * Some callers may only support dynamically allocated interrupts. + * This is indicated with dyn_only flag. * * On failure, return map with negative .index. The caller * is expected to check returned map index. * */ -struct msi_map ice_alloc_irq(struct ice_pf *pf) +struct msi_map ice_alloc_irq(struct ice_pf *pf, bool dyn_only) { + int sriov_base_vector = pf->sriov_base_vector; struct msi_map map = { .index = -ENOENT }; + struct device *dev = ice_pf_to_dev(pf); struct ice_irq_entry *entry; - entry = ice_get_irq_res(pf); + entry = ice_get_irq_res(pf, dyn_only); if (!entry) return map; - map.index = entry->index; - map.virq = pci_irq_vector(pf->pdev, map.index); + /* fail if we're about to violate SRIOV vectors space */ + if (sriov_base_vector && entry->index >= sriov_base_vector) + goto exit_free_res; + + if (pci_msix_can_alloc_dyn(pf->pdev) && entry->dynamic) { + map = pci_msix_alloc_irq_at(pf->pdev, entry->index, NULL); + if (map.index < 0) + goto exit_free_res; + dev_dbg(dev, "allocated new irq at index %d\n", map.index); + } else { + map.index = entry->index; + map.virq = pci_irq_vector(pf->pdev, map.index); + } + + return map; +exit_free_res: + dev_err(dev, "Could not allocate irq at idx %d\n", entry->index); + ice_free_irq_res(pf, entry->index); return map; } @@ -291,9 +329,50 @@ struct msi_map ice_alloc_irq(struct ice_pf *pf) * @pf: board private structure * @map: map with interrupt details * - * Remove allocated interrupt from the interrupt tracker. + * Remove allocated interrupt from the interrupt tracker. If interrupt was + * allocated dynamically, free respective interrupt vector. */ void ice_free_irq(struct ice_pf *pf, struct msi_map map) { + struct ice_irq_entry *entry; + + entry = xa_load(&pf->irq_tracker.entries, map.index); + + if (!entry) { + dev_err(ice_pf_to_dev(pf), "Failed to get MSIX interrupt entry at index %d", + map.index); + return; + } + + dev_dbg(ice_pf_to_dev(pf), "Free irq at index %d\n", map.index); + + if (entry->dynamic) + pci_msix_free_irq(pf->pdev, map); + ice_free_irq_res(pf, map.index); } + +/** + * ice_get_max_used_msix_vector - Get the max used interrupt vector + * @pf: board private structure + * + * Return index of maximum used interrupt vectors with respect to the + * beginning of the MSIX table. Take into account that some interrupts + * may have been dynamically allocated after MSIX was initially enabled. + */ +int ice_get_max_used_msix_vector(struct ice_pf *pf) +{ + unsigned long start, index, max_idx; + void *entry; + + /* Treat all preallocated interrupts as used */ + start = pf->irq_tracker.num_static; + max_idx = start - 1; + + xa_for_each_start(&pf->irq_tracker.entries, index, entry, start) { + if (index > max_idx) + max_idx = index; + } + + return max_idx; +} diff --git a/drivers/net/ethernet/intel/ice/ice_irq.h b/drivers/net/ethernet/intel/ice/ice_irq.h index da5cdb1f0d3a..f35efc08575e 100644 --- a/drivers/net/ethernet/intel/ice/ice_irq.h +++ b/drivers/net/ethernet/intel/ice/ice_irq.h @@ -6,17 +6,20 @@ struct ice_irq_entry { unsigned int index; + bool dynamic; /* allocation type flag */ }; struct ice_irq_tracker { struct xarray entries; u16 num_entries; /* total vectors available */ + u16 num_static; /* preallocated entries */ }; int ice_init_interrupt_scheme(struct ice_pf *pf); void ice_clear_interrupt_scheme(struct ice_pf *pf); -struct msi_map ice_alloc_irq(struct ice_pf *pf); +struct msi_map ice_alloc_irq(struct ice_pf *pf, bool dyn_only); void ice_free_irq(struct ice_pf *pf, struct msi_map map); +int ice_get_max_used_msix_vector(struct ice_pf *pf); #endif diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index efc621c0bd6c..62e91512aeab 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3305,7 +3305,7 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) goto skip_req_irq; /* reserve one vector in irq_tracker for misc interrupts */ - oicr_irq = ice_alloc_irq(pf); + oicr_irq = ice_alloc_irq(pf, false); if (oicr_irq.index < 0) return oicr_irq.index; diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 195105ce9039..80c643fb9f2f 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -418,7 +418,7 @@ int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector) static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed) { u16 total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors; - int vectors_used = pf->irq_tracker.num_entries; + int vectors_used = ice_get_max_used_msix_vector(pf); int sriov_base_vector; sriov_base_vector = total_vectors - num_msix_needed; @@ -458,6 +458,7 @@ static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed) */ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) { + int vectors_used = ice_get_max_used_msix_vector(pf); u16 num_msix_per_vf, num_txq, num_rxq, avail_qs; int msix_avail_per_vf, msix_avail_for_sriov; struct device *dev = ice_pf_to_dev(pf); @@ -470,7 +471,7 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs) /* determine MSI-X resources per VF */ msix_avail_for_sriov = pf->hw.func_caps.common_cap.num_msix_vectors - - pf->irq_tracker.num_entries; + vectors_used; msix_avail_per_vf = msix_avail_for_sriov / num_vfs; if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MED) { num_msix_per_vf = ICE_NUM_VF_MSIX_MED; -- cgit v1.2.3 From 030d71fd93b1f0fe6e844c1d790f70c80d828c79 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Tue, 16 May 2023 17:10:31 +0530 Subject: octeontx2-pf: mcs: Support VLAN in clear text Detect whether macsec secy is running on top of VLAN which implies transmitting VLAN tag in clear text before macsec SecTag. In this case configure hardware to insert SecTag after VLAN tag. Signed-off-by: Subbaraya Sundeep Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c | 8 ++++++-- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c index b59532cf53ce..6e2fb24be8c1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c @@ -426,13 +426,16 @@ static int cn10k_mcs_write_tx_secy(struct otx2_nic *pfvf, struct mcs_secy_plcy_write_req *req; struct mbox *mbox = &pfvf->mbox; struct macsec_tx_sc *sw_tx_sc; - /* Insert SecTag after 12 bytes (DA+SA)*/ - u8 tag_offset = 12; u8 sectag_tci = 0; + u8 tag_offset; u64 policy; u8 cipher; int ret; + /* Insert SecTag after 12 bytes (DA+SA) or 16 bytes + * if VLAN tag needs to be sent in clear text. + */ + tag_offset = txsc->vlan_dev ? 16 : 12; sw_tx_sc = &secy->tx_sc; mutex_lock(&mbox->lock); @@ -1163,6 +1166,7 @@ static int cn10k_mdo_add_secy(struct macsec_context *ctx) txsc->encoding_sa = secy->tx_sc.encoding_sa; txsc->last_validate_frames = secy->validate_frames; txsc->last_replay_protect = secy->replay_protect; + txsc->vlan_dev = is_vlan_dev(ctx->netdev); list_add(&txsc->entry, &cfg->txsc_list); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 0f2b2a901225..b2267c8bec37 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -419,6 +419,7 @@ struct cn10k_mcs_txsc { u8 encoding_sa; u8 salt[CN10K_MCS_SA_PER_SC][MACSEC_SALT_LEN]; ssci_t ssci[CN10K_MCS_SA_PER_SC]; + bool vlan_dev; /* macsec running on VLAN ? */ }; struct cn10k_mcs_rxsc { -- cgit v1.2.3 From 578fb0926c127d1b11b01b0102605efde9be9f41 Mon Sep 17 00:00:00 2001 From: Paul Greenwalt Date: Wed, 26 Apr 2023 11:50:45 -0700 Subject: ice: update ICE_PHY_TYPE_HIGH_MAX_INDEX ICE_PHY_TYPE_HIGH_MAX_INDEX should be the maximum index value and not the length/number of ICE_PHY_TYPE_HIGH. This is not an issue because this define is only used when calling ice_get_link_speed_based_on_phy_type(), which will return ICE_AQ_LINK_SPEED_UNKNOWN for any invalid index. The caller of ice_get_link_speed_based_on_phy_type(), ice_update_phy_type() checks that the return value is a valid link speed before using it and ICE_AQ_LINK_SPEED_UNKNOWN is not. However, update the define to reflect the correct value. Signed-off-by: Paul Greenwalt Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 838d9b274d68..63d3e1dcbba5 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -1087,7 +1087,7 @@ struct ice_aqc_get_phy_caps { #define ICE_PHY_TYPE_HIGH_100G_CAUI2 BIT_ULL(2) #define ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC BIT_ULL(3) #define ICE_PHY_TYPE_HIGH_100G_AUI2 BIT_ULL(4) -#define ICE_PHY_TYPE_HIGH_MAX_INDEX 5 +#define ICE_PHY_TYPE_HIGH_MAX_INDEX 4 struct ice_aqc_get_phy_caps_data { __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */ -- cgit v1.2.3 From 9136e1f1e5c3f3409b09764556f33fd6353cad60 Mon Sep 17 00:00:00 2001 From: Paul Greenwalt Date: Wed, 26 Apr 2023 11:50:46 -0700 Subject: ice: refactor PHY type to ethtool link mode Refactor ice_phy_type_to_ethtool to use phy_type_[low|high]_lkup table to map PHY type to AQ link speed and ethtool link mode. This removes complexity and simplifies future changes. Signed-off-by: Paul Greenwalt Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 1 + drivers/net/ethernet/intel/ice/ice_ethtool.c | 309 +++------------------------ drivers/net/ethernet/intel/ice/ice_ethtool.h | 105 +++++++++ 3 files changed, 141 insertions(+), 274 deletions(-) create mode 100644 drivers/net/ethernet/intel/ice/ice_ethtool.h (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index d637032c8139..8b016511561f 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 8407c7175cf6..8d5cbbd0b3d5 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -4,6 +4,7 @@ /* ethtool support for ice */ #include "ice.h" +#include "ice_ethtool.h" #include "ice_flow.h" #include "ice_fltr.h" #include "ice_lib.h" @@ -1658,15 +1659,26 @@ ice_mask_min_supported_speeds(struct ice_hw *hw, *phy_types_low &= ~ICE_PHY_TYPE_LOW_MASK_MIN_1G; } -#define ice_ethtool_advertise_link_mode(aq_link_speed, ethtool_link_mode) \ - do { \ - if (req_speeds & (aq_link_speed) || \ - (!req_speeds && \ - (advert_phy_type_lo & phy_type_mask_lo || \ - advert_phy_type_hi & phy_type_mask_hi))) \ - ethtool_link_ksettings_add_link_mode(ks, advertising,\ - ethtool_link_mode); \ - } while (0) +/** + * ice_linkmode_set_bit - set link mode bit + * @phy_to_ethtool: PHY type to ethtool link mode struct to set + * @ks: ethtool link ksettings struct to fill out + * @req_speeds: speed requested by user + * @advert_phy_type: advertised PHY type + * @phy_type: PHY type + */ +static void +ice_linkmode_set_bit(const struct ice_phy_type_to_ethtool *phy_to_ethtool, + struct ethtool_link_ksettings *ks, u32 req_speeds, + u64 advert_phy_type, u32 phy_type) +{ + linkmode_set_bit(phy_to_ethtool->link_mode, ks->link_modes.supported); + + if (req_speeds & phy_to_ethtool->aq_link_speed || + (!req_speeds && advert_phy_type & BIT(phy_type))) + linkmode_set_bit(phy_to_ethtool->link_mode, + ks->link_modes.advertising); +} /** * ice_phy_type_to_ethtool - convert the phy_types to ethtool link modes @@ -1682,11 +1694,10 @@ ice_phy_type_to_ethtool(struct net_device *netdev, struct ice_pf *pf = vsi->back; u64 advert_phy_type_lo = 0; u64 advert_phy_type_hi = 0; - u64 phy_type_mask_lo = 0; - u64 phy_type_mask_hi = 0; u64 phy_types_high = 0; u64 phy_types_low = 0; - u16 req_speeds; + u32 req_speeds; + u32 i; req_speeds = vsi->port_info->phy.link_info.req_speeds; @@ -1743,272 +1754,22 @@ ice_phy_type_to_ethtool(struct net_device *netdev, advert_phy_type_hi = vsi->port_info->phy.phy_type_high; } - ethtool_link_ksettings_zero_link_mode(ks, supported); - ethtool_link_ksettings_zero_link_mode(ks, advertising); - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_100BASE_TX | - ICE_PHY_TYPE_LOW_100M_SGMII; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 100baseT_Full); - - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100MB, - 100baseT_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_1000BASE_T | - ICE_PHY_TYPE_LOW_1G_SGMII; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 1000baseT_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_1000MB, - 1000baseT_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_1000BASE_KX; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 1000baseKX_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_1000MB, - 1000baseKX_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_1000BASE_SX | - ICE_PHY_TYPE_LOW_1000BASE_LX; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 1000baseX_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_1000MB, - 1000baseX_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_2500BASE_T; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 2500baseT_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_2500MB, - 2500baseT_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_2500BASE_X | - ICE_PHY_TYPE_LOW_2500BASE_KX; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 2500baseX_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_2500MB, - 2500baseX_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_5GBASE_T | - ICE_PHY_TYPE_LOW_5GBASE_KR; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 5000baseT_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_5GB, - 5000baseT_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_T | - ICE_PHY_TYPE_LOW_10G_SFI_DA | - ICE_PHY_TYPE_LOW_10G_SFI_AOC_ACC | - ICE_PHY_TYPE_LOW_10G_SFI_C2C; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 10000baseT_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB, - 10000baseT_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_KR_CR1; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 10000baseKR_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB, - 10000baseKR_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_SR; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 10000baseSR_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB, - 10000baseSR_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_10GBASE_LR; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 10000baseLR_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_10GB, - 10000baseLR_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_25GBASE_T | - ICE_PHY_TYPE_LOW_25GBASE_CR | - ICE_PHY_TYPE_LOW_25GBASE_CR_S | - ICE_PHY_TYPE_LOW_25GBASE_CR1 | - ICE_PHY_TYPE_LOW_25G_AUI_AOC_ACC | - ICE_PHY_TYPE_LOW_25G_AUI_C2C; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 25000baseCR_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_25GB, - 25000baseCR_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_25GBASE_SR | - ICE_PHY_TYPE_LOW_25GBASE_LR; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 25000baseSR_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_25GB, - 25000baseSR_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_25GBASE_KR | - ICE_PHY_TYPE_LOW_25GBASE_KR_S | - ICE_PHY_TYPE_LOW_25GBASE_KR1; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 25000baseKR_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_25GB, - 25000baseKR_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_KR4; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 40000baseKR4_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB, - 40000baseKR4_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_CR4 | - ICE_PHY_TYPE_LOW_40G_XLAUI_AOC_ACC | - ICE_PHY_TYPE_LOW_40G_XLAUI; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 40000baseCR4_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB, - 40000baseCR4_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_SR4; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 40000baseSR4_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB, - 40000baseSR4_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_40GBASE_LR4; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 40000baseLR4_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_40GB, - 40000baseLR4_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_50GBASE_CR2 | - ICE_PHY_TYPE_LOW_50G_LAUI2_AOC_ACC | - ICE_PHY_TYPE_LOW_50G_LAUI2 | - ICE_PHY_TYPE_LOW_50G_AUI2_AOC_ACC | - ICE_PHY_TYPE_LOW_50G_AUI2 | - ICE_PHY_TYPE_LOW_50GBASE_CP | - ICE_PHY_TYPE_LOW_50GBASE_SR | - ICE_PHY_TYPE_LOW_50G_AUI1_AOC_ACC | - ICE_PHY_TYPE_LOW_50G_AUI1; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 50000baseCR2_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_50GB, - 50000baseCR2_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_50GBASE_KR2 | - ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 50000baseKR2_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_50GB, - 50000baseKR2_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_50GBASE_SR2 | - ICE_PHY_TYPE_LOW_50GBASE_LR2 | - ICE_PHY_TYPE_LOW_50GBASE_FR | - ICE_PHY_TYPE_LOW_50GBASE_LR; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 50000baseSR2_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_50GB, - 50000baseSR2_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_CR4 | - ICE_PHY_TYPE_LOW_100G_CAUI4_AOC_ACC | - ICE_PHY_TYPE_LOW_100G_CAUI4 | - ICE_PHY_TYPE_LOW_100G_AUI4_AOC_ACC | - ICE_PHY_TYPE_LOW_100G_AUI4 | - ICE_PHY_TYPE_LOW_100GBASE_CR_PAM4; - phy_type_mask_hi = ICE_PHY_TYPE_HIGH_100G_CAUI2_AOC_ACC | - ICE_PHY_TYPE_HIGH_100G_CAUI2 | - ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC | - ICE_PHY_TYPE_HIGH_100G_AUI2; - if (phy_types_low & phy_type_mask_lo || - phy_types_high & phy_type_mask_hi) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseCR4_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB, - 100000baseCR4_Full); - } - - if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CP2) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseCR2_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB, - 100000baseCR2_Full); - } - - if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR4) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseSR4_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB, - 100000baseSR4_Full); - } - - if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR2) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseSR2_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB, - 100000baseSR2_Full); - } - - phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_LR4 | - ICE_PHY_TYPE_LOW_100GBASE_DR; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseLR4_ER4_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB, - 100000baseLR4_ER4_Full); - } + linkmode_zero(ks->link_modes.supported); + linkmode_zero(ks->link_modes.advertising); - phy_type_mask_lo = ICE_PHY_TYPE_LOW_100GBASE_KR4 | - ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4; - if (phy_types_low & phy_type_mask_lo) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseKR4_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB, - 100000baseKR4_Full); + for (i = 0; i < BITS_PER_TYPE(u64); i++) { + if (phy_types_low & BIT_ULL(i)) + ice_linkmode_set_bit(&phy_type_low_lkup[i], ks, + req_speeds, advert_phy_type_lo, + i); } - if (phy_types_high & ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4) { - ethtool_link_ksettings_add_link_mode(ks, supported, - 100000baseKR2_Full); - ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB, - 100000baseKR2_Full); + for (i = 0; i < BITS_PER_TYPE(u64); i++) { + if (phy_types_high & BIT_ULL(i)) + ice_linkmode_set_bit(&phy_type_high_lkup[i], ks, + req_speeds, advert_phy_type_hi, + i); } - } #define TEST_SET_BITS_TIMEOUT 50 diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.h b/drivers/net/ethernet/intel/ice/ice_ethtool.h new file mode 100644 index 000000000000..00043ea9469a --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2023 Intel Corporation */ + +#ifndef _ICE_ETHTOOL_H_ +#define _ICE_ETHTOOL_H_ + +struct ice_phy_type_to_ethtool { + u64 aq_link_speed; + u8 link_mode; +}; + +/* Macro to make PHY type to Ethtool link mode table entry. + * The index is the PHY type. + */ +#define ICE_PHY_TYPE(LINK_SPEED, ETHTOOL_LINK_MODE) {\ + .aq_link_speed = ICE_AQ_LINK_SPEED_##LINK_SPEED, \ + .link_mode = ETHTOOL_LINK_MODE_##ETHTOOL_LINK_MODE##_BIT, \ +} + +/* Lookup table mapping PHY type low to link speed and Ethtool link modes. + * Array index corresponds to HW PHY type bit, see + * ice_adminq_cmd.h:ICE_PHY_TYPE_LOW_*. + */ +static const struct ice_phy_type_to_ethtool +phy_type_low_lkup[] = { + [0] = ICE_PHY_TYPE(100MB, 100baseT_Full), + [1] = ICE_PHY_TYPE(100MB, 100baseT_Full), + [2] = ICE_PHY_TYPE(1000MB, 1000baseT_Full), + [3] = ICE_PHY_TYPE(1000MB, 1000baseX_Full), + [4] = ICE_PHY_TYPE(1000MB, 1000baseX_Full), + [5] = ICE_PHY_TYPE(1000MB, 1000baseKX_Full), + [6] = ICE_PHY_TYPE(1000MB, 1000baseT_Full), + [7] = ICE_PHY_TYPE(2500MB, 2500baseT_Full), + [8] = ICE_PHY_TYPE(2500MB, 2500baseX_Full), + [9] = ICE_PHY_TYPE(2500MB, 2500baseX_Full), + [10] = ICE_PHY_TYPE(5GB, 5000baseT_Full), + [11] = ICE_PHY_TYPE(5GB, 5000baseT_Full), + [12] = ICE_PHY_TYPE(10GB, 10000baseT_Full), + [13] = ICE_PHY_TYPE(10GB, 10000baseT_Full), + [14] = ICE_PHY_TYPE(10GB, 10000baseSR_Full), + [15] = ICE_PHY_TYPE(10GB, 10000baseLR_Full), + [16] = ICE_PHY_TYPE(10GB, 10000baseKR_Full), + [17] = ICE_PHY_TYPE(10GB, 10000baseT_Full), + [18] = ICE_PHY_TYPE(10GB, 10000baseKR_Full), + [19] = ICE_PHY_TYPE(25GB, 25000baseCR_Full), + [20] = ICE_PHY_TYPE(25GB, 25000baseCR_Full), + [21] = ICE_PHY_TYPE(25GB, 25000baseCR_Full), + [22] = ICE_PHY_TYPE(25GB, 25000baseCR_Full), + [23] = ICE_PHY_TYPE(25GB, 25000baseSR_Full), + [24] = ICE_PHY_TYPE(25GB, 25000baseSR_Full), + [25] = ICE_PHY_TYPE(25GB, 25000baseKR_Full), + [26] = ICE_PHY_TYPE(25GB, 25000baseKR_Full), + [27] = ICE_PHY_TYPE(25GB, 25000baseKR_Full), + [28] = ICE_PHY_TYPE(25GB, 25000baseCR_Full), + [29] = ICE_PHY_TYPE(25GB, 25000baseKR_Full), + [30] = ICE_PHY_TYPE(40GB, 40000baseCR4_Full), + [31] = ICE_PHY_TYPE(40GB, 40000baseSR4_Full), + [32] = ICE_PHY_TYPE(40GB, 40000baseLR4_Full), + [33] = ICE_PHY_TYPE(40GB, 40000baseKR4_Full), + [34] = ICE_PHY_TYPE(40GB, 40000baseCR4_Full), + [35] = ICE_PHY_TYPE(40GB, 40000baseCR4_Full), + [36] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), + [37] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full), + [38] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full), + [39] = ICE_PHY_TYPE(50GB, 50000baseKR2_Full), + [40] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), + [41] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), + [42] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), + [43] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), + [44] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), + [45] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), + [46] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full), + [47] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full), + [48] = ICE_PHY_TYPE(50GB, 50000baseKR2_Full), + [49] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), + [50] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), + [51] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), + [52] = ICE_PHY_TYPE(100GB, 100000baseSR4_Full), + [53] = ICE_PHY_TYPE(100GB, 100000baseLR4_ER4_Full), + [54] = ICE_PHY_TYPE(100GB, 100000baseKR4_Full), + [55] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), + [56] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), + [57] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), + [58] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), + [59] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), + [60] = ICE_PHY_TYPE(100GB, 100000baseKR4_Full), + [61] = ICE_PHY_TYPE(100GB, 100000baseCR2_Full), + [62] = ICE_PHY_TYPE(100GB, 100000baseSR2_Full), + [63] = ICE_PHY_TYPE(100GB, 100000baseLR4_ER4_Full), +}; + +/* Lookup table mapping PHY type high to link speed and Ethtool link modes. + * Array index corresponds to HW PHY type bit, see + * ice_adminq_cmd.h:ICE_PHY_TYPE_HIGH_* + */ +static const struct ice_phy_type_to_ethtool +phy_type_high_lkup[] = { + [0] = ICE_PHY_TYPE(100GB, 100000baseKR2_Full), + [1] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), + [2] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), + [3] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), + [4] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), +}; + +#endif /* !_ICE_ETHTOOL_H_ */ -- cgit v1.2.3 From 49eb1c1f2f05fe948b209ad359283355d3428d89 Mon Sep 17 00:00:00 2001 From: Paul Greenwalt Date: Wed, 26 Apr 2023 11:50:47 -0700 Subject: ice: update PHY type to ethtool link mode mapping Some link modes can be more accurately reported due to newer link mode values that have been added to the kernel; update those PHY type to report modes that better reflect the link mode. Signed-off-by: Paul Greenwalt Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ethtool.h | 38 ++++++++++++++-------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.h b/drivers/net/ethernet/intel/ice/ice_ethtool.h index 00043ea9469a..b403ee79cd5e 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.h +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.h @@ -36,11 +36,11 @@ phy_type_low_lkup[] = { [10] = ICE_PHY_TYPE(5GB, 5000baseT_Full), [11] = ICE_PHY_TYPE(5GB, 5000baseT_Full), [12] = ICE_PHY_TYPE(10GB, 10000baseT_Full), - [13] = ICE_PHY_TYPE(10GB, 10000baseT_Full), + [13] = ICE_PHY_TYPE(10GB, 10000baseCR_Full), [14] = ICE_PHY_TYPE(10GB, 10000baseSR_Full), [15] = ICE_PHY_TYPE(10GB, 10000baseLR_Full), [16] = ICE_PHY_TYPE(10GB, 10000baseKR_Full), - [17] = ICE_PHY_TYPE(10GB, 10000baseT_Full), + [17] = ICE_PHY_TYPE(10GB, 10000baseCR_Full), [18] = ICE_PHY_TYPE(10GB, 10000baseKR_Full), [19] = ICE_PHY_TYPE(25GB, 25000baseCR_Full), [20] = ICE_PHY_TYPE(25GB, 25000baseCR_Full), @@ -51,36 +51,36 @@ phy_type_low_lkup[] = { [25] = ICE_PHY_TYPE(25GB, 25000baseKR_Full), [26] = ICE_PHY_TYPE(25GB, 25000baseKR_Full), [27] = ICE_PHY_TYPE(25GB, 25000baseKR_Full), - [28] = ICE_PHY_TYPE(25GB, 25000baseCR_Full), - [29] = ICE_PHY_TYPE(25GB, 25000baseKR_Full), + [28] = ICE_PHY_TYPE(25GB, 25000baseSR_Full), + [29] = ICE_PHY_TYPE(25GB, 25000baseCR_Full), [30] = ICE_PHY_TYPE(40GB, 40000baseCR4_Full), [31] = ICE_PHY_TYPE(40GB, 40000baseSR4_Full), [32] = ICE_PHY_TYPE(40GB, 40000baseLR4_Full), [33] = ICE_PHY_TYPE(40GB, 40000baseKR4_Full), - [34] = ICE_PHY_TYPE(40GB, 40000baseCR4_Full), + [34] = ICE_PHY_TYPE(40GB, 40000baseSR4_Full), [35] = ICE_PHY_TYPE(40GB, 40000baseCR4_Full), [36] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), [37] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full), [38] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full), [39] = ICE_PHY_TYPE(50GB, 50000baseKR2_Full), - [40] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), + [40] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full), [41] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), - [42] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), + [42] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full), [43] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), - [44] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), - [45] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), - [46] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full), - [47] = ICE_PHY_TYPE(50GB, 50000baseSR2_Full), - [48] = ICE_PHY_TYPE(50GB, 50000baseKR2_Full), - [49] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), - [50] = ICE_PHY_TYPE(50GB, 50000baseCR2_Full), + [44] = ICE_PHY_TYPE(50GB, 50000baseCR_Full), + [45] = ICE_PHY_TYPE(50GB, 50000baseSR_Full), + [46] = ICE_PHY_TYPE(50GB, 50000baseLR_ER_FR_Full), + [47] = ICE_PHY_TYPE(50GB, 50000baseLR_ER_FR_Full), + [48] = ICE_PHY_TYPE(50GB, 50000baseKR_Full), + [49] = ICE_PHY_TYPE(50GB, 50000baseSR_Full), + [50] = ICE_PHY_TYPE(50GB, 50000baseCR_Full), [51] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), [52] = ICE_PHY_TYPE(100GB, 100000baseSR4_Full), [53] = ICE_PHY_TYPE(100GB, 100000baseLR4_ER4_Full), [54] = ICE_PHY_TYPE(100GB, 100000baseKR4_Full), [55] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), [56] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), - [57] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), + [57] = ICE_PHY_TYPE(100GB, 100000baseSR4_Full), [58] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), [59] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), [60] = ICE_PHY_TYPE(100GB, 100000baseKR4_Full), @@ -96,10 +96,10 @@ phy_type_low_lkup[] = { static const struct ice_phy_type_to_ethtool phy_type_high_lkup[] = { [0] = ICE_PHY_TYPE(100GB, 100000baseKR2_Full), - [1] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), - [2] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), - [3] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), - [4] = ICE_PHY_TYPE(100GB, 100000baseCR4_Full), + [1] = ICE_PHY_TYPE(100GB, 100000baseSR2_Full), + [2] = ICE_PHY_TYPE(100GB, 100000baseCR2_Full), + [3] = ICE_PHY_TYPE(100GB, 100000baseSR2_Full), + [4] = ICE_PHY_TYPE(100GB, 100000baseCR2_Full), }; #endif /* !_ICE_ETHTOOL_H_ */ -- cgit v1.2.3 From 1c769b1a303f7a3b447fc7244340b77823bdbfdc Mon Sep 17 00:00:00 2001 From: Dave Ertman Date: Tue, 16 May 2023 13:30:55 +0200 Subject: ice: Remove LAG+SRIOV mutual exclusion There was a change previously to stop SR-IOV and LAG from existing on the same interface. This was to prevent the violation of LACP (Link Aggregation Control Protocol). The method to achieve this was to add a no-op Rx handler onto the netdev when SR-IOV VFs were present, thus blocking bonding, bridging, etc from claiming the interface by adding its own Rx handler. Also, when an interface was added into a aggregate, then the SR-IOV capability was set to false. There are some users that have in house solutions using both SR-IOV and bridging/bonding that this method interferes with (e.g. creating duplicate VFs on the bonded interfaces and failing between them when the interface fails over). It makes more sense to provide the most functionality possible, the restriction on co-existence of these features will be removed. No additional functionality is currently being provided beyond what existed before the co-existence restriction was put into place. It is up to the end user to not implement a solution that would interfere with existing network protocols. Reviewed-by: Michal Swiatkowski Signed-off-by: Dave Ertman Signed-off-by: Wojciech Drewek Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- .../device_drivers/ethernet/intel/ice.rst | 18 -------- drivers/net/ethernet/intel/ice/ice.h | 19 -------- drivers/net/ethernet/intel/ice/ice_lag.c | 12 ----- drivers/net/ethernet/intel/ice/ice_lag.h | 54 ---------------------- drivers/net/ethernet/intel/ice/ice_lib.c | 2 - drivers/net/ethernet/intel/ice/ice_sriov.c | 4 -- 6 files changed, 109 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/Documentation/networking/device_drivers/ethernet/intel/ice.rst b/Documentation/networking/device_drivers/ethernet/intel/ice.rst index 69695e5511f4..e4d065c55ea8 100644 --- a/Documentation/networking/device_drivers/ethernet/intel/ice.rst +++ b/Documentation/networking/device_drivers/ethernet/intel/ice.rst @@ -84,24 +84,6 @@ Once the VM shuts down, or otherwise releases the VF, the command will complete. -Important notes for SR-IOV and Link Aggregation ------------------------------------------------ -Link Aggregation is mutually exclusive with SR-IOV. - -- If Link Aggregation is active, SR-IOV VFs cannot be created on the PF. -- If SR-IOV is active, you cannot set up Link Aggregation on the interface. - -Bridging and MACVLAN are also affected by this. If you wish to use bridging or -MACVLAN with SR-IOV, you must set up bridging or MACVLAN before enabling -SR-IOV. If you are using bridging or MACVLAN in conjunction with SR-IOV, and -you want to remove the interface from the bridge or MACVLAN, you must follow -these steps: - -1. Destroy SR-IOV VFs if they exist -2. Remove the interface from the bridge or MACVLAN -3. Recreate SRIOV VFs as needed - - Additional Features and Configurations ====================================== diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 8b016511561f..b4bca1d964a9 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -814,25 +814,6 @@ static inline bool ice_is_switchdev_running(struct ice_pf *pf) return pf->switchdev.is_running; } -/** - * ice_set_sriov_cap - enable SRIOV in PF flags - * @pf: PF struct - */ -static inline void ice_set_sriov_cap(struct ice_pf *pf) -{ - if (pf->hw.func_caps.common_cap.sr_iov_1_1) - set_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); -} - -/** - * ice_clear_sriov_cap - disable SRIOV in PF flags - * @pf: PF struct - */ -static inline void ice_clear_sriov_cap(struct ice_pf *pf) -{ - clear_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags); -} - #define ICE_FD_STAT_CTR_BLOCK_COUNT 256 #define ICE_FD_STAT_PF_IDX(base_idx) \ ((base_idx) * ICE_FD_STAT_CTR_BLOCK_COUNT) diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index ee5b36941ba3..5a7753bda324 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -6,15 +6,6 @@ #include "ice.h" #include "ice_lag.h" -/** - * ice_lag_nop_handler - no-op Rx handler to disable LAG - * @pskb: pointer to skb pointer - */ -rx_handler_result_t ice_lag_nop_handler(struct sk_buff __always_unused **pskb) -{ - return RX_HANDLER_PASS; -} - /** * ice_lag_set_primary - set PF LAG state as Primary * @lag: LAG info struct @@ -158,7 +149,6 @@ ice_lag_link(struct ice_lag *lag, struct netdev_notifier_changeupper_info *info) lag->upper_netdev = upper; } - ice_clear_sriov_cap(pf); ice_clear_rdma_cap(pf); lag->bonded = true; @@ -205,7 +195,6 @@ ice_lag_unlink(struct ice_lag *lag, } lag->peer_netdev = NULL; - ice_set_sriov_cap(pf); ice_set_rdma_cap(pf); lag->bonded = false; lag->role = ICE_LAG_NONE; @@ -229,7 +218,6 @@ static void ice_lag_unregister(struct ice_lag *lag, struct net_device *netdev) if (lag->upper_netdev) { dev_put(lag->upper_netdev); lag->upper_netdev = NULL; - ice_set_sriov_cap(pf); ice_set_rdma_cap(pf); } /* perform some cleanup in case we come back */ diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h index 51b5cf467ce2..2c373676c42f 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.h +++ b/drivers/net/ethernet/intel/ice/ice_lag.h @@ -25,63 +25,9 @@ struct ice_lag { struct notifier_block notif_block; u8 bonded:1; /* currently bonded */ u8 primary:1; /* this is primary */ - u8 handler:1; /* did we register a rx_netdev_handler */ - /* each thing blocking bonding will increment this value by one. - * If this value is zero, then bonding is allowed. - */ - u16 dis_lag; u8 role; }; int ice_init_lag(struct ice_pf *pf); void ice_deinit_lag(struct ice_pf *pf); -rx_handler_result_t ice_lag_nop_handler(struct sk_buff **pskb); - -/** - * ice_disable_lag - increment LAG disable count - * @lag: LAG struct - */ -static inline void ice_disable_lag(struct ice_lag *lag) -{ - /* If LAG this PF is not already disabled, disable it */ - rtnl_lock(); - if (!netdev_is_rx_handler_busy(lag->netdev)) { - if (!netdev_rx_handler_register(lag->netdev, - ice_lag_nop_handler, - NULL)) - lag->handler = true; - } - rtnl_unlock(); - lag->dis_lag++; -} - -/** - * ice_enable_lag - decrement disable count for a PF - * @lag: LAG struct - * - * Decrement the disable counter for a port, and if that count reaches - * zero, then remove the no-op Rx handler from that netdev - */ -static inline void ice_enable_lag(struct ice_lag *lag) -{ - if (lag->dis_lag) - lag->dis_lag--; - if (!lag->dis_lag && lag->handler) { - rtnl_lock(); - netdev_rx_handler_unregister(lag->netdev); - rtnl_unlock(); - lag->handler = false; - } -} - -/** - * ice_is_lag_dis - is LAG disabled - * @lag: LAG struct - * - * Return true if bonding is disabled - */ -static inline bool ice_is_lag_dis(struct ice_lag *lag) -{ - return !!(lag->dis_lag); -} #endif /* _ICE_LAG_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 387bb9cbafbe..3de9556b89ac 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2707,8 +2707,6 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params) return vsi; err_vsi_cfg: - if (params->type == ICE_VSI_VF) - ice_enable_lag(pf->lag); ice_vsi_free(vsi); return NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 80c643fb9f2f..a7e7debb1428 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -979,8 +979,6 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) if (!num_vfs) { if (!pci_vfs_assigned(pdev)) { ice_free_vfs(pf); - if (pf->lag) - ice_enable_lag(pf->lag); return 0; } @@ -992,8 +990,6 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs) if (err) return err; - if (pf->lag) - ice_disable_lag(pf->lag); return num_vfs; } -- cgit v1.2.3 From fe6559fab328972a2c8687d322fa54ab6d08f209 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 15 May 2023 13:23:46 -0600 Subject: net: libwx: Replace zero-length array with flexible-array member Zero-length arrays as fake flexible arrays are deprecated, and we are moving towards adopting C99 flexible-array members instead. Transform zero-length array into flexible-array member in struct wx_q_vector. Link: https://github.com/KSPP/linux/issues/21 Link: https://github.com/KSPP/linux/issues/286 Link: https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html Signed-off-by: Gustavo A. R. Silva Reviewed-by: Simon Horman Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/ZGKGwtsobVZecWa4@work Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_type.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 32f952d93009..cbe7f184b50e 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -598,7 +598,7 @@ struct wx_q_vector { char name[IFNAMSIZ + 17]; /* for dynamic allocation of rings associated with this q_vector */ - struct wx_ring ring[0] ____cacheline_internodealigned_in_smp; + struct wx_ring ring[] ____cacheline_internodealigned_in_smp; }; enum wx_isb_idx { -- cgit v1.2.3 From b1cf7a5615157e958c2bdac9aa981676c07a10d9 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 15 May 2023 13:22:48 -0600 Subject: mlxfw: Replace zero-length array with DECLARE_FLEX_ARRAY() helper Zero-length arrays are deprecated and we are moving towards adopting C99 flexible-array members, instead. So, replace zero-length arrays declarations alone in structs with the new DECLARE_FLEX_ARRAY() helper macro. This helper allows for flexible-array members alone in structs. Link: https://github.com/KSPP/linux/issues/193 Link: https://github.com/KSPP/linux/issues/285 Link: https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html Signed-off-by: Gustavo A. R. Silva Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/ZGKGiBxP0zHo6XSK@work Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h index b001e5258091..47f6cc0401c3 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2_format.h @@ -44,7 +44,7 @@ MLXFW_MFA2_TLV(multi, struct mlxfw_mfa2_tlv_multi, MLXFW_MFA2_TLV_MULTI_PART); struct mlxfw_mfa2_tlv_psid { - u8 psid[0]; + DECLARE_FLEX_ARRAY(u8, psid); } __packed; MLXFW_MFA2_TLV_VARSIZE(psid, struct mlxfw_mfa2_tlv_psid, -- cgit v1.2.3 From 1fd22211354a94cb5afa7d7dab1a8e2c5ec1eed8 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 16 May 2023 22:14:02 +0200 Subject: net: lan966x: Add registers to configure PCP, DEI, DSCP Add the registers that are needed to configure the PCP, DEI and DSCP of the switch both at ingress and also at egress. Reviewed-by: Daniel Machon Reviewed-by: Piotr Raczynski Signed-off-by: Horatiu Vultur Signed-off-by: Paolo Abeni --- .../net/ethernet/microchip/lan966x/lan966x_regs.h | 132 +++++++++++++++++++++ 1 file changed, 132 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h index 222039180276..4b553927d2e0 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_regs.h @@ -283,6 +283,18 @@ enum lan966x_target { #define ANA_VLAN_CFG_VLAN_POP_CNT_GET(x)\ FIELD_GET(ANA_VLAN_CFG_VLAN_POP_CNT, x) +#define ANA_VLAN_CFG_VLAN_PCP GENMASK(15, 13) +#define ANA_VLAN_CFG_VLAN_PCP_SET(x)\ + FIELD_PREP(ANA_VLAN_CFG_VLAN_PCP, x) +#define ANA_VLAN_CFG_VLAN_PCP_GET(x)\ + FIELD_GET(ANA_VLAN_CFG_VLAN_PCP, x) + +#define ANA_VLAN_CFG_VLAN_DEI BIT(12) +#define ANA_VLAN_CFG_VLAN_DEI_SET(x)\ + FIELD_PREP(ANA_VLAN_CFG_VLAN_DEI, x) +#define ANA_VLAN_CFG_VLAN_DEI_GET(x)\ + FIELD_GET(ANA_VLAN_CFG_VLAN_DEI, x) + #define ANA_VLAN_CFG_VLAN_VID GENMASK(11, 0) #define ANA_VLAN_CFG_VLAN_VID_SET(x)\ FIELD_PREP(ANA_VLAN_CFG_VLAN_VID, x) @@ -316,6 +328,39 @@ enum lan966x_target { #define ANA_DROP_CFG_DROP_MC_SMAC_ENA_GET(x)\ FIELD_GET(ANA_DROP_CFG_DROP_MC_SMAC_ENA, x) +/* ANA:PORT:QOS_CFG */ +#define ANA_QOS_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 8, 0, 1, 4) + +#define ANA_QOS_CFG_DP_DEFAULT_VAL BIT(8) +#define ANA_QOS_CFG_DP_DEFAULT_VAL_SET(x)\ + FIELD_PREP(ANA_QOS_CFG_DP_DEFAULT_VAL, x) +#define ANA_QOS_CFG_DP_DEFAULT_VAL_GET(x)\ + FIELD_GET(ANA_QOS_CFG_DP_DEFAULT_VAL, x) + +#define ANA_QOS_CFG_QOS_DEFAULT_VAL GENMASK(7, 5) +#define ANA_QOS_CFG_QOS_DEFAULT_VAL_SET(x)\ + FIELD_PREP(ANA_QOS_CFG_QOS_DEFAULT_VAL, x) +#define ANA_QOS_CFG_QOS_DEFAULT_VAL_GET(x)\ + FIELD_GET(ANA_QOS_CFG_QOS_DEFAULT_VAL, x) + +#define ANA_QOS_CFG_QOS_DSCP_ENA BIT(4) +#define ANA_QOS_CFG_QOS_DSCP_ENA_SET(x)\ + FIELD_PREP(ANA_QOS_CFG_QOS_DSCP_ENA, x) +#define ANA_QOS_CFG_QOS_DSCP_ENA_GET(x)\ + FIELD_GET(ANA_QOS_CFG_QOS_DSCP_ENA, x) + +#define ANA_QOS_CFG_QOS_PCP_ENA BIT(3) +#define ANA_QOS_CFG_QOS_PCP_ENA_SET(x)\ + FIELD_PREP(ANA_QOS_CFG_QOS_PCP_ENA, x) +#define ANA_QOS_CFG_QOS_PCP_ENA_GET(x)\ + FIELD_GET(ANA_QOS_CFG_QOS_PCP_ENA, x) + +#define ANA_QOS_CFG_DSCP_REWR_CFG GENMASK(1, 0) +#define ANA_QOS_CFG_DSCP_REWR_CFG_SET(x)\ + FIELD_PREP(ANA_QOS_CFG_DSCP_REWR_CFG, x) +#define ANA_QOS_CFG_DSCP_REWR_CFG_GET(x)\ + FIELD_GET(ANA_QOS_CFG_DSCP_REWR_CFG, x) + /* ANA:PORT:VCAP_CFG */ #define ANA_VCAP_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 12, 0, 1, 4) @@ -415,6 +460,21 @@ enum lan966x_target { #define ANA_VCAP_S2_CFG_OAM_DIS_GET(x)\ FIELD_GET(ANA_VCAP_S2_CFG_OAM_DIS, x) +/* ANA:PORT:QOS_PCP_DEI_MAP_CFG */ +#define ANA_PCP_DEI_CFG(g, r) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 32, r, 16, 4) + +#define ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL BIT(3) +#define ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL_SET(x)\ + FIELD_PREP(ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL, x) +#define ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL_GET(x)\ + FIELD_GET(ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL, x) + +#define ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL GENMASK(2, 0) +#define ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL_SET(x)\ + FIELD_PREP(ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL, x) +#define ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL_GET(x)\ + FIELD_GET(ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL, x) + /* ANA:PORT:CPU_FWD_CFG */ #define ANA_CPU_FWD_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 96, 0, 1, 4) @@ -478,6 +538,15 @@ enum lan966x_target { #define ANA_PORT_CFG_PORTID_VAL_GET(x)\ FIELD_GET(ANA_PORT_CFG_PORTID_VAL, x) +/* ANA:COMMON:DSCP_REWR_CFG */ +#define ANA_DSCP_REWR_CFG(r) __REG(TARGET_ANA, 0, 1, 31232, 0, 1, 552, 332, r, 16, 4) + +#define ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL GENMASK(5, 0) +#define ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL_SET(x)\ + FIELD_PREP(ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL, x) +#define ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL_GET(x)\ + FIELD_GET(ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL, x) + /* ANA:PORT:POL_CFG */ #define ANA_POL_CFG(g) __REG(TARGET_ANA, 0, 1, 28672, g, 9, 128, 116, 0, 1, 4) @@ -547,6 +616,33 @@ enum lan966x_target { #define ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA_GET(x)\ FIELD_GET(ANA_AGGR_CFG_AC_IP4_TCPUDP_ENA, x) +/* ANA:COMMON:DSCP_CFG */ +#define ANA_DSCP_CFG(r) __REG(TARGET_ANA, 0, 1, 31232, 0, 1, 552, 76, r, 64, 4) + +#define ANA_DSCP_CFG_DP_DSCP_VAL BIT(11) +#define ANA_DSCP_CFG_DP_DSCP_VAL_SET(x)\ + FIELD_PREP(ANA_DSCP_CFG_DP_DSCP_VAL, x) +#define ANA_DSCP_CFG_DP_DSCP_VAL_GET(x)\ + FIELD_GET(ANA_DSCP_CFG_DP_DSCP_VAL, x) + +#define ANA_DSCP_CFG_QOS_DSCP_VAL GENMASK(10, 8) +#define ANA_DSCP_CFG_QOS_DSCP_VAL_SET(x)\ + FIELD_PREP(ANA_DSCP_CFG_QOS_DSCP_VAL, x) +#define ANA_DSCP_CFG_QOS_DSCP_VAL_GET(x)\ + FIELD_GET(ANA_DSCP_CFG_QOS_DSCP_VAL, x) + +#define ANA_DSCP_CFG_DSCP_TRUST_ENA BIT(1) +#define ANA_DSCP_CFG_DSCP_TRUST_ENA_SET(x)\ + FIELD_PREP(ANA_DSCP_CFG_DSCP_TRUST_ENA, x) +#define ANA_DSCP_CFG_DSCP_TRUST_ENA_GET(x)\ + FIELD_GET(ANA_DSCP_CFG_DSCP_TRUST_ENA, x) + +#define ANA_DSCP_CFG_DSCP_REWR_ENA BIT(0) +#define ANA_DSCP_CFG_DSCP_REWR_ENA_SET(x)\ + FIELD_PREP(ANA_DSCP_CFG_DSCP_REWR_ENA, x) +#define ANA_DSCP_CFG_DSCP_REWR_ENA_GET(x)\ + FIELD_GET(ANA_DSCP_CFG_DSCP_REWR_ENA, x) + /* ANA:POL:POL_PIR_CFG */ #define ANA_POL_PIR_CFG(g) __REG(TARGET_ANA, 0, 1, 16384, g, 345, 32, 0, 0, 1, 4) @@ -1468,6 +1564,18 @@ enum lan966x_target { #define REW_TAG_CFG_TAG_TPID_CFG_GET(x)\ FIELD_GET(REW_TAG_CFG_TAG_TPID_CFG, x) +#define REW_TAG_CFG_TAG_PCP_CFG GENMASK(3, 2) +#define REW_TAG_CFG_TAG_PCP_CFG_SET(x)\ + FIELD_PREP(REW_TAG_CFG_TAG_PCP_CFG, x) +#define REW_TAG_CFG_TAG_PCP_CFG_GET(x)\ + FIELD_GET(REW_TAG_CFG_TAG_PCP_CFG, x) + +#define REW_TAG_CFG_TAG_DEI_CFG GENMASK(1, 0) +#define REW_TAG_CFG_TAG_DEI_CFG_SET(x)\ + FIELD_PREP(REW_TAG_CFG_TAG_DEI_CFG, x) +#define REW_TAG_CFG_TAG_DEI_CFG_GET(x)\ + FIELD_GET(REW_TAG_CFG_TAG_DEI_CFG, x) + /* REW:PORT:PORT_CFG */ #define REW_PORT_CFG(g) __REG(TARGET_REW, 0, 1, 0, g, 10, 128, 8, 0, 1, 4) @@ -1483,6 +1591,30 @@ enum lan966x_target { #define REW_PORT_CFG_NO_REWRITE_GET(x)\ FIELD_GET(REW_PORT_CFG_NO_REWRITE, x) +/* REW:PORT:DSCP_CFG */ +#define REW_DSCP_CFG(g) __REG(TARGET_REW, 0, 1, 0, g, 10, 128, 12, 0, 1, 4) + +#define REW_DSCP_CFG_DSCP_REWR_CFG GENMASK(1, 0) +#define REW_DSCP_CFG_DSCP_REWR_CFG_SET(x)\ + FIELD_PREP(REW_DSCP_CFG_DSCP_REWR_CFG, x) +#define REW_DSCP_CFG_DSCP_REWR_CFG_GET(x)\ + FIELD_GET(REW_DSCP_CFG_DSCP_REWR_CFG, x) + +/* REW:PORT:PCP_DEI_QOS_MAP_CFG */ +#define REW_PCP_DEI_CFG(g, r) __REG(TARGET_REW, 0, 1, 0, g, 10, 128, 16, r, 16, 4) + +#define REW_PCP_DEI_CFG_DEI_QOS_VAL BIT(3) +#define REW_PCP_DEI_CFG_DEI_QOS_VAL_SET(x)\ + FIELD_PREP(REW_PCP_DEI_CFG_DEI_QOS_VAL, x) +#define REW_PCP_DEI_CFG_DEI_QOS_VAL_GET(x)\ + FIELD_GET(REW_PCP_DEI_CFG_DEI_QOS_VAL, x) + +#define REW_PCP_DEI_CFG_PCP_QOS_VAL GENMASK(2, 0) +#define REW_PCP_DEI_CFG_PCP_QOS_VAL_SET(x)\ + FIELD_PREP(REW_PCP_DEI_CFG_PCP_QOS_VAL, x) +#define REW_PCP_DEI_CFG_PCP_QOS_VAL_GET(x)\ + FIELD_GET(REW_PCP_DEI_CFG_PCP_QOS_VAL, x) + /* REW:COMMON:STAT_CFG */ #define REW_STAT_CFG __REG(TARGET_REW, 0, 1, 3072, 0, 1, 528, 520, 0, 1, 4) -- cgit v1.2.3 From a83e463036ef491ba0f7b99f82a12c902a285245 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 16 May 2023 22:14:03 +0200 Subject: net: lan966x: Add support for offloading pcp table Add support for offloading pcp app entries. Lan966x has 8 priority queues per port and for each priority it also has a drop precedence. Reviewed-by: Daniel Machon Reviewed-by: Piotr Raczynski Signed-off-by: Horatiu Vultur Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microchip/lan966x/Kconfig | 11 +++ drivers/net/ethernet/microchip/lan966x/Makefile | 1 + .../net/ethernet/microchip/lan966x/lan966x_dcb.c | 103 +++++++++++++++++++++ .../net/ethernet/microchip/lan966x/lan966x_main.c | 2 + .../net/ethernet/microchip/lan966x/lan966x_main.h | 25 +++++ .../net/ethernet/microchip/lan966x/lan966x_port.c | 30 ++++++ 6 files changed, 172 insertions(+) create mode 100644 drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan966x/Kconfig b/drivers/net/ethernet/microchip/lan966x/Kconfig index 571e6d4da1e9..f9ebffc04eb8 100644 --- a/drivers/net/ethernet/microchip/lan966x/Kconfig +++ b/drivers/net/ethernet/microchip/lan966x/Kconfig @@ -10,3 +10,14 @@ config LAN966X_SWITCH select VCAP help This driver supports the Lan966x network switch device. + +config LAN966X_DCB + bool "Data Center Bridging (DCB) support" + depends on LAN966X_SWITCH && DCB + default y + help + Say Y here if you want to use Data Center Bridging (DCB) in the + driver. This can be used to assign priority to traffic, based on + DSCP and PCP. + + If unsure, set to Y. diff --git a/drivers/net/ethernet/microchip/lan966x/Makefile b/drivers/net/ethernet/microchip/lan966x/Makefile index 7b0cda4ffa6b..3b6ac331691d 100644 --- a/drivers/net/ethernet/microchip/lan966x/Makefile +++ b/drivers/net/ethernet/microchip/lan966x/Makefile @@ -15,6 +15,7 @@ lan966x-switch-objs := lan966x_main.o lan966x_phylink.o lan966x_port.o \ lan966x_xdp.o lan966x_vcap_impl.o lan966x_vcap_ag_api.o \ lan966x_tc_flower.o lan966x_goto.o +lan966x-switch-$(CONFIG_LAN966X_DCB) += lan966x_dcb.o lan966x-switch-$(CONFIG_DEBUG_FS) += lan966x_vcap_debugfs.o # Provide include files diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c b/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c new file mode 100644 index 000000000000..e0d49421812f --- /dev/null +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include "lan966x_main.h" + +static void lan966x_dcb_app_update(struct net_device *dev, bool enable) +{ + struct lan966x_port *port = netdev_priv(dev); + struct lan966x_port_qos qos = {0}; + struct dcb_app app_itr; + + /* Get pcp ingress mapping */ + for (int i = 0; i < ARRAY_SIZE(qos.pcp.map); i++) { + app_itr.selector = DCB_APP_SEL_PCP; + app_itr.protocol = i; + qos.pcp.map[i] = dcb_getapp(dev, &app_itr); + } + + qos.pcp.enable = enable; + lan966x_port_qos_set(port, &qos); +} + +static int lan966x_dcb_app_validate(struct net_device *dev, + const struct dcb_app *app) +{ + int err = 0; + + switch (app->selector) { + /* Pcp checks */ + case DCB_APP_SEL_PCP: + if (app->protocol >= LAN966X_PORT_QOS_PCP_DEI_COUNT) + err = -EINVAL; + else if (app->priority >= NUM_PRIO_QUEUES) + err = -ERANGE; + break; + default: + err = -EINVAL; + break; + } + + if (err) + netdev_err(dev, "Invalid entry: %d:%d\n", app->protocol, + app->priority); + + return err; +} + +static int lan966x_dcb_ieee_delapp(struct net_device *dev, struct dcb_app *app) +{ + int err; + + err = dcb_ieee_delapp(dev, app); + if (err < 0) + return err; + + lan966x_dcb_app_update(dev, false); + + return 0; +} + +static int lan966x_dcb_ieee_setapp(struct net_device *dev, struct dcb_app *app) +{ + struct dcb_app app_itr; + int err; + u8 prio; + + err = lan966x_dcb_app_validate(dev, app); + if (err) + return err; + + /* Delete current mapping, if it exists */ + prio = dcb_getapp(dev, app); + if (prio) { + app_itr = *app; + app_itr.priority = prio; + dcb_ieee_delapp(dev, &app_itr); + } + + err = dcb_ieee_setapp(dev, app); + if (err) + return err; + + lan966x_dcb_app_update(dev, true); + + return 0; +} + +static const struct dcbnl_rtnl_ops lan966x_dcbnl_ops = { + .ieee_setapp = lan966x_dcb_ieee_setapp, + .ieee_delapp = lan966x_dcb_ieee_delapp, +}; + +void lan966x_dcb_init(struct lan966x *lan966x) +{ + for (int p = 0; p < lan966x->num_phys_ports; ++p) { + struct lan966x_port *port; + + port = lan966x->ports[p]; + if (!port) + continue; + + port->dev->dcbnl_ops = &lan966x_dcbnl_ops; + } +} diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 2b6e046e1d10..5f01b21acdd1 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -1213,6 +1213,8 @@ static int lan966x_probe(struct platform_device *pdev) if (err) goto cleanup_fdma; + lan966x_dcb_init(lan966x); + return 0; cleanup_fdma: diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index 882d5a08e7d5..b9ca47ab6e8b 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -104,6 +104,11 @@ #define LAN966X_VCAP_CID_ES0_L0 VCAP_CID_EGRESS_L0 /* ES0 lookup 0 */ #define LAN966X_VCAP_CID_ES0_MAX (VCAP_CID_EGRESS_L1 - 1) /* ES0 Max */ +#define LAN966X_PORT_QOS_PCP_COUNT 8 +#define LAN966X_PORT_QOS_DEI_COUNT 8 +#define LAN966X_PORT_QOS_PCP_DEI_COUNT \ + (LAN966X_PORT_QOS_PCP_COUNT + LAN966X_PORT_QOS_DEI_COUNT) + /* MAC table entry types. * ENTRYTYPE_NORMAL is subject to aging. * ENTRYTYPE_LOCKED is not subject to aging. @@ -392,6 +397,15 @@ struct lan966x_port_tc { struct flow_stats mirror_stat; }; +struct lan966x_port_qos_pcp { + u8 map[LAN966X_PORT_QOS_PCP_DEI_COUNT]; + bool enable; +}; + +struct lan966x_port_qos { + struct lan966x_port_qos_pcp pcp; +}; + struct lan966x_port { struct net_device *dev; struct lan966x *lan966x; @@ -456,6 +470,9 @@ int lan966x_port_pcs_set(struct lan966x_port *port, struct lan966x_port_config *config); void lan966x_port_init(struct lan966x_port *port); +void lan966x_port_qos_set(struct lan966x_port *port, + struct lan966x_port_qos *qos); + int lan966x_mac_ip_learn(struct lan966x *lan966x, bool cpu_copy, const unsigned char mac[ETH_ALEN], @@ -680,6 +697,14 @@ int lan966x_goto_port_del(struct lan966x_port *port, unsigned long goto_id, struct netlink_ext_ack *extack); +#ifdef CONFIG_LAN966X_DCB +void lan966x_dcb_init(struct lan966x *lan966x); +#else +static inline void lan966x_dcb_init(struct lan966x *lan966x) +{ +} +#endif + static inline void __iomem *lan_addr(void __iomem *base[], int id, int tinst, int tcnt, int gbase, int ginst, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c index 0050fcb988b7..0cee8127c48e 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c @@ -394,6 +394,36 @@ int lan966x_port_pcs_set(struct lan966x_port *port, return 0; } +static void lan966x_port_qos_pcp_set(struct lan966x_port *port, + struct lan966x_port_qos_pcp *qos) +{ + u8 *pcp_itr = qos->map; + u8 pcp, dp; + + lan_rmw(ANA_QOS_CFG_QOS_PCP_ENA_SET(qos->enable), + ANA_QOS_CFG_QOS_PCP_ENA, + port->lan966x, ANA_QOS_CFG(port->chip_port)); + + /* Map PCP and DEI to priority */ + for (int i = 0; i < ARRAY_SIZE(qos->map); i++) { + pcp = *(pcp_itr + i); + dp = (i < LAN966X_PORT_QOS_PCP_COUNT) ? 0 : 1; + + lan_rmw(ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL_SET(pcp) | + ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL_SET(dp), + ANA_PCP_DEI_CFG_QOS_PCP_DEI_VAL | + ANA_PCP_DEI_CFG_DP_PCP_DEI_VAL, + port->lan966x, + ANA_PCP_DEI_CFG(port->chip_port, i)); + } +} + +void lan966x_port_qos_set(struct lan966x_port *port, + struct lan966x_port_qos *qos) +{ + lan966x_port_qos_pcp_set(port, &qos->pcp); +} + void lan966x_port_init(struct lan966x_port *port) { struct lan966x_port_config *config = &port->config; -- cgit v1.2.3 From 10c71a97eeeb0fb703225203059d2aeac79acb2a Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 16 May 2023 22:14:04 +0200 Subject: net: lan966x: Add support for apptrust Make use of set/getapptrust() to implement per-selector trust and trust order. Reviewed-by: Daniel Machon Signed-off-by: Horatiu Vultur Signed-off-by: Paolo Abeni --- .../net/ethernet/microchip/lan966x/lan966x_dcb.c | 118 ++++++++++++++++++++- 1 file changed, 114 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c b/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c index e0d49421812f..d6210c70171e 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c @@ -2,7 +2,49 @@ #include "lan966x_main.h" -static void lan966x_dcb_app_update(struct net_device *dev, bool enable) +enum lan966x_dcb_apptrust_values { + LAN966X_DCB_APPTRUST_EMPTY, + LAN966X_DCB_APPTRUST_DSCP, + LAN966X_DCB_APPTRUST_PCP, + LAN966X_DCB_APPTRUST_DSCP_PCP, + __LAN966X_DCB_APPTRUST_MAX +}; + +static const struct lan966x_dcb_apptrust { + u8 selectors[IEEE_8021QAZ_APP_SEL_MAX + 1]; + int nselectors; +} *lan966x_port_apptrust[NUM_PHYS_PORTS]; + +static const char *lan966x_dcb_apptrust_names[__LAN966X_DCB_APPTRUST_MAX] = { + [LAN966X_DCB_APPTRUST_EMPTY] = "empty", + [LAN966X_DCB_APPTRUST_DSCP] = "dscp", + [LAN966X_DCB_APPTRUST_PCP] = "pcp", + [LAN966X_DCB_APPTRUST_DSCP_PCP] = "dscp pcp" +}; + +/* Lan966x supported apptrust policies */ +static const struct lan966x_dcb_apptrust + lan966x_dcb_apptrust_policies[__LAN966X_DCB_APPTRUST_MAX] = { + /* Empty *must* be first */ + [LAN966X_DCB_APPTRUST_EMPTY] = { { 0 }, 0 }, + [LAN966X_DCB_APPTRUST_DSCP] = { { IEEE_8021QAZ_APP_SEL_DSCP }, 1 }, + [LAN966X_DCB_APPTRUST_PCP] = { { DCB_APP_SEL_PCP }, 1 }, + [LAN966X_DCB_APPTRUST_DSCP_PCP] = { { IEEE_8021QAZ_APP_SEL_DSCP, + DCB_APP_SEL_PCP }, 2 }, +}; + +static bool lan966x_dcb_apptrust_contains(int portno, u8 selector) +{ + const struct lan966x_dcb_apptrust *conf = lan966x_port_apptrust[portno]; + + for (int i = 0; i < conf->nselectors; i++) + if (conf->selectors[i] == selector) + return true; + + return false; +} + +static void lan966x_dcb_app_update(struct net_device *dev) { struct lan966x_port *port = netdev_priv(dev); struct lan966x_port_qos qos = {0}; @@ -15,7 +57,10 @@ static void lan966x_dcb_app_update(struct net_device *dev, bool enable) qos.pcp.map[i] = dcb_getapp(dev, &app_itr); } - qos.pcp.enable = enable; + /* Enable use of pcp for queue classification */ + if (lan966x_dcb_apptrust_contains(port->chip_port, DCB_APP_SEL_PCP)) + qos.pcp.enable = true; + lan966x_port_qos_set(port, &qos); } @@ -52,7 +97,7 @@ static int lan966x_dcb_ieee_delapp(struct net_device *dev, struct dcb_app *app) if (err < 0) return err; - lan966x_dcb_app_update(dev, false); + lan966x_dcb_app_update(dev); return 0; } @@ -79,7 +124,67 @@ static int lan966x_dcb_ieee_setapp(struct net_device *dev, struct dcb_app *app) if (err) return err; - lan966x_dcb_app_update(dev, true); + lan966x_dcb_app_update(dev); + + return 0; +} + +static int lan966x_dcb_apptrust_validate(struct net_device *dev, + u8 *selectors, + int nselectors) +{ + for (int i = 0; i < ARRAY_SIZE(lan966x_dcb_apptrust_policies); i++) { + bool match; + + if (lan966x_dcb_apptrust_policies[i].nselectors != nselectors) + continue; + + match = true; + for (int j = 0; j < nselectors; j++) { + if (lan966x_dcb_apptrust_policies[i].selectors[j] != + *(selectors + j)) { + match = false; + break; + } + } + if (match) + return i; + } + + netdev_err(dev, "Valid apptrust configurations are:\n"); + for (int i = 0; i < ARRAY_SIZE(lan966x_dcb_apptrust_names); i++) + pr_info("order: %s\n", lan966x_dcb_apptrust_names[i]); + + return -EOPNOTSUPP; +} + +static int lan966x_dcb_setapptrust(struct net_device *dev, + u8 *selectors, + int nselectors) +{ + struct lan966x_port *port = netdev_priv(dev); + int idx; + + idx = lan966x_dcb_apptrust_validate(dev, selectors, nselectors); + if (idx < 0) + return idx; + + lan966x_port_apptrust[port->chip_port] = &lan966x_dcb_apptrust_policies[idx]; + lan966x_dcb_app_update(dev); + + return 0; +} + +static int lan966x_dcb_getapptrust(struct net_device *dev, u8 *selectors, + int *nselectors) +{ + struct lan966x_port *port = netdev_priv(dev); + const struct lan966x_dcb_apptrust *trust; + + trust = lan966x_port_apptrust[port->chip_port]; + + memcpy(selectors, trust->selectors, trust->nselectors); + *nselectors = trust->nselectors; return 0; } @@ -87,6 +192,8 @@ static int lan966x_dcb_ieee_setapp(struct net_device *dev, struct dcb_app *app) static const struct dcbnl_rtnl_ops lan966x_dcbnl_ops = { .ieee_setapp = lan966x_dcb_ieee_setapp, .ieee_delapp = lan966x_dcb_ieee_delapp, + .dcbnl_setapptrust = lan966x_dcb_setapptrust, + .dcbnl_getapptrust = lan966x_dcb_getapptrust, }; void lan966x_dcb_init(struct lan966x *lan966x) @@ -99,5 +206,8 @@ void lan966x_dcb_init(struct lan966x *lan966x) continue; port->dev->dcbnl_ops = &lan966x_dcbnl_ops; + + lan966x_port_apptrust[port->chip_port] = + &lan966x_dcb_apptrust_policies[LAN966X_DCB_APPTRUST_DSCP_PCP]; } } -- cgit v1.2.3 From 0c88d98108c615d9a8c1325857d44792c8924b16 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 16 May 2023 22:14:05 +0200 Subject: net: lan966x: Add support for offloading dscp table Add support for offloading dscp app entries. The dscp values are global for all lan966x ports. Reviewed-by: Daniel Machon Signed-off-by: Horatiu Vultur Signed-off-by: Paolo Abeni --- .../net/ethernet/microchip/lan966x/lan966x_dcb.c | 59 ++++++++++++++++++++-- .../net/ethernet/microchip/lan966x/lan966x_main.h | 8 +++ .../net/ethernet/microchip/lan966x/lan966x_port.c | 26 ++++++++++ 3 files changed, 89 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c b/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c index d6210c70171e..17cec9ec5ed2 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c @@ -57,19 +57,62 @@ static void lan966x_dcb_app_update(struct net_device *dev) qos.pcp.map[i] = dcb_getapp(dev, &app_itr); } + /* Get dscp ingress mapping */ + for (int i = 0; i < ARRAY_SIZE(qos.dscp.map); i++) { + app_itr.selector = IEEE_8021QAZ_APP_SEL_DSCP; + app_itr.protocol = i; + qos.dscp.map[i] = dcb_getapp(dev, &app_itr); + } + /* Enable use of pcp for queue classification */ if (lan966x_dcb_apptrust_contains(port->chip_port, DCB_APP_SEL_PCP)) qos.pcp.enable = true; + /* Enable use of dscp for queue classification */ + if (lan966x_dcb_apptrust_contains(port->chip_port, IEEE_8021QAZ_APP_SEL_DSCP)) + qos.dscp.enable = true; + lan966x_port_qos_set(port, &qos); } +/* DSCP mapping is global for all ports, so set and delete app entries are + * replicated for each port. + */ +static int lan966x_dcb_ieee_dscp_setdel(struct net_device *dev, + struct dcb_app *app, + int (*setdel)(struct net_device *, + struct dcb_app *)) +{ + struct lan966x_port *port = netdev_priv(dev); + struct lan966x *lan966x = port->lan966x; + int err; + + for (int i = 0; i < NUM_PHYS_PORTS; i++) { + port = lan966x->ports[i]; + if (!port) + continue; + + err = setdel(port->dev, app); + if (err) + return err; + } + + return 0; +} + static int lan966x_dcb_app_validate(struct net_device *dev, const struct dcb_app *app) { int err = 0; switch (app->selector) { + /* Dscp checks */ + case IEEE_8021QAZ_APP_SEL_DSCP: + if (app->protocol >= LAN966X_PORT_QOS_DSCP_COUNT) + err = -EINVAL; + else if (app->priority >= NUM_PRIO_QUEUES) + err = -ERANGE; + break; /* Pcp checks */ case DCB_APP_SEL_PCP: if (app->protocol >= LAN966X_PORT_QOS_PCP_DEI_COUNT) @@ -93,8 +136,12 @@ static int lan966x_dcb_ieee_delapp(struct net_device *dev, struct dcb_app *app) { int err; - err = dcb_ieee_delapp(dev, app); - if (err < 0) + if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP) + err = lan966x_dcb_ieee_dscp_setdel(dev, app, dcb_ieee_delapp); + else + err = dcb_ieee_delapp(dev, app); + + if (err) return err; lan966x_dcb_app_update(dev); @@ -117,10 +164,14 @@ static int lan966x_dcb_ieee_setapp(struct net_device *dev, struct dcb_app *app) if (prio) { app_itr = *app; app_itr.priority = prio; - dcb_ieee_delapp(dev, &app_itr); + lan966x_dcb_ieee_delapp(dev, &app_itr); } - err = dcb_ieee_setapp(dev, app); + if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP) + err = lan966x_dcb_ieee_dscp_setdel(dev, app, dcb_ieee_setapp); + else + err = dcb_ieee_setapp(dev, app); + if (err) return err; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index b9ca47ab6e8b..8213440e0867 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -109,6 +109,8 @@ #define LAN966X_PORT_QOS_PCP_DEI_COUNT \ (LAN966X_PORT_QOS_PCP_COUNT + LAN966X_PORT_QOS_DEI_COUNT) +#define LAN966X_PORT_QOS_DSCP_COUNT 64 + /* MAC table entry types. * ENTRYTYPE_NORMAL is subject to aging. * ENTRYTYPE_LOCKED is not subject to aging. @@ -402,8 +404,14 @@ struct lan966x_port_qos_pcp { bool enable; }; +struct lan966x_port_qos_dscp { + u8 map[LAN966X_PORT_QOS_DSCP_COUNT]; + bool enable; +}; + struct lan966x_port_qos { struct lan966x_port_qos_pcp pcp; + struct lan966x_port_qos_dscp dscp; }; struct lan966x_port { diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c index 0cee8127c48e..11c552e87ee4 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c @@ -418,10 +418,36 @@ static void lan966x_port_qos_pcp_set(struct lan966x_port *port, } } +static void lan966x_port_qos_dscp_set(struct lan966x_port *port, + struct lan966x_port_qos_dscp *qos) +{ + struct lan966x *lan966x = port->lan966x; + + /* Enable/disable dscp for qos classification. */ + lan_rmw(ANA_QOS_CFG_QOS_DSCP_ENA_SET(qos->enable), + ANA_QOS_CFG_QOS_DSCP_ENA, + lan966x, ANA_QOS_CFG(port->chip_port)); + + /* Map each dscp value to priority and dp */ + for (int i = 0; i < ARRAY_SIZE(qos->map); i++) + lan_rmw(ANA_DSCP_CFG_DP_DSCP_VAL_SET(0) | + ANA_DSCP_CFG_QOS_DSCP_VAL_SET(*(qos->map + i)), + ANA_DSCP_CFG_DP_DSCP_VAL | + ANA_DSCP_CFG_QOS_DSCP_VAL, + lan966x, ANA_DSCP_CFG(i)); + + /* Set per-dscp trust */ + for (int i = 0; i < ARRAY_SIZE(qos->map); i++) + lan_rmw(ANA_DSCP_CFG_DSCP_TRUST_ENA_SET(qos->enable), + ANA_DSCP_CFG_DSCP_TRUST_ENA, + lan966x, ANA_DSCP_CFG(i)); +} + void lan966x_port_qos_set(struct lan966x_port *port, struct lan966x_port_qos *qos) { lan966x_port_qos_pcp_set(port, &qos->pcp); + lan966x_port_qos_dscp_set(port, &qos->dscp); } void lan966x_port_init(struct lan966x_port *port) -- cgit v1.2.3 From f8ba50ea13fb38da26aea8e1cba2ab30493e2c71 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 16 May 2023 22:14:06 +0200 Subject: net: lan966x: Add support for offloading default prio Add support for offloading default prio. Reviewed-by: Daniel Machon Reviewed-by: Piotr Raczynski Signed-off-by: Horatiu Vultur Signed-off-by: Paolo Abeni --- .../net/ethernet/microchip/lan966x/lan966x_dcb.c | 12 ++++++++++++ .../net/ethernet/microchip/lan966x/lan966x_main.h | 1 + .../net/ethernet/microchip/lan966x/lan966x_port.c | 21 +++++++++++++++++++++ 3 files changed, 34 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c b/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c index 17cec9ec5ed2..273e3bfb2389 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c @@ -64,6 +64,11 @@ static void lan966x_dcb_app_update(struct net_device *dev) qos.dscp.map[i] = dcb_getapp(dev, &app_itr); } + /* Get default prio */ + qos.default_prio = dcb_ieee_getapp_default_prio_mask(dev); + if (qos.default_prio) + qos.default_prio = fls(qos.default_prio) - 1; + /* Enable use of pcp for queue classification */ if (lan966x_dcb_apptrust_contains(port->chip_port, DCB_APP_SEL_PCP)) qos.pcp.enable = true; @@ -106,6 +111,13 @@ static int lan966x_dcb_app_validate(struct net_device *dev, int err = 0; switch (app->selector) { + /* Default priority checks */ + case IEEE_8021QAZ_APP_SEL_ETHERTYPE: + if (app->protocol) + err = -EINVAL; + else if (app->priority >= NUM_PRIO_QUEUES) + err = -ERANGE; + break; /* Dscp checks */ case IEEE_8021QAZ_APP_SEL_DSCP: if (app->protocol >= LAN966X_PORT_QOS_DSCP_COUNT) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index 8213440e0867..53711d538016 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -412,6 +412,7 @@ struct lan966x_port_qos_dscp { struct lan966x_port_qos { struct lan966x_port_qos_pcp pcp; struct lan966x_port_qos_dscp dscp; + u8 default_prio; }; struct lan966x_port { diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c index 11c552e87ee4..a6608876b71e 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c @@ -443,11 +443,32 @@ static void lan966x_port_qos_dscp_set(struct lan966x_port *port, lan966x, ANA_DSCP_CFG(i)); } +static int lan966x_port_qos_default_set(struct lan966x_port *port, + struct lan966x_port_qos *qos) +{ + /* Set default prio and dp level */ + lan_rmw(ANA_QOS_CFG_DP_DEFAULT_VAL_SET(0) | + ANA_QOS_CFG_QOS_DEFAULT_VAL_SET(qos->default_prio), + ANA_QOS_CFG_DP_DEFAULT_VAL | + ANA_QOS_CFG_QOS_DEFAULT_VAL, + port->lan966x, ANA_QOS_CFG(port->chip_port)); + + /* Set default pcp and dei for untagged frames */ + lan_rmw(ANA_VLAN_CFG_VLAN_DEI_SET(0) | + ANA_VLAN_CFG_VLAN_PCP_SET(0), + ANA_VLAN_CFG_VLAN_DEI | + ANA_VLAN_CFG_VLAN_PCP, + port->lan966x, ANA_VLAN_CFG(port->chip_port)); + + return 0; +} + void lan966x_port_qos_set(struct lan966x_port *port, struct lan966x_port_qos *qos) { lan966x_port_qos_pcp_set(port, &qos->pcp); lan966x_port_qos_dscp_set(port, &qos->dscp); + lan966x_port_qos_default_set(port, qos); } void lan966x_port_init(struct lan966x_port *port) -- cgit v1.2.3 From 363f98b96a43f11cb4c6e4d69199d656d2e5b373 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 16 May 2023 22:14:07 +0200 Subject: net: lan966x: Add support for PCP rewrite Add support for rewrite of PCP and DEI value, based on QoS and DP level. The DCB rewrite table is queried for mappings between priority and PCP/DEI. The classified DP level is then encoded in the DEI bit, if a mapping for DEI exists. Reviewed-by: Daniel Machon Signed-off-by: Horatiu Vultur Signed-off-by: Paolo Abeni --- .../net/ethernet/microchip/lan966x/lan966x_dcb.c | 61 +++++++++++++++++++++- .../net/ethernet/microchip/lan966x/lan966x_main.h | 10 ++++ .../net/ethernet/microchip/lan966x/lan966x_port.c | 37 +++++++++++++ 3 files changed, 107 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c b/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c index 273e3bfb2389..0ea650943653 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c @@ -46,9 +46,11 @@ static bool lan966x_dcb_apptrust_contains(int portno, u8 selector) static void lan966x_dcb_app_update(struct net_device *dev) { + struct dcb_rewr_prio_pcp_map pcp_rewr_map = {0}; struct lan966x_port *port = netdev_priv(dev); struct lan966x_port_qos qos = {0}; struct dcb_app app_itr; + bool pcp_rewr = false; /* Get pcp ingress mapping */ for (int i = 0; i < ARRAY_SIZE(qos.pcp.map); i++) { @@ -69,10 +71,24 @@ static void lan966x_dcb_app_update(struct net_device *dev) if (qos.default_prio) qos.default_prio = fls(qos.default_prio) - 1; + /* Get pcp rewrite mapping */ + dcb_getrewr_prio_pcp_mask_map(dev, &pcp_rewr_map); + for (int i = 0; i < ARRAY_SIZE(pcp_rewr_map.map); i++) { + if (!pcp_rewr_map.map[i]) + continue; + + pcp_rewr = true; + qos.pcp_rewr.map[i] = fls(pcp_rewr_map.map[i]) - 1; + } + /* Enable use of pcp for queue classification */ - if (lan966x_dcb_apptrust_contains(port->chip_port, DCB_APP_SEL_PCP)) + if (lan966x_dcb_apptrust_contains(port->chip_port, DCB_APP_SEL_PCP)) { qos.pcp.enable = true; + if (pcp_rewr) + qos.pcp_rewr.enable = true; + } + /* Enable use of dscp for queue classification */ if (lan966x_dcb_apptrust_contains(port->chip_port, IEEE_8021QAZ_APP_SEL_DSCP)) qos.dscp.enable = true; @@ -252,11 +268,54 @@ static int lan966x_dcb_getapptrust(struct net_device *dev, u8 *selectors, return 0; } +static int lan966x_dcb_delrewr(struct net_device *dev, struct dcb_app *app) +{ + int err; + + err = dcb_delrewr(dev, app); + if (err < 0) + return err; + + lan966x_dcb_app_update(dev); + + return 0; +} + +static int lan966x_dcb_setrewr(struct net_device *dev, struct dcb_app *app) +{ + struct dcb_app app_itr; + u16 proto; + int err; + + err = lan966x_dcb_app_validate(dev, app); + if (err) + goto out; + + /* Delete current mapping, if it exists. */ + proto = dcb_getrewr(dev, app); + if (proto) { + app_itr = *app; + app_itr.protocol = proto; + lan966x_dcb_delrewr(dev, &app_itr); + } + + err = dcb_setrewr(dev, app); + if (err) + goto out; + + lan966x_dcb_app_update(dev); + +out: + return err; +} + static const struct dcbnl_rtnl_ops lan966x_dcbnl_ops = { .ieee_setapp = lan966x_dcb_ieee_setapp, .ieee_delapp = lan966x_dcb_ieee_delapp, .dcbnl_setapptrust = lan966x_dcb_setapptrust, .dcbnl_getapptrust = lan966x_dcb_getapptrust, + .dcbnl_setrewr = lan966x_dcb_setrewr, + .dcbnl_delrewr = lan966x_dcb_delrewr, }; void lan966x_dcb_init(struct lan966x *lan966x) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index 53711d538016..16b0149ac2b5 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -111,6 +111,10 @@ #define LAN966X_PORT_QOS_DSCP_COUNT 64 +/* Port PCP rewrite mode */ +#define LAN966X_PORT_REW_TAG_CTRL_CLASSIFIED 0 +#define LAN966X_PORT_REW_TAG_CTRL_MAPPED 2 + /* MAC table entry types. * ENTRYTYPE_NORMAL is subject to aging. * ENTRYTYPE_LOCKED is not subject to aging. @@ -409,9 +413,15 @@ struct lan966x_port_qos_dscp { bool enable; }; +struct lan966x_port_qos_pcp_rewr { + u16 map[NUM_PRIO_QUEUES]; + bool enable; +}; + struct lan966x_port_qos { struct lan966x_port_qos_pcp pcp; struct lan966x_port_qos_dscp dscp; + struct lan966x_port_qos_pcp_rewr pcp_rewr; u8 default_prio; }; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c index a6608876b71e..6887746d081f 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c @@ -463,12 +463,49 @@ static int lan966x_port_qos_default_set(struct lan966x_port *port, return 0; } +static void lan966x_port_qos_pcp_rewr_set(struct lan966x_port *port, + struct lan966x_port_qos_pcp_rewr *qos) +{ + u8 mode = LAN966X_PORT_REW_TAG_CTRL_CLASSIFIED; + u8 pcp, dei; + + if (qos->enable) + mode = LAN966X_PORT_REW_TAG_CTRL_MAPPED; + + /* Map the values only if it is enabled otherwise will be the classified + * value + */ + lan_rmw(REW_TAG_CFG_TAG_PCP_CFG_SET(mode) | + REW_TAG_CFG_TAG_DEI_CFG_SET(mode), + REW_TAG_CFG_TAG_PCP_CFG | + REW_TAG_CFG_TAG_DEI_CFG, + port->lan966x, REW_TAG_CFG(port->chip_port)); + + /* Map each value to pcp and dei */ + for (int i = 0; i < ARRAY_SIZE(qos->map); i++) { + pcp = qos->map[i]; + if (pcp > LAN966X_PORT_QOS_PCP_COUNT) + dei = 1; + else + dei = 0; + + lan_rmw(REW_PCP_DEI_CFG_DEI_QOS_VAL_SET(dei) | + REW_PCP_DEI_CFG_PCP_QOS_VAL_SET(pcp), + REW_PCP_DEI_CFG_DEI_QOS_VAL | + REW_PCP_DEI_CFG_PCP_QOS_VAL, + port->lan966x, + REW_PCP_DEI_CFG(port->chip_port, + i + dei * LAN966X_PORT_QOS_PCP_COUNT)); + } +} + void lan966x_port_qos_set(struct lan966x_port *port, struct lan966x_port_qos *qos) { lan966x_port_qos_pcp_set(port, &qos->pcp); lan966x_port_qos_dscp_set(port, &qos->dscp); lan966x_port_qos_default_set(port, qos); + lan966x_port_qos_pcp_rewr_set(port, &qos->pcp_rewr); } void lan966x_port_init(struct lan966x_port *port) -- cgit v1.2.3 From d38ddd56d90eb156b2708637403fd8a936c0113a Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Tue, 16 May 2023 22:14:08 +0200 Subject: net: lan966x: Add support for DSCP rewrite Add support for DSCP rewrite in lan966x driver. On egress DSCP is rewritten from either classified DSCP, or frame DSCP. Classified DSCP is determined by the Analyzer Classifier on ingress, and is mapped from classified QoS class and DP level. Classification of DSCP is by default enabled for all ports. It is required that DSCP is trusted for the egress port *and* rewrite table is not empty, in order to rewrite DSCP based on classified DSCP, otherwise DSCP is always rewritten from frame DSCP. Reviewed-by: Daniel Machon Signed-off-by: Horatiu Vultur Signed-off-by: Paolo Abeni --- .../net/ethernet/microchip/lan966x/lan966x_dcb.c | 36 ++++++++++++++++++++-- .../net/ethernet/microchip/lan966x/lan966x_main.h | 13 ++++++++ .../net/ethernet/microchip/lan966x/lan966x_port.c | 35 +++++++++++++++++++++ 3 files changed, 81 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c b/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c index 0ea650943653..ed2d96d7908e 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_dcb.c @@ -46,10 +46,12 @@ static bool lan966x_dcb_apptrust_contains(int portno, u8 selector) static void lan966x_dcb_app_update(struct net_device *dev) { + struct dcb_ieee_app_prio_map dscp_rewr_map = {0}; struct dcb_rewr_prio_pcp_map pcp_rewr_map = {0}; struct lan966x_port *port = netdev_priv(dev); struct lan966x_port_qos qos = {0}; struct dcb_app app_itr; + bool dscp_rewr = false; bool pcp_rewr = false; /* Get pcp ingress mapping */ @@ -81,6 +83,16 @@ static void lan966x_dcb_app_update(struct net_device *dev) qos.pcp_rewr.map[i] = fls(pcp_rewr_map.map[i]) - 1; } + /* Get dscp rewrite mapping */ + dcb_getrewr_prio_dscp_mask_map(dev, &dscp_rewr_map); + for (int i = 0; i < ARRAY_SIZE(dscp_rewr_map.map); i++) { + if (!dscp_rewr_map.map[i]) + continue; + + dscp_rewr = true; + qos.dscp_rewr.map[i] = fls64(dscp_rewr_map.map[i]) - 1; + } + /* Enable use of pcp for queue classification */ if (lan966x_dcb_apptrust_contains(port->chip_port, DCB_APP_SEL_PCP)) { qos.pcp.enable = true; @@ -90,9 +102,13 @@ static void lan966x_dcb_app_update(struct net_device *dev) } /* Enable use of dscp for queue classification */ - if (lan966x_dcb_apptrust_contains(port->chip_port, IEEE_8021QAZ_APP_SEL_DSCP)) + if (lan966x_dcb_apptrust_contains(port->chip_port, IEEE_8021QAZ_APP_SEL_DSCP)) { qos.dscp.enable = true; + if (dscp_rewr) + qos.dscp_rewr.enable = true; + } + lan966x_port_qos_set(port, &qos); } @@ -272,7 +288,11 @@ static int lan966x_dcb_delrewr(struct net_device *dev, struct dcb_app *app) { int err; - err = dcb_delrewr(dev, app); + if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP) + err = lan966x_dcb_ieee_dscp_setdel(dev, app, dcb_delrewr); + else + err = dcb_delrewr(dev, app); + if (err < 0) return err; @@ -299,7 +319,11 @@ static int lan966x_dcb_setrewr(struct net_device *dev, struct dcb_app *app) lan966x_dcb_delrewr(dev, &app_itr); } - err = dcb_setrewr(dev, app); + if (app->selector == IEEE_8021QAZ_APP_SEL_DSCP) + err = lan966x_dcb_ieee_dscp_setdel(dev, app, dcb_setrewr); + else + err = dcb_setrewr(dev, app); + if (err) goto out; @@ -331,5 +355,11 @@ void lan966x_dcb_init(struct lan966x *lan966x) lan966x_port_apptrust[port->chip_port] = &lan966x_dcb_apptrust_policies[LAN966X_DCB_APPTRUST_DSCP_PCP]; + + /* Enable DSCP classification based on classified QoS class and + * DP, for all DSCP values, for all ports. + */ + lan966x_port_qos_dscp_rewr_mode_set(port, + LAN966X_PORT_QOS_REWR_DSCP_ALL); } } diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index 16b0149ac2b5..27f272831ea5 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -115,6 +115,11 @@ #define LAN966X_PORT_REW_TAG_CTRL_CLASSIFIED 0 #define LAN966X_PORT_REW_TAG_CTRL_MAPPED 2 +/* Port DSCP rewrite mode */ +#define LAN966X_PORT_REW_DSCP_FRAME 0 +#define LAN966X_PORT_REW_DSCP_ANALIZER 1 +#define LAN966X_PORT_QOS_REWR_DSCP_ALL 3 + /* MAC table entry types. * ENTRYTYPE_NORMAL is subject to aging. * ENTRYTYPE_LOCKED is not subject to aging. @@ -418,10 +423,16 @@ struct lan966x_port_qos_pcp_rewr { bool enable; }; +struct lan966x_port_qos_dscp_rewr { + u16 map[LAN966X_PORT_QOS_DSCP_COUNT]; + bool enable; +}; + struct lan966x_port_qos { struct lan966x_port_qos_pcp pcp; struct lan966x_port_qos_dscp dscp; struct lan966x_port_qos_pcp_rewr pcp_rewr; + struct lan966x_port_qos_dscp_rewr dscp_rewr; u8 default_prio; }; @@ -491,6 +502,8 @@ void lan966x_port_init(struct lan966x_port *port); void lan966x_port_qos_set(struct lan966x_port *port, struct lan966x_port_qos *qos); +void lan966x_port_qos_dscp_rewr_mode_set(struct lan966x_port *port, + int mode); int lan966x_mac_ip_learn(struct lan966x *lan966x, bool cpu_copy, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c index 6887746d081f..92108d354051 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c @@ -499,6 +499,40 @@ static void lan966x_port_qos_pcp_rewr_set(struct lan966x_port *port, } } +static void lan966x_port_qos_dscp_rewr_set(struct lan966x_port *port, + struct lan966x_port_qos_dscp_rewr *qos) +{ + u16 dscp; + u8 mode; + + if (qos->enable) + mode = LAN966X_PORT_REW_DSCP_ANALIZER; + else + mode = LAN966X_PORT_REW_DSCP_FRAME; + + /* Enable the rewrite otherwise will use the values from the frame */ + lan_rmw(REW_DSCP_CFG_DSCP_REWR_CFG_SET(mode), + REW_DSCP_CFG_DSCP_REWR_CFG, + port->lan966x, REW_DSCP_CFG(port->chip_port)); + + /* Map each classified Qos class and DP to classified DSCP value */ + for (int i = 0; i < ARRAY_SIZE(qos->map); i++) { + dscp = qos->map[i]; + + lan_rmw(ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL_SET(dscp), + ANA_DSCP_REWR_CFG_DSCP_QOS_REWR_VAL, + port->lan966x, ANA_DSCP_REWR_CFG(i)); + } +} + +void lan966x_port_qos_dscp_rewr_mode_set(struct lan966x_port *port, + int mode) +{ + lan_rmw(ANA_QOS_CFG_DSCP_REWR_CFG_SET(mode), + ANA_QOS_CFG_DSCP_REWR_CFG, + port->lan966x, ANA_QOS_CFG(port->chip_port)); +} + void lan966x_port_qos_set(struct lan966x_port *port, struct lan966x_port_qos *qos) { @@ -506,6 +540,7 @@ void lan966x_port_qos_set(struct lan966x_port *port, lan966x_port_qos_dscp_set(port, &qos->dscp); lan966x_port_qos_default_set(port, qos); lan966x_port_qos_pcp_rewr_set(port, &qos->pcp_rewr); + lan966x_port_qos_dscp_rewr_set(port, &qos->dscp_rewr); } void lan966x_port_init(struct lan966x_port *port) -- cgit v1.2.3 From 95b681485563c64585de78662ee52d06b7fa47d9 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Wed, 12 Apr 2023 09:36:11 +0200 Subject: igc: Avoid transmit queue timeout for XDP High XDP load triggers the netdev watchdog: |NETDEV WATCHDOG: enp3s0 (igc): transmit queue 2 timed out The reason is the Tx queue transmission start (txq->trans_start) is not updated in XDP code path. Therefore, add it for all XDP transmission functions. Signed-off-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 38d113b48111..c5ef1edcf548 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2411,6 +2411,8 @@ static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp) nq = txring_txq(ring); __netif_tx_lock(nq, cpu); + /* Avoid transmit queue timeout since we share it with the slow path */ + txq_trans_cond_update(nq); res = igc_xdp_init_tx_descriptor(ring, xdpf); __netif_tx_unlock(nq); return res; @@ -2829,6 +2831,9 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring) __netif_tx_lock(nq, cpu); + /* Avoid transmit queue timeout since we share it with the slow path */ + txq_trans_cond_update(nq); + budget = igc_desc_unused(ring); while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) { @@ -6354,6 +6359,9 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, __netif_tx_lock(nq, cpu); + /* Avoid transmit queue timeout since we share it with the slow path */ + txq_trans_cond_update(nq); + drops = 0; for (i = 0; i < num_frames; i++) { int err; -- cgit v1.2.3 From 7271522b729b80d9581f4b3debef0e942d3a1049 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 28 Apr 2023 16:00:09 -0400 Subject: igb: Define igb_pm_ops conditionally on CONFIG_PM For s390, gcc with W=1 reports drivers/net/ethernet/intel/igb/igb_main.c:186:32: error: 'igb_pm_ops' defined but not used [-Werror=unused-const-variable=] 186 | static const struct dev_pm_ops igb_pm_ops = { | ^~~~~~~~~~ The only use of igb_pm_ops is conditional on CONFIG_PM. The definition of igb_pm_ops should also be conditional on CONFIG_PM Signed-off-by: Tom Rix Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 58872a4c2540..c5cdb880774d 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -183,11 +183,13 @@ static int igb_resume(struct device *); static int igb_runtime_suspend(struct device *dev); static int igb_runtime_resume(struct device *dev); static int igb_runtime_idle(struct device *dev); +#ifdef CONFIG_PM static const struct dev_pm_ops igb_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume) SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume, igb_runtime_idle) }; +#endif static void igb_shutdown(struct pci_dev *); static int igb_pci_sriov_configure(struct pci_dev *dev, int num_vfs); #ifdef CONFIG_IGB_DCA -- cgit v1.2.3 From c4dc8dc32bd1fa0ed04d25f2e4004d854c163c39 Mon Sep 17 00:00:00 2001 From: Baozhu Ni Date: Wed, 17 May 2023 09:27:26 +0800 Subject: e1000e: Add @adapter description to kdoc Provide a description for the kernel doc of the @adapter of e1000e_trigger_lsc() Signed-off-by: Baozhu Ni Reviewed-by: Simon Horman Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/e1000e/netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index bd7ef59b1f2e..771a3c909c45 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -4198,7 +4198,7 @@ void e1000e_reset(struct e1000_adapter *adapter) /** * e1000e_trigger_lsc - trigger an LSC interrupt - * @adapter: + * @adapter: board private structure * * Fire a link status change interrupt to start the watchdog. **/ -- cgit v1.2.3 From afbed3f74830163f9559579dee382cac3cff82da Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 16 May 2023 18:59:35 -0700 Subject: net/mlx5e: do as little as possible in napi poll when budget is 0 NAPI gets called with budget of 0 from netpoll, which has interrupts disabled. We should try to free some space on Tx rings and nothing else. Specifically do not try to handle XDP TX or try to refill Rx buffers - we can't use the page pool from IRQ context. Don't check if IRQs moved, either, that makes no sense in netpoll. Netpoll calls _all_ the rings from whatever CPU it happens to be invoked on. In general do as little as possible, the work quickly adds up when there's tens of rings to poll. The immediate stack trace I was seeing is: __do_softirq+0xd1/0x2c0 __local_bh_enable_ip+0xc7/0x120 page_pool_put_defragged_page+0x267/0x320 mlx5e_free_xdpsq_desc+0x99/0xd0 mlx5e_poll_xdpsq_cq+0x138/0x3b0 mlx5e_napi_poll+0xc3/0x8b0 netpoll_poll_dev+0xce/0x150 AFAIU page pool takes a BH lock, releases it and since BH is now enabled tries to run softirqs. Reviewed-by: Tariq Toukan Fixes: 60bbf7eeef10 ("mlx5: use page_pool for xdp_return_frame call") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index a50bfda18e96..fbb2d963fb7e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -161,20 +161,22 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) } } + /* budget=0 means we may be in IRQ context, do as little as possible */ + if (unlikely(!budget)) + goto out; + busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq); if (c->xdp) busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq); - if (likely(budget)) { /* budget=0 means: don't poll rx rings */ - if (xsk_open) - work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget); + if (xsk_open) + work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget); - if (likely(budget - work_done)) - work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done); + if (likely(budget - work_done)) + work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done); - busy |= work_done == budget; - } + busy |= work_done == budget; mlx5e_poll_ico_cq(&c->icosq.cq); if (mlx5e_poll_ico_cq(&c->async_icosq.cq)) -- cgit v1.2.3 From cfcb942863f6fce9266e1957a021e6c7295dee42 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Thu, 18 May 2023 06:48:22 +0100 Subject: sfc: fix devlink info error handling Avoid early devlink info return if errors arise with MCDI commands executed for getting the required info from the device. The rationale is some commands can fail but later ones could still give useful data. Moreover, some nvram partitions could not be present which needs to be handled as a non error. The specific errors are reported through system messages and if any error appears, it will be reported generically through extack. Fixes 14743ddd2495 ("sfc: add devlink info support for ef100") Signed-off-by: Alejandro Lucero Acked-by: Martin Habets Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx_devlink.c | 95 ++++++++++++++++------------------ 1 file changed, 45 insertions(+), 50 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/efx_devlink.c b/drivers/net/ethernet/sfc/efx_devlink.c index 381b805659d3..ef9971cbb695 100644 --- a/drivers/net/ethernet/sfc/efx_devlink.c +++ b/drivers/net/ethernet/sfc/efx_devlink.c @@ -171,9 +171,14 @@ static int efx_devlink_info_nvram_partition(struct efx_nic *efx, rc = efx_mcdi_nvram_metadata(efx, partition_type, NULL, version, NULL, 0); + + /* If the partition does not exist, that is not an error. */ + if (rc == -ENOENT) + return 0; + if (rc) { - netif_err(efx, drv, efx->net_dev, "mcdi nvram %s: failed\n", - version_name); + netif_err(efx, drv, efx->net_dev, "mcdi nvram %s: failed (rc=%d)\n", + version_name, rc); return rc; } @@ -187,36 +192,33 @@ static int efx_devlink_info_nvram_partition(struct efx_nic *efx, static int efx_devlink_info_stored_versions(struct efx_nic *efx, struct devlink_info_req *req) { - int rc; - - rc = efx_devlink_info_nvram_partition(efx, req, - NVRAM_PARTITION_TYPE_BUNDLE, - DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID); - if (rc) - return rc; - - rc = efx_devlink_info_nvram_partition(efx, req, - NVRAM_PARTITION_TYPE_MC_FIRMWARE, - DEVLINK_INFO_VERSION_GENERIC_FW_MGMT); - if (rc) - return rc; - - rc = efx_devlink_info_nvram_partition(efx, req, - NVRAM_PARTITION_TYPE_SUC_FIRMWARE, - EFX_DEVLINK_INFO_VERSION_FW_MGMT_SUC); - if (rc) - return rc; - - rc = efx_devlink_info_nvram_partition(efx, req, - NVRAM_PARTITION_TYPE_EXPANSION_ROM, - EFX_DEVLINK_INFO_VERSION_FW_EXPROM); - if (rc) - return rc; + int err; - rc = efx_devlink_info_nvram_partition(efx, req, - NVRAM_PARTITION_TYPE_EXPANSION_UEFI, - EFX_DEVLINK_INFO_VERSION_FW_UEFI); - return rc; + /* We do not care here about the specific error but just if an error + * happened. The specific error will be reported inside the call + * through system messages, and if any error happened in any call + * below, we report it through extack. + */ + err = efx_devlink_info_nvram_partition(efx, req, + NVRAM_PARTITION_TYPE_BUNDLE, + DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID); + + err |= efx_devlink_info_nvram_partition(efx, req, + NVRAM_PARTITION_TYPE_MC_FIRMWARE, + DEVLINK_INFO_VERSION_GENERIC_FW_MGMT); + + err |= efx_devlink_info_nvram_partition(efx, req, + NVRAM_PARTITION_TYPE_SUC_FIRMWARE, + EFX_DEVLINK_INFO_VERSION_FW_MGMT_SUC); + + err |= efx_devlink_info_nvram_partition(efx, req, + NVRAM_PARTITION_TYPE_EXPANSION_ROM, + EFX_DEVLINK_INFO_VERSION_FW_EXPROM); + + err |= efx_devlink_info_nvram_partition(efx, req, + NVRAM_PARTITION_TYPE_EXPANSION_UEFI, + EFX_DEVLINK_INFO_VERSION_FW_UEFI); + return err; } #define EFX_VER_FLAG(_f) \ @@ -587,27 +589,20 @@ static int efx_devlink_info_get(struct devlink *devlink, { struct efx_devlink *devlink_private = devlink_priv(devlink); struct efx_nic *efx = devlink_private->efx; - int rc; + int err; - /* Several different MCDI commands are used. We report first error - * through extack returning at that point. Specific error - * information via system messages. + /* Several different MCDI commands are used. We report if errors + * happened through extack. Specific error information via system + * messages inside the calls. */ - rc = efx_devlink_info_board_cfg(efx, req); - if (rc) { - NL_SET_ERR_MSG_MOD(extack, "Getting board info failed"); - return rc; - } - rc = efx_devlink_info_stored_versions(efx, req); - if (rc) { - NL_SET_ERR_MSG_MOD(extack, "Getting stored versions failed"); - return rc; - } - rc = efx_devlink_info_running_versions(efx, req); - if (rc) { - NL_SET_ERR_MSG_MOD(extack, "Getting running versions failed"); - return rc; - } + err = efx_devlink_info_board_cfg(efx, req); + + err |= efx_devlink_info_stored_versions(efx, req); + + err |= efx_devlink_info_running_versions(efx, req); + + if (err) + NL_SET_ERR_MSG_MOD(extack, "Errors when getting device info. Check system messages"); return 0; } -- cgit v1.2.3 From de678ca38861f2eb58814048076dcf95ed1b5bf9 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Thu, 18 May 2023 12:10:42 +0530 Subject: octeontx2-pf: Fix TSOv6 offload HW adds segment size to the payload length in the IPv6 header. Fix payload length to just TCP header length instead of 'TCP header size + IPv6 header size'. Fixes: 86d7476078b8 ("octeontx2-pf: TCP segmentation offload support") Signed-off-by: Sunil Goutham Signed-off-by: Ratheesh Kannoth Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 7045fedfd73a..7af223b0a37f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -652,9 +652,7 @@ static void otx2_sqe_add_ext(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, htons(ext->lso_sb - skb_network_offset(skb)); } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { ext->lso_format = pfvf->hw.lso_tsov6_idx; - - ipv6_hdr(skb)->payload_len = - htons(ext->lso_sb - skb_network_offset(skb)); + ipv6_hdr(skb)->payload_len = htons(tcp_hdrlen(skb)); } else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { __be16 l3_proto = vlan_get_protocol(skb); struct udphdr *udph = udp_hdr(skb); -- cgit v1.2.3 From 9025944fddfed5966c8f102f1fe921ab3aee2c12 Mon Sep 17 00:00:00 2001 From: Shenwei Wang Date: Thu, 18 May 2023 10:02:02 -0500 Subject: net: fec: add dma_wmb to ensure correct descriptor values Two dma_wmb() are added in the XDP TX path to ensure proper ordering of descriptor and buffer updates: 1. A dma_wmb() is added after updating the last BD to make sure the updates to rest of the descriptor are visible before transferring ownership to FEC. 2. A dma_wmb() is also added after updating the bdp to ensure these updates are visible before updating txq->bd.cur. 3. Start the xmit of the frame immediately right after configuring the tx descriptor. Fixes: 6d6b39f180b8 ("net: fec: add initial XDP support") Signed-off-by: Shenwei Wang Reviewed-by: Wei Fang Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 577d94821b3e..38e5b5abe067 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3834,6 +3834,11 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, index = fec_enet_get_bd_index(last_bdp, &txq->bd); txq->tx_skbuff[index] = NULL; + /* Make sure the updates to rest of the descriptor are performed before + * transferring ownership. + */ + dma_wmb(); + /* Send it on its way. Tell FEC it's ready, interrupt when done, * it's the last BD of the frame, and to put the CRC on the end. */ @@ -3843,8 +3848,14 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, /* If this was the last BD in the ring, start at the beginning again. */ bdp = fec_enet_get_nextdesc(last_bdp, &txq->bd); + /* Make sure the update to bdp are performed before txq->bd.cur. */ + dma_wmb(); + txq->bd.cur = bdp; + /* Trigger transmission start */ + writel(0, txq->bd.reg_desc_active); + return 0; } @@ -3873,12 +3884,6 @@ static int fec_enet_xdp_xmit(struct net_device *dev, sent_frames++; } - /* Make sure the update to bdp and tx_skbuff are performed. */ - wmb(); - - /* Trigger transmission start */ - writel(0, txq->bd.reg_desc_active); - __netif_tx_unlock(nq); return sent_frames; -- cgit v1.2.3 From 20d5e0ef252a151ea6585cfccf32def81a624666 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 18 May 2023 22:30:49 +0200 Subject: net: arc: Make arc_emac_remove() return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function returns zero unconditionally. Change it to return void instead which simplifies its callers as error handing becomes unnecessary. Signed-off-by: Uwe Kleine-König Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/arc/emac.h | 2 +- drivers/net/ethernet/arc/emac_arc.c | 6 +++--- drivers/net/ethernet/arc/emac_main.c | 4 +--- drivers/net/ethernet/arc/emac_rockchip.c | 5 ++--- 4 files changed, 7 insertions(+), 10 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/arc/emac.h b/drivers/net/ethernet/arc/emac.h index d820ae03a966..0e244f0e25fd 100644 --- a/drivers/net/ethernet/arc/emac.h +++ b/drivers/net/ethernet/arc/emac.h @@ -220,6 +220,6 @@ static inline void arc_reg_clr(struct arc_emac_priv *priv, int reg, int mask) int arc_mdio_probe(struct arc_emac_priv *priv); int arc_mdio_remove(struct arc_emac_priv *priv); int arc_emac_probe(struct net_device *ndev, int interface); -int arc_emac_remove(struct net_device *ndev); +void arc_emac_remove(struct net_device *ndev); #endif /* ARC_EMAC_H */ diff --git a/drivers/net/ethernet/arc/emac_arc.c b/drivers/net/ethernet/arc/emac_arc.c index 800620b8f10d..ce3147e886a1 100644 --- a/drivers/net/ethernet/arc/emac_arc.c +++ b/drivers/net/ethernet/arc/emac_arc.c @@ -61,11 +61,11 @@ out_netdev: static int emac_arc_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); - int err; - err = arc_emac_remove(ndev); + arc_emac_remove(ndev); free_netdev(ndev); - return err; + + return 0; } static const struct of_device_id emac_arc_dt_ids[] = { diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index ba0646b3b122..2b427d8a1831 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -1008,7 +1008,7 @@ out_put_node: } EXPORT_SYMBOL_GPL(arc_emac_probe); -int arc_emac_remove(struct net_device *ndev) +void arc_emac_remove(struct net_device *ndev) { struct arc_emac_priv *priv = netdev_priv(ndev); @@ -1019,8 +1019,6 @@ int arc_emac_remove(struct net_device *ndev) if (!IS_ERR(priv->clk)) clk_disable_unprepare(priv->clk); - - return 0; } EXPORT_SYMBOL_GPL(arc_emac_remove); diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c index 1c9ca3bcb871..509101112279 100644 --- a/drivers/net/ethernet/arc/emac_rockchip.c +++ b/drivers/net/ethernet/arc/emac_rockchip.c @@ -248,9 +248,8 @@ static int emac_rockchip_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct rockchip_priv_data *priv = netdev_priv(ndev); - int err; - err = arc_emac_remove(ndev); + arc_emac_remove(ndev); clk_disable_unprepare(priv->refclk); @@ -261,7 +260,7 @@ static int emac_rockchip_remove(struct platform_device *pdev) clk_disable_unprepare(priv->macclk); free_netdev(ndev); - return err; + return 0; } static struct platform_driver emac_rockchip_driver = { -- cgit v1.2.3 From ecd01b69a5f8edda731d8a7cfe33c9ffa0c85700 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 7 Apr 2023 18:52:15 +0200 Subject: ice: define meta data to match in switch Add description for each meta data. Redefine tunnel mask to match only tunneled MAC and tunneled VLAN. It shouldn't try to match other flags (previously it was 0xff, it is redundant). VLAN mask was 0xd000, change it to 0xf000. 4 last bits are flags depending on the same field in packets (VLAN tag). Because of that, It isn't harmful to match also on ITAG. Group all MDID and MDID offsets into enums to keep things organized. Signed-off-by: Michal Swiatkowski Reviewed-by: Piotr Raczynski Reviewed-by: Simon Horman Reviewed-by: Leon Romanovsky Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_protocol_type.h | 186 +++++++++++++++++++-- drivers/net/ethernet/intel/ice/ice_switch.c | 11 +- drivers/net/ethernet/intel/ice/ice_vlan_mode.c | 2 +- 3 files changed, 183 insertions(+), 16 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h index 02a4e1cf624e..8a84f106bd4d 100644 --- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -115,17 +115,7 @@ enum ice_prot_id { #define ICE_L2TPV3_HW 104 #define ICE_UDP_OF_HW 52 /* UDP Tunnels */ -#define ICE_META_DATA_ID_HW 255 /* this is used for tunnel and VLAN type */ -#define ICE_MDID_SIZE 2 - -#define ICE_TUN_FLAG_MDID 21 -#define ICE_TUN_FLAG_MDID_OFF (ICE_MDID_SIZE * ICE_TUN_FLAG_MDID) -#define ICE_TUN_FLAG_MASK 0xFF - -#define ICE_VLAN_FLAG_MDID 20 -#define ICE_VLAN_FLAG_MDID_OFF (ICE_MDID_SIZE * ICE_VLAN_FLAG_MDID) -#define ICE_PKT_FLAGS_0_TO_15_VLAN_FLAGS_MASK 0xD000 #define ICE_TUN_FLAG_FV_IND 2 @@ -230,6 +220,181 @@ struct ice_nvgre_hdr { __be32 tni_flow; }; +/* Metadata information + * + * Not all MDIDs can be used by switch block. It depends on package version. + * + * MDID 16 (Rx offset) + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | A | B | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * A = Source port where the transaction came from (3b). + * + * B = Destination TC of the packet. The TC is relative to a port (5b). + * + * MDID 17 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | PTYPE | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * PTYPE = Encodes the packet type (10b). + * + * MDID 18 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Packet length | R | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Packet length = Length of the packet in bytes + * (packet always carriers CRC) (14b). + * R = Reserved (2b). + * + * MDID 19 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Source VSI | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Source VSI = Source VSI of packet loopbacked in switch (for egress) (10b). + * + * MDID 20 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |A|B|C|D|E|F|R|R|G|H|I|J|K|L|M|N| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * A = DSI - set for DSI RX pkts. + * B = ipsec_decrypted - invalid on NIC. + * C = marker - this is a marker packet. + * D = from_network - for TX sets to 0 + * for RX: + * * 1 - packet is from external link + * * 0 - packet source is from internal + * E = source_interface_is_rx - reflect the physical interface from where the + * packet was received: + * * 1 - Rx + * * 0 - Tx + * F = from_mng - The bit signals that the packet's origin is the management. + * G = ucast - Outer L2 MAC address is unicast. + * H = mcast - Outer L2 MAC address is multicast. + * I = bcast - Outer L2 MAC address is broadcast. + * J = second_outer_mac_present - 2 outer MAC headers are present in the packet. + * K = STAG or BVLAN - Outer L2 header has STAG (ethernet type 0x88a8) or + * BVLAN (ethernet type 0x88a8). + * L = ITAG - Outer L2 header has ITAG *ethernet type 0x88e7) + * M = EVLAN (0x8100) - Outer L2 header has EVLAN (ethernet type 0x8100) + * N = EVLAN (0x9100) - Outer L2 header has EVLAN (ethernet type 0x9100) + */ +#define ICE_PKT_VLAN_STAG BIT(12) +#define ICE_PKT_VLAN_ITAG BIT(13) +#define ICE_PKT_VLAN_EVLAN (BIT(14) | BIT(15)) +#define ICE_PKT_VLAN_MASK (ICE_PKT_VLAN_STAG | ICE_PKT_VLAN_ITAG | \ + ICE_PKT_VLAN_EVLAN) +/* MDID 21 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |A|B|C|D|E|F|G|H|I|J|R|R|K|L|M|N| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * A = VLAN (0x8100) - Outer L2 header has VLAN (ethernet type 0x8100) + * B = NSHoE - Outer L2 header has NSH (ethernet type 0x894f) + * C = MPLS (0x8847) - There is at least 1 MPLS tag in the outer header + * (ethernet type 0x8847) + * D = MPLS (0x8848) - There is at least 1 MPLS tag in the outer header + * (ethernet type 0x8848) + * E = multi MPLS - There is more than a single MPLS tag in the outer header + * F = inner MPLS - There is inner MPLS tag in the packet + * G = tunneled MAC - Set if the packet includes a tunneled MAC + * H = tunneled VLAN - Same as VLAN, but for a tunneled header + * I = pkt_is_frag - Packet is fragmented (ipv4 or ipv6) + * J = ipv6_ext - The packet has routing or destination ipv6 extension in inner + * or outer ipv6 headers + * K = RoCE - UDP packet detected as RoCEv2 + * L = UDP_XSUM_0 - Set to 1 if L4 checksum is 0 in a UDP packet + * M = ESP - This is a ESP packet + * N = NAT_ESP - This is a ESP packet encapsulated in UDP NAT + */ +#define ICE_PKT_TUNNEL_MAC BIT(6) +#define ICE_PKT_TUNNEL_VLAN BIT(7) +#define ICE_PKT_TUNNEL_MASK (ICE_PKT_TUNNEL_MAC | ICE_PKT_TUNNEL_VLAN) + +/* MDID 22 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |A|B|C|D|E|F| G |H|I|J| K |L|M| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * A = fin - fin flag in tcp header + * B = sync - sync flag in tcp header + * C = rst - rst flag in tcp header + * D = psh - psh flag in tcp header + * E = ack - ack flag in tcp header + * F = urg - urg flag in tcp header + * G = tunnel type (3b) - Flags used to decode tunnel type: + * * b000 - not a VXLAN/Geneve/GRE tunnel + * * b001 - VXLAN-GPE + * * b010 - VXLAN (non-GPE) + * * b011 - Geneve + * * b100 - GRE (no key, no xsum) + * * b101 - GREK (key, no xsum) + * * b110 - GREC (no key, xsum) + * * b111 - GREKC (key, xsum) + * H = UDP_GRE - Packet is UDP (VXLAN or VLAN_GPE or Geneve or MPLSoUDP or GRE) + * tunnel + * I = OAM - VXLAN/Geneve/tunneled NSH packet with the OAM bit set + * J = tunneled NSH - Packet has NSHoGRE or NSHoUDP + * K = switch (2b) - Direction on switch + * * b00 - normal + * * b01 - TX force only LAN + * * b10 - TX disable LAN + * * b11 - direct to VSI + * L = swpe - Represents SWPE bit in TX command + * M = sw_cmd - Switch command + * + * MDID 23 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |A|B|C|D| R |E|F|R| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * A = MAC error - Produced by MAC according to L2 error conditions + * B = PPRS no offload - FIFO overflow in PPRS or any problematic condition in + * PPRS ANA + * C = abort - Set when malicious packet is detected + * D = partial analysis - ANA's analysing got cut in the middle + * (header > 504B etc.) + * E = FLM - Flow director hit indication + * F = FDLONG - Flow direector long bucket indication + * + */ +#define ICE_MDID_SIZE 2 +#define ICE_META_DATA_ID_HW 255 + +enum ice_hw_metadata_id { + ICE_SOURCE_PORT_MDID = 16, + ICE_PTYPE_MDID = 17, + ICE_PACKET_LENGTH_MDID = 18, + ICE_SOURCE_VSI_MDID = 19, + ICE_PKT_VLAN_MDID = 20, + ICE_PKT_TUNNEL_MDID = 21, + ICE_PKT_TCP_MDID = 22, + ICE_PKT_ERROR_MDID = 23, +}; + +enum ice_hw_metadata_offset { + ICE_SOURCE_PORT_MDID_OFFSET = ICE_MDID_SIZE * ICE_SOURCE_PORT_MDID, + ICE_PTYPE_MDID_OFFSET = ICE_MDID_SIZE * ICE_PTYPE_MDID, + ICE_PACKET_LENGTH_MDID_OFFSET = ICE_MDID_SIZE * ICE_PACKET_LENGTH_MDID, + ICE_SOURCE_VSI_MDID_OFFSET = ICE_MDID_SIZE * ICE_SOURCE_VSI_MDID, + ICE_PKT_VLAN_MDID_OFFSET = ICE_MDID_SIZE * ICE_PKT_VLAN_MDID, + ICE_PKT_TUNNEL_MDID_OFFSET = ICE_MDID_SIZE * ICE_PKT_TUNNEL_MDID, + ICE_PKT_TCP_MDID_OFFSET = ICE_MDID_SIZE * ICE_PKT_TCP_MDID, + ICE_PKT_ERROR_MDID_OFFSET = ICE_MDID_SIZE * ICE_PKT_ERROR_MDID, +}; + +struct ice_hw_metadata { + __be16 source_port; + __be16 ptype; + __be16 packet_length; + __be16 source_vsi; + __be16 flags[4]; +}; + union ice_prot_hdr { struct ice_ether_hdr eth_hdr; struct ice_ethtype_hdr ethertype; @@ -243,6 +408,7 @@ union ice_prot_hdr { struct ice_udp_gtp_hdr gtp_hdr; struct ice_pppoe_hdr pppoe_hdr; struct ice_l2tpv3_sess_hdr l2tpv3_sess_hdr; + struct ice_hw_metadata metadata; }; /* This is mapping table entry that maps every word within a given protocol diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 46b36851af46..5c3f266fa80f 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -5268,7 +5268,7 @@ static bool ice_tun_type_match_word(enum ice_sw_tunnel_type tun_type, u16 *mask) case ICE_SW_TUN_NVGRE: case ICE_SW_TUN_GTPU: case ICE_SW_TUN_GTPC: - *mask = ICE_TUN_FLAG_MASK; + *mask = ICE_PKT_TUNNEL_MASK; return true; default: @@ -5297,7 +5297,8 @@ ice_add_special_words(struct ice_adv_rule_info *rinfo, u8 word = lkup_exts->n_val_words++; lkup_exts->fv_words[word].prot_id = ICE_META_DATA_ID_HW; - lkup_exts->fv_words[word].off = ICE_TUN_FLAG_MDID_OFF; + lkup_exts->fv_words[word].off = + ICE_PKT_TUNNEL_MDID_OFFSET; lkup_exts->field_mask[word] = mask; } else { return -ENOSPC; @@ -5309,9 +5310,9 @@ ice_add_special_words(struct ice_adv_rule_info *rinfo, u8 word = lkup_exts->n_val_words++; lkup_exts->fv_words[word].prot_id = ICE_META_DATA_ID_HW; - lkup_exts->fv_words[word].off = ICE_VLAN_FLAG_MDID_OFF; - lkup_exts->field_mask[word] = - ICE_PKT_FLAGS_0_TO_15_VLAN_FLAGS_MASK; + lkup_exts->fv_words[word].off = + ICE_PKT_VLAN_MDID_OFFSET; + lkup_exts->field_mask[word] = ICE_PKT_VLAN_MASK; } else { return -ENOSPC; } diff --git a/drivers/net/ethernet/intel/ice/ice_vlan_mode.c b/drivers/net/ethernet/intel/ice/ice_vlan_mode.c index bcda2e004807..1279c1ffe31c 100644 --- a/drivers/net/ethernet/intel/ice/ice_vlan_mode.c +++ b/drivers/net/ethernet/intel/ice/ice_vlan_mode.c @@ -219,7 +219,7 @@ static struct ice_update_recipe_lkup_idx_params ice_dvm_dflt_recipes[] = { .rid = ICE_SW_LKUP_VLAN, .fv_idx = ICE_PKT_FLAGS_0_TO_15_FV_IDX, .ignore_valid = false, - .mask = ICE_PKT_FLAGS_0_TO_15_VLAN_FLAGS_MASK, + .mask = ICE_PKT_VLAN_MASK, .mask_valid = true, .lkup_idx = ICE_SW_LKUP_VLAN_PKT_FLAGS_LKUP_IDX, }, -- cgit v1.2.3 From 40fd749245f2ee32874e563051957cc614cf11e4 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 7 Apr 2023 18:52:16 +0200 Subject: ice: remove redundant Rx field from rule info Information about the direction is currently stored in sw_act.flag. There is no need to duplicate it in another field. Setting direction flag doesn't mean that there is a match criteria for direction in rule. It is only a information for HW from where switch id should be collected (VSI or port). In current implementation of advance rule handling, without matching for direction meta data, we can always set one the same flag and everything will work the same. Ability to match on direction meta data will be added in follow up patches. Recipe 0, 3 and 9 loaded from package has direction match criteria, but they are handled in other function. Move ice_adv_rule_info fields to avoid holes. Signed-off-by: Michal Swiatkowski Reviewed-by: Piotr Raczynski Reviewed-by: Simon Horman Reviewed-by: Leon Romanovsky Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_eswitch.c | 1 - drivers/net/ethernet/intel/ice/ice_switch.c | 22 +++++++++++----------- drivers/net/ethernet/intel/ice/ice_switch.h | 8 +++----- drivers/net/ethernet/intel/ice/ice_tc_lib.c | 5 ----- 4 files changed, 14 insertions(+), 22 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index f6dd3f8fd936..2c80d57331d0 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -39,7 +39,6 @@ ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf, const u8 *mac) rule_info.sw_act.flag |= ICE_FLTR_TX; rule_info.sw_act.vsi_handle = ctrl_vsi->idx; rule_info.sw_act.fltr_act = ICE_FWD_TO_Q; - rule_info.rx = false; rule_info.sw_act.fwd_id.q_id = hw->func_caps.common_cap.rxq_first_id + ctrl_vsi->rxq_map[vf->vf_id]; rule_info.flags_info.act |= ICE_SINGLE_ACT_LB_ENABLE; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 5c3f266fa80f..e806dfe69b90 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -6121,8 +6121,7 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, if (rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI) rinfo->sw_act.fwd_id.hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); - if (rinfo->sw_act.flag & ICE_FLTR_TX) - rinfo->sw_act.src = ice_get_hw_vsi_num(hw, vsi_handle); + rinfo->sw_act.src = ice_get_hw_vsi_num(hw, vsi_handle); status = ice_add_adv_recipe(hw, lkups, lkups_cnt, rinfo, &rid); if (status) @@ -6190,19 +6189,20 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, goto err_ice_add_adv_rule; } - /* set the rule LOOKUP type based on caller specified 'Rx' - * instead of hardcoding it to be either LOOKUP_TX/RX + /* If there is no matching criteria for direction there + * is only one difference between Rx and Tx: + * - get switch id base on VSI number from source field (Tx) + * - get switch id base on port number (Rx) * - * for 'Rx' set the source to be the port number - * for 'Tx' set the source to be the source HW VSI number (determined - * by caller) + * If matching on direction metadata is chose rule direction is + * extracted from type value set here. */ - if (rinfo->rx) { - s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); - s_rule->src = cpu_to_le16(hw->port_info->lport); - } else { + if (rinfo->sw_act.flag & ICE_FLTR_TX) { s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX); s_rule->src = cpu_to_le16(rinfo->sw_act.src); + } else { + s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); + s_rule->src = cpu_to_le16(hw->port_info->lport); } s_rule->recipe_id = cpu_to_le16(rid); diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 68d8e8a6a189..8e77868d6dca 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -10,7 +10,6 @@ #define ICE_DFLT_VSI_INVAL 0xff #define ICE_FLTR_RX BIT(0) #define ICE_FLTR_TX BIT(1) -#define ICE_FLTR_TX_RX (ICE_FLTR_RX | ICE_FLTR_TX) #define ICE_VSI_INVAL_ID 0xffff #define ICE_INVAL_Q_HANDLE 0xFFFF @@ -188,11 +187,10 @@ struct ice_adv_rule_flags_info { struct ice_adv_rule_info { enum ice_sw_tunnel_type tun_type; - struct ice_sw_act_ctrl sw_act; - u32 priority; - u8 rx; /* true means LOOKUP_RX otherwise LOOKUP_TX */ - u16 fltr_rule_id; u16 vlan_type; + u16 fltr_rule_id; + u32 priority; + struct ice_sw_act_ctrl sw_act; struct ice_adv_rule_flags_info flags_info; }; diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index d1a31f236d26..2b1a34586f47 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -698,12 +698,10 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) if (fltr->direction == ICE_ESWITCH_FLTR_INGRESS) { rule_info.sw_act.flag |= ICE_FLTR_RX; rule_info.sw_act.src = hw->pf_id; - rule_info.rx = true; rule_info.flags_info.act = ICE_SINGLE_ACT_LB_ENABLE; } else { rule_info.sw_act.flag |= ICE_FLTR_TX; rule_info.sw_act.src = vsi->idx; - rule_info.rx = false; rule_info.flags_info.act = ICE_SINGLE_ACT_LAN_ENABLE; } @@ -910,7 +908,6 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, rule_info.sw_act.vsi_handle = dest_vsi->idx; rule_info.priority = ICE_SWITCH_FLTR_PRIO_VSI; rule_info.sw_act.src = hw->pf_id; - rule_info.rx = true; dev_dbg(dev, "add switch rule for TC:%u vsi_idx:%u, lkups_cnt:%u\n", tc_fltr->action.fwd.tc.tc_class, rule_info.sw_act.vsi_handle, lkups_cnt); @@ -921,7 +918,6 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, rule_info.sw_act.vsi_handle = dest_vsi->idx; rule_info.priority = ICE_SWITCH_FLTR_PRIO_QUEUE; rule_info.sw_act.src = hw->pf_id; - rule_info.rx = true; dev_dbg(dev, "add switch rule action to forward to queue:%u (HW queue %u), lkups_cnt:%u\n", tc_fltr->action.fwd.q.queue, tc_fltr->action.fwd.q.hw_queue, lkups_cnt); @@ -929,7 +925,6 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, case ICE_DROP_PACKET: rule_info.sw_act.flag |= ICE_FLTR_RX; rule_info.sw_act.src = hw->pf_id; - rule_info.rx = true; rule_info.priority = ICE_SWITCH_FLTR_PRIO_VSI; break; default: -- cgit v1.2.3 From 17c6d8357da1ae6a5d92c15efe68f877c3e8b968 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 7 Apr 2023 18:52:17 +0200 Subject: ice: specify field names in ice_prot_ext init Anonymous initializers are now discouraged. Define ICE_PROTCOL_ENTRY macro to rewrite anonymous initializers to named one. No functional changes here. Suggested-by: Alexander Lobakin Signed-off-by: Michal Swiatkowski Reviewed-by: Simon Horman Reviewed-by: Leon Romanovsky Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_switch.c | 51 ++++++++++++++++------------- 1 file changed, 28 insertions(+), 23 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index e806dfe69b90..baa61a2b82f0 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -4540,6 +4540,11 @@ ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, return status; } +#define ICE_PROTOCOL_ENTRY(id, ...) { \ + .prot_type = id, \ + .offs = {__VA_ARGS__}, \ +} + /* This is mapping table entry that maps every word within a given protocol * structure to the real byte offset as per the specification of that * protocol header. @@ -4550,29 +4555,29 @@ ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, * structure is added to that union. */ static const struct ice_prot_ext_tbl_entry ice_prot_ext[ICE_PROTOCOL_LAST] = { - { ICE_MAC_OFOS, { 0, 2, 4, 6, 8, 10, 12 } }, - { ICE_MAC_IL, { 0, 2, 4, 6, 8, 10, 12 } }, - { ICE_ETYPE_OL, { 0 } }, - { ICE_ETYPE_IL, { 0 } }, - { ICE_VLAN_OFOS, { 2, 0 } }, - { ICE_IPV4_OFOS, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18 } }, - { ICE_IPV4_IL, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18 } }, - { ICE_IPV6_OFOS, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, - 26, 28, 30, 32, 34, 36, 38 } }, - { ICE_IPV6_IL, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, - 26, 28, 30, 32, 34, 36, 38 } }, - { ICE_TCP_IL, { 0, 2 } }, - { ICE_UDP_OF, { 0, 2 } }, - { ICE_UDP_ILOS, { 0, 2 } }, - { ICE_VXLAN, { 8, 10, 12, 14 } }, - { ICE_GENEVE, { 8, 10, 12, 14 } }, - { ICE_NVGRE, { 0, 2, 4, 6 } }, - { ICE_GTP, { 8, 10, 12, 14, 16, 18, 20, 22 } }, - { ICE_GTP_NO_PAY, { 8, 10, 12, 14 } }, - { ICE_PPPOE, { 0, 2, 4, 6 } }, - { ICE_L2TPV3, { 0, 2, 4, 6, 8, 10 } }, - { ICE_VLAN_EX, { 2, 0 } }, - { ICE_VLAN_IN, { 2, 0 } }, + ICE_PROTOCOL_ENTRY(ICE_MAC_OFOS, 0, 2, 4, 6, 8, 10, 12), + ICE_PROTOCOL_ENTRY(ICE_MAC_IL, 0, 2, 4, 6, 8, 10, 12), + ICE_PROTOCOL_ENTRY(ICE_ETYPE_OL, 0), + ICE_PROTOCOL_ENTRY(ICE_ETYPE_IL, 0), + ICE_PROTOCOL_ENTRY(ICE_VLAN_OFOS, 2, 0), + ICE_PROTOCOL_ENTRY(ICE_IPV4_OFOS, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18), + ICE_PROTOCOL_ENTRY(ICE_IPV4_IL, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18), + ICE_PROTOCOL_ENTRY(ICE_IPV6_OFOS, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, + 20, 22, 24, 26, 28, 30, 32, 34, 36, 38), + ICE_PROTOCOL_ENTRY(ICE_IPV6_IL, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, + 22, 24, 26, 28, 30, 32, 34, 36, 38), + ICE_PROTOCOL_ENTRY(ICE_TCP_IL, 0, 2), + ICE_PROTOCOL_ENTRY(ICE_UDP_OF, 0, 2), + ICE_PROTOCOL_ENTRY(ICE_UDP_ILOS, 0, 2), + ICE_PROTOCOL_ENTRY(ICE_VXLAN, 8, 10, 12, 14), + ICE_PROTOCOL_ENTRY(ICE_GENEVE, 8, 10, 12, 14), + ICE_PROTOCOL_ENTRY(ICE_NVGRE, 0, 2, 4, 6), + ICE_PROTOCOL_ENTRY(ICE_GTP, 8, 10, 12, 14, 16, 18, 20, 22), + ICE_PROTOCOL_ENTRY(ICE_GTP_NO_PAY, 8, 10, 12, 14), + ICE_PROTOCOL_ENTRY(ICE_PPPOE, 0, 2, 4, 6), + ICE_PROTOCOL_ENTRY(ICE_L2TPV3, 0, 2, 4, 6, 8, 10), + ICE_PROTOCOL_ENTRY(ICE_VLAN_EX, 2, 0), + ICE_PROTOCOL_ENTRY(ICE_VLAN_IN, 2, 0), }; static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = { -- cgit v1.2.3 From 03592a14b9383bbe1c0d56e7ac4005cea10e711a Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 7 Apr 2023 18:52:18 +0200 Subject: ice: allow matching on meta data Add meta data matching criteria in the same place as protocol matching criteria. There is no need to add meta data as special words after parsing all lookups. Trade meta data in the same why as other lookups. The one difference between meta data lookups and protocol lookups is that meta data doesn't impact how the packets looks like. Because of that ignore it when filling testing packet. Match on tunnel type meta data always if tunnel type is different than TNL_LAST. Signed-off-by: Michal Swiatkowski Reviewed-by: Piotr Raczynski Reviewed-by: Simon Horman Reviewed-by: Leon Romanovsky Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_protocol_type.h | 8 ++ drivers/net/ethernet/intel/ice/ice_switch.c | 158 ++++++++------------- drivers/net/ethernet/intel/ice/ice_switch.h | 4 + drivers/net/ethernet/intel/ice/ice_tc_lib.c | 29 +++- drivers/net/ethernet/intel/ice/ice_tc_lib.h | 1 + 5 files changed, 95 insertions(+), 105 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h index 8a84f106bd4d..ed0ab8177c61 100644 --- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -47,6 +47,7 @@ enum ice_protocol_type { ICE_L2TPV3, ICE_VLAN_EX, ICE_VLAN_IN, + ICE_HW_METADATA, ICE_VXLAN_GPE, ICE_SCTP_IL, ICE_PROTOCOL_LAST @@ -387,6 +388,13 @@ enum ice_hw_metadata_offset { ICE_PKT_ERROR_MDID_OFFSET = ICE_MDID_SIZE * ICE_PKT_ERROR_MDID, }; +enum ice_pkt_flags { + ICE_PKT_FLAGS_VLAN = 0, + ICE_PKT_FLAGS_TUNNEL = 1, + ICE_PKT_FLAGS_TCP = 2, + ICE_PKT_FLAGS_ERROR = 3, +}; + struct ice_hw_metadata { __be16 source_port; __be16 ptype; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index baa61a2b82f0..9578bd0a2d65 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -4578,6 +4578,15 @@ static const struct ice_prot_ext_tbl_entry ice_prot_ext[ICE_PROTOCOL_LAST] = { ICE_PROTOCOL_ENTRY(ICE_L2TPV3, 0, 2, 4, 6, 8, 10), ICE_PROTOCOL_ENTRY(ICE_VLAN_EX, 2, 0), ICE_PROTOCOL_ENTRY(ICE_VLAN_IN, 2, 0), + ICE_PROTOCOL_ENTRY(ICE_HW_METADATA, + ICE_SOURCE_PORT_MDID_OFFSET, + ICE_PTYPE_MDID_OFFSET, + ICE_PACKET_LENGTH_MDID_OFFSET, + ICE_SOURCE_VSI_MDID_OFFSET, + ICE_PKT_VLAN_MDID_OFFSET, + ICE_PKT_TUNNEL_MDID_OFFSET, + ICE_PKT_TCP_MDID_OFFSET, + ICE_PKT_ERROR_MDID_OFFSET), }; static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = { @@ -4602,6 +4611,7 @@ static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = { { ICE_L2TPV3, ICE_L2TPV3_HW }, { ICE_VLAN_EX, ICE_VLAN_OF_HW }, { ICE_VLAN_IN, ICE_VLAN_OL_HW }, + { ICE_HW_METADATA, ICE_META_DATA_ID_HW }, }; /** @@ -5260,72 +5270,6 @@ ice_create_recipe_group(struct ice_hw *hw, struct ice_sw_recipe *rm, return status; } -/** - * ice_tun_type_match_word - determine if tun type needs a match mask - * @tun_type: tunnel type - * @mask: mask to be used for the tunnel - */ -static bool ice_tun_type_match_word(enum ice_sw_tunnel_type tun_type, u16 *mask) -{ - switch (tun_type) { - case ICE_SW_TUN_GENEVE: - case ICE_SW_TUN_VXLAN: - case ICE_SW_TUN_NVGRE: - case ICE_SW_TUN_GTPU: - case ICE_SW_TUN_GTPC: - *mask = ICE_PKT_TUNNEL_MASK; - return true; - - default: - *mask = 0; - return false; - } -} - -/** - * ice_add_special_words - Add words that are not protocols, such as metadata - * @rinfo: other information regarding the rule e.g. priority and action info - * @lkup_exts: lookup word structure - * @dvm_ena: is double VLAN mode enabled - */ -static int -ice_add_special_words(struct ice_adv_rule_info *rinfo, - struct ice_prot_lkup_ext *lkup_exts, bool dvm_ena) -{ - u16 mask; - - /* If this is a tunneled packet, then add recipe index to match the - * tunnel bit in the packet metadata flags. - */ - if (ice_tun_type_match_word(rinfo->tun_type, &mask)) { - if (lkup_exts->n_val_words < ICE_MAX_CHAIN_WORDS) { - u8 word = lkup_exts->n_val_words++; - - lkup_exts->fv_words[word].prot_id = ICE_META_DATA_ID_HW; - lkup_exts->fv_words[word].off = - ICE_PKT_TUNNEL_MDID_OFFSET; - lkup_exts->field_mask[word] = mask; - } else { - return -ENOSPC; - } - } - - if (rinfo->vlan_type != 0 && dvm_ena) { - if (lkup_exts->n_val_words < ICE_MAX_CHAIN_WORDS) { - u8 word = lkup_exts->n_val_words++; - - lkup_exts->fv_words[word].prot_id = ICE_META_DATA_ID_HW; - lkup_exts->fv_words[word].off = - ICE_PKT_VLAN_MDID_OFFSET; - lkup_exts->field_mask[word] = ICE_PKT_VLAN_MASK; - } else { - return -ENOSPC; - } - } - - return 0; -} - /* ice_get_compat_fv_bitmap - Get compatible field vector bitmap for rule * @hw: pointer to hardware structure * @rinfo: other information regarding the rule e.g. priority and action info @@ -5439,13 +5383,6 @@ ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, if (status) goto err_unroll; - /* Create any special protocol/offset pairs, such as looking at tunnel - * bits by extracting metadata - */ - status = ice_add_special_words(rinfo, lkup_exts, ice_is_dvm_ena(hw)); - if (status) - goto err_unroll; - /* Group match words into recipes using preferred recipe grouping * criteria. */ @@ -5731,6 +5668,10 @@ ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, * was already checked when search for the dummy packet */ type = lkups[i].type; + /* metadata isn't present in the packet */ + if (type == ICE_HW_METADATA) + continue; + for (j = 0; offsets[j].type != ICE_PROTOCOL_LAST; j++) { if (type == offsets[j].type) { offset = offsets[j].offset; @@ -5866,16 +5807,21 @@ ice_fill_adv_packet_tun(struct ice_hw *hw, enum ice_sw_tunnel_type tun_type, /** * ice_fill_adv_packet_vlan - fill dummy packet with VLAN tag type + * @hw: pointer to hw structure * @vlan_type: VLAN tag type * @pkt: dummy packet to fill in * @offsets: offset info for the dummy packet */ static int -ice_fill_adv_packet_vlan(u16 vlan_type, u8 *pkt, +ice_fill_adv_packet_vlan(struct ice_hw *hw, u16 vlan_type, u8 *pkt, const struct ice_dummy_pkt_offsets *offsets) { u16 i; + /* Check if there is something to do */ + if (!vlan_type || !ice_is_dvm_ena(hw)) + return 0; + /* Find VLAN header and insert VLAN TPID */ for (i = 0; offsets[i].type != ICE_PROTOCOL_LAST; i++) { if (offsets[i].type == ICE_VLAN_OFOS || @@ -5894,6 +5840,15 @@ ice_fill_adv_packet_vlan(u16 vlan_type, u8 *pkt, return -EIO; } +static bool ice_rules_equal(const struct ice_adv_rule_info *first, + const struct ice_adv_rule_info *second) +{ + return first->sw_act.flag == second->sw_act.flag && + first->tun_type == second->tun_type && + first->vlan_type == second->vlan_type && + first->src_vsi == second->src_vsi; +} + /** * ice_find_adv_rule_entry - Search a rule entry * @hw: pointer to the hardware structure @@ -5927,9 +5882,7 @@ ice_find_adv_rule_entry(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, lkups_matched = false; break; } - if (rinfo->sw_act.flag == list_itr->rule_info.sw_act.flag && - rinfo->tun_type == list_itr->rule_info.tun_type && - rinfo->vlan_type == list_itr->rule_info.vlan_type && + if (ice_rules_equal(rinfo, &list_itr->rule_info) && lkups_matched) return list_itr; } @@ -6045,6 +5998,20 @@ ice_adv_add_update_vsi_list(struct ice_hw *hw, return status; } +void ice_rule_add_tunnel_metadata(struct ice_adv_lkup_elem *lkup) +{ + lkup->type = ICE_HW_METADATA; + lkup->m_u.metadata.flags[ICE_PKT_FLAGS_TUNNEL] = + cpu_to_be16(ICE_PKT_TUNNEL_MASK); +} + +void ice_rule_add_vlan_metadata(struct ice_adv_lkup_elem *lkup) +{ + lkup->type = ICE_HW_METADATA; + lkup->m_u.metadata.flags[ICE_PKT_FLAGS_VLAN] = + cpu_to_be16(ICE_PKT_VLAN_MASK); +} + /** * ice_add_adv_rule - helper function to create an advanced switch rule * @hw: pointer to the hardware structure @@ -6126,7 +6093,11 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, if (rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI) rinfo->sw_act.fwd_id.hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); - rinfo->sw_act.src = ice_get_hw_vsi_num(hw, vsi_handle); + + if (rinfo->src_vsi) + rinfo->sw_act.src = ice_get_hw_vsi_num(hw, rinfo->src_vsi); + else + rinfo->sw_act.src = ice_get_hw_vsi_num(hw, vsi_handle); status = ice_add_adv_recipe(hw, lkups, lkups_cnt, rinfo, &rid); if (status) @@ -6217,22 +6188,16 @@ ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, if (status) goto err_ice_add_adv_rule; - if (rinfo->tun_type != ICE_NON_TUN && - rinfo->tun_type != ICE_SW_TUN_AND_NON_TUN) { - status = ice_fill_adv_packet_tun(hw, rinfo->tun_type, - s_rule->hdr_data, - profile->offsets); - if (status) - goto err_ice_add_adv_rule; - } + status = ice_fill_adv_packet_tun(hw, rinfo->tun_type, s_rule->hdr_data, + profile->offsets); + if (status) + goto err_ice_add_adv_rule; - if (rinfo->vlan_type != 0 && ice_is_dvm_ena(hw)) { - status = ice_fill_adv_packet_vlan(rinfo->vlan_type, - s_rule->hdr_data, - profile->offsets); - if (status) - goto err_ice_add_adv_rule; - } + status = ice_fill_adv_packet_vlan(hw, rinfo->vlan_type, + s_rule->hdr_data, + profile->offsets); + if (status) + goto err_ice_add_adv_rule; status = ice_aq_sw_rules(hw, (struct ice_aqc_sw_rules *)s_rule, rule_buf_sz, 1, ice_aqc_opc_add_sw_rules, @@ -6475,13 +6440,6 @@ ice_rem_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, return -EIO; } - /* Create any special protocol/offset pairs, such as looking at tunnel - * bits by extracting metadata - */ - status = ice_add_special_words(rinfo, &lkup_exts, ice_is_dvm_ena(hw)); - if (status) - return status; - rid = ice_find_recp(hw, &lkup_exts, rinfo->tun_type); /* If did not find a recipe that match the existing criteria */ if (rid == ICE_MAX_NUM_RECIPES) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 8e77868d6dca..bbd759f94187 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -186,10 +186,12 @@ struct ice_adv_rule_flags_info { }; struct ice_adv_rule_info { + /* Store metadata values in rule info */ enum ice_sw_tunnel_type tun_type; u16 vlan_type; u16 fltr_rule_id; u32 priority; + u16 src_vsi; struct ice_sw_act_ctrl sw_act; struct ice_adv_rule_flags_info flags_info; }; @@ -340,6 +342,8 @@ ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, u16 counter_id); /* Switch/bridge related commands */ +void ice_rule_add_tunnel_metadata(struct ice_adv_lkup_elem *lkup); +void ice_rule_add_vlan_metadata(struct ice_adv_lkup_elem *lkup); int ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, struct ice_adv_rule_info *rinfo, diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 2b1a34586f47..b54052ef6050 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -54,6 +54,10 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers, if (flags & (ICE_TC_FLWR_FIELD_VLAN | ICE_TC_FLWR_FIELD_VLAN_PRIO)) lkups_cnt++; + /* is VLAN TPID specified */ + if (flags & ICE_TC_FLWR_FIELD_VLAN_TPID) + lkups_cnt++; + /* is CVLAN specified? */ if (flags & (ICE_TC_FLWR_FIELD_CVLAN | ICE_TC_FLWR_FIELD_CVLAN_PRIO)) lkups_cnt++; @@ -80,6 +84,10 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers, ICE_TC_FLWR_FIELD_SRC_L4_PORT)) lkups_cnt++; + /* matching for tunneled packets in metadata */ + if (fltr->tunnel_type != TNL_LAST) + lkups_cnt++; + return lkups_cnt; } @@ -320,6 +328,10 @@ ice_tc_fill_tunnel_outer(u32 flags, struct ice_tc_flower_fltr *fltr, i++; } + /* always fill matching on tunneled packets in metadata */ + ice_rule_add_tunnel_metadata(&list[i]); + i++; + return i; } @@ -390,10 +402,6 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, /* copy VLAN info */ if (flags & (ICE_TC_FLWR_FIELD_VLAN | ICE_TC_FLWR_FIELD_VLAN_PRIO)) { - vlan_tpid = be16_to_cpu(headers->vlan_hdr.vlan_tpid); - rule_info->vlan_type = - ice_check_supported_vlan_tpid(vlan_tpid); - if (flags & ICE_TC_FLWR_FIELD_CVLAN) list[i].type = ICE_VLAN_EX; else @@ -418,6 +426,15 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags, i++; } + if (flags & ICE_TC_FLWR_FIELD_VLAN_TPID) { + vlan_tpid = be16_to_cpu(headers->vlan_hdr.vlan_tpid); + rule_info->vlan_type = + ice_check_supported_vlan_tpid(vlan_tpid); + + ice_rule_add_vlan_metadata(&list[i]); + i++; + } + if (flags & (ICE_TC_FLWR_FIELD_CVLAN | ICE_TC_FLWR_FIELD_CVLAN_PRIO)) { list[i].type = ICE_VLAN_IN; @@ -1455,8 +1472,10 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, VLAN_PRIO_MASK); } - if (match.mask->vlan_tpid) + if (match.mask->vlan_tpid) { headers->vlan_hdr.vlan_tpid = match.key->vlan_tpid; + fltr->flags |= ICE_TC_FLWR_FIELD_VLAN_TPID; + } } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h index 8d5e22ac7023..8bbc1a62bdb1 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h @@ -33,6 +33,7 @@ #define ICE_TC_FLWR_FIELD_L2TPV3_SESSID BIT(26) #define ICE_TC_FLWR_FIELD_VLAN_PRIO BIT(27) #define ICE_TC_FLWR_FIELD_CVLAN_PRIO BIT(28) +#define ICE_TC_FLWR_FIELD_VLAN_TPID BIT(29) #define ICE_TC_FLOWER_MASK_32 0xFFFFFFFF -- cgit v1.2.3 From 0ef4479d13af4c5516920520d9cf7bcfe801b353 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Fri, 7 Apr 2023 18:52:19 +0200 Subject: ice: use src VSI instead of src MAC in slow-path The use of a source MAC to direct packets from the VF to the corresponding port representor is only ok if there is only one MAC on a VF. To support this functionality when the number of MACs on a VF is greater, it is necessary to match a source VSI instead of a source MAC. Let's use the new switch API that allows matching on metadata. If MAC isn't used in match criteria there is no need to handle adding rule after virtchnl command. Instead add new rule while port representor is being configured. Remove rule_added field, checking for sp_rule can be used instead. Remove also checking for switchdev running in deleting rule as it can be called from unroll context when running flag isn't set. Checking for sp_rule covers both context (with and without running flag). Rules are added in eswitch configuration flow, so there is no need to have replay function. Signed-off-by: Michal Swiatkowski Reviewed-by: Piotr Raczynski Reviewed-by: Simon Horman Reviewed-by: Leon Romanovsky Tested-by: Sujai Buvaneswaran Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_eswitch.c | 83 +++++++--------------- drivers/net/ethernet/intel/ice/ice_eswitch.h | 14 ---- drivers/net/ethernet/intel/ice/ice_protocol_type.h | 5 +- drivers/net/ethernet/intel/ice/ice_repr.c | 17 ----- drivers/net/ethernet/intel/ice/ice_repr.h | 5 +- drivers/net/ethernet/intel/ice/ice_switch.c | 6 ++ drivers/net/ethernet/intel/ice/ice_switch.h | 1 + drivers/net/ethernet/intel/ice/ice_vf_lib.c | 3 - drivers/net/ethernet/intel/ice/ice_virtchnl.c | 8 --- 9 files changed, 40 insertions(+), 102 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index 2c80d57331d0..ad0a007b7398 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -10,16 +10,15 @@ #include "ice_tc_lib.h" /** - * ice_eswitch_add_vf_mac_rule - add adv rule with VF's MAC + * ice_eswitch_add_vf_sp_rule - add adv rule with VF's VSI index * @pf: pointer to PF struct * @vf: pointer to VF struct - * @mac: VF's MAC address * * This function adds advanced rule that forwards packets with - * VF's MAC address (src MAC) to the corresponding switchdev ctrl VSI queue. + * VF's VSI index to the corresponding switchdev ctrl VSI queue. */ -int -ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf, const u8 *mac) +static int +ice_eswitch_add_vf_sp_rule(struct ice_pf *pf, struct ice_vf *vf) { struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi; struct ice_adv_rule_info rule_info = { 0 }; @@ -32,11 +31,9 @@ ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf, const u8 *mac) if (!list) return -ENOMEM; - list[0].type = ICE_MAC_OFOS; - ether_addr_copy(list[0].h_u.eth_hdr.src_addr, mac); - eth_broadcast_addr(list[0].m_u.eth_hdr.src_addr); + ice_rule_add_src_vsi_metadata(list); - rule_info.sw_act.flag |= ICE_FLTR_TX; + rule_info.sw_act.flag = ICE_FLTR_TX; rule_info.sw_act.vsi_handle = ctrl_vsi->idx; rule_info.sw_act.fltr_act = ICE_FWD_TO_Q; rule_info.sw_act.fwd_id.q_id = hw->func_caps.common_cap.rxq_first_id + @@ -44,63 +41,31 @@ ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf, const u8 *mac) rule_info.flags_info.act |= ICE_SINGLE_ACT_LB_ENABLE; rule_info.flags_info.act_valid = true; rule_info.tun_type = ICE_SW_TUN_AND_NON_TUN; + rule_info.src_vsi = vf->lan_vsi_idx; err = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, - vf->repr->mac_rule); + &vf->repr->sp_rule); if (err) - dev_err(ice_pf_to_dev(pf), "Unable to add VF mac rule in switchdev mode for VF %d", + dev_err(ice_pf_to_dev(pf), "Unable to add VF slow-path rule in switchdev mode for VF %d", vf->vf_id); - else - vf->repr->rule_added = true; kfree(list); return err; } /** - * ice_eswitch_replay_vf_mac_rule - replay adv rule with VF's MAC - * @vf: pointer to vF struct - * - * This function replays VF's MAC rule after reset. - */ -void ice_eswitch_replay_vf_mac_rule(struct ice_vf *vf) -{ - int err; - - if (!ice_is_switchdev_running(vf->pf)) - return; - - if (is_valid_ether_addr(vf->hw_lan_addr)) { - err = ice_eswitch_add_vf_mac_rule(vf->pf, vf, - vf->hw_lan_addr); - if (err) { - dev_err(ice_pf_to_dev(vf->pf), "Failed to add MAC %pM for VF %d\n, error %d\n", - vf->hw_lan_addr, vf->vf_id, err); - return; - } - vf->num_mac++; - - ether_addr_copy(vf->dev_lan_addr, vf->hw_lan_addr); - } -} - -/** - * ice_eswitch_del_vf_mac_rule - delete adv rule with VF's MAC + * ice_eswitch_del_vf_sp_rule - delete adv rule with VF's VSI index * @vf: pointer to the VF struct * - * Delete the advanced rule that was used to forward packets with the VF's MAC - * address (src MAC) to the corresponding switchdev ctrl VSI queue. + * Delete the advanced rule that was used to forward packets with the VF's VSI + * index to the corresponding switchdev ctrl VSI queue. */ -void ice_eswitch_del_vf_mac_rule(struct ice_vf *vf) +static void ice_eswitch_del_vf_sp_rule(struct ice_vf *vf) { - if (!ice_is_switchdev_running(vf->pf)) - return; - - if (!vf->repr->rule_added) + if (!vf->repr) return; - ice_rem_adv_rule_by_id(&vf->pf->hw, vf->repr->mac_rule); - vf->repr->rule_added = false; + ice_rem_adv_rule_by_id(&vf->pf->hw, &vf->repr->sp_rule); } /** @@ -236,6 +201,7 @@ ice_eswitch_release_reprs(struct ice_pf *pf, struct ice_vsi *ctrl_vsi) ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); metadata_dst_free(vf->repr->dst); vf->repr->dst = NULL; + ice_eswitch_del_vf_sp_rule(vf); ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, ICE_FWD_TO_VSI); @@ -263,25 +229,30 @@ static int ice_eswitch_setup_reprs(struct ice_pf *pf) vf->repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, GFP_KERNEL); if (!vf->repr->dst) { - ice_fltr_add_mac_and_broadcast(vsi, - vf->hw_lan_addr, + ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, + ICE_FWD_TO_VSI); + goto err; + } + + if (ice_eswitch_add_vf_sp_rule(pf, vf)) { + ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, ICE_FWD_TO_VSI); goto err; } if (ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof)) { - ice_fltr_add_mac_and_broadcast(vsi, - vf->hw_lan_addr, + ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, ICE_FWD_TO_VSI); + ice_eswitch_del_vf_sp_rule(vf); metadata_dst_free(vf->repr->dst); vf->repr->dst = NULL; goto err; } if (ice_vsi_add_vlan_zero(vsi)) { - ice_fltr_add_mac_and_broadcast(vsi, - vf->hw_lan_addr, + ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr, ICE_FWD_TO_VSI); + ice_eswitch_del_vf_sp_rule(vf); metadata_dst_free(vf->repr->dst); vf->repr->dst = NULL; ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h index 6a413331572b..b18bf83a2f5b 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.h +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h @@ -20,11 +20,6 @@ bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf); void ice_eswitch_update_repr(struct ice_vsi *vsi); void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf); -int -ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf, - const u8 *mac); -void ice_eswitch_replay_vf_mac_rule(struct ice_vf *vf); -void ice_eswitch_del_vf_mac_rule(struct ice_vf *vf); void ice_eswitch_set_target_vsi(struct sk_buff *skb, struct ice_tx_offload_params *off); @@ -34,15 +29,6 @@ ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev); static inline void ice_eswitch_release(struct ice_pf *pf) { } static inline void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) { } -static inline void ice_eswitch_replay_vf_mac_rule(struct ice_vf *vf) { } -static inline void ice_eswitch_del_vf_mac_rule(struct ice_vf *vf) { } - -static inline int -ice_eswitch_add_vf_mac_rule(struct ice_pf *pf, struct ice_vf *vf, - const u8 *mac) -{ - return -EOPNOTSUPP; -} static inline void ice_eswitch_set_target_vsi(struct sk_buff *skb, diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h index ed0ab8177c61..6a9364761165 100644 --- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -256,7 +256,10 @@ struct ice_nvgre_hdr { * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * Source VSI = Source VSI of packet loopbacked in switch (for egress) (10b). - * + */ +#define ICE_MDID_SOURCE_VSI_MASK GENMASK(9, 0) + +/* * MDID 20 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |A|B|C|D|E|F|R|R|G|H|I|J|K|L|M|N| diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index fd1f8b0ad0ab..e30e12321abd 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -298,14 +298,6 @@ static int ice_repr_add(struct ice_vf *vf) if (!repr) return -ENOMEM; -#ifdef CONFIG_ICE_SWITCHDEV - repr->mac_rule = kzalloc(sizeof(*repr->mac_rule), GFP_KERNEL); - if (!repr->mac_rule) { - err = -ENOMEM; - goto err_alloc_rule; - } -#endif - repr->netdev = alloc_etherdev(sizeof(struct ice_netdev_priv)); if (!repr->netdev) { err = -ENOMEM; @@ -351,11 +343,6 @@ err_alloc_q_vector: free_netdev(repr->netdev); repr->netdev = NULL; err_alloc: -#ifdef CONFIG_ICE_SWITCHDEV - kfree(repr->mac_rule); - repr->mac_rule = NULL; -err_alloc_rule: -#endif kfree(repr); vf->repr = NULL; return err; @@ -376,10 +363,6 @@ static void ice_repr_rem(struct ice_vf *vf) ice_devlink_destroy_vf_port(vf); free_netdev(vf->repr->netdev); vf->repr->netdev = NULL; -#ifdef CONFIG_ICE_SWITCHDEV - kfree(vf->repr->mac_rule); - vf->repr->mac_rule = NULL; -#endif kfree(vf->repr); vf->repr = NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_repr.h b/drivers/net/ethernet/intel/ice/ice_repr.h index 378a45bfa256..9c2a6f496b3b 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.h +++ b/drivers/net/ethernet/intel/ice/ice_repr.h @@ -13,9 +13,8 @@ struct ice_repr { struct net_device *netdev; struct metadata_dst *dst; #ifdef CONFIG_ICE_SWITCHDEV - /* info about slow path MAC rule */ - struct ice_rule_query_data *mac_rule; - u8 rule_added; + /* info about slow path rule */ + struct ice_rule_query_data sp_rule; #endif }; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 9578bd0a2d65..2ea9e1ae5517 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -6012,6 +6012,12 @@ void ice_rule_add_vlan_metadata(struct ice_adv_lkup_elem *lkup) cpu_to_be16(ICE_PKT_VLAN_MASK); } +void ice_rule_add_src_vsi_metadata(struct ice_adv_lkup_elem *lkup) +{ + lkup->type = ICE_HW_METADATA; + lkup->m_u.metadata.source_vsi = cpu_to_be16(ICE_MDID_SOURCE_VSI_MASK); +} + /** * ice_add_adv_rule - helper function to create an advanced switch rule * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index bbd759f94187..c84b56fe84a5 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -344,6 +344,7 @@ ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, /* Switch/bridge related commands */ void ice_rule_add_tunnel_metadata(struct ice_adv_lkup_elem *lkup); void ice_rule_add_vlan_metadata(struct ice_adv_lkup_elem *lkup); +void ice_rule_add_src_vsi_metadata(struct ice_adv_lkup_elem *lkup); int ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, struct ice_adv_rule_info *rinfo, diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index e441968a70ae..b26ce4425f45 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -689,8 +689,6 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) */ ice_vf_clear_all_promisc_modes(vf, vsi); - ice_eswitch_del_vf_mac_rule(vf); - ice_vf_fdir_exit(vf); ice_vf_fdir_init(vf); /* clean VF control VSI when resetting VF since it should be setup @@ -716,7 +714,6 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) } ice_eswitch_update_repr(vsi); - ice_eswitch_replay_vf_mac_rule(vf); /* if the VF has been reset allow it to come up again */ ice_mbx_clear_malvf(&vf->mbx_info); diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index f4a524f80b11..efbc2968a7bf 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -3730,7 +3730,6 @@ static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg) for (i = 0; i < al->num_elements; i++) { u8 *mac_addr = al->list[i].addr; - int result; if (!is_unicast_ether_addr(mac_addr) || ether_addr_equal(mac_addr, vf->hw_lan_addr)) @@ -3742,13 +3741,6 @@ static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg) goto handle_mac_exit; } - result = ice_eswitch_add_vf_mac_rule(pf, vf, mac_addr); - if (result) { - dev_err(ice_pf_to_dev(pf), "Failed to add MAC %pM for VF %d\n, error %d\n", - mac_addr, vf->vf_id, result); - goto handle_mac_exit; - } - ice_vfhw_mac_add(vf, &al->list[i]); vf->num_mac++; break; -- cgit v1.2.3 From c511822fe2c96478d86b6bc3404bb45ebad7556b Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 22 Mar 2023 13:27:43 +0200 Subject: net/mlx5: Remove redundant esw multiport validate function The function didn't validate the value and doesn't require value validation as it will always be valid true or false values. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 4b607785d694..0e07971e024a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -464,27 +464,6 @@ static int mlx5_devlink_esw_multiport_get(struct devlink *devlink, u32 id, ctx->val.vbool = mlx5_lag_is_mpesw(dev); return 0; } - -static int mlx5_devlink_esw_multiport_validate(struct devlink *devlink, u32 id, - union devlink_param_value val, - struct netlink_ext_ack *extack) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - - if (!MLX5_ESWITCH_MANAGER(dev)) { - NL_SET_ERR_MSG_MOD(extack, "E-Switch is unsupported"); - return -EOPNOTSUPP; - } - - if (mlx5_eswitch_mode(dev) != MLX5_ESWITCH_OFFLOADS) { - NL_SET_ERR_MSG_MOD(extack, - "E-Switch must be in switchdev mode"); - return -EBUSY; - } - - return 0; -} - #endif static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, @@ -563,7 +542,7 @@ static const struct devlink_param mlx5_devlink_params[] = { BIT(DEVLINK_PARAM_CMODE_RUNTIME), mlx5_devlink_esw_multiport_get, mlx5_devlink_esw_multiport_set, - mlx5_devlink_esw_multiport_validate), + NULL), #endif DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_eq_depth_validate), -- cgit v1.2.3 From 2abe501751ed8dbd390be5c2f8d51125e3662814 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Thu, 23 Mar 2023 10:01:38 +0200 Subject: net/mlx5: E-Switch, Remove redundant check The call to mlx5_eswitch_enable() also does the same check and if E-Switch not supported it returns 0 without any change. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 20d7662c10fb..f07d00929162 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -74,9 +74,6 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) struct mlx5_core_sriov *sriov = &dev->priv.sriov; int err, vf, num_msix_count; - if (!MLX5_ESWITCH_MANAGER(dev)) - goto enable_vfs_hca; - err = mlx5_eswitch_enable(dev->priv.eswitch, num_vfs); if (err) { mlx5_core_warn(dev, @@ -84,7 +81,6 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) return err; } -enable_vfs_hca: num_msix_count = mlx5_get_default_msix_vec_count(dev, num_vfs); for (vf = 0; vf < num_vfs; vf++) { /* Notify the VF before its enablement to let it set -- cgit v1.2.3 From edab80b89337b826566ff7fdbbc8ae2dcfb22fee Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 31 Jan 2023 12:06:45 +0200 Subject: net/mlx5e: E-Switch, Remove flow_source check for metadata matching There is no reason to check for flow_source cap to allow metadata matching. When flow_source match is being used the flow_source cap is being checked. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 69215ffb9999..ecd12a0c6f07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2827,9 +2827,6 @@ bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw) MLX5_FDB_TO_VPORT_REG_C_0)) return false; - if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) - return false; - return true; } -- cgit v1.2.3 From 806815bf3c1d885e686ddc0e75d941078dcf56ae Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Thu, 30 Mar 2023 12:25:25 +0300 Subject: net/mlx5e: Remove redundant __func__ arg from fs_err() calls fs_err() already logs the function name. remote the arg so the function name will not be logged twice. Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 33bfe4d7338b..934b0d5ce1b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -283,7 +283,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_flow_steering *fs, if (IS_ERR(*rule_p)) { err = PTR_ERR(*rule_p); *rule_p = NULL; - fs_err(fs, "%s: add rule failed\n", __func__); + fs_err(fs, "add rule failed\n"); } return err; @@ -395,8 +395,7 @@ int mlx5e_add_vlan_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num if (IS_ERR(rule)) { err = PTR_ERR(rule); fs->vlan->trap_rule = NULL; - fs_err(fs, "%s: add VLAN trap rule failed, err %d\n", - __func__, err); + fs_err(fs, "add VLAN trap rule failed, err %d\n", err); return err; } fs->vlan->trap_rule = rule; @@ -421,8 +420,7 @@ int mlx5e_add_mac_trap(struct mlx5e_flow_steering *fs, int trap_id, int tir_num) if (IS_ERR(rule)) { err = PTR_ERR(rule); fs->l2.trap_rule = NULL; - fs_err(fs, "%s: add MAC trap rule failed, err %d\n", - __func__, err); + fs_err(fs, "add MAC trap rule failed, err %d\n", err); return err; } fs->l2.trap_rule = rule; @@ -763,7 +761,7 @@ static int mlx5e_add_promisc_rule(struct mlx5e_flow_steering *fs) if (IS_ERR(*rule_p)) { err = PTR_ERR(*rule_p); *rule_p = NULL; - fs_err(fs, "%s: add promiscuous rule failed\n", __func__); + fs_err(fs, "add promiscuous rule failed\n"); } kvfree(spec); return err; @@ -995,7 +993,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs, ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(ai->rule)) { - fs_err(fs, "%s: add l2 rule(mac:%pM) failed\n", __func__, mv_dmac); + fs_err(fs, "add l2 rule(mac:%pM) failed\n", mv_dmac); err = PTR_ERR(ai->rule); ai->rule = NULL; } -- cgit v1.2.3 From c97c9fe48ae3ccca75fb6001a3bd2bfcb094dbd7 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 2 Apr 2023 14:13:25 +0300 Subject: net/mlx5e: E-Switch, Update when to set other vport context Other vport context should be set if vport number is not 0. In case of ECPF, vport 0 represents the host PF representor so also need to set other vport context. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c index 45b839116212..d599e50af346 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c @@ -35,7 +35,8 @@ esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns, } ft_attr.max_fte = size; - ft_attr.flags = MLX5_FLOW_TABLE_OTHER_VPORT; + if (vport_num || mlx5_core_is_ecpf(esw->dev)) + ft_attr.flags = MLX5_FLOW_TABLE_OTHER_VPORT; acl = mlx5_create_vport_flow_table(root_ns, &ft_attr, vport_num); if (IS_ERR(acl)) { err = PTR_ERR(acl); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 901c53751b0a..bf97a593d1d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -113,7 +113,8 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1); MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); - MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + if (vport || mlx5_core_is_ecpf(dev)) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, nic_vport_context); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index ba7e3df22413..bc66b078a8a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -288,7 +288,8 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type); MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); - MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + if (vport || mlx5_core_is_ecpf(dev)) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); if (err) -- cgit v1.2.3 From 99db5669f6635a9719beb2e78ebf5887cde03a26 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 2 Apr 2023 14:19:54 +0300 Subject: net/mlx5e: E-Switch, Allow get vport api if esw exists We could have an esw manager device which is not a vport group manager. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index bf97a593d1d4..692cea3a6383 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -92,7 +92,7 @@ mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_vport *vport; - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager)) + if (!esw) return ERR_PTR(-EPERM); vport = xa_load(&esw->vports, vport_num); -- cgit v1.2.3 From 29bcb6e4fe7072ccea2a1c8b357ffd8e88f334bb Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 2 Apr 2023 13:59:08 +0300 Subject: net/mlx5e: E-Switch, Use metadata for vport matching in send-to-vport rules Like other rules use metadata matching if supported instead of source_port. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 62 +++++++++++++++------- 1 file changed, 43 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index ecd12a0c6f07..66a522d08be1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -838,6 +838,7 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, struct mlx5_flow_handle *flow_rule; struct mlx5_flow_spec *spec; void *misc; + u16 vport; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { @@ -847,20 +848,43 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); - /* source vport is the esw manager */ - MLX5_SET(fte_match_set_misc, misc, source_port, from_esw->manager_vport); - if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) - MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(from_esw->dev, vhca_id)); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) - MLX5_SET_TO_ONES(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + + /* source vport is the esw manager */ + vport = from_esw->manager_vport; + + if (mlx5_eswitch_vport_match_metadata_enabled(on_esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(from_esw, vport)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(from_esw->dev, vhca_id)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + } + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport.num = rep->vport; dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id); @@ -1270,7 +1294,8 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) #define MAX_SQ_NVPORTS 32 static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, - u32 *flow_group_in) + u32 *flow_group_in, + int match_params) { void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, @@ -1279,7 +1304,7 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS_2); + MLX5_MATCH_MISC_PARAMETERS_2 | match_params); MLX5_SET(fte_match_param, match_criteria, misc_parameters_2.metadata_reg_c_0, @@ -1287,7 +1312,7 @@ static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, } else { MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); + MLX5_MATCH_MISC_PARAMETERS | match_params); MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); @@ -1463,14 +1488,13 @@ esw_create_send_to_vport_group(struct mlx5_eswitch *esw, memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); + esw_set_flow_group_source_port(esw, flow_group_in, MLX5_MATCH_MISC_PARAMETERS); match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && + MLX5_CAP_ESW(esw->dev, merged_eswitch)) { MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_eswitch_owner_vhca_id); MLX5_SET(create_flow_group_in, flow_group_in, @@ -1558,7 +1582,7 @@ esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw, memset(flow_group_in, 0, inlen); - esw_set_flow_group_source_port(esw, flow_group_in); + esw_set_flow_group_source_port(esw, flow_group_in, 0); if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { match_criteria = MLX5_ADDR_OF(create_flow_group_in, @@ -1845,7 +1869,7 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) return -ENOMEM; /* create vport rx group */ - esw_set_flow_group_source_port(esw, flow_group_in); + esw_set_flow_group_source_port(esw, flow_group_in, 0); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1); -- cgit v1.2.3 From 6cb9318a2534b7081eb9f375a720972f811665f6 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Thu, 20 Apr 2023 12:14:41 +0300 Subject: net/mlx5: Remove redundant vport_group_manager cap check It's enough to check for esw_manager cap for get the esw flow table caps. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 7bb7be01225a..fb2035a5ec99 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -196,14 +196,11 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } - if (MLX5_CAP_GEN(dev, vport_group_manager) && - MLX5_ESWITCH_MANAGER(dev)) { + if (MLX5_ESWITCH_MANAGER(dev)) { err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE); if (err) return err; - } - if (MLX5_ESWITCH_MANAGER(dev)) { err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH); if (err) return err; -- cgit v1.2.3 From bea416c7e970399b438b801a9f3ffa24c4ddf855 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 23 Apr 2023 10:53:18 +0300 Subject: net/mlx5e: E-Switch, Check device is PF when stopping esw offloads Checking sriov is done on the pci device so it can return true on other devices like SF but nothing should be done in this case. Add a check that the device is PF. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 66a522d08be1..44b5d1359155 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3301,7 +3301,7 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, /* If changing from switchdev to legacy mode without sriov enabled, * no need to create legacy fdb. */ - if (!mlx5_sriov_is_enabled(esw->dev)) + if (!mlx5_core_is_pf(esw->dev) || !mlx5_sriov_is_enabled(esw->dev)) return 0; err = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_IGNORE_NUM_VFS); -- cgit v1.2.3 From 292243d13b1821434599fef7b4ea62110ea771c1 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 23 Apr 2023 15:24:00 +0300 Subject: net/mlx5e: E-Switch: move debug print of adding mac to correct place Move the debug print inside the if clause that actually does the change. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 692cea3a6383..3e3963424285 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -310,11 +310,12 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) fdb_add: /* SRIOV is enabled: Forward UC MAC to vport */ - if (esw->fdb_table.legacy.fdb && esw->mode == MLX5_ESWITCH_LEGACY) + if (esw->fdb_table.legacy.fdb && esw->mode == MLX5_ESWITCH_LEGACY) { vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); - esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n", - vport, mac, vaddr->flow_rule); + esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n", + vport, mac, vaddr->flow_rule); + } return 0; } -- cgit v1.2.3 From 3d7c5f78b8cef1376de95443632212f9042d7e78 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Thu, 20 Apr 2023 12:19:25 +0300 Subject: net/mlx5e: E-Switch, Add a check that log_max_l2_table is valid If log_max_l2_table is 0 there is no really room for one L2 address. and should be treated as not supported. Do the check in MPFS init and for vport context events which both used to update L2 address. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 6 +++++- drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 3e3963424285..9b71819e049a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -712,6 +712,9 @@ void esw_vport_change_handle_locked(struct mlx5_vport *vport) struct mlx5_eswitch *esw = dev->priv.eswitch; u8 mac[ETH_ALEN]; + if (!MLX5_CAP_GEN(dev, log_max_l2_table)) + return; + mlx5_query_nic_vport_mac_address(dev, vport->vport, true, mac); esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", vport->vport, mac); @@ -948,7 +951,8 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, u16 vport_num) vport->enabled = false; /* Disable events from this vport */ - arm_vport_context_events_cmd(esw->dev, vport->vport, 0); + if (MLX5_CAP_GEN(esw->dev, log_max_l2_table)) + arm_vport_context_events_cmd(esw->dev, vport->vport, 0); if (!mlx5_esw_is_manager_vport(esw, vport->vport) && MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index 8ff16318e32d..4450091e181a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -99,7 +99,7 @@ int mlx5_mpfs_init(struct mlx5_core_dev *dev) int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); struct mlx5_mpfs *mpfs; - if (!MLX5_ESWITCH_MANAGER(dev)) + if (!MLX5_ESWITCH_MANAGER(dev) || l2table_size == 1) return 0; mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL); -- cgit v1.2.3 From c24246d07a942887fb62cd76229c89efdee6d948 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 24 Apr 2023 12:11:13 +0300 Subject: net/mlx5: E-Switch, Use RoCE version 2 for loopback traffic Could be port initializing eswitch doesn't support RoCE version 1 but all ports should support RoCE version 2. Signed-off-by: Roi Dayan Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index 540cf05f6373..15bb562b3846 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -115,7 +115,7 @@ free: static void mlx5_rdma_del_roce_addr(struct mlx5_core_dev *dev) { - mlx5_core_roce_gid_set(dev, 0, 0, 0, + mlx5_core_roce_gid_set(dev, 0, MLX5_ROCE_VERSION_2, 0, NULL, NULL, false, 0, 1); } @@ -135,7 +135,7 @@ static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev) mlx5_rdma_make_default_gid(dev, &gid); return mlx5_core_roce_gid_set(dev, 0, - MLX5_ROCE_VERSION_1, + MLX5_ROCE_VERSION_2, 0, gid.raw, mac, false, 0, 1); } -- cgit v1.2.3 From 7eb197fd83a355214346feddd55fe3336125953f Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 24 Apr 2023 12:15:54 +0300 Subject: net/mlx5: E-Switch, Use metadata matching for RoCE loopback rule Use metadata matching for RoCE loopback rule if device is configured to use metadata for source port matching. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 8 ++++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 46 +++++++++++++--------- drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 20 ++-------- 3 files changed, 40 insertions(+), 34 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 1a042c981713..6c5c05c414fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -683,6 +683,14 @@ mlx5_esw_vporttbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr struct mlx5_flow_handle * esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag); +void mlx5_esw_set_flow_group_source_port(struct mlx5_eswitch *esw, + u32 *flow_group_in, + int match_params); + +void mlx5_esw_set_spec_source_port(struct mlx5_eswitch *esw, + u16 vport, + struct mlx5_flow_spec *spec); + int esw_offloads_load_rep(struct mlx5_eswitch *esw, u16 vport_num); void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 44b5d1359155..76db72f339d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1293,9 +1293,10 @@ esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32 -static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, - u32 *flow_group_in, - int match_params) +void +mlx5_esw_set_flow_group_source_port(struct mlx5_eswitch *esw, + u32 *flow_group_in, + int match_params) { void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, @@ -1488,7 +1489,7 @@ esw_create_send_to_vport_group(struct mlx5_eswitch *esw, memset(flow_group_in, 0, inlen); - esw_set_flow_group_source_port(esw, flow_group_in, MLX5_MATCH_MISC_PARAMETERS); + mlx5_esw_set_flow_group_source_port(esw, flow_group_in, MLX5_MATCH_MISC_PARAMETERS); match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn); @@ -1582,7 +1583,7 @@ esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw, memset(flow_group_in, 0, inlen); - esw_set_flow_group_source_port(esw, flow_group_in, 0); + mlx5_esw_set_flow_group_source_port(esw, flow_group_in, 0); if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { match_criteria = MLX5_ADDR_OF(create_flow_group_in, @@ -1869,7 +1870,7 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) return -ENOMEM; /* create vport rx group */ - esw_set_flow_group_source_port(esw, flow_group_in, 0); + mlx5_esw_set_flow_group_source_port(esw, flow_group_in, 0); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1); @@ -1939,21 +1940,13 @@ static void esw_destroy_vport_rx_drop_group(struct mlx5_eswitch *esw) mlx5_destroy_flow_group(esw->offloads.vport_rx_drop_group); } -struct mlx5_flow_handle * -mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, - struct mlx5_flow_destination *dest) +void +mlx5_esw_set_spec_source_port(struct mlx5_eswitch *esw, + u16 vport, + struct mlx5_flow_spec *spec) { - struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_handle *flow_rule; - struct mlx5_flow_spec *spec; void *misc; - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) { - flow_rule = ERR_PTR(-ENOMEM); - goto out; - } - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, @@ -1973,6 +1966,23 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; } +} + +struct mlx5_flow_handle * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_spec *spec; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) { + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + mlx5_esw_set_spec_source_port(esw, vport, spec); flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index 15bb562b3846..a42f6cd99b74 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -30,9 +30,8 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev) struct mlx5_flow_spec *spec; struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; - void *match_criteria; + struct mlx5_eswitch *esw; u32 *flow_group_in; - void *misc; int err; if (!(MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) && @@ -63,12 +62,8 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev) goto free; } - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, - match_criteria); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters.source_port); + esw = dev->priv.eswitch; + mlx5_esw_set_flow_group_source_port(esw, flow_group_in, 0); fg = mlx5_create_flow_group(ft, flow_group_in); if (IS_ERR(fg)) { @@ -77,14 +72,7 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev) goto destroy_flow_table; } - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, - dev->priv.eswitch->manager_vport); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + mlx5_esw_set_spec_source_port(esw, esw->manager_vport, spec); flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, NULL, 0); -- cgit v1.2.3 From 0279b5454c0e8344f30fcac90db76cf17b6b76c7 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 2 May 2023 12:44:47 +0300 Subject: net/mlx5: devlink, Only show PF related devlink warning when needed Limit the PF related warning to show if device is actually a PF. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 0e07971e024a..bfaec67abf0d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -162,9 +162,8 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, return -EOPNOTSUPP; } - if (pci_num_vf(pdev)) { + if (mlx5_core_is_pf(dev) && pci_num_vf(pdev)) NL_SET_ERR_MSG_MOD(extack, "reload while VFs are present is unfavorable"); - } switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: -- cgit v1.2.3 From f5d87b47a1d9dc14c048c84935397d97833ac706 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 23 Apr 2023 13:39:57 +0300 Subject: net/mlx5e: E-Switch, Initialize E-Switch for eswitch manager Initialize eswitch instance for a function which is eswitch manager but not a vport group manager. Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 9b71819e049a..31956cd9d1bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1622,7 +1622,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) struct mlx5_eswitch *esw; int err; - if (!MLX5_VPORT_MANAGER(dev)) + if (!MLX5_VPORT_MANAGER(dev) && !MLX5_ESWITCH_MANAGER(dev)) return 0; esw = kzalloc(sizeof(*esw), GFP_KERNEL); @@ -1692,7 +1692,7 @@ abort: void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) { - if (!esw || !MLX5_VPORT_MANAGER(esw->dev)) + if (!esw) return; esw_info(esw->dev, "cleanup\n"); -- cgit v1.2.3 From e4ac7cc6e5a45306049ac2337dea0e636adf36be Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Fri, 19 May 2023 09:48:25 +0800 Subject: net: fec: turn on XDP features The XDP features are supported since the commit 66c0e13ad236 ("drivers: net: turn on XDP features"). Currently, the fec driver supports NETDEV_XDP_ACT_BASIC, NETDEV_XDP_ACT_REDIRECT and NETDEV_XDP_ACT_NDO_XMIT. So turn on these XDP features for fec driver. Signed-off-by: Wei Fang Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 6d0b46c76924..87a431222af6 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -4030,6 +4030,11 @@ static int fec_enet_init(struct net_device *ndev) ndev->hw_features = ndev->features; + if (!(fep->quirks & FEC_QUIRK_SWAP_FRAME)) + ndev->xdp_features = NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT; + fec_restart(ndev); if (fep->quirks & FEC_QUIRK_MIB_CLEAR) -- cgit v1.2.3 From 2ae9c66b04554bf5b3eeaab8c12a0bfb9f28ebde Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Fri, 19 May 2023 10:01:13 +0800 Subject: net: fec: remove useless fec_enet_reset_skb() This patch is a cleanup for fec driver. The fec_enet_reset_skb() is used to free skb buffers for tx queues and is only invoked in fec_restart(). However, fec_enet_bd_init() also resets skb buffers and is invoked in fec_restart() too. So fec_enet_reset_skb() is redundant and useless. Signed-off-by: Wei Fang Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 87a431222af6..3ecf20ee5851 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1011,24 +1011,6 @@ static void fec_enet_enable_ring(struct net_device *ndev) } } -static void fec_enet_reset_skb(struct net_device *ndev) -{ - struct fec_enet_private *fep = netdev_priv(ndev); - struct fec_enet_priv_tx_q *txq; - int i, j; - - for (i = 0; i < fep->num_tx_queues; i++) { - txq = fep->tx_queue[i]; - - for (j = 0; j < txq->bd.ring_size; j++) { - if (txq->tx_skbuff[j]) { - dev_kfree_skb_any(txq->tx_skbuff[j]); - txq->tx_skbuff[j] = NULL; - } - } - } -} - /* * This function is called to start or restart the FEC during a link * change, transmit timeout, or to reconfigure the FEC. The network @@ -1071,9 +1053,6 @@ fec_restart(struct net_device *ndev) fec_enet_enable_ring(ndev); - /* Reset tx SKB buffers. */ - fec_enet_reset_skb(ndev); - /* Enable MII mode */ if (fep->full_duplex == DUPLEX_FULL) { /* FD enable */ -- cgit v1.2.3 From 8b6b7c1190c3da1137d320c3de5e8d7f69baba5b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 19 May 2023 11:33:04 +0100 Subject: net: altera: tse: remove mac_an_restart() function The mac_an_restart() method will only be called if the driver sets legacy_pre_march2020, which the altera tse driver does not do. Therefore, providing a stub is unnecessary. Fixes: fef2998203e1 ("net: altera: tse: convert to phylink") Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/altera/altera_tse_main.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 66e3af73ec41..190ff1bcd94e 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -1036,10 +1036,6 @@ static struct net_device_ops altera_tse_netdev_ops = { .ndo_validate_addr = eth_validate_addr, }; -static void alt_tse_mac_an_restart(struct phylink_config *config) -{ -} - static void alt_tse_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) { @@ -1096,7 +1092,6 @@ static struct phylink_pcs *alt_tse_select_pcs(struct phylink_config *config, } static const struct phylink_mac_ops alt_tse_phylink_ops = { - .mac_an_restart = alt_tse_mac_an_restart, .mac_config = alt_tse_mac_config, .mac_link_down = alt_tse_mac_link_down, .mac_link_up = alt_tse_mac_link_up, -- cgit v1.2.3 From 5b17a4971d3b2a073f4078dd65331efbe35baa2d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 20 May 2023 10:30:17 +0200 Subject: forcedeth: Fix an error handling path in nv_probe() If an error occures after calling nv_mgmt_acquire_sema(), it should be undone with a corresponding nv_mgmt_release_sema() call. Add it in the error handling path of the probe as already done in the remove function. Fixes: cac1c52c3621 ("forcedeth: mgmt unit interface") Signed-off-by: Christophe JAILLET Acked-by: Zhu Yanjun Link: https://lore.kernel.org/r/355e9a7d351b32ad897251b6f81b5886fcdc6766.1684571393.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/nvidia/forcedeth.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index 0605d1ee490d..7a549b834e97 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -6138,6 +6138,7 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) return 0; out_error: + nv_mgmt_release_sema(dev); if (phystate_orig) writel(phystate|NVREG_ADAPTCTL_RUNNING, base + NvRegAdapterControl); out_freering: -- cgit v1.2.3 From 640bf95b2c7c2981fb471acdafbd3e0458f8390d Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 20 May 2023 11:48:55 +0200 Subject: 3c589_cs: Fix an error handling path in tc589_probe() Should tc589_config() fail, some resources need to be released as already done in the remove function. Fixes: 15b99ac17295 ("[PATCH] pcmcia: add return value to _config() functions") Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/d8593ae867b24c79063646e36f9b18b0790107cb.1684575975.git.christophe.jaillet@wanadoo.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/3com/3c589_cs.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c index 82f94b1635bf..5267e9dcd87e 100644 --- a/drivers/net/ethernet/3com/3c589_cs.c +++ b/drivers/net/ethernet/3com/3c589_cs.c @@ -195,6 +195,7 @@ static int tc589_probe(struct pcmcia_device *link) { struct el3_private *lp; struct net_device *dev; + int ret; dev_dbg(&link->dev, "3c589_attach()\n"); @@ -218,7 +219,15 @@ static int tc589_probe(struct pcmcia_device *link) dev->ethtool_ops = &netdev_ethtool_ops; - return tc589_config(link); + ret = tc589_config(link); + if (ret) + goto err_free_netdev; + + return 0; + +err_free_netdev: + free_netdev(dev); + return ret; } static void tc589_detach(struct pcmcia_device *link) -- cgit v1.2.3 From 2a0a935fb64ee8af253b9c6133bb6702fb152ac2 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 2 May 2023 11:03:53 +0300 Subject: net/mlx5: Collect command failures data only for known commands DEVX can issue a general command, which is not used by mlx5 driver. In case such command is failed, mlx5 is trying to collect the failure data, However, mlx5 doesn't create a storage for this command, since mlx5 doesn't use it. This lead to array-index-out-of-bounds error. Fix it by checking whether the command is known before collecting the failure data. Fixes: 34f46ae0d4b3 ("net/mlx5: Add command failures data to debugfs") Signed-off-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index d53de39539a8..d532883b42d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1920,9 +1920,10 @@ static void mlx5_cmd_err_trace(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, u32 syndrome, int err) { + const char *namep = mlx5_command_str(opcode); struct mlx5_cmd_stats *stats; - if (!err) + if (!err || !(strcmp(namep, "unknown command opcode"))) return; stats = &dev->cmd.stats[opcode]; -- cgit v1.2.3 From 2be5bd42a5bba1a05daedc86cf0e248210009669 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 20 Mar 2023 13:07:53 +0200 Subject: net/mlx5: Handle pairing of E-switch via uplink un/load APIs In case user switch a device from switchdev mode to legacy mode, mlx5 first unpair the E-switch and afterwards unload the uplink vport. From the other hand, in case user remove or reload a device, mlx5 first unload the uplink vport and afterwards unpair the E-switch. The latter is causing a bug[1], hence, handle pairing of E-switch as part of uplink un/load APIs. [1] In case VF_LAG is used, every tc fdb flow is duplicated to the peer esw. However, the original esw keeps a pointer to this duplicated flow, not the peer esw. e.g.: if user create tc fdb flow over esw0, the flow is duplicated over esw1, in FW/HW, but in SW, esw0 keeps a pointer to the duplicated flow. During module unload while a peer tc fdb flow is still offloaded, in case the first device to be removed is the peer device (esw1 in the example above), the peer net-dev is destroyed, and so the mlx5e_priv is memset to 0. Afterwards, the peer device is trying to unpair himself from the original device (esw0 in the example above). Unpair API invoke the original device to clear peer flow from its eswitch (esw0), but the peer flow, which is stored over the original eswitch (esw0), is trying to use the peer mlx5e_priv, which is memset to 0 and result in bellow kernel-oops. [ 157.964081 ] BUG: unable to handle page fault for address: 000000000002ce60 [ 157.964662 ] #PF: supervisor read access in kernel mode [ 157.965123 ] #PF: error_code(0x0000) - not-present page [ 157.965582 ] PGD 0 P4D 0 [ 157.965866 ] Oops: 0000 [#1] SMP [ 157.967670 ] RIP: 0010:mlx5e_tc_del_fdb_flow+0x48/0x460 [mlx5_core] [ 157.976164 ] Call Trace: [ 157.976437 ] [ 157.976690 ] __mlx5e_tc_del_fdb_peer_flow+0xe6/0x100 [mlx5_core] [ 157.977230 ] mlx5e_tc_clean_fdb_peer_flows+0x67/0x90 [mlx5_core] [ 157.977767 ] mlx5_esw_offloads_unpair+0x2d/0x1e0 [mlx5_core] [ 157.984653 ] mlx5_esw_offloads_devcom_event+0xbf/0x130 [mlx5_core] [ 157.985212 ] mlx5_devcom_send_event+0xa3/0xb0 [mlx5_core] [ 157.985714 ] esw_offloads_disable+0x5a/0x110 [mlx5_core] [ 157.986209 ] mlx5_eswitch_disable_locked+0x152/0x170 [mlx5_core] [ 157.986757 ] mlx5_eswitch_disable+0x51/0x80 [mlx5_core] [ 157.987248 ] mlx5_unload+0x2a/0xb0 [mlx5_core] [ 157.987678 ] mlx5_uninit_one+0x5f/0xd0 [mlx5_core] [ 157.988127 ] remove_one+0x64/0xe0 [mlx5_core] [ 157.988549 ] pci_device_remove+0x31/0xa0 [ 157.988933 ] device_release_driver_internal+0x18f/0x1f0 [ 157.989402 ] driver_detach+0x3f/0x80 [ 157.989754 ] bus_remove_driver+0x70/0xf0 [ 157.990129 ] pci_unregister_driver+0x34/0x90 [ 157.990537 ] mlx5_cleanup+0xc/0x1c [mlx5_core] [ 157.990972 ] __x64_sys_delete_module+0x15a/0x250 [ 157.991398 ] ? exit_to_user_mode_prepare+0xea/0x110 [ 157.991840 ] do_syscall_64+0x3d/0x90 [ 157.992198 ] entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: 04de7dda7394 ("net/mlx5e: Infrastructure for duplicated offloading of TC flows") Fixes: 1418ddd96afd ("net/mlx5e: Duplicate offloaded TC eswitch rules under uplink LAG") Signed-off-by: Shay Drory Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 +++- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 4 ++++ drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 7 ++----- 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 728b82ce4031..65fe40f55d84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -5301,6 +5301,8 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) goto err_action_counter; } + mlx5_esw_offloads_devcom_init(esw); + return 0; err_action_counter: @@ -5329,7 +5331,7 @@ void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv) priv = netdev_priv(rpriv->netdev); esw = priv->mdev->priv.eswitch; - mlx5e_tc_clean_fdb_peer_flows(esw); + mlx5_esw_offloads_devcom_cleanup(esw); mlx5e_tc_tun_cleanup(uplink_priv->encap); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 1a042c981713..9f007c5438ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -369,6 +369,8 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf); void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw); void mlx5_eswitch_disable(struct mlx5_eswitch *esw); +void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw); +void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, u16 vport, const u8 *mac); int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, @@ -767,6 +769,8 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; } static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {} static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {} +static inline void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) {} +static inline void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) {} static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } static inline int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 69215ffb9999..7c34c7cf506f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2779,7 +2779,7 @@ err_out: return err; } -static void esw_offloads_devcom_init(struct mlx5_eswitch *esw) +void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) { struct mlx5_devcom *devcom = esw->dev->priv.devcom; @@ -2802,7 +2802,7 @@ static void esw_offloads_devcom_init(struct mlx5_eswitch *esw) ESW_OFFLOADS_DEVCOM_PAIR, esw); } -static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) +void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) { struct mlx5_devcom *devcom = esw->dev->priv.devcom; @@ -3250,8 +3250,6 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) if (err) goto err_vports; - esw_offloads_devcom_init(esw); - return 0; err_vports: @@ -3292,7 +3290,6 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, void esw_offloads_disable(struct mlx5_eswitch *esw) { - esw_offloads_devcom_cleanup(esw); mlx5_eswitch_disable_pf_vf_vports(esw); esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK); esw_set_passing_vport_metadata(esw, false); -- cgit v1.2.3 From 1e5daf5565b61a96e570865091589afc9156e3d3 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 9 Mar 2023 16:43:15 +0200 Subject: net/mlx5: DR, Fix crc32 calculation to work on big-endian (BE) CPUs When calculating crc for hash index we use the function crc32 that calculates for little-endian (LE) arch. Then we convert it to network endianness using htonl(), but it's wrong to do the conversion in BE archs since the crc32 value is already LE. The solution is to switch the bytes from the crc result for all types of arc. Fixes: 40416d8ede65 ("net/mlx5: DR, Replace CRC32 implementation to use kernel lib") Signed-off-by: Erez Shitrit Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index 9413aaf51251..e94fbb015efa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -15,7 +15,8 @@ static u32 dr_ste_crc32_calc(const void *input_data, size_t length) { u32 crc = crc32(0, input_data, length); - return (__force u32)htonl(crc); + return (__force u32)((crc >> 24) & 0xff) | ((crc << 8) & 0xff0000) | + ((crc >> 8) & 0xff00) | ((crc << 24) & 0xff000000); } bool mlx5dr_ste_supp_ttl_cs_recalc(struct mlx5dr_cmd_caps *caps) -- cgit v1.2.3 From c7dd225bc224726c22db08e680bf787f60ebdee3 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 2 Apr 2023 17:14:10 +0300 Subject: net/mlx5: DR, Check force-loopback RC QP capability independently from RoCE SW Steering uses RC QP for writing STEs to ICM. This writingis done in LB (loopback), and FL (force-loopback) QP is preferred for performance. FL is available when RoCE is enabled or disabled based on RoCE caps. This patch adds reading of FL capability from HCA caps in addition to the existing reading from RoCE caps, thus fixing the case where we didn't have loopback enabled when RoCE was disabled. Fixes: 7304d603a57a ("net/mlx5: DR, Add support for force-loopback QP") Signed-off-by: Itamar Gozlan Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c | 4 +++- include/linux/mlx5/mlx5_ifc.h | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index 3835ba3f4dda..1aa525e509f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -117,6 +117,8 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id); caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols); caps->sw_format_ver = MLX5_CAP_GEN(mdev, steering_format_version); + caps->roce_caps.fl_rc_qp_when_roce_disabled = + MLX5_CAP_GEN(mdev, fl_rc_qp_when_roce_disabled); if (MLX5_CAP_GEN(mdev, roce)) { err = dr_cmd_query_nic_vport_roce_en(mdev, 0, &roce_en); @@ -124,7 +126,7 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, return err; caps->roce_caps.roce_en = roce_en; - caps->roce_caps.fl_rc_qp_when_roce_disabled = + caps->roce_caps.fl_rc_qp_when_roce_disabled |= MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_disabled); caps->roce_caps.fl_rc_qp_when_roce_enabled = MLX5_CAP_ROCE(mdev, fl_rc_qp_when_roce_enabled); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index dc5e2cb302a5..b89778d0d326 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1705,7 +1705,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 rc[0x1]; u8 uar_4k[0x1]; - u8 reserved_at_241[0x9]; + u8 reserved_at_241[0x7]; + u8 fl_rc_qp_when_roce_disabled[0x1]; + u8 regexp_params[0x1]; u8 uar_sz[0x6]; u8 port_selection_cap[0x1]; u8 reserved_at_248[0x1]; -- cgit v1.2.3 From be071cdb167fc3e25fe81922166b3d499d23e8ac Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 3 Apr 2023 22:26:00 +0200 Subject: net/mlx5e: Use correct encap attribute during invalidation With introduction of post action infrastructure most of the users of encap attribute had been modified in order to obtain the correct attribute by calling mlx5e_tc_get_encap_attr() helper instead of assuming encap action is always on default attribute. However, the cited commit didn't modify mlx5e_invalidate_encap() which prevents it from destroying correct modify header action which leads to a warning [0]. Fix the issue by using correct attribute. [0]: Feb 21 09:47:35 c-237-177-40-045 kernel: WARNING: CPU: 17 PID: 654 at drivers/net/ethernet/mellanox/mlx5/core/en_tc.c:684 mlx5e_tc_attach_mod_hdr+0x1cc/0x230 [mlx5_core] Feb 21 09:47:35 c-237-177-40-045 kernel: RIP: 0010:mlx5e_tc_attach_mod_hdr+0x1cc/0x230 [mlx5_core] Feb 21 09:47:35 c-237-177-40-045 kernel: Call Trace: Feb 21 09:47:35 c-237-177-40-045 kernel: Feb 21 09:47:35 c-237-177-40-045 kernel: mlx5e_tc_fib_event_work+0x8e3/0x1f60 [mlx5_core] Feb 21 09:47:35 c-237-177-40-045 kernel: ? mlx5e_take_all_encap_flows+0xe0/0xe0 [mlx5_core] Feb 21 09:47:35 c-237-177-40-045 kernel: ? lock_downgrade+0x6d0/0x6d0 Feb 21 09:47:35 c-237-177-40-045 kernel: ? lockdep_hardirqs_on_prepare+0x273/0x3f0 Feb 21 09:47:35 c-237-177-40-045 kernel: ? lockdep_hardirqs_on_prepare+0x273/0x3f0 Feb 21 09:47:35 c-237-177-40-045 kernel: process_one_work+0x7c2/0x1310 Feb 21 09:47:35 c-237-177-40-045 kernel: ? lockdep_hardirqs_on_prepare+0x3f0/0x3f0 Feb 21 09:47:35 c-237-177-40-045 kernel: ? pwq_dec_nr_in_flight+0x230/0x230 Feb 21 09:47:35 c-237-177-40-045 kernel: ? rwlock_bug.part.0+0x90/0x90 Feb 21 09:47:35 c-237-177-40-045 kernel: worker_thread+0x59d/0xec0 Feb 21 09:47:35 c-237-177-40-045 kernel: ? __kthread_parkme+0xd9/0x1d0 Fixes: 8300f225268b ("net/mlx5e: Create new flow attr for multi table actions") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 20c2d2ecaf93..6a052c6cfc15 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -1369,11 +1369,13 @@ static void mlx5e_invalidate_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow; list_for_each_entry(flow, encap_flows, tmp_list) { - struct mlx5_flow_attr *attr = flow->attr; struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_flow_attr *attr; if (!mlx5e_is_offloaded_flow(flow)) continue; + + attr = mlx5e_tc_get_encap_attr(flow); esw_attr = attr->esw_attr; if (flow_flag_test(flow, SLOW)) -- cgit v1.2.3 From a65735148e0328f80c0f72f9f8d2f609bfcf4aff Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 1 May 2023 14:37:56 +0300 Subject: net/mlx5: Fix error message when failing to allocate device memory Fix spacing for the error and also the correct error code pointer. Fixes: c9b9dcb430b3 ("net/mlx5: Move device memory management to mlx5_core") Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 995eb2d5ace0..a7eb65cd0bdd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1049,7 +1049,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) dev->dm = mlx5_dm_create(dev); if (IS_ERR(dev->dm)) - mlx5_core_warn(dev, "Failed to init device memory%d\n", err); + mlx5_core_warn(dev, "Failed to init device memory %ld\n", PTR_ERR(dev->dm)); dev->tracer = mlx5_fw_tracer_create(dev); dev->hv_vhca = mlx5_hv_vhca_create(dev); -- cgit v1.2.3 From 691c041bf20899fc13c793f92ba61ab660fa3a30 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 31 Mar 2023 14:20:51 +0200 Subject: net/mlx5e: Fix deadlock in tc route query code Cited commit causes ABBA deadlock[0] when peer flows are created while holding the devcom rw semaphore. Due to peer flows offload implementation the lock is taken much higher up the call chain and there is no obvious way to easily fix the deadlock. Instead, since tc route query code needs the peer eswitch structure only to perform a lookup in xarray and doesn't perform any sleeping operations with it, refactor the code for lockless execution in following ways: - RCUify the devcom 'data' pointer. When resetting the pointer synchronously wait for RCU grace period before returning. This is fine since devcom is currently only used for synchronization of pairing/unpairing of eswitches which is rare and already expensive as-is. - Wrap all usages of 'paired' boolean in {READ|WRITE}_ONCE(). The flag has already been used in some unlocked contexts without proper annotations (e.g. users of mlx5_devcom_is_paired() function), but it wasn't an issue since all relevant code paths checked it again after obtaining the devcom semaphore. Now it is also used by mlx5_devcom_get_peer_data_rcu() as "best effort" check to return NULL when devcom is being unpaired. Note that while RCU read lock doesn't prevent the unpaired flag from being changed concurrently it still guarantees that reader can continue to use 'data'. - Refactor mlx5e_tc_query_route_vport() function to use new mlx5_devcom_get_peer_data_rcu() API which fixes the deadlock. [0]: [ 164.599612] ====================================================== [ 164.600142] WARNING: possible circular locking dependency detected [ 164.600667] 6.3.0-rc3+ #1 Not tainted [ 164.601021] ------------------------------------------------------ [ 164.601557] handler1/3456 is trying to acquire lock: [ 164.601998] ffff88811f1714b0 (&esw->offloads.encap_tbl_lock){+.+.}-{3:3}, at: mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core] [ 164.603078] but task is already holding lock: [ 164.603617] ffff88810137fc98 (&comp->sem){++++}-{3:3}, at: mlx5_devcom_get_peer_data+0x37/0x80 [mlx5_core] [ 164.604459] which lock already depends on the new lock. [ 164.605190] the existing dependency chain (in reverse order) is: [ 164.605848] -> #1 (&comp->sem){++++}-{3:3}: [ 164.606380] down_read+0x39/0x50 [ 164.606772] mlx5_devcom_get_peer_data+0x37/0x80 [mlx5_core] [ 164.607336] mlx5e_tc_query_route_vport+0x86/0xc0 [mlx5_core] [ 164.607914] mlx5e_tc_tun_route_lookup+0x1a4/0x1d0 [mlx5_core] [ 164.608495] mlx5e_attach_decap_route+0xc6/0x1e0 [mlx5_core] [ 164.609063] mlx5e_tc_add_fdb_flow+0x1ea/0x360 [mlx5_core] [ 164.609627] __mlx5e_add_fdb_flow+0x2d2/0x430 [mlx5_core] [ 164.610175] mlx5e_configure_flower+0x952/0x1a20 [mlx5_core] [ 164.610741] tc_setup_cb_add+0xd4/0x200 [ 164.611146] fl_hw_replace_filter+0x14c/0x1f0 [cls_flower] [ 164.611661] fl_change+0xc95/0x18a0 [cls_flower] [ 164.612116] tc_new_tfilter+0x3fc/0xd20 [ 164.612516] rtnetlink_rcv_msg+0x418/0x5b0 [ 164.612936] netlink_rcv_skb+0x54/0x100 [ 164.613339] netlink_unicast+0x190/0x250 [ 164.613746] netlink_sendmsg+0x245/0x4a0 [ 164.614150] sock_sendmsg+0x38/0x60 [ 164.614522] ____sys_sendmsg+0x1d0/0x1e0 [ 164.614934] ___sys_sendmsg+0x80/0xc0 [ 164.615320] __sys_sendmsg+0x51/0x90 [ 164.615701] do_syscall_64+0x3d/0x90 [ 164.616083] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 164.616568] -> #0 (&esw->offloads.encap_tbl_lock){+.+.}-{3:3}: [ 164.617210] __lock_acquire+0x159e/0x26e0 [ 164.617638] lock_acquire+0xc2/0x2a0 [ 164.618018] __mutex_lock+0x92/0xcd0 [ 164.618401] mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core] [ 164.618943] post_process_attr+0x153/0x2d0 [mlx5_core] [ 164.619471] mlx5e_tc_add_fdb_flow+0x164/0x360 [mlx5_core] [ 164.620021] __mlx5e_add_fdb_flow+0x2d2/0x430 [mlx5_core] [ 164.620564] mlx5e_configure_flower+0xe33/0x1a20 [mlx5_core] [ 164.621125] tc_setup_cb_add+0xd4/0x200 [ 164.621531] fl_hw_replace_filter+0x14c/0x1f0 [cls_flower] [ 164.622047] fl_change+0xc95/0x18a0 [cls_flower] [ 164.622500] tc_new_tfilter+0x3fc/0xd20 [ 164.622906] rtnetlink_rcv_msg+0x418/0x5b0 [ 164.623324] netlink_rcv_skb+0x54/0x100 [ 164.623727] netlink_unicast+0x190/0x250 [ 164.624138] netlink_sendmsg+0x245/0x4a0 [ 164.624544] sock_sendmsg+0x38/0x60 [ 164.624919] ____sys_sendmsg+0x1d0/0x1e0 [ 164.625340] ___sys_sendmsg+0x80/0xc0 [ 164.625731] __sys_sendmsg+0x51/0x90 [ 164.626117] do_syscall_64+0x3d/0x90 [ 164.626502] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 164.626995] other info that might help us debug this: [ 164.627725] Possible unsafe locking scenario: [ 164.628268] CPU0 CPU1 [ 164.628683] ---- ---- [ 164.629098] lock(&comp->sem); [ 164.629421] lock(&esw->offloads.encap_tbl_lock); [ 164.630066] lock(&comp->sem); [ 164.630555] lock(&esw->offloads.encap_tbl_lock); [ 164.630993] *** DEADLOCK *** [ 164.631575] 3 locks held by handler1/3456: [ 164.631962] #0: ffff888124b75130 (&block->cb_lock){++++}-{3:3}, at: tc_setup_cb_add+0x5b/0x200 [ 164.632703] #1: ffff888116e512b8 (&esw->mode_lock){++++}-{3:3}, at: mlx5_esw_hold+0x39/0x50 [mlx5_core] [ 164.633552] #2: ffff88810137fc98 (&comp->sem){++++}-{3:3}, at: mlx5_devcom_get_peer_data+0x37/0x80 [mlx5_core] [ 164.634435] stack backtrace: [ 164.634883] CPU: 17 PID: 3456 Comm: handler1 Not tainted 6.3.0-rc3+ #1 [ 164.635431] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 164.636340] Call Trace: [ 164.636616] [ 164.636863] dump_stack_lvl+0x47/0x70 [ 164.637217] check_noncircular+0xfe/0x110 [ 164.637601] __lock_acquire+0x159e/0x26e0 [ 164.637977] ? mlx5_cmd_set_fte+0x5b0/0x830 [mlx5_core] [ 164.638472] lock_acquire+0xc2/0x2a0 [ 164.638828] ? mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core] [ 164.639339] ? lock_is_held_type+0x98/0x110 [ 164.639728] __mutex_lock+0x92/0xcd0 [ 164.640074] ? mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core] [ 164.640576] ? __lock_acquire+0x382/0x26e0 [ 164.640958] ? mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core] [ 164.641468] ? mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core] [ 164.641965] mlx5e_attach_encap+0xd8/0x8b0 [mlx5_core] [ 164.642454] ? lock_release+0xbf/0x240 [ 164.642819] post_process_attr+0x153/0x2d0 [mlx5_core] [ 164.643318] mlx5e_tc_add_fdb_flow+0x164/0x360 [mlx5_core] [ 164.643835] __mlx5e_add_fdb_flow+0x2d2/0x430 [mlx5_core] [ 164.644340] mlx5e_configure_flower+0xe33/0x1a20 [mlx5_core] [ 164.644862] ? lock_acquire+0xc2/0x2a0 [ 164.645219] tc_setup_cb_add+0xd4/0x200 [ 164.645588] fl_hw_replace_filter+0x14c/0x1f0 [cls_flower] [ 164.646067] fl_change+0xc95/0x18a0 [cls_flower] [ 164.646488] tc_new_tfilter+0x3fc/0xd20 [ 164.646861] ? tc_del_tfilter+0x810/0x810 [ 164.647236] rtnetlink_rcv_msg+0x418/0x5b0 [ 164.647621] ? rtnl_setlink+0x160/0x160 [ 164.647982] netlink_rcv_skb+0x54/0x100 [ 164.648348] netlink_unicast+0x190/0x250 [ 164.648722] netlink_sendmsg+0x245/0x4a0 [ 164.649090] sock_sendmsg+0x38/0x60 [ 164.649434] ____sys_sendmsg+0x1d0/0x1e0 [ 164.649804] ? copy_msghdr_from_user+0x6d/0xa0 [ 164.650213] ___sys_sendmsg+0x80/0xc0 [ 164.650563] ? lock_acquire+0xc2/0x2a0 [ 164.650926] ? lock_acquire+0xc2/0x2a0 [ 164.651286] ? __fget_files+0x5/0x190 [ 164.651644] ? find_held_lock+0x2b/0x80 [ 164.652006] ? __fget_files+0xb9/0x190 [ 164.652365] ? lock_release+0xbf/0x240 [ 164.652723] ? __fget_files+0xd3/0x190 [ 164.653079] __sys_sendmsg+0x51/0x90 [ 164.653435] do_syscall_64+0x3d/0x90 [ 164.653784] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 164.654229] RIP: 0033:0x7f378054f8bd [ 164.654577] Code: 28 89 54 24 1c 48 89 74 24 10 89 7c 24 08 e8 6a c3 f4 ff 8b 54 24 1c 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 33 44 89 c7 48 89 44 24 08 e8 be c3 f4 ff 48 [ 164.656041] RSP: 002b:00007f377fa114b0 EFLAGS: 00000293 ORIG_RAX: 000000000000002e [ 164.656701] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f378054f8bd [ 164.657297] RDX: 0000000000000000 RSI: 00007f377fa11540 RDI: 0000000000000014 [ 164.657885] RBP: 00007f377fa12278 R08: 0000000000000000 R09: 000000000000015c [ 164.658472] R10: 00007f377fa123d0 R11: 0000000000000293 R12: 0000560962d99bd0 [ 164.665317] R13: 0000000000000000 R14: 0000560962d99bd0 R15: 00007f377fa11540 Fixes: f9d196bd632b ("net/mlx5e: Use correct eswitch for stack devices with lag") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Reviewed-by: Shay Drory Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 19 +++++---- .../net/ethernet/mellanox/mlx5/core/lib/devcom.c | 48 +++++++++++++++++----- .../net/ethernet/mellanox/mlx5/core/lib/devcom.h | 1 + 3 files changed, 48 insertions(+), 20 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 65fe40f55d84..416ab6b6da97 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1665,11 +1665,9 @@ bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport) { struct mlx5e_priv *out_priv, *route_priv; - struct mlx5_devcom *devcom = NULL; struct mlx5_core_dev *route_mdev; struct mlx5_eswitch *esw; u16 vhca_id; - int err; out_priv = netdev_priv(out_dev); esw = out_priv->mdev->priv.eswitch; @@ -1678,6 +1676,9 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id); if (mlx5_lag_is_active(out_priv->mdev)) { + struct mlx5_devcom *devcom; + int err; + /* In lag case we may get devices from different eswitch instances. * If we failed to get vport num, it means, mostly, that we on the wrong * eswitch. @@ -1686,16 +1687,16 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro if (err != -ENOENT) return err; + rcu_read_lock(); devcom = out_priv->mdev->priv.devcom; - esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - if (!esw) - return -ENODEV; + esw = mlx5_devcom_get_peer_data_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + err = esw ? mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport) : -ENODEV; + rcu_read_unlock(); + + return err; } - err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); - if (devcom) - mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - return err; + return mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); } static int diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c index adefde3ea941..070d55f13419 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -13,7 +13,7 @@ static LIST_HEAD(devcom_list); struct mlx5_devcom_component { struct { - void *data; + void __rcu *data; } device[MLX5_DEVCOM_PORTS_SUPPORTED]; mlx5_devcom_event_handler_t handler; @@ -162,7 +162,7 @@ void mlx5_devcom_register_component(struct mlx5_devcom *devcom, comp = &devcom->priv->components[id]; down_write(&comp->sem); comp->handler = handler; - comp->device[devcom->idx].data = data; + rcu_assign_pointer(comp->device[devcom->idx].data, data); up_write(&comp->sem); } @@ -176,8 +176,9 @@ void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom, comp = &devcom->priv->components[id]; down_write(&comp->sem); - comp->device[devcom->idx].data = NULL; + RCU_INIT_POINTER(comp->device[devcom->idx].data, NULL); up_write(&comp->sem); + synchronize_rcu(); } int mlx5_devcom_send_event(struct mlx5_devcom *devcom, @@ -193,12 +194,15 @@ int mlx5_devcom_send_event(struct mlx5_devcom *devcom, comp = &devcom->priv->components[id]; down_write(&comp->sem); - for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) - if (i != devcom->idx && comp->device[i].data) { - err = comp->handler(event, comp->device[i].data, - event_data); + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) { + void *data = rcu_dereference_protected(comp->device[i].data, + lockdep_is_held(&comp->sem)); + + if (i != devcom->idx && data) { + err = comp->handler(event, data, event_data); break; } + } up_write(&comp->sem); return err; @@ -213,7 +217,7 @@ void mlx5_devcom_set_paired(struct mlx5_devcom *devcom, comp = &devcom->priv->components[id]; WARN_ON(!rwsem_is_locked(&comp->sem)); - comp->paired = paired; + WRITE_ONCE(comp->paired, paired); } bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, @@ -222,7 +226,7 @@ bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, if (IS_ERR_OR_NULL(devcom)) return false; - return devcom->priv->components[id].paired; + return READ_ONCE(devcom->priv->components[id].paired); } void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, @@ -236,7 +240,7 @@ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, comp = &devcom->priv->components[id]; down_read(&comp->sem); - if (!comp->paired) { + if (!READ_ONCE(comp->paired)) { up_read(&comp->sem); return NULL; } @@ -245,7 +249,29 @@ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, if (i != devcom->idx) break; - return comp->device[i].data; + return rcu_dereference_protected(comp->device[i].data, lockdep_is_held(&comp->sem)); +} + +void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp; + int i; + + if (IS_ERR_OR_NULL(devcom)) + return NULL; + + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) + if (i != devcom->idx) + break; + + comp = &devcom->priv->components[id]; + /* This can change concurrently, however 'data' pointer will remain + * valid for the duration of RCU read section. + */ + if (!READ_ONCE(comp->paired)) + return NULL; + + return rcu_dereference(comp->device[i].data); } void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index 94313c18bb64..9a496f4722da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -41,6 +41,7 @@ bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, enum mlx5_devcom_components id); +void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id); void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, enum mlx5_devcom_components id); -- cgit v1.2.3 From 7aa50380191635e5897a773f272829cc961a2be5 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Tue, 21 Feb 2023 16:18:48 -0800 Subject: net/mlx5e: Fix SQ wake logic in ptp napi_poll context Check in the mlx5e_ptp_poll_ts_cq context if the ptp tx sq should be woken up. Before change, the ptp tx sq may never wake up if the ptp tx ts skb fifo is full when mlx5e_poll_tx_cq checks if the queue should be woken up. Fixes: 1880bc4e4a96 ("net/mlx5e: Add TX port timestamp support") Signed-off-by: Rahul Rameshbabu Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 19 ++++++++++++------- 3 files changed, 16 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index eb5abd0e55d9..3cbebfba582b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -175,6 +175,8 @@ static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget) /* ensure cq space is freed before enabling more cqes */ wmb(); + mlx5e_txqsq_wake(&ptpsq->txqsq); + return work_done == budget; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 47381e949f1f..879d698b6119 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -193,6 +193,8 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size) return pi; } +void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq); + static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index df5e780e8e6a..c7eb6b238c2b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -762,6 +762,17 @@ static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_t } } +void mlx5e_txqsq_wake(struct mlx5e_txqsq *sq) +{ + if (netif_tx_queue_stopped(sq->txq) && + mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) && + mlx5e_ptpsq_fifo_has_room(sq) && + !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { + netif_tx_wake_queue(sq->txq); + sq->stats->wake++; + } +} + bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) { struct mlx5e_sq_stats *stats; @@ -861,13 +872,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) netdev_tx_completed_queue(sq->txq, npkts, nbytes); - if (netif_tx_queue_stopped(sq->txq) && - mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) && - mlx5e_ptpsq_fifo_has_room(sq) && - !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { - netif_tx_wake_queue(sq->txq); - stats->wake++; - } + mlx5e_txqsq_wake(sq); return (i == MLX5E_TX_CQ_POLL_BUDGET); } -- cgit v1.2.3 From dfa1e46d6093831b9d49f0f350227a1d13644a2f Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 26 Apr 2023 16:04:48 +0300 Subject: net/mlx5e: TC, Fix using eswitch mapping in nic mode Cited patch is using the eswitch object mapping pool while in nic mode where it isn't initialized. This results in the trace below [0]. Fix that by using either nic or eswitch object mapping pool depending if eswitch is enabled or not. [0]: [ 826.446057] ================================================================== [ 826.446729] BUG: KASAN: slab-use-after-free in mlx5_add_flow_rules+0x30/0x490 [mlx5_core] [ 826.447515] Read of size 8 at addr ffff888194485830 by task tc/6233 [ 826.448243] CPU: 16 PID: 6233 Comm: tc Tainted: G W 6.3.0-rc6+ #1 [ 826.448890] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 826.449785] Call Trace: [ 826.450052] [ 826.450302] dump_stack_lvl+0x33/0x50 [ 826.450650] print_report+0xc2/0x610 [ 826.450998] ? __virt_addr_valid+0xb1/0x130 [ 826.451385] ? mlx5_add_flow_rules+0x30/0x490 [mlx5_core] [ 826.451935] kasan_report+0xae/0xe0 [ 826.452276] ? mlx5_add_flow_rules+0x30/0x490 [mlx5_core] [ 826.452829] mlx5_add_flow_rules+0x30/0x490 [mlx5_core] [ 826.453368] ? __kmalloc_node+0x5a/0x120 [ 826.453733] esw_add_restore_rule+0x20f/0x270 [mlx5_core] [ 826.454288] ? mlx5_eswitch_add_send_to_vport_meta_rule+0x260/0x260 [mlx5_core] [ 826.455011] ? mutex_unlock+0x80/0xd0 [ 826.455361] ? __mutex_unlock_slowpath.constprop.0+0x210/0x210 [ 826.455862] ? mapping_add+0x2cb/0x440 [mlx5_core] [ 826.456425] mlx5e_tc_action_miss_mapping_get+0x139/0x180 [mlx5_core] [ 826.457058] ? mlx5e_tc_update_skb_nic+0xb0/0xb0 [mlx5_core] [ 826.457636] ? __kasan_kmalloc+0x77/0x90 [ 826.458000] ? __kmalloc+0x57/0x120 [ 826.458336] mlx5_tc_ct_flow_offload+0x325/0xe40 [mlx5_core] [ 826.458916] ? ct_kernel_enter.constprop.0+0x48/0xa0 [ 826.459360] ? mlx5_tc_ct_parse_action+0xf0/0xf0 [mlx5_core] [ 826.459933] ? mlx5e_mod_hdr_attach+0x491/0x520 [mlx5_core] [ 826.460507] ? mlx5e_mod_hdr_get+0x12/0x20 [mlx5_core] [ 826.461046] ? mlx5e_tc_attach_mod_hdr+0x154/0x170 [mlx5_core] [ 826.461635] mlx5e_configure_flower+0x969/0x2110 [mlx5_core] [ 826.462217] ? _raw_spin_lock_bh+0x85/0xe0 [ 826.462597] ? __mlx5e_add_fdb_flow+0x750/0x750 [mlx5_core] [ 826.463163] ? kasan_save_stack+0x2e/0x40 [ 826.463534] ? down_read+0x115/0x1b0 [ 826.463878] ? down_write_killable+0x110/0x110 [ 826.464288] ? tc_setup_action.part.0+0x9f/0x3b0 [ 826.464701] ? mlx5e_is_uplink_rep+0x4c/0x90 [mlx5_core] [ 826.465253] ? mlx5e_tc_reoffload_flows_work+0x130/0x130 [mlx5_core] [ 826.465878] tc_setup_cb_add+0x112/0x250 [ 826.466247] fl_hw_replace_filter+0x230/0x310 [cls_flower] [ 826.466724] ? fl_hw_destroy_filter+0x1a0/0x1a0 [cls_flower] [ 826.467212] fl_change+0x14e1/0x2030 [cls_flower] [ 826.467636] ? sock_def_readable+0x89/0x120 [ 826.468019] ? fl_tmplt_create+0x2d0/0x2d0 [cls_flower] [ 826.468509] ? kasan_unpoison+0x23/0x50 [ 826.468873] ? get_random_u16+0x180/0x180 [ 826.469244] ? __radix_tree_lookup+0x2b/0x130 [ 826.469640] ? fl_get+0x7b/0x140 [cls_flower] [ 826.470042] ? fl_mask_put+0x200/0x200 [cls_flower] [ 826.470478] ? __mutex_unlock_slowpath.constprop.0+0x210/0x210 [ 826.470973] ? fl_tmplt_create+0x2d0/0x2d0 [cls_flower] [ 826.471427] tc_new_tfilter+0x644/0x1050 [ 826.471795] ? tc_get_tfilter+0x860/0x860 [ 826.472170] ? __thaw_task+0x130/0x130 [ 826.472525] ? arch_stack_walk+0x98/0xf0 [ 826.472892] ? cap_capable+0x9f/0xd0 [ 826.473235] ? security_capable+0x47/0x60 [ 826.473608] rtnetlink_rcv_msg+0x1d5/0x550 [ 826.473985] ? rtnl_calcit.isra.0+0x1f0/0x1f0 [ 826.474383] ? __stack_depot_save+0x35/0x4c0 [ 826.474779] ? kasan_save_stack+0x2e/0x40 [ 826.475149] ? kasan_save_stack+0x1e/0x40 [ 826.475518] ? __kasan_record_aux_stack+0x9f/0xb0 [ 826.475939] ? task_work_add+0x77/0x1c0 [ 826.476305] netlink_rcv_skb+0xe0/0x210 [ 826.476661] ? rtnl_calcit.isra.0+0x1f0/0x1f0 [ 826.477057] ? netlink_ack+0x7c0/0x7c0 [ 826.477412] ? rhashtable_jhash2+0xef/0x150 [ 826.477796] ? _copy_from_iter+0x105/0x770 [ 826.484386] netlink_unicast+0x346/0x490 [ 826.484755] ? netlink_attachskb+0x400/0x400 [ 826.485145] ? kernel_text_address+0xc2/0xd0 [ 826.485535] netlink_sendmsg+0x3b0/0x6c0 [ 826.485902] ? kernel_text_address+0xc2/0xd0 [ 826.486296] ? netlink_unicast+0x490/0x490 [ 826.486671] ? iovec_from_user.part.0+0x7a/0x1a0 [ 826.487083] ? netlink_unicast+0x490/0x490 [ 826.487461] sock_sendmsg+0x73/0xc0 [ 826.487803] ____sys_sendmsg+0x364/0x380 [ 826.488186] ? import_iovec+0x7/0x10 [ 826.488531] ? kernel_sendmsg+0x30/0x30 [ 826.488893] ? __copy_msghdr+0x180/0x180 [ 826.489258] ? kasan_save_stack+0x2e/0x40 [ 826.489629] ? kasan_save_stack+0x1e/0x40 [ 826.490002] ? __kasan_record_aux_stack+0x9f/0xb0 [ 826.490424] ? __call_rcu_common.constprop.0+0x46/0x580 [ 826.490876] ___sys_sendmsg+0xdf/0x140 [ 826.491231] ? copy_msghdr_from_user+0x110/0x110 [ 826.491649] ? fget_raw+0x120/0x120 [ 826.491988] ? ___sys_recvmsg+0xd9/0x130 [ 826.492355] ? folio_batch_add_and_move+0x80/0xa0 [ 826.492776] ? _raw_spin_lock+0x7a/0xd0 [ 826.493137] ? _raw_spin_lock+0x7a/0xd0 [ 826.493500] ? _raw_read_lock_irq+0x30/0x30 [ 826.493880] ? kasan_set_track+0x21/0x30 [ 826.494249] ? kasan_save_free_info+0x2a/0x40 [ 826.494650] ? do_sys_openat2+0xff/0x270 [ 826.495016] ? __fget_light+0x1b5/0x200 [ 826.495377] ? __virt_addr_valid+0xb1/0x130 [ 826.495763] __sys_sendmsg+0xb2/0x130 [ 826.496118] ? __sys_sendmsg_sock+0x20/0x20 [ 826.496501] ? __x64_sys_rseq+0x2e0/0x2e0 [ 826.496874] ? do_user_addr_fault+0x276/0x820 [ 826.497273] ? fpregs_assert_state_consistent+0x52/0x60 [ 826.497727] ? exit_to_user_mode_prepare+0x30/0x120 [ 826.498158] do_syscall_64+0x3d/0x90 [ 826.498502] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 826.498949] RIP: 0033:0x7f9b67f4f887 [ 826.499294] Code: 0a 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 [ 826.500742] RSP: 002b:00007fff5d1a5498 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 826.501395] RAX: ffffffffffffffda RBX: 0000000064413ce6 RCX: 00007f9b67f4f887 [ 826.501975] RDX: 0000000000000000 RSI: 00007fff5d1a5500 RDI: 0000000000000003 [ 826.502556] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000001 [ 826.503135] R10: 00007f9b67e08708 R11: 0000000000000246 R12: 0000000000000001 [ 826.503714] R13: 0000000000000001 R14: 00007fff5d1a9800 R15: 0000000000485400 [ 826.504304] [ 826.504753] Allocated by task 3764: [ 826.505090] kasan_save_stack+0x1e/0x40 [ 826.505453] kasan_set_track+0x21/0x30 [ 826.505810] __kasan_kmalloc+0x77/0x90 [ 826.506164] __mlx5_create_flow_table+0x16d/0xbb0 [mlx5_core] [ 826.506742] esw_offloads_enable+0x60d/0xfb0 [mlx5_core] [ 826.507292] mlx5_eswitch_enable_locked+0x4d3/0x680 [mlx5_core] [ 826.507885] mlx5_devlink_eswitch_mode_set+0x2a3/0x580 [mlx5_core] [ 826.508513] devlink_nl_cmd_eswitch_set_doit+0xdf/0x1f0 [ 826.508969] genl_family_rcv_msg_doit.isra.0+0x146/0x1c0 [ 826.509427] genl_rcv_msg+0x28d/0x3e0 [ 826.509772] netlink_rcv_skb+0xe0/0x210 [ 826.510133] genl_rcv+0x24/0x40 [ 826.510448] netlink_unicast+0x346/0x490 [ 826.510810] netlink_sendmsg+0x3b0/0x6c0 [ 826.511179] sock_sendmsg+0x73/0xc0 [ 826.511519] __sys_sendto+0x18d/0x220 [ 826.511867] __x64_sys_sendto+0x72/0x80 [ 826.512232] do_syscall_64+0x3d/0x90 [ 826.512576] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 826.513220] Freed by task 5674: [ 826.513535] kasan_save_stack+0x1e/0x40 [ 826.513893] kasan_set_track+0x21/0x30 [ 826.514245] kasan_save_free_info+0x2a/0x40 [ 826.514629] ____kasan_slab_free+0x11a/0x1b0 [ 826.515021] __kmem_cache_free+0x14d/0x280 [ 826.515399] tree_put_node+0x109/0x1c0 [mlx5_core] [ 826.515907] mlx5_destroy_flow_table+0x119/0x630 [mlx5_core] [ 826.516481] esw_offloads_steering_cleanup+0xe7/0x150 [mlx5_core] [ 826.517084] esw_offloads_disable+0xe0/0x160 [mlx5_core] [ 826.517632] mlx5_eswitch_disable_locked+0x26c/0x290 [mlx5_core] [ 826.518225] mlx5_devlink_eswitch_mode_set+0x128/0x580 [mlx5_core] [ 826.518834] devlink_nl_cmd_eswitch_set_doit+0xdf/0x1f0 [ 826.519286] genl_family_rcv_msg_doit.isra.0+0x146/0x1c0 [ 826.519748] genl_rcv_msg+0x28d/0x3e0 [ 826.520101] netlink_rcv_skb+0xe0/0x210 [ 826.520458] genl_rcv+0x24/0x40 [ 826.520771] netlink_unicast+0x346/0x490 [ 826.521137] netlink_sendmsg+0x3b0/0x6c0 [ 826.521505] sock_sendmsg+0x73/0xc0 [ 826.521842] __sys_sendto+0x18d/0x220 [ 826.522191] __x64_sys_sendto+0x72/0x80 [ 826.522554] do_syscall_64+0x3d/0x90 [ 826.522894] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 826.523540] Last potentially related work creation: [ 826.523969] kasan_save_stack+0x1e/0x40 [ 826.524331] __kasan_record_aux_stack+0x9f/0xb0 [ 826.524739] insert_work+0x30/0x130 [ 826.525078] __queue_work+0x34b/0x690 [ 826.525426] queue_work_on+0x48/0x50 [ 826.525766] __rhashtable_remove_fast_one+0x4af/0x4d0 [mlx5_core] [ 826.526365] del_sw_flow_group+0x1b5/0x270 [mlx5_core] [ 826.526898] tree_put_node+0x109/0x1c0 [mlx5_core] [ 826.527407] esw_offloads_steering_cleanup+0xd3/0x150 [mlx5_core] [ 826.528009] esw_offloads_disable+0xe0/0x160 [mlx5_core] [ 826.528616] mlx5_eswitch_disable_locked+0x26c/0x290 [mlx5_core] [ 826.529218] mlx5_devlink_eswitch_mode_set+0x128/0x580 [mlx5_core] [ 826.529823] devlink_nl_cmd_eswitch_set_doit+0xdf/0x1f0 [ 826.530276] genl_family_rcv_msg_doit.isra.0+0x146/0x1c0 [ 826.530733] genl_rcv_msg+0x28d/0x3e0 [ 826.531079] netlink_rcv_skb+0xe0/0x210 [ 826.531439] genl_rcv+0x24/0x40 [ 826.531755] netlink_unicast+0x346/0x490 [ 826.532123] netlink_sendmsg+0x3b0/0x6c0 [ 826.532487] sock_sendmsg+0x73/0xc0 [ 826.532825] __sys_sendto+0x18d/0x220 [ 826.533175] __x64_sys_sendto+0x72/0x80 [ 826.533533] do_syscall_64+0x3d/0x90 [ 826.533877] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 826.534521] The buggy address belongs to the object at ffff888194485800 which belongs to the cache kmalloc-512 of size 512 [ 826.535506] The buggy address is located 48 bytes inside of freed 512-byte region [ffff888194485800, ffff888194485a00) [ 826.536666] The buggy address belongs to the physical page: [ 826.537138] page:00000000d75841dd refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x194480 [ 826.537915] head:00000000d75841dd order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0 [ 826.538595] flags: 0x200000000010200(slab|head|node=0|zone=2) [ 826.539089] raw: 0200000000010200 ffff888100042c80 ffffea0004523800 dead000000000002 [ 826.539755] raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 [ 826.540417] page dumped because: kasan: bad access detected [ 826.541095] Memory state around the buggy address: [ 826.541519] ffff888194485700: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 826.542149] ffff888194485780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 826.542773] >ffff888194485800: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 826.543400] ^ [ 826.543822] ffff888194485880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 826.544452] ffff888194485900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 826.545079] ================================================================== Fixes: 6702782845a5 ("net/mlx5e: TC, Set CT miss to the specific ct action instance") Signed-off-by: Paul Blakey Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 34 ++++++++++++++++++++----- 1 file changed, 27 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 416ab6b6da97..e95414ef1f04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -5646,22 +5646,43 @@ bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb) 0, NULL); } +static struct mapping_ctx * +mlx5e_get_priv_obj_mapping(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc; + struct mlx5_eswitch *esw; + struct mapping_ctx *ctx; + + if (is_mdev_switchdev_mode(priv->mdev)) { + esw = priv->mdev->priv.eswitch; + ctx = esw->offloads.reg_c0_obj_pool; + } else { + tc = mlx5e_fs_get_tc(priv->fs); + ctx = tc->mapping; + } + + return ctx; +} + int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, u64 act_miss_cookie, u32 *act_miss_mapping) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_mapped_obj mapped_obj = {}; + struct mlx5_eswitch *esw; struct mapping_ctx *ctx; int err; - ctx = esw->offloads.reg_c0_obj_pool; - + ctx = mlx5e_get_priv_obj_mapping(priv); mapped_obj.type = MLX5_MAPPED_OBJ_ACT_MISS; mapped_obj.act_miss_cookie = act_miss_cookie; err = mapping_add(ctx, &mapped_obj, act_miss_mapping); if (err) return err; + if (!is_mdev_switchdev_mode(priv->mdev)) + return 0; + + esw = priv->mdev->priv.eswitch; attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping); if (IS_ERR(attr->act_id_restore_rule)) goto err_rule; @@ -5676,10 +5697,9 @@ err_rule: void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, u32 act_miss_mapping) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mapping_ctx *ctx; + struct mapping_ctx *ctx = mlx5e_get_priv_obj_mapping(priv); - ctx = esw->offloads.reg_c0_obj_pool; - mlx5_del_flow_rules(attr->act_id_restore_rule); + if (is_mdev_switchdev_mode(priv->mdev)) + mlx5_del_flow_rules(attr->act_id_restore_rule); mapping_remove(ctx, act_miss_mapping); } -- cgit v1.2.3 From 8c253dfc89efde6b5faddf9e7400e5d17884e042 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 6 Feb 2023 11:52:02 +0200 Subject: net/mlx5: E-switch, Devcom, sync devcom events and devcom comp register devcom events are sent to all registered component. Following the cited patch, it is possible for two components, e.g.: two eswitches, to send devcom events, while both components are registered. This means eswitch layer will do double un/pairing, which is double allocation and free of resources, even though only one un/pairing is needed. flow example: cpu0 cpu1 ---- ---- mlx5_devlink_eswitch_mode_set(dev0) esw_offloads_devcom_init() mlx5_devcom_register_component(esw0) mlx5_devlink_eswitch_mode_set(dev1) esw_offloads_devcom_init() mlx5_devcom_register_component(esw1) mlx5_devcom_send_event() mlx5_devcom_send_event() Hence, check whether the eswitches are already un/paired before free/allocation of resources. Fixes: 09b278462f16 ("net: devlink: enable parallel ops on netlink interface") Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 9f007c5438ee..add6cfa432a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -342,6 +342,7 @@ struct mlx5_eswitch { u32 large_group_num; } params; struct blocking_notifier_head n_head; + bool paired[MLX5_MAX_PORTS]; }; void esw_offloads_disable(struct mlx5_eswitch *esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7c34c7cf506f..8d19c20d3447 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2742,6 +2742,9 @@ static int mlx5_esw_offloads_devcom_event(int event, mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) break; + if (esw->paired[mlx5_get_dev_index(peer_esw->dev)]) + break; + err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true); if (err) goto err_out; @@ -2753,14 +2756,18 @@ static int mlx5_esw_offloads_devcom_event(int event, if (err) goto err_pair; + esw->paired[mlx5_get_dev_index(peer_esw->dev)] = true; + peer_esw->paired[mlx5_get_dev_index(esw->dev)] = true; mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true); break; case ESW_OFFLOADS_DEVCOM_UNPAIR: - if (!mlx5_devcom_is_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) + if (!esw->paired[mlx5_get_dev_index(peer_esw->dev)]) break; mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); + esw->paired[mlx5_get_dev_index(peer_esw->dev)] = false; + peer_esw->paired[mlx5_get_dev_index(esw->dev)] = false; mlx5_esw_offloads_unpair(peer_esw); mlx5_esw_offloads_unpair(esw); mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false); -- cgit v1.2.3 From af87194352cad882d787d06fb7efa714acd95427 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 2 May 2023 13:35:11 +0300 Subject: net/mlx5: Devcom, fix error flow in mlx5_devcom_register_device In case devcom allocation is failed, mlx5 is always freeing the priv. However, this priv might have been allocated by a different thread, and freeing it might lead to use-after-free bugs. Fix it by freeing the priv only in case it was allocated by the running thread. Fixes: fadd59fc50d0 ("net/mlx5: Introduce inter-device communication mechanism") Signed-off-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c index 070d55f13419..8f978491dd32 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -112,7 +112,8 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev) priv->devs[idx] = dev; devcom = mlx5_devcom_alloc(priv, idx); if (!devcom) { - kfree(priv); + if (new_priv) + kfree(priv); return ERR_PTR(-ENOMEM); } -- cgit v1.2.3 From 1f893f57a3bf9fe1f4bcb25b55aea7f7f9712fe7 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 2 May 2023 13:36:42 +0300 Subject: net/mlx5: Devcom, serialize devcom registration From one hand, mlx5 driver is allowing to probe PFs in parallel. From the other hand, devcom, which is a share resource between PFs, is registered without any lock. This might resulted in memory problems. Hence, use the global mlx5_dev_list_lock in order to serialize devcom registration. Fixes: fadd59fc50d0 ("net/mlx5: Introduce inter-device communication mechanism") Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c index 8f978491dd32..b7d779d08d83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -3,6 +3,7 @@ #include #include "lib/devcom.h" +#include "mlx5_core.h" static LIST_HEAD(devcom_list); @@ -77,6 +78,7 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED) return NULL; + mlx5_dev_list_lock(); sguid0 = mlx5_query_nic_system_image_guid(dev); list_for_each_entry(iter, &devcom_list, list) { struct mlx5_core_dev *tmp_dev = NULL; @@ -102,8 +104,10 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev) if (!priv) { priv = mlx5_devcom_list_alloc(); - if (!priv) - return ERR_PTR(-ENOMEM); + if (!priv) { + devcom = ERR_PTR(-ENOMEM); + goto out; + } idx = 0; new_priv = true; @@ -114,12 +118,14 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev) if (!devcom) { if (new_priv) kfree(priv); - return ERR_PTR(-ENOMEM); + devcom = ERR_PTR(-ENOMEM); + goto out; } if (new_priv) list_add(&priv->list, &devcom_list); - +out: + mlx5_dev_list_unlock(); return devcom; } @@ -132,6 +138,7 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom) if (IS_ERR_OR_NULL(devcom)) return; + mlx5_dev_list_lock(); priv = devcom->priv; priv->devs[devcom->idx] = NULL; @@ -142,10 +149,12 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom *devcom) break; if (i != MLX5_DEVCOM_PORTS_SUPPORTED) - return; + goto out; list_del(&priv->list); kfree(priv); +out: + mlx5_dev_list_unlock(); } void mlx5_devcom_register_component(struct mlx5_devcom *devcom, -- cgit v1.2.3 From 9c2d08010963a61a171e8cb2852d3ce015b60cb4 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 13 Apr 2023 22:15:31 +0300 Subject: net/mlx5: Free irqs only on shutdown callback Whenever a shutdown is invoked, free irqs only and keep mlx5_irq synthetic wrapper intact in order to avoid use-after-free on system shutdown. for example: ================================================================== BUG: KASAN: use-after-free in _find_first_bit+0x66/0x80 Read of size 8 at addr ffff88823fc0d318 by task kworker/u192:0/13608 CPU: 25 PID: 13608 Comm: kworker/u192:0 Tainted: G B W O 6.1.21-cloudflare-kasan-2023.3.21 #1 Hardware name: GIGABYTE R162-R2-GEN0/MZ12-HD2-CD, BIOS R14 05/03/2021 Workqueue: mlx5e mlx5e_tx_timeout_work [mlx5_core] Call Trace: dump_stack_lvl+0x34/0x48 print_report+0x170/0x473 ? _find_first_bit+0x66/0x80 kasan_report+0xad/0x130 ? _find_first_bit+0x66/0x80 _find_first_bit+0x66/0x80 mlx5e_open_channels+0x3c5/0x3a10 [mlx5_core] ? console_unlock+0x2fa/0x430 ? _raw_spin_lock_irqsave+0x8d/0xf0 ? _raw_spin_unlock_irqrestore+0x42/0x80 ? preempt_count_add+0x7d/0x150 ? __wake_up_klogd.part.0+0x7d/0xc0 ? vprintk_emit+0xfe/0x2c0 ? mlx5e_trigger_napi_sched+0x40/0x40 [mlx5_core] ? dev_attr_show.cold+0x35/0x35 ? devlink_health_do_dump.part.0+0x174/0x340 ? devlink_health_report+0x504/0x810 ? mlx5e_reporter_tx_timeout+0x29d/0x3a0 [mlx5_core] ? mlx5e_tx_timeout_work+0x17c/0x230 [mlx5_core] ? process_one_work+0x680/0x1050 mlx5e_safe_switch_params+0x156/0x220 [mlx5_core] ? mlx5e_switch_priv_channels+0x310/0x310 [mlx5_core] ? mlx5_eq_poll_irq_disabled+0xb6/0x100 [mlx5_core] mlx5e_tx_reporter_timeout_recover+0x123/0x240 [mlx5_core] ? __mutex_unlock_slowpath.constprop.0+0x2b0/0x2b0 devlink_health_reporter_recover+0xa6/0x1f0 devlink_health_report+0x2f7/0x810 ? vsnprintf+0x854/0x15e0 mlx5e_reporter_tx_timeout+0x29d/0x3a0 [mlx5_core] ? mlx5e_reporter_tx_err_cqe+0x1a0/0x1a0 [mlx5_core] ? mlx5e_tx_reporter_timeout_dump+0x50/0x50 [mlx5_core] ? mlx5e_tx_reporter_dump_sq+0x260/0x260 [mlx5_core] ? newidle_balance+0x9b7/0xe30 ? psi_group_change+0x6a7/0xb80 ? mutex_lock+0x96/0xf0 ? __mutex_lock_slowpath+0x10/0x10 mlx5e_tx_timeout_work+0x17c/0x230 [mlx5_core] process_one_work+0x680/0x1050 worker_thread+0x5a0/0xeb0 ? process_one_work+0x1050/0x1050 kthread+0x2a2/0x340 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x22/0x30 Freed by task 1: kasan_save_stack+0x23/0x50 kasan_set_track+0x21/0x30 kasan_save_free_info+0x2a/0x40 ____kasan_slab_free+0x169/0x1d0 slab_free_freelist_hook+0xd2/0x190 __kmem_cache_free+0x1a1/0x2f0 irq_pool_free+0x138/0x200 [mlx5_core] mlx5_irq_table_destroy+0xf6/0x170 [mlx5_core] mlx5_core_eq_free_irqs+0x74/0xf0 [mlx5_core] shutdown+0x194/0x1aa [mlx5_core] pci_device_shutdown+0x75/0x120 device_shutdown+0x35c/0x620 kernel_restart+0x60/0xa0 __do_sys_reboot+0x1cb/0x2c0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x4b/0xb5 The buggy address belongs to the object at ffff88823fc0d300 which belongs to the cache kmalloc-192 of size 192 The buggy address is located 24 bytes inside of 192-byte region [ffff88823fc0d300, ffff88823fc0d3c0) The buggy address belongs to the physical page: page:0000000010139587 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x23fc0c head:0000000010139587 order:1 compound_mapcount:0 compound_pincount:0 flags: 0x2ffff800010200(slab|head|node=0|zone=2|lastcpupid=0x1ffff) raw: 002ffff800010200 0000000000000000 dead000000000122 ffff88810004ca00 raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88823fc0d200: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88823fc0d280: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc >ffff88823fc0d300: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff88823fc0d380: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff88823fc0d400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== general protection fault, probably for non-canonical address 0xdffffc005c40d7ac: 0000 [#1] PREEMPT SMP KASAN NOPTI KASAN: probably user-memory-access in range [0x00000002e206bd60-0x00000002e206bd67] CPU: 25 PID: 13608 Comm: kworker/u192:0 Tainted: G B W O 6.1.21-cloudflare-kasan-2023.3.21 #1 Hardware name: GIGABYTE R162-R2-GEN0/MZ12-HD2-CD, BIOS R14 05/03/2021 Workqueue: mlx5e mlx5e_tx_timeout_work [mlx5_core] RIP: 0010:__alloc_pages+0x141/0x5c0 Call Trace: ? sysvec_apic_timer_interrupt+0xa0/0xc0 ? asm_sysvec_apic_timer_interrupt+0x16/0x20 ? __alloc_pages_slowpath.constprop.0+0x1ec0/0x1ec0 ? _raw_spin_unlock_irqrestore+0x3d/0x80 __kmalloc_large_node+0x80/0x120 ? kvmalloc_node+0x4e/0x170 __kmalloc_node+0xd4/0x150 kvmalloc_node+0x4e/0x170 mlx5e_open_channels+0x631/0x3a10 [mlx5_core] ? console_unlock+0x2fa/0x430 ? _raw_spin_lock_irqsave+0x8d/0xf0 ? _raw_spin_unlock_irqrestore+0x42/0x80 ? preempt_count_add+0x7d/0x150 ? __wake_up_klogd.part.0+0x7d/0xc0 ? vprintk_emit+0xfe/0x2c0 ? mlx5e_trigger_napi_sched+0x40/0x40 [mlx5_core] ? dev_attr_show.cold+0x35/0x35 ? devlink_health_do_dump.part.0+0x174/0x340 ? devlink_health_report+0x504/0x810 ? mlx5e_reporter_tx_timeout+0x29d/0x3a0 [mlx5_core] ? mlx5e_tx_timeout_work+0x17c/0x230 [mlx5_core] ? process_one_work+0x680/0x1050 mlx5e_safe_switch_params+0x156/0x220 [mlx5_core] ? mlx5e_switch_priv_channels+0x310/0x310 [mlx5_core] ? mlx5_eq_poll_irq_disabled+0xb6/0x100 [mlx5_core] mlx5e_tx_reporter_timeout_recover+0x123/0x240 [mlx5_core] ? __mutex_unlock_slowpath.constprop.0+0x2b0/0x2b0 devlink_health_reporter_recover+0xa6/0x1f0 devlink_health_report+0x2f7/0x810 ? vsnprintf+0x854/0x15e0 mlx5e_reporter_tx_timeout+0x29d/0x3a0 [mlx5_core] ? mlx5e_reporter_tx_err_cqe+0x1a0/0x1a0 [mlx5_core] ? mlx5e_tx_reporter_timeout_dump+0x50/0x50 [mlx5_core] ? mlx5e_tx_reporter_dump_sq+0x260/0x260 [mlx5_core] ? newidle_balance+0x9b7/0xe30 ? psi_group_change+0x6a7/0xb80 ? mutex_lock+0x96/0xf0 ? __mutex_lock_slowpath+0x10/0x10 mlx5e_tx_timeout_work+0x17c/0x230 [mlx5_core] process_one_work+0x680/0x1050 worker_thread+0x5a0/0xeb0 ? process_one_work+0x1050/0x1050 kthread+0x2a2/0x340 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x22/0x30 ---[ end trace 0000000000000000 ]--- RIP: 0010:__alloc_pages+0x141/0x5c0 Code: e0 39 a3 96 89 e9 b8 22 01 32 01 83 e1 0f 48 89 fa 01 c9 48 c1 ea 03 d3 f8 83 e0 03 89 44 24 6c 48 b8 00 00 00 00 00 fc ff df <80> 3c 02 00 0f 85 fc 03 00 00 89 e8 4a 8b 14 f5 e0 39 a3 96 4c 89 RSP: 0018:ffff888251f0f438 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 1ffff1104a3e1e8b RCX: 0000000000000000 RDX: 000000005c40d7ac RSI: 0000000000000003 RDI: 00000002e206bd60 RBP: 0000000000052dc0 R08: ffff8882b0044218 R09: ffff8882b0045e8a R10: fffffbfff300fefc R11: ffff888167af4000 R12: 0000000000000003 R13: 0000000000000000 R14: 00000000696c7070 R15: ffff8882373f4380 FS: 0000000000000000(0000) GS:ffff88bf2be80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005641d031eee8 CR3: 0000002e7ca14000 CR4: 0000000000350ee0 Kernel panic - not syncing: Fatal exception Kernel Offset: 0x11000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) ---[ end Kernel panic - not syncing: Fatal exception ]---] Reported-by: Frederick Lawler Link: https://lore.kernel.org/netdev/be5b9271-7507-19c5-ded1-fa78f1980e69@cloudflare.com Signed-off-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 29 ++++++++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 1c35d721a31d..fe698c79616c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -1104,7 +1104,7 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = dev->priv.eq_table; mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ - mlx5_irq_table_destroy(dev); + mlx5_irq_table_free_irqs(dev); mutex_unlock(&table->lock); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h index efd0c299c5c7..aa403a5ea34e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h @@ -15,6 +15,7 @@ int mlx5_irq_table_init(struct mlx5_core_dev *dev); void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); int mlx5_irq_table_create(struct mlx5_core_dev *dev); void mlx5_irq_table_destroy(struct mlx5_core_dev *dev); +void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev); int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table); int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table); struct mlx5_irq_table *mlx5_irq_table_get(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 2245d3b2f393..ac1304c2d205 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -691,6 +691,24 @@ static void irq_pools_destroy(struct mlx5_irq_table *table) irq_pool_free(table->pcif_pool); } +static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool) +{ + struct mlx5_irq *irq; + unsigned long index; + + xa_for_each(&pool->irqs, index, irq) + free_irq(irq->map.virq, &irq->nh); +} + +static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table) +{ + if (table->sf_ctrl_pool) { + mlx5_irq_pool_free_irqs(table->sf_comp_pool); + mlx5_irq_pool_free_irqs(table->sf_ctrl_pool); + } + mlx5_irq_pool_free_irqs(table->pcif_pool); +} + /* irq_table API */ int mlx5_irq_table_init(struct mlx5_core_dev *dev) @@ -774,6 +792,17 @@ void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) pci_free_irq_vectors(dev->pdev); } +void mlx5_irq_table_free_irqs(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + + if (mlx5_core_is_sf(dev)) + return; + + mlx5_irq_pools_free_irqs(table); + pci_free_irq_vectors(dev->pdev); +} + int mlx5_irq_table_get_sfs_vec(struct mlx5_irq_table *table) { if (table->sf_comp_pool) -- cgit v1.2.3 From ef8c063cf88e1a3d99ab4ada1cbab5ba7248a4f2 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 16 Apr 2023 08:54:04 +0300 Subject: net/mlx5: Fix irq affinity management The cited patch deny the user of changing the affinity of mlx5 irqs, which break backward compatibility. Hence, allow the user to change the affinity of mlx5 irqs. Fixes: bbac70c74183 ("net/mlx5: Use newer affinity descriptor") Signed-off-by: Shay Drory Reviewed-by: Eli Cohen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index ac1304c2d205..86b528aae6d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -567,7 +567,7 @@ int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, struct mlx5_irq *irq; int i; - af_desc.is_managed = 1; + af_desc.is_managed = false; for (i = 0; i < nirqs; i++) { cpumask_set_cpu(cpus[i], &af_desc.mask); irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap); -- cgit v1.2.3 From 1da438c0ae02396dc5018b63237492cb5908608d Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 17 Apr 2023 10:57:50 +0300 Subject: net/mlx5: Fix indexing of mlx5_irq After the cited patch, mlx5_irq xarray index can be different then mlx5_irq MSIX table index. Fix it by storing both mlx5_irq xarray index and MSIX table index. Fixes: 3354822cde5a ("net/mlx5: Use dynamic msix vectors allocation") Signed-off-by: Shay Drory Reviewed-by: Eli Cohen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 86b528aae6d4..db5687d9fec9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -32,6 +32,7 @@ struct mlx5_irq { struct mlx5_irq_pool *pool; int refcount; struct msi_map map; + u32 pool_index; }; struct mlx5_irq_table { @@ -132,7 +133,7 @@ static void irq_release(struct mlx5_irq *irq) struct cpu_rmap *rmap; #endif - xa_erase(&pool->irqs, irq->map.index); + xa_erase(&pool->irqs, irq->pool_index); /* free_irq requires that affinity_hint and rmap will be cleared before * calling it. To satisfy this requirement, we call * irq_cpu_rmap_remove() to remove the notifier @@ -276,11 +277,11 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, } irq->pool = pool; irq->refcount = 1; - irq->map.index = i; - err = xa_err(xa_store(&pool->irqs, irq->map.index, irq, GFP_KERNEL)); + irq->pool_index = i; + err = xa_err(xa_store(&pool->irqs, irq->pool_index, irq, GFP_KERNEL)); if (err) { mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n", - irq->map.index, err); + irq->pool_index, err); goto err_xa; } return irq; -- cgit v1.2.3 From b2e3406a38f0f48b1dfb81e5bb73d243ff6af179 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Mon, 22 May 2023 07:34:04 +0530 Subject: octeontx2-pf: Add support for page pool Page pool for each rx queue enhance rx side performance by reclaiming buffers back to each queue specific pool. DMA mapping is done only for first allocation of buffers. As subsequent buffers allocation avoid DMA mapping, it results in performance improvement. Image | Performance ------------ | ------------ Vannila | 3Mpps | with this | 42Mpps change | --------------------------- Signed-off-by: Ratheesh Kannoth Link: https://lore.kernel.org/r/20230522020404.152020-1-rkannoth@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/Kconfig | 1 + .../ethernet/marvell/octeontx2/nic/otx2_common.c | 78 +++++++++++++++++++--- .../ethernet/marvell/octeontx2/nic/otx2_common.h | 6 +- .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 11 ++- .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 19 +++--- .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.h | 1 + .../net/ethernet/marvell/octeontx2/nic/qos_sq.c | 2 +- 7 files changed, 96 insertions(+), 22 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/Kconfig b/drivers/net/ethernet/marvell/octeontx2/Kconfig index 993ac180a5db..a32d85d6f599 100644 --- a/drivers/net/ethernet/marvell/octeontx2/Kconfig +++ b/drivers/net/ethernet/marvell/octeontx2/Kconfig @@ -32,6 +32,7 @@ config OCTEONTX2_PF tristate "Marvell OcteonTX2 NIC Physical Function driver" select OCTEONTX2_MBOX select NET_DEVLINK + select PAGE_POOL depends on (64BIT && COMPILE_TEST) || ARM64 select DIMLIB depends on PCI diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index f9286648e45c..a79cb680bb23 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -518,11 +518,32 @@ void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx) (pfvf->hw.cq_ecount_wait - 1)); } +static int otx2_alloc_pool_buf(struct otx2_nic *pfvf, struct otx2_pool *pool, + dma_addr_t *dma) +{ + unsigned int offset = 0; + struct page *page; + size_t sz; + + sz = SKB_DATA_ALIGN(pool->rbsize); + sz = ALIGN(sz, OTX2_ALIGN); + + page = page_pool_alloc_frag(pool->page_pool, &offset, sz, GFP_ATOMIC); + if (unlikely(!page)) + return -ENOMEM; + + *dma = page_pool_get_dma_addr(page) + offset; + return 0; +} + static int __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, dma_addr_t *dma) { u8 *buf; + if (pool->page_pool) + return otx2_alloc_pool_buf(pfvf, pool, dma); + buf = napi_alloc_frag_align(pool->rbsize, OTX2_ALIGN); if (unlikely(!buf)) return -ENOMEM; @@ -1205,10 +1226,31 @@ void otx2_sq_free_sqbs(struct otx2_nic *pfvf) } } +void otx2_free_bufs(struct otx2_nic *pfvf, struct otx2_pool *pool, + u64 iova, int size) +{ + struct page *page; + u64 pa; + + pa = otx2_iova_to_phys(pfvf->iommu_domain, iova); + page = virt_to_head_page(phys_to_virt(pa)); + + if (pool->page_pool) { + page_pool_put_full_page(pool->page_pool, page, true); + } else { + dma_unmap_page_attrs(pfvf->dev, iova, size, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + + put_page(page); + } +} + void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type) { int pool_id, pool_start = 0, pool_end = 0, size = 0; - u64 iova, pa; + struct otx2_pool *pool; + u64 iova; if (type == AURA_NIX_SQ) { pool_start = otx2_get_pool_idx(pfvf, type, 0); @@ -1224,15 +1266,13 @@ void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type) /* Free SQB and RQB pointers from the aura pool */ for (pool_id = pool_start; pool_id < pool_end; pool_id++) { iova = otx2_aura_allocptr(pfvf, pool_id); + pool = &pfvf->qset.pool[pool_id]; while (iova) { if (type == AURA_NIX_RQ) iova -= OTX2_HEAD_ROOM; - pa = otx2_iova_to_phys(pfvf->iommu_domain, iova); - dma_unmap_page_attrs(pfvf->dev, iova, size, - DMA_FROM_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); - put_page(virt_to_page(phys_to_virt(pa))); + otx2_free_bufs(pfvf, pool, iova, size); + iova = otx2_aura_allocptr(pfvf, pool_id); } } @@ -1250,6 +1290,8 @@ void otx2_aura_pool_free(struct otx2_nic *pfvf) pool = &pfvf->qset.pool[pool_id]; qmem_free(pfvf->dev, pool->stack); qmem_free(pfvf->dev, pool->fc_addr); + page_pool_destroy(pool->page_pool); + pool->page_pool = NULL; } devm_kfree(pfvf->dev, pfvf->qset.pool); pfvf->qset.pool = NULL; @@ -1333,8 +1375,9 @@ int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, } int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, - int stack_pages, int numptrs, int buf_size) + int stack_pages, int numptrs, int buf_size, int type) { + struct page_pool_params pp_params = { 0 }; struct npa_aq_enq_req *aq; struct otx2_pool *pool; int err; @@ -1378,6 +1421,22 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, aq->ctype = NPA_AQ_CTYPE_POOL; aq->op = NPA_AQ_INSTOP_INIT; + if (type != AURA_NIX_RQ) { + pool->page_pool = NULL; + return 0; + } + + pp_params.flags = PP_FLAG_PAGE_FRAG | PP_FLAG_DMA_MAP; + pp_params.pool_size = numptrs; + pp_params.nid = NUMA_NO_NODE; + pp_params.dev = pfvf->dev; + pp_params.dma_dir = DMA_FROM_DEVICE; + pool->page_pool = page_pool_create(&pp_params); + if (IS_ERR(pool->page_pool)) { + netdev_err(pfvf->netdev, "Creation of page pool failed\n"); + return PTR_ERR(pool->page_pool); + } + return 0; } @@ -1412,7 +1471,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) /* Initialize pool context */ err = otx2_pool_init(pfvf, pool_id, stack_pages, - num_sqbs, hw->sqb_size); + num_sqbs, hw->sqb_size, AURA_NIX_SQ); if (err) goto fail; } @@ -1475,7 +1534,7 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf) } for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) { err = otx2_pool_init(pfvf, pool_id, stack_pages, - num_ptrs, pfvf->rbsize); + num_ptrs, pfvf->rbsize, AURA_NIX_RQ); if (err) goto fail; } @@ -1659,7 +1718,6 @@ int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable) req->bpid_per_chan = 0; #endif - return otx2_sync_mbox_msg(&pfvf->mbox); } EXPORT_SYMBOL(otx2_nix_config_bp); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index b2267c8bec37..a9ed15d1793a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -976,7 +976,7 @@ int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable); void otx2_ctx_disable(struct mbox *mbox, int type, bool npa); int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable); -void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq); +void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int qidx); void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq); int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura); int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura); @@ -984,7 +984,7 @@ int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura); int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, dma_addr_t *dma); int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, - int stack_pages, int numptrs, int buf_size); + int stack_pages, int numptrs, int buf_size, int type); int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, int pool_id, int numptrs); @@ -1054,6 +1054,8 @@ u16 otx2_get_max_mtu(struct otx2_nic *pfvf); int otx2_handle_ntuple_tc_features(struct net_device *netdev, netdev_features_t features); int otx2_smq_flush(struct otx2_nic *pfvf, int smq); +void otx2_free_bufs(struct otx2_nic *pfvf, struct otx2_pool *pool, + u64 iova, int size); /* tc support */ int otx2_init_tc(struct otx2_nic *nic); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index e1883c3edda3..db3fcab1c8cd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1555,7 +1555,9 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) struct nix_lf_free_req *free_req; struct mbox *mbox = &pf->mbox; struct otx2_cq_queue *cq; + struct otx2_pool *pool; struct msg_req *req; + int pool_id; int qidx; /* Ensure all SQE are processed */ @@ -1584,7 +1586,7 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) for (qidx = 0; qidx < qset->cq_cnt; qidx++) { cq = &qset->cq[qidx]; if (cq->cq_type == CQ_RX) - otx2_cleanup_rx_cqes(pf, cq); + otx2_cleanup_rx_cqes(pf, cq, qidx); else otx2_cleanup_tx_cqes(pf, cq); } @@ -1594,6 +1596,13 @@ static void otx2_free_hw_resources(struct otx2_nic *pf) /* Free RQ buffer pointers*/ otx2_free_aura_ptr(pf, AURA_NIX_RQ); + for (qidx = 0; qidx < pf->hw.rx_queues; qidx++) { + pool_id = otx2_get_pool_idx(pf, AURA_NIX_RQ, qidx); + pool = &pf->qset.pool[pool_id]; + page_pool_destroy(pool->page_pool); + pool->page_pool = NULL; + } + otx2_free_cq_res(pf); /* Free all ingress bandwidth profiles allocated */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index e288f46b23a8..bd84144480a5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -217,9 +217,6 @@ static bool otx2_skb_add_frag(struct otx2_nic *pfvf, struct sk_buff *skb, skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, va - page_address(page) + off, len - off, pfvf->rbsize); - - otx2_dma_unmap_page(pfvf, iova - OTX2_HEAD_ROOM, - pfvf->rbsize, DMA_FROM_DEVICE); return true; } @@ -382,6 +379,8 @@ static void otx2_rcv_pkt_handler(struct otx2_nic *pfvf, if (pfvf->netdev->features & NETIF_F_RXCSUM) skb->ip_summed = CHECKSUM_UNNECESSARY; + skb_mark_for_recycle(skb); + napi_gro_frags(napi); } @@ -1186,11 +1185,13 @@ bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq, } EXPORT_SYMBOL(otx2_sq_append_skb); -void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) +void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int qidx) { struct nix_cqe_rx_s *cqe; + struct otx2_pool *pool; int processed_cqe = 0; - u64 iova, pa; + u16 pool_id; + u64 iova; if (pfvf->xdp_prog) xdp_rxq_info_unreg(&cq->xdp_rxq); @@ -1198,6 +1199,9 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe) return; + pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_RQ, qidx); + pool = &pfvf->qset.pool[pool_id]; + while (cq->pend_cqe) { cqe = (struct nix_cqe_rx_s *)otx2_get_next_cqe(cq); processed_cqe++; @@ -1210,9 +1214,8 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) continue; } iova = cqe->sg.seg_addr - OTX2_HEAD_ROOM; - pa = otx2_iova_to_phys(pfvf->iommu_domain, iova); - otx2_dma_unmap_page(pfvf, iova, pfvf->rbsize, DMA_FROM_DEVICE); - put_page(virt_to_page(phys_to_virt(pa))); + + otx2_free_bufs(pfvf, pool, iova, pfvf->rbsize); } /* Free CQEs to HW */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h index 7ab6db9a986f..b5d689eeff80 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h @@ -118,6 +118,7 @@ struct otx2_cq_poll { struct otx2_pool { struct qmem *stack; struct qmem *fc_addr; + struct page_pool *page_pool; u16 rbsize; }; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c index d96ed29c1567..9d887bfc3108 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/qos_sq.c @@ -63,7 +63,7 @@ static int otx2_qos_sq_aura_pool_init(struct otx2_nic *pfvf, int qidx) /* Initialize pool context */ err = otx2_pool_init(pfvf, pool_id, stack_pages, - num_sqbs, hw->sqb_size); + num_sqbs, hw->sqb_size, AURA_NIX_SQ); if (err) goto aura_free; -- cgit v1.2.3 From 600761245952d7f70280add6ce02894f1528992b Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 22 May 2023 14:00:38 +0200 Subject: lan966x: Fix unloading/loading of the driver It was noticing that after a while when unloading/loading the driver and sending traffic through the switch, it would stop working. It would stop forwarding any traffic and the only way to get out of this was to do a power cycle of the board. The root cause seems to be that the switch core is initialized twice. Apparently initializing twice the switch core disturbs the pointers in the queue systems in the HW, so after a while it would stop sending the traffic. Unfortunetly, it is not possible to use a reset of the switch here, because the reset line is connected to multiple devices like MDIO, SGPIO, FAN, etc. So then all the devices will get reseted when the network driver will be loaded. So the fix is to check if the core is initialized already and if that is the case don't initialize it again. Fixes: db8bcaad5393 ("net: lan966x: add the basic lan966x driver") Signed-off-by: Horatiu Vultur Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230522120038.3749026-1-horatiu.vultur@microchip.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microchip/lan966x/lan966x_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 2b6e046e1d10..ee2698698d71 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -1039,6 +1039,16 @@ static int lan966x_reset_switch(struct lan966x *lan966x) reset_control_reset(switch_reset); + /* Don't reinitialize the switch core, if it is already initialized. In + * case it is initialized twice, some pointers inside the queue system + * in HW will get corrupted and then after a while the queue system gets + * full and no traffic is passing through the switch. The issue is seen + * when loading and unloading the driver and sending traffic through the + * switch. + */ + if (lan_rd(lan966x, SYS_RESET_CFG) & SYS_RESET_CFG_CORE_ENA) + return 0; + lan_wr(SYS_RESET_CFG_CORE_ENA_SET(0), lan966x, SYS_RESET_CFG); lan_wr(SYS_RAM_INIT_RAM_INIT_SET(1), lan966x, SYS_RAM_INIT); ret = readx_poll_timeout(lan966x_ram_init, lan966x, -- cgit v1.2.3 From 57910a47ffe993c2724a916b9e003d84ff0c0df7 Mon Sep 17 00:00:00 2001 From: Jaco Coetzee Date: Mon, 22 May 2023 16:13:35 +0200 Subject: nfp: add L4 RSS hashing on UDP traffic Add layer 4 RSS hashing on UDP traffic to allow for the utilization of multiple queues for multiple connections on the same IP address. Previously, since the introduction of the driver, RSS hashing was only performed on the source and destination IP addresses of UDP packets thereby limiting UDP traffic to a single queue for multiple connections on the same IP address. The transport layer is now included in RSS hashing for UDP traffic, which was not previously the case. The reason behind the previous limitation is unclear - either a historic limitation of the NFP device, or an oversight. Signed-off-by: Jaco Coetzee Acked-by: Simon Horman Signed-off-by: Louis Peens Link: https://lore.kernel.org/r/20230522141335.22536-1-louis.peens@corigine.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 62f0bf91d1e1..b7cce746b5c0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2418,6 +2418,8 @@ static void nfp_net_rss_init(struct nfp_net *nn) /* Enable IPv4/IPv6 TCP by default */ nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP | NFP_NET_CFG_RSS_IPV6_TCP | + NFP_NET_CFG_RSS_IPV4_UDP | + NFP_NET_CFG_RSS_IPV6_UDP | FIELD_PREP(NFP_NET_CFG_RSS_HFUNC, nn->rss_hfunc) | NFP_NET_CFG_RSS_MASK; } -- cgit v1.2.3 From d6c36cbc5e533f48bd89a7b5f339bd82b8b4378a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 22 May 2023 15:41:21 +0200 Subject: r8169: Use a raw_spinlock_t for the register locks. The driver's interrupt service routine is requested with the IRQF_NO_THREAD if MSI is available. This means that the routine is invoked in hardirq context even on PREEMPT_RT. The routine itself is relatively short and schedules a worker, performs register access and schedules NAPI. On PREEMPT_RT, scheduling NAPI from hardirq results in waking ksoftirqd for further processing so using NAPI threads with this driver is highly recommended since it NULL routes the threaded-IRQ efforts. Adding rtl_hw_aspm_clkreq_enable() to the ISR is problematic on PREEMPT_RT because the function uses spinlock_t locks which become sleeping locks on PREEMPT_RT. The locks are only used to protect register access and don't nest into other functions or locks. They are also not used for unbounded period of time. Therefore it looks okay to convert them to raw_spinlock_t. Convert the three locks which are used from the interrupt service routine to raw_spinlock_t. Fixes: e1ed3e4d9111 ("r8169: disable ASPM during NAPI poll") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Heiner Kallweit Link: https://lore.kernel.org/r/20230522134121.uxjax0F5@linutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 44 +++++++++++++++---------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index a7e376e7e689..4b19803a7dd0 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -616,10 +616,10 @@ struct rtl8169_private { struct work_struct work; } wk; - spinlock_t config25_lock; - spinlock_t mac_ocp_lock; + raw_spinlock_t config25_lock; + raw_spinlock_t mac_ocp_lock; - spinlock_t cfg9346_usage_lock; + raw_spinlock_t cfg9346_usage_lock; int cfg9346_usage_count; unsigned supports_gmii:1; @@ -671,20 +671,20 @@ static void rtl_lock_config_regs(struct rtl8169_private *tp) { unsigned long flags; - spin_lock_irqsave(&tp->cfg9346_usage_lock, flags); + raw_spin_lock_irqsave(&tp->cfg9346_usage_lock, flags); if (!--tp->cfg9346_usage_count) RTL_W8(tp, Cfg9346, Cfg9346_Lock); - spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags); + raw_spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags); } static void rtl_unlock_config_regs(struct rtl8169_private *tp) { unsigned long flags; - spin_lock_irqsave(&tp->cfg9346_usage_lock, flags); + raw_spin_lock_irqsave(&tp->cfg9346_usage_lock, flags); if (!tp->cfg9346_usage_count++) RTL_W8(tp, Cfg9346, Cfg9346_Unlock); - spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags); + raw_spin_unlock_irqrestore(&tp->cfg9346_usage_lock, flags); } static void rtl_pci_commit(struct rtl8169_private *tp) @@ -698,10 +698,10 @@ static void rtl_mod_config2(struct rtl8169_private *tp, u8 clear, u8 set) unsigned long flags; u8 val; - spin_lock_irqsave(&tp->config25_lock, flags); + raw_spin_lock_irqsave(&tp->config25_lock, flags); val = RTL_R8(tp, Config2); RTL_W8(tp, Config2, (val & ~clear) | set); - spin_unlock_irqrestore(&tp->config25_lock, flags); + raw_spin_unlock_irqrestore(&tp->config25_lock, flags); } static void rtl_mod_config5(struct rtl8169_private *tp, u8 clear, u8 set) @@ -709,10 +709,10 @@ static void rtl_mod_config5(struct rtl8169_private *tp, u8 clear, u8 set) unsigned long flags; u8 val; - spin_lock_irqsave(&tp->config25_lock, flags); + raw_spin_lock_irqsave(&tp->config25_lock, flags); val = RTL_R8(tp, Config5); RTL_W8(tp, Config5, (val & ~clear) | set); - spin_unlock_irqrestore(&tp->config25_lock, flags); + raw_spin_unlock_irqrestore(&tp->config25_lock, flags); } static bool rtl_is_8125(struct rtl8169_private *tp) @@ -899,9 +899,9 @@ static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data) { unsigned long flags; - spin_lock_irqsave(&tp->mac_ocp_lock, flags); + raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags); __r8168_mac_ocp_write(tp, reg, data); - spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); + raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); } static u16 __r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg) @@ -919,9 +919,9 @@ static u16 r8168_mac_ocp_read(struct rtl8169_private *tp, u32 reg) unsigned long flags; u16 val; - spin_lock_irqsave(&tp->mac_ocp_lock, flags); + raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags); val = __r8168_mac_ocp_read(tp, reg); - spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); + raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); return val; } @@ -932,10 +932,10 @@ static void r8168_mac_ocp_modify(struct rtl8169_private *tp, u32 reg, u16 mask, unsigned long flags; u16 data; - spin_lock_irqsave(&tp->mac_ocp_lock, flags); + raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags); data = __r8168_mac_ocp_read(tp, reg); __r8168_mac_ocp_write(tp, reg, (data & ~mask) | set); - spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); + raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags); } /* Work around a hw issue with RTL8168g PHY, the quirk disables @@ -1420,14 +1420,14 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) r8168_mac_ocp_modify(tp, 0xc0b6, BIT(0), 0); } - spin_lock_irqsave(&tp->config25_lock, flags); + raw_spin_lock_irqsave(&tp->config25_lock, flags); for (i = 0; i < tmp; i++) { options = RTL_R8(tp, cfg[i].reg) & ~cfg[i].mask; if (wolopts & cfg[i].opt) options |= cfg[i].mask; RTL_W8(tp, cfg[i].reg, options); } - spin_unlock_irqrestore(&tp->config25_lock, flags); + raw_spin_unlock_irqrestore(&tp->config25_lock, flags); switch (tp->mac_version) { case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06: @@ -5179,9 +5179,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->eee_adv = -1; tp->ocp_base = OCP_STD_PHY_BASE; - spin_lock_init(&tp->cfg9346_usage_lock); - spin_lock_init(&tp->config25_lock); - spin_lock_init(&tp->mac_ocp_lock); + raw_spin_lock_init(&tp->cfg9346_usage_lock); + raw_spin_lock_init(&tp->config25_lock); + raw_spin_lock_init(&tp->mac_ocp_lock); dev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev, struct pcpu_sw_netstats); -- cgit v1.2.3 From a695641c8eaac268ad7e373c7e33c00b88b2bcbf Mon Sep 17 00:00:00 2001 From: Coco Li Date: Mon, 22 May 2023 13:15:52 -0700 Subject: gve: Support IPv6 Big TCP on DQ Add support for using IPv6 Big TCP on DQ which can handle large TSO/GRO packets. See https://lwn.net/Articles/895398/. This can improve the throughput and CPU usage. Perf test result: ip -d link show $DEV gso_max_size 185000 gso_max_segs 65535 tso_max_size 262143 tso_max_segs 65535 gro_max_size 185000 For performance, tested with neper using 9k MTU on hardware that supports 200Gb/s line rate. In single streams when line rate is not saturated, we expect throughput improvements. When the networking is performing at line rate, we expect cpu usage improvements. Tcp_stream (unidirectional stream test, T=thread, F=flow): skb=180kb, T=1, F=1, no zerocopy: throughput average=64576.88 Mb/s, sender stime=8.3, receiver stime=10.68 skb=64kb, T=1, F=1, no zerocopy: throughput average=64862.54 Mb/s, sender stime=9.96, receiver stime=12.67 skb=180kb, T=1, F=1, yes zerocopy: throughput average=146604.97 Mb/s, sender stime=10.61, receiver stime=5.52 skb=64kb, T=1, F=1, yes zerocopy: throughput average=131357.78 Mb/s, sender stime=12.11, receiver stime=12.25 skb=180kb, T=20, F=100, no zerocopy: throughput average=182411.37 Mb/s, sender stime=41.62, receiver stime=79.4 skb=64kb, T=20, F=100, no zerocopy: throughput average=182892.02 Mb/s, sender stime=57.39, receiver stime=72.69 skb=180kb, T=20, F=100, yes zerocopy: throughput average=182337.65 Mb/s, sender stime=27.94, receiver stime=39.7 skb=64kb, T=20, F=100, yes zerocopy: throughput average=182144.20 Mb/s, sender stime=47.06, receiver stime=39.01 Signed-off-by: Ziwei Xiao Signed-off-by: Coco Li Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230522201552.3585421-1-ziweixiao@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_main.c | 5 +++++ drivers/net/ethernet/google/gve/gve_tx_dqo.c | 4 ++++ 2 files changed, 9 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index caa00c72aeeb..8fb70db63b8b 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -31,6 +31,7 @@ // Minimum amount of time between queue kicks in msec (10 seconds) #define MIN_TX_TIMEOUT_GAP (1000 * 10) +#define DQO_TX_MAX 0x3FFFF const char gve_version_str[] = GVE_VERSION; static const char gve_version_prefix[] = GVE_VERSION_PREFIX; @@ -2047,6 +2048,10 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) goto err; } + /* Big TCP is only supported on DQ*/ + if (!gve_is_gqi(priv)) + netif_set_tso_max_size(priv->dev, DQO_TX_MAX); + priv->num_registered_pages = 0; priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; /* gvnic has one Notification Block per MSI-x vector, except for the diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index b76143bfd594..3c09e66ba1ab 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -8,6 +8,7 @@ #include "gve_adminq.h" #include "gve_utils.h" #include "gve_dqo.h" +#include #include #include #include @@ -646,6 +647,9 @@ static int gve_try_tx_skb(struct gve_priv *priv, struct gve_tx_ring *tx, goto drop; } + if (unlikely(ipv6_hopopt_jumbo_remove(skb))) + goto drop; + num_buffer_descs = gve_num_buffer_descs_needed(skb); } else { num_buffer_descs = gve_num_buffer_descs_needed(skb); -- cgit v1.2.3 From 04910d8cbfed65dad21c31723c6c1a8d9f990fb6 Mon Sep 17 00:00:00 2001 From: Arınç ÜNAL Date: Mon, 22 May 2023 13:57:43 +0300 Subject: net: ethernet: mtk_eth_soc: fix QoS on DSA MAC on non MTK_NETSYS_V2 SoCs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit c6d96df9fa2c ("net: ethernet: mtk_eth_soc: drop generic vlan rx offload, only use DSA untagging") makes VLAN RX offloading to be only used on the SoCs without the MTK_NETSYS_V2 ability (which are not just MT7621 and MT7622). The commit disables the proper handling of special tagged (DSA) frames, added with commit 87e3df4961f4 ("net-next: ethernet: mediatek: add CDM able to recognize the tag for DSA"), for non MTK_NETSYS_V2 SoCs when it finds a MAC that does not use DSA. So if the other MAC uses DSA, the CDMQ component transmits DSA tagged frames to the CPU improperly. This issue can be observed on frames with TCP, for example, a TCP speed test using iperf3 won't work. The commit disables the proper handling of special tagged (DSA) frames because it assumes that these SoCs don't use more than one MAC, which is wrong. Although I made Frank address this false assumption on the patch log when they sent the patch on behalf of Felix, the code still made changes with this assumption. Therefore, the proper handling of special tagged (DSA) frames must be kept enabled in all circumstances as it doesn't affect non DSA tagged frames. Hardware DSA untagging, introduced with the commit 2d7605a72906 ("net: ethernet: mtk_eth_soc: enable hardware DSA untagging"), and VLAN RX offloading are operations on the two CDM components of the frame engine, CDMP and CDMQ, which connect to Packet DMA (PDMA) and QoS DMA (QDMA) and are between the MACs and the CPU. These operations apply to all MACs of the SoC so if one MAC uses DSA and the other doesn't, the hardware DSA untagging operation will cause the CDMP component to transmit non DSA tagged frames to the CPU improperly. Since the VLAN RX offloading feature configuration was dropped, VLAN RX offloading can only be used along with hardware DSA untagging. So, for the case above, we need to disable both features and leave it to the CPU, therefore software, to untag the DSA and VLAN tags. So the correct way to handle this is: For all SoCs: Enable the proper handling of special tagged (DSA) frames (MTK_CDMQ_IG_CTRL). For non MTK_NETSYS_V2 SoCs: Enable hardware DSA untagging (MTK_CDMP_IG_CTRL). Enable VLAN RX offloading (MTK_CDMP_EG_CTRL). When a non MTK_NETSYS_V2 SoC MAC does not use DSA: Disable hardware DSA untagging (MTK_CDMP_IG_CTRL). Disable VLAN RX offloading (MTK_CDMP_EG_CTRL). Fixes: c6d96df9fa2c ("net: ethernet: mtk_eth_soc: drop generic vlan rx offload, only use DSA untagging") Signed-off-by: Arınç ÜNAL Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index a75fd072082c..834c644b67db 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -3269,18 +3269,14 @@ static int mtk_open(struct net_device *dev) eth->dsa_meta[i] = md_dst; } } else { - /* Hardware special tag parsing needs to be disabled if at least - * one MAC does not use DSA. + /* Hardware DSA untagging and VLAN RX offloading need to be + * disabled if at least one MAC does not use DSA. */ u32 val = mtk_r32(eth, MTK_CDMP_IG_CTRL); val &= ~MTK_CDMP_STAG_EN; mtk_w32(eth, val, MTK_CDMP_IG_CTRL); - val = mtk_r32(eth, MTK_CDMQ_IG_CTRL); - val &= ~MTK_CDMQ_STAG_EN; - mtk_w32(eth, val, MTK_CDMQ_IG_CTRL); - mtk_w32(eth, 0, MTK_CDMP_EG_CTRL); } -- cgit v1.2.3 From c496daeb863093a046e0bb8db7265bf45d91775a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 23 May 2023 14:37:59 +0200 Subject: devlink: remove duplicate port notification The notification about created port is send from devl_port_register() function called from ops->port_new(). No need to send it again here, so remove the call and the helper function. Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/sf/devlink.c | 9 ++--- drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h | 3 +- include/net/devlink.h | 4 +- net/devlink/leftover.c | 45 +--------------------- 4 files changed, 6 insertions(+), 55 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index 7d955a4d9f14..de15b9c85e1b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -282,8 +282,7 @@ out: static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, const struct devlink_port_new_attrs *new_attr, - struct netlink_ext_ack *extack, - unsigned int *new_port_index) + struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5_sf *sf; @@ -297,7 +296,6 @@ static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, new_attr->controller, new_attr->sfnum); if (err) goto esw_err; - *new_port_index = sf->port_index; trace_mlx5_sf_add(dev, sf->port_index, sf->controller, sf->hw_fn_id, new_attr->sfnum); return 0; @@ -338,8 +336,7 @@ mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_ int mlx5_devlink_sf_port_new(struct devlink *devlink, const struct devlink_port_new_attrs *new_attr, - struct netlink_ext_ack *extack, - unsigned int *new_port_index) + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_sf_table *table; @@ -355,7 +352,7 @@ int mlx5_devlink_sf_port_new(struct devlink *devlink, "Port add is only supported in eswitch switchdev mode or SF ports are disabled."); return -EOPNOTSUPP; } - err = mlx5_sf_add(dev, table, new_attr, extack, new_port_index); + err = mlx5_sf_add(dev, table, new_attr, extack); mlx5_sf_table_put(table); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h index 3a480e06ecc0..1f7d8cbd72e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -20,8 +20,7 @@ void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev); int mlx5_devlink_sf_port_new(struct devlink *devlink, const struct devlink_port_new_attrs *add_attr, - struct netlink_ext_ack *extack, - unsigned int *new_port_index); + struct netlink_ext_ack *extack); int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, struct netlink_ext_ack *extack); int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port, diff --git a/include/net/devlink.h b/include/net/devlink.h index 6a942e70e451..ccea6e079777 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1500,7 +1500,6 @@ struct devlink_ops { * @devlink: Devlink instance * @attrs: attributes of the new port * @extack: extack for reporting error messages - * @new_port_index: index of the new port * * Devlink core will call this device driver function upon user request * to create a new port function of a specified flavor and optional @@ -1515,8 +1514,7 @@ struct devlink_ops { */ int (*port_new)(struct devlink *devlink, const struct devlink_port_new_attrs *attrs, - struct netlink_ext_ack *extack, - unsigned int *new_port_index); + struct netlink_ext_ack *extack); /** * port_del() - Delete a port function * @devlink: Devlink instance diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c index cd0254968076..cb60e42b2761 100644 --- a/net/devlink/leftover.c +++ b/net/devlink/leftover.c @@ -1354,45 +1354,12 @@ static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, return devlink->ops->port_unsplit(devlink, devlink_port, info->extack); } -static int devlink_port_new_notify(struct devlink *devlink, - unsigned int port_index, - struct genl_info *info) -{ - struct devlink_port *devlink_port; - struct sk_buff *msg; - int err; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; - - lockdep_assert_held(&devlink->lock); - devlink_port = devlink_port_get_by_index(devlink, port_index); - if (!devlink_port) { - err = -ENODEV; - goto out; - } - - err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW, - info->snd_portid, info->snd_seq, 0, NULL); - if (err) - goto out; - - return genlmsg_reply(msg, info); - -out: - nlmsg_free(msg); - return err; -} - static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, struct genl_info *info) { struct netlink_ext_ack *extack = info->extack; struct devlink_port_new_attrs new_attrs = {}; struct devlink *devlink = info->user_ptr[0]; - unsigned int new_port_index; - int err; if (!devlink->ops->port_new || !devlink->ops->port_del) return -EOPNOTSUPP; @@ -1423,17 +1390,7 @@ static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, new_attrs.sfnum_valid = true; } - err = devlink->ops->port_new(devlink, &new_attrs, extack, - &new_port_index); - if (err) - return err; - - err = devlink_port_new_notify(devlink, new_port_index, info); - if (err && err != -ENODEV) { - /* Fail to send the response; destroy newly created port. */ - devlink->ops->port_del(devlink, new_port_index, extack); - } - return err; + return devlink->ops->port_new(devlink, &new_attrs, extack); } static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, -- cgit v1.2.3 From 9277649c66fe7cb0e2f8adb09621556bcfb052c7 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 23 May 2023 14:38:01 +0200 Subject: devlink: pass devlink_port pointer to ops->port_del() instead of index Historically there was a reason why port_dev() along with for example port_split() did get port_index instead of the devlink_port pointer. With the locking changes that were done which ensured devlink instance mutex is hold for every command, the port ops could get devlink_port pointer directly. Change the forgotten port_dev() op to be as others and pass devlink_port pointer instead of port_index. Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c | 5 +++-- drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h | 3 ++- include/net/devlink.h | 4 ++-- net/devlink/leftover.c | 11 +++-------- 4 files changed, 10 insertions(+), 13 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index de15b9c85e1b..c7d4691cb65a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -376,7 +376,8 @@ static void mlx5_sf_dealloc(struct mlx5_sf_table *table, struct mlx5_sf *sf) } } -int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, +int mlx5_devlink_sf_port_del(struct devlink *devlink, + struct devlink_port *dl_port, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); @@ -391,7 +392,7 @@ int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, "Port del is only supported in eswitch switchdev mode or SF ports are disabled."); return -EOPNOTSUPP; } - sf = mlx5_sf_lookup_by_index(table, port_index); + sf = mlx5_sf_lookup_by_index(table, dl_port->index); if (!sf) { err = -ENODEV; goto sf_err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h index 1f7d8cbd72e8..c5430b8dcdf6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -21,7 +21,8 @@ void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev); int mlx5_devlink_sf_port_new(struct devlink *devlink, const struct devlink_port_new_attrs *add_attr, struct netlink_ext_ack *extack); -int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, +int mlx5_devlink_sf_port_del(struct devlink *devlink, + struct devlink_port *dl_port, struct netlink_ext_ack *extack); int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port, enum devlink_port_fn_state *state, diff --git a/include/net/devlink.h b/include/net/devlink.h index 24a48f3d4c35..1bd56c8d6f3c 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1516,7 +1516,7 @@ struct devlink_ops { /** * port_del() - Delete a port function * @devlink: Devlink instance - * @port_index: port function index to delete + * @port: The devlink port * @extack: extack for reporting error messages * * Devlink core will call this device driver function upon user request @@ -1528,7 +1528,7 @@ struct devlink_ops { * * Return: 0 on success, negative value otherwise. */ - int (*port_del)(struct devlink *devlink, unsigned int port_index, + int (*port_del)(struct devlink *devlink, struct devlink_port *port, struct netlink_ext_ack *extack); /** * port_fn_state_get() - Get the state of a port function diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c index cb60e42b2761..0410137a4a31 100644 --- a/net/devlink/leftover.c +++ b/net/devlink/leftover.c @@ -1396,20 +1396,14 @@ static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, struct genl_info *info) { + struct devlink_port *devlink_port = info->user_ptr[1]; struct netlink_ext_ack *extack = info->extack; struct devlink *devlink = info->user_ptr[0]; - unsigned int port_index; if (!devlink->ops->port_del) return -EOPNOTSUPP; - if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_INDEX)) { - NL_SET_ERR_MSG(extack, "Port index is not specified"); - return -EINVAL; - } - port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]); - - return devlink->ops->port_del(devlink, port_index, extack); + return devlink->ops->port_del(devlink, devlink_port, extack); } static int @@ -6341,6 +6335,7 @@ const struct genl_small_ops devlink_nl_ops[56] = { .cmd = DEVLINK_CMD_PORT_DEL, .doit = devlink_nl_cmd_port_del_doit, .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_PORT, }, { .cmd = DEVLINK_CMD_LINECARD_GET, -- cgit v1.2.3 From e2ab5aa11f191b54514f063a5b5c29f3559f4ab7 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Wed, 1 Mar 2023 10:50:53 +0200 Subject: net/mlx5e: Extract remaining tunnel encap code to dedicated file Move set_encap_dests() and clean_encap_dests() to the tunnel encap dedicated file. And rename them to mlx5e_tc_tun_encap_dests_set() and mlx5e_tc_tun_encap_dests_unset(). No functional change in this patch. It is needed in the next patch. Signed-off-by: Chris Mi Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 83 ++++++++++++++++++++ .../ethernet/mellanox/mlx5/core/en/tc_tun_encap.h | 9 +++ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 89 +--------------------- 3 files changed, 94 insertions(+), 87 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 6a052c6cfc15..d516227b8fe8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -1016,6 +1016,89 @@ out_err: return err; } +int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack, + bool *vf_tun) +{ + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5_esw_flow_attr *esw_attr; + struct net_device *encap_dev = NULL; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_priv *out_priv; + int out_index; + int err = 0; + + if (!mlx5e_is_eswitch_flow(flow)) + return 0; + + parse_attr = attr->parse_attr; + esw_attr = attr->esw_attr; + *vf_tun = false; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + struct net_device *out_dev; + int mirred_ifindex; + + if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) + continue; + + mirred_ifindex = parse_attr->mirred_ifindex[out_index]; + out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex); + if (!out_dev) { + NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found"); + err = -ENODEV; + goto out; + } + err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index, + extack, &encap_dev); + dev_put(out_dev); + if (err) + goto out; + + if (esw_attr->dests[out_index].flags & + MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && + !esw_attr->dest_int_port) + *vf_tun = true; + + out_priv = netdev_priv(encap_dev); + rpriv = out_priv->ppriv; + esw_attr->dests[out_index].rep = rpriv->rep; + esw_attr->dests[out_index].mdev = out_priv->mdev; + } + + if (*vf_tun && esw_attr->out_count > 1) { + NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported"); + err = -EOPNOTSUPP; + goto out; + } + +out: + return err; +} + +void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr) +{ + struct mlx5_esw_flow_attr *esw_attr; + int out_index; + + if (!mlx5e_is_eswitch_flow(flow)) + return; + + esw_attr = attr->esw_attr; + + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { + if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) + continue; + + mlx5e_detach_encap(flow->priv, flow, attr, out_index); + kfree(attr->parse_attr->tun_info[out_index]); + } +} + static int cmp_route_info(struct mlx5e_route_key *a, struct mlx5e_route_key *b) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h index 8ad273dde40e..5d7d67687cbc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h @@ -30,6 +30,15 @@ int mlx5e_attach_decap_route(struct mlx5e_priv *priv, void mlx5e_detach_decap_route(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow); +int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + struct netlink_ext_ack *extack, + bool *vf_tun); +void mlx5e_tc_tun_encap_dests_unset(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr); + struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info); int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e95414ef1f04..8935156f8f4e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1699,91 +1699,6 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro return mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); } -static int -set_encap_dests(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - struct mlx5_flow_attr *attr, - struct netlink_ext_ack *extack, - bool *vf_tun) -{ - struct mlx5e_tc_flow_parse_attr *parse_attr; - struct mlx5_esw_flow_attr *esw_attr; - struct net_device *encap_dev = NULL; - struct mlx5e_rep_priv *rpriv; - struct mlx5e_priv *out_priv; - int out_index; - int err = 0; - - if (!mlx5e_is_eswitch_flow(flow)) - return 0; - - parse_attr = attr->parse_attr; - esw_attr = attr->esw_attr; - *vf_tun = false; - - for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { - struct net_device *out_dev; - int mirred_ifindex; - - if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) - continue; - - mirred_ifindex = parse_attr->mirred_ifindex[out_index]; - out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex); - if (!out_dev) { - NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found"); - err = -ENODEV; - goto out; - } - err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index, - extack, &encap_dev); - dev_put(out_dev); - if (err) - goto out; - - if (esw_attr->dests[out_index].flags & - MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE && - !esw_attr->dest_int_port) - *vf_tun = true; - - out_priv = netdev_priv(encap_dev); - rpriv = out_priv->ppriv; - esw_attr->dests[out_index].rep = rpriv->rep; - esw_attr->dests[out_index].mdev = out_priv->mdev; - } - - if (*vf_tun && esw_attr->out_count > 1) { - NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported"); - err = -EOPNOTSUPP; - goto out; - } - -out: - return err; -} - -static void -clean_encap_dests(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - struct mlx5_flow_attr *attr) -{ - struct mlx5_esw_flow_attr *esw_attr; - int out_index; - - if (!mlx5e_is_eswitch_flow(flow)) - return; - - esw_attr = attr->esw_attr; - - for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { - if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)) - continue; - - mlx5e_detach_encap(priv, flow, attr, out_index); - kfree(attr->parse_attr->tun_info[out_index]); - } -} - static int verify_attr_actions(u32 actions, struct netlink_ext_ack *extack) { @@ -1820,7 +1735,7 @@ post_process_attr(struct mlx5e_tc_flow *flow, if (err) goto err_out; - err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun); + err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun); if (err) goto err_out; @@ -4324,7 +4239,7 @@ mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *a if (attr->post_act_handle) mlx5e_tc_post_act_del(get_post_action(flow->priv), attr->post_act_handle); - clean_encap_dests(flow->priv, flow, attr); + mlx5e_tc_tun_encap_dests_unset(flow->priv, flow, attr); if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) mlx5_fc_destroy(counter_dev, attr->counter); -- cgit v1.2.3 From 37c3b9fa7ccf5caad6d87ba4d42bf00be46be1cf Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Tue, 21 Feb 2023 04:41:41 +0200 Subject: net/mlx5e: Prevent encap offload when neigh update is running The cited commit adds a compeletion to remove dependency on rtnl lock. But it causes a deadlock for multiple encapsulations: crash> bt ffff8aece8a64000 PID: 1514557 TASK: ffff8aece8a64000 CPU: 3 COMMAND: "tc" #0 [ffffa6d14183f368] __schedule at ffffffffb8ba7f45 #1 [ffffa6d14183f3f8] schedule at ffffffffb8ba8418 #2 [ffffa6d14183f418] schedule_preempt_disabled at ffffffffb8ba8898 #3 [ffffa6d14183f428] __mutex_lock at ffffffffb8baa7f8 #4 [ffffa6d14183f4d0] mutex_lock_nested at ffffffffb8baabeb #5 [ffffa6d14183f4e0] mlx5e_attach_encap at ffffffffc0f48c17 [mlx5_core] #6 [ffffa6d14183f628] mlx5e_tc_add_fdb_flow at ffffffffc0f39680 [mlx5_core] #7 [ffffa6d14183f688] __mlx5e_add_fdb_flow at ffffffffc0f3b636 [mlx5_core] #8 [ffffa6d14183f6f0] mlx5e_tc_add_flow at ffffffffc0f3bcdf [mlx5_core] #9 [ffffa6d14183f728] mlx5e_configure_flower at ffffffffc0f3c1d1 [mlx5_core] #10 [ffffa6d14183f790] mlx5e_rep_setup_tc_cls_flower at ffffffffc0f3d529 [mlx5_core] #11 [ffffa6d14183f7a0] mlx5e_rep_setup_tc_cb at ffffffffc0f3d714 [mlx5_core] #12 [ffffa6d14183f7b0] tc_setup_cb_add at ffffffffb8931bb8 #13 [ffffa6d14183f810] fl_hw_replace_filter at ffffffffc0dae901 [cls_flower] #14 [ffffa6d14183f8d8] fl_change at ffffffffc0db5c57 [cls_flower] #15 [ffffa6d14183f970] tc_new_tfilter at ffffffffb8936047 #16 [ffffa6d14183fac8] rtnetlink_rcv_msg at ffffffffb88c7c31 #17 [ffffa6d14183fb50] netlink_rcv_skb at ffffffffb8942853 #18 [ffffa6d14183fbc0] rtnetlink_rcv at ffffffffb88c1835 #19 [ffffa6d14183fbd0] netlink_unicast at ffffffffb8941f27 #20 [ffffa6d14183fc18] netlink_sendmsg at ffffffffb8942245 #21 [ffffa6d14183fc98] sock_sendmsg at ffffffffb887d482 #22 [ffffa6d14183fcb8] ____sys_sendmsg at ffffffffb887d81a #23 [ffffa6d14183fd38] ___sys_sendmsg at ffffffffb88806e2 #24 [ffffa6d14183fe90] __sys_sendmsg at ffffffffb88807a2 #25 [ffffa6d14183ff28] __x64_sys_sendmsg at ffffffffb888080f #26 [ffffa6d14183ff38] do_syscall_64 at ffffffffb8b9b6a8 #27 [ffffa6d14183ff50] entry_SYSCALL_64_after_hwframe at ffffffffb8c0007c crash> bt 0xffff8aeb07544000 PID: 1110766 TASK: ffff8aeb07544000 CPU: 0 COMMAND: "kworker/u20:9" #0 [ffffa6d14e6b7bd8] __schedule at ffffffffb8ba7f45 #1 [ffffa6d14e6b7c68] schedule at ffffffffb8ba8418 #2 [ffffa6d14e6b7c88] schedule_timeout at ffffffffb8baef88 #3 [ffffa6d14e6b7d10] wait_for_completion at ffffffffb8ba968b #4 [ffffa6d14e6b7d60] mlx5e_take_all_encap_flows at ffffffffc0f47ec4 [mlx5_core] #5 [ffffa6d14e6b7da0] mlx5e_rep_update_flows at ffffffffc0f3e734 [mlx5_core] #6 [ffffa6d14e6b7df8] mlx5e_rep_neigh_update at ffffffffc0f400bb [mlx5_core] #7 [ffffa6d14e6b7e50] process_one_work at ffffffffb80acc9c #8 [ffffa6d14e6b7ed0] worker_thread at ffffffffb80ad012 #9 [ffffa6d14e6b7f10] kthread at ffffffffb80b615d #10 [ffffa6d14e6b7f50] ret_from_fork at ffffffffb8001b2f After the first encap is attached, flow will be added to encap entry's flows list. If neigh update is running at this time, the following encaps of the flow can't hold the encap_tbl_lock and sleep. If neigh update thread is waiting for that flow's init_done, deadlock happens. Fix it by holding lock outside of the for loop. If neigh update is running, prevent encap flows from offloading. Since the lock is held outside of the for loop, concurrent creation of encap entries is not allowed. So remove unnecessary wait_for_completion call for res_ready. Fixes: 95435ad7999b ("net/mlx5e: Only access fully initialized flows in neigh update") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 37 ++++++++++++---------- 1 file changed, 20 insertions(+), 17 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index d516227b8fe8..f0c3464f037f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -492,6 +492,19 @@ void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) mlx5e_encap_dealloc(priv, e); } +static void mlx5e_encap_put_locked(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + lockdep_assert_held(&esw->offloads.encap_tbl_lock); + + if (!refcount_dec_and_test(&e->refcnt)) + return; + list_del(&e->route_list); + hash_del_rcu(&e->encap_hlist); + mlx5e_encap_dealloc(priv, e); +} + static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -816,6 +829,8 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, uintptr_t hash_key; int err = 0; + lockdep_assert_held(&esw->offloads.encap_tbl_lock); + parse_attr = attr->parse_attr; tun_info = parse_attr->tun_info[out_index]; mpls_info = &parse_attr->mpls_info[out_index]; @@ -829,7 +844,6 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, hash_key = hash_encap_info(&key); - mutex_lock(&esw->offloads.encap_tbl_lock); e = mlx5e_encap_get(priv, &key, hash_key); /* must verify if encap is valid or not */ @@ -840,15 +854,6 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, goto out_err; } - mutex_unlock(&esw->offloads.encap_tbl_lock); - wait_for_completion(&e->res_ready); - - /* Protect against concurrent neigh update. */ - mutex_lock(&esw->offloads.encap_tbl_lock); - if (e->compl_result < 0) { - err = -EREMOTEIO; - goto out_err; - } goto attach_flow; } @@ -877,15 +882,12 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv, INIT_LIST_HEAD(&e->flows); hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); tbl_time_before = mlx5e_route_tbl_get_last_update(priv); - mutex_unlock(&esw->offloads.encap_tbl_lock); if (family == AF_INET) err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); else if (family == AF_INET6) err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e); - /* Protect against concurrent neigh update. */ - mutex_lock(&esw->offloads.encap_tbl_lock); complete_all(&e->res_ready); if (err) { e->compl_result = err; @@ -920,18 +922,15 @@ attach_flow: } else { flow_flag_set(flow, SLOW); } - mutex_unlock(&esw->offloads.encap_tbl_lock); return err; out_err: - mutex_unlock(&esw->offloads.encap_tbl_lock); if (e) - mlx5e_encap_put(priv, e); + mlx5e_encap_put_locked(priv, e); return err; out_err_init: - mutex_unlock(&esw->offloads.encap_tbl_lock); kfree(tun_info); kfree(e); return err; @@ -1027,6 +1026,7 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, struct net_device *encap_dev = NULL; struct mlx5e_rep_priv *rpriv; struct mlx5e_priv *out_priv; + struct mlx5_eswitch *esw; int out_index; int err = 0; @@ -1037,6 +1037,8 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, esw_attr = attr->esw_attr; *vf_tun = false; + esw = priv->mdev->priv.eswitch; + mutex_lock(&esw->offloads.encap_tbl_lock); for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { struct net_device *out_dev; int mirred_ifindex; @@ -1075,6 +1077,7 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv, } out: + mutex_unlock(&esw->offloads.encap_tbl_lock); return err; } -- cgit v1.2.3 From 81fe2be062915e2a2fdc494c3cd90e946e946c25 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Mon, 1 May 2023 17:31:40 +0300 Subject: net/mlx5e: Consider internal buffers size in port buffer calculations Currently, when a user triggers a change in port buffer headroom (buffers 0-7), the driver checks that the requested headroom does not exceed the total port buffer size. However, this check does not take into account the internal buffers (buffers 8-9), which are also part of the total port buffer. This can result in treating invalid port buffer change requests as valid, causing unintended changes to the shared buffer. To address this, include the internal buffers size in the calculation of available port buffer space which ensures that port buffer requests do not exceed the correct limit. Furthermore, remove internal buffers (8-9) size from the total_size calculation as these buffers are reserved for internal use and are not exposed to the user. While at it, add verbosity to the debug prints in mlx5e_port_query_buffer() function to ease future debugging. Fixes: ecdf2dadee8e ("net/mlx5e: Receive buffer support for DCBX") Signed-off-by: Maher Sanalla Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/port_buffer.c | 42 ++++++++++++++-------- .../ethernet/mellanox/mlx5/core/en/port_buffer.h | 8 +++-- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 7 ++-- 3 files changed, 36 insertions(+), 21 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 7ac1ad9c46de..0d78527451bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -51,7 +51,7 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv, if (err) goto out; - for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]); port_buffer->buffer[i].lossy = MLX5_GET(bufferx_reg, buffer, lossy); @@ -73,14 +73,24 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv, port_buffer->buffer[i].lossy); } - port_buffer->headroom_size = total_used; + port_buffer->internal_buffers_size = 0; + for (i = MLX5E_MAX_NETWORK_BUFFER; i < MLX5E_TOTAL_BUFFERS; i++) { + buffer = MLX5_ADDR_OF(pbmc_reg, out, buffer[i]); + port_buffer->internal_buffers_size += + MLX5_GET(bufferx_reg, buffer, size) * port_buff_cell_sz; + } + port_buffer->port_buffer_size = MLX5_GET(pbmc_reg, out, port_buffer_size) * port_buff_cell_sz; - port_buffer->spare_buffer_size = - port_buffer->port_buffer_size - total_used; - - mlx5e_dbg(HW, priv, "total buffer size=%d, spare buffer size=%d\n", - port_buffer->port_buffer_size, + port_buffer->headroom_size = total_used; + port_buffer->spare_buffer_size = port_buffer->port_buffer_size - + port_buffer->internal_buffers_size - + port_buffer->headroom_size; + + mlx5e_dbg(HW, priv, + "total buffer size=%u, headroom buffer size=%u, internal buffers size=%u, spare buffer size=%u\n", + port_buffer->port_buffer_size, port_buffer->headroom_size, + port_buffer->internal_buffers_size, port_buffer->spare_buffer_size); out: kfree(out); @@ -206,11 +216,11 @@ static int port_update_pool_cfg(struct mlx5_core_dev *mdev, if (!MLX5_CAP_GEN(mdev, sbcam_reg)) return 0; - for (i = 0; i < MLX5E_MAX_BUFFER; i++) + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) lossless_buff_count += ((port_buffer->buffer[i].size) && (!(port_buffer->buffer[i].lossy))); - for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { p = select_sbcm_params(&port_buffer->buffer[i], lossless_buff_count); err = mlx5e_port_set_sbcm(mdev, 0, i, MLX5_INGRESS_DIR, @@ -293,7 +303,7 @@ static int port_set_buffer(struct mlx5e_priv *priv, if (err) goto out; - for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { void *buffer = MLX5_ADDR_OF(pbmc_reg, in, buffer[i]); u64 size = port_buffer->buffer[i].size; u64 xoff = port_buffer->buffer[i].xoff; @@ -351,7 +361,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, { int i; - for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { if (port_buffer->buffer[i].lossy) { port_buffer->buffer[i].xoff = 0; port_buffer->buffer[i].xon = 0; @@ -408,7 +418,7 @@ static int update_buffer_lossy(struct mlx5_core_dev *mdev, int err; int i; - for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { prio_count = 0; lossy_count = 0; @@ -515,7 +525,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) { update_prio2buffer = true; - for (i = 0; i < MLX5E_MAX_BUFFER; i++) + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) mlx5e_dbg(HW, priv, "%s: requested to map prio[%d] to buffer %d\n", __func__, i, prio2buffer[i]); @@ -530,7 +540,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, } if (change & MLX5E_PORT_BUFFER_SIZE) { - for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { mlx5e_dbg(HW, priv, "%s: buffer[%d]=%d\n", __func__, i, buffer_size[i]); if (!port_buffer.buffer[i].lossy && !buffer_size[i]) { mlx5e_dbg(HW, priv, "%s: lossless buffer[%d] size cannot be zero\n", @@ -544,7 +554,9 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, mlx5e_dbg(HW, priv, "%s: total buffer requested=%d\n", __func__, total_used); - if (total_used > port_buffer.port_buffer_size) + if (total_used > port_buffer.headroom_size && + (total_used - port_buffer.headroom_size) > + port_buffer.spare_buffer_size) return -EINVAL; update_buffer = true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h index a6ef118de758..f4a19ffbb641 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h @@ -35,7 +35,8 @@ #include "en.h" #include "port.h" -#define MLX5E_MAX_BUFFER 8 +#define MLX5E_MAX_NETWORK_BUFFER 8 +#define MLX5E_TOTAL_BUFFERS 10 #define MLX5E_DEFAULT_CABLE_LEN 7 /* 7 meters */ #define MLX5_BUFFER_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, pcam_reg) && \ @@ -60,8 +61,9 @@ struct mlx5e_bufferx_reg { struct mlx5e_port_buffer { u32 port_buffer_size; u32 spare_buffer_size; - u32 headroom_size; - struct mlx5e_bufferx_reg buffer[MLX5E_MAX_BUFFER]; + u32 headroom_size; /* Buffers 0-7 */ + u32 internal_buffers_size; /* Buffers 8-9 */ + struct mlx5e_bufferx_reg buffer[MLX5E_MAX_NETWORK_BUFFER]; }; int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 89de92d06483..ebee52a8361a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -926,9 +926,10 @@ static int mlx5e_dcbnl_getbuffer(struct net_device *dev, if (err) return err; - for (i = 0; i < MLX5E_MAX_BUFFER; i++) + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) dcb_buffer->buffer_size[i] = port_buffer.buffer[i].size; - dcb_buffer->total_size = port_buffer.port_buffer_size; + dcb_buffer->total_size = port_buffer.port_buffer_size - + port_buffer.internal_buffers_size; return 0; } @@ -970,7 +971,7 @@ static int mlx5e_dcbnl_setbuffer(struct net_device *dev, if (err) return err; - for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { if (port_buffer.buffer[i].size != dcb_buffer->buffer_size[i]) { changed |= MLX5E_PORT_BUFFER_SIZE; buffer_size = dcb_buffer->buffer_size; -- cgit v1.2.3 From 623efc4cbd6115db36716e31037cb6d1f3ce6754 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Tue, 9 May 2023 17:56:01 +0300 Subject: net/mlx5e: Do not update SBCM when prio2buffer command is invalid The shared buffer pools configuration which are stored in the SBCM register are updated when the user changes the prio2buffer mapping. However, in case the user desired prio2buffer change is invalid, which can occur due to mapping a lossless priority to a not large enough buffer, the SBCM update should not be performed, as the user command is failed. Thus, Perform the SBCM update only after xoff threshold calculation is performed and the user prio2buffer mapping is validated. Fixes: a440030d8946 ("net/mlx5e: Update shared buffer along with device buffer changes") Signed-off-by: Maher Sanalla Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 0d78527451bc..7e8e96cc5cd0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -442,11 +442,11 @@ static int update_buffer_lossy(struct mlx5_core_dev *mdev, } if (changed) { - err = port_update_pool_cfg(mdev, port_buffer); + err = update_xoff_threshold(port_buffer, xoff, max_mtu, port_buff_cell_sz); if (err) return err; - err = update_xoff_threshold(port_buffer, xoff, max_mtu, port_buff_cell_sz); + err = port_update_pool_cfg(mdev, port_buffer); if (err) return err; -- cgit v1.2.3 From 824c8dc4a470040bf0e56ba716543839c2498d49 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 24 Apr 2023 12:31:59 +0300 Subject: net/mlx5: Drain health before unregistering devlink mlx5 health mechanism is using devlink APIs, which are using devlink notify APIs. After the cited patch, using devlink notify APIs after devlink is unregistered triggers a WARN_ON(). Hence, drain health WQ before devlink is unregistered. Fixes: cf530217408e ("devlink: Notify users when objects are accessible") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a7eb65cd0bdd..2132a6510639 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1802,15 +1802,16 @@ static void remove_one(struct pci_dev *pdev) struct devlink *devlink = priv_to_devlink(dev); set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); - /* mlx5_drain_fw_reset() is using devlink APIs. Hence, we must drain - * fw_reset before unregistering the devlink. + /* mlx5_drain_fw_reset() and mlx5_drain_health_wq() are using + * devlink notify APIs. + * Hence, we must drain them before unregistering the devlink. */ mlx5_drain_fw_reset(dev); + mlx5_drain_health_wq(dev); devlink_unregister(devlink); mlx5_sriov_disable(pdev); mlx5_thermal_uninit(dev); mlx5_crdump_disable(dev); - mlx5_drain_health_wq(dev); mlx5_uninit_one(dev); mlx5_pci_close(dev); mlx5_mdev_uninit(dev); -- cgit v1.2.3 From b4646da0573fae9dfa2b8f1f10936cb6eedd7230 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 24 Apr 2023 12:46:06 +0300 Subject: net/mlx5: SF, Drain health before removing device There is no point in recovery during device removal. Also, if health work started need to wait for it to avoid races and NULL pointer access. Hence, drain health WQ before removing device. Fixes: 1958fc2f0712 ("net/mlx5: SF, Add auxiliary device driver") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index e2f26d0bc615..0692363cf80e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -63,6 +63,7 @@ static void mlx5_sf_dev_remove(struct auxiliary_device *adev) struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); struct devlink *devlink = priv_to_devlink(sf_dev->mdev); + mlx5_drain_health_wq(sf_dev->mdev); devlink_unregister(devlink); mlx5_uninit_one(sf_dev->mdev); iounmap(sf_dev->mdev->iseg); -- cgit v1.2.3 From 341a80de2468f481b1f771683709b5649cbfe513 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sat, 29 Apr 2023 20:41:41 +0300 Subject: net/mlx5: fw_tracer, Fix event handling mlx5 driver needs to parse traces with event_id inside the range of first_string_trace and num_string_trace. However, mlx5 is parsing all events with event_id >= first_string_trace. Fix it by checking for the correct range. Fixes: c71ad41ccb0c ("net/mlx5: FW tracer, events handling") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index f40497823e65..7c0f2adbea00 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -490,7 +490,7 @@ static void poll_trace(struct mlx5_fw_tracer *tracer, (u64)timestamp_low; break; default: - if (tracer_event->event_id >= tracer->str_db.first_string_trace || + if (tracer_event->event_id >= tracer->str_db.first_string_trace && tracer_event->event_id <= tracer->str_db.first_string_trace + tracer->str_db.num_string_trace) { tracer_event->type = TRACER_EVENT_TYPE_STRING; -- cgit v1.2.3 From 1db1f21caebbb1b6e9b1e7657df613616be3fb49 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 13 Apr 2023 15:48:30 +0300 Subject: net/mlx5e: Use query_special_contexts cmd only once per mdev Don't query the firmware so many times (num rqs * num wqes * wqe frags) because it slows down linearly the interface creation time when the product is larger. Do it only once per mdev and store the result in mlx5e_param. Due to helper function being called from different files, move it to an appropriate location. Rename the function with a proper prefix and add a small cleanup. This fix applies only for legacy rq. Fixes: 1b1e4868836a ("net/mlx5e: Use query_special_contexts for mkeys") Signed-off-by: Dragos Tatulea Reviewed-by: Or Har-Toov Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 24 +++-------------------- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 21 ++++++++++++++++++++ include/linux/mlx5/driver.h | 1 + 4 files changed, 26 insertions(+), 21 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index b8987a404d75..8e999f238194 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -327,6 +327,7 @@ struct mlx5e_params { unsigned int sw_mtu; int hard_mtu; bool ptp_rx; + __be32 terminate_lkey_be; }; static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2944691f06ad..0235adcbc609 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -727,26 +727,6 @@ static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq) mlx5e_rq_shampo_hd_free(rq); } -static __be32 mlx5e_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev) -{ - u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {}; - u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; - int res; - - if (!MLX5_CAP_GEN(dev, terminate_scatter_list_mkey)) - return MLX5_TERMINATE_SCATTER_LIST_LKEY; - - MLX5_SET(query_special_contexts_in, in, opcode, - MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); - res = mlx5_cmd_exec_inout(dev, query_special_contexts, in, out); - if (res) - return MLX5_TERMINATE_SCATTER_LIST_LKEY; - - res = MLX5_GET(query_special_contexts_out, out, - terminate_scatter_list_mkey); - return cpu_to_be32(res); -} - static int mlx5e_alloc_rq(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, struct mlx5e_rq_param *rqp, @@ -908,7 +888,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, /* check if num_frags is not a pow of two */ if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) { wqe->data[f].byte_count = 0; - wqe->data[f].lkey = mlx5e_get_terminate_scatter_list_mkey(mdev); + wqe->data[f].lkey = params->terminate_lkey_be; wqe->data[f].addr = 0; } } @@ -5007,6 +4987,8 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 /* RQ */ mlx5e_build_rq_params(mdev, params); + params->terminate_lkey_be = mlx5_core_get_terminate_scatter_list_mkey(mdev); + params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); /* CQ moderation params */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 9d735c343a3b..678f0be81375 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -32,6 +32,7 @@ #include #include +#include #include "mlx5_core.h" int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in, @@ -122,3 +123,23 @@ int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num) return mlx5_cmd_exec_in(dev, destroy_psv, in); } EXPORT_SYMBOL(mlx5_core_destroy_psv); + +__be32 mlx5_core_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; + u32 mkey; + + if (!MLX5_CAP_GEN(dev, terminate_scatter_list_mkey)) + return MLX5_TERMINATE_SCATTER_LIST_LKEY; + + MLX5_SET(query_special_contexts_in, in, opcode, + MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); + if (mlx5_cmd_exec_inout(dev, query_special_contexts, in, out)) + return MLX5_TERMINATE_SCATTER_LIST_LKEY; + + mkey = MLX5_GET(query_special_contexts_out, out, + terminate_scatter_list_mkey); + return cpu_to_be32(mkey); +} +EXPORT_SYMBOL(mlx5_core_get_terminate_scatter_list_mkey); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a4c4f737f9c1..94d2be5848ae 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1093,6 +1093,7 @@ void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, int npsvs, u32 *sig_index); int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num); +__be32 mlx5_core_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev); void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *odp_caps); -- cgit v1.2.3 From 5d862ec631f3d3cc3b4f8cdb5b9fc5879663f1d3 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 22 May 2023 14:48:52 +0200 Subject: net/mlx5: Fix post parse infra to only parse every action once Caller of mlx5e_tc_act_post_parse() needs it to parse only the subset of actions starting after previous split and ending at the current action. However, that range is not provided as arguments and mlx5e_tc_act_post_parse() uses generic flow_action_for_each() that iterates over all flow actions. Not only this is redundant, it also causes a bug when mlx5e_tc_act->post_parse() callback is not idempotent since it will be called for every split. For example, ct action tc_act_post_parse_ct() callback obtains a reference to mlx5_ct_ft instance and calling it several times during parsing stage will cause reference counter imbalance. Fix the issue by providing a proper action range of the current split subset to mlx5e_tc_act_post_parse() and only calling mlx5e_tc_act->post_parse() for actions inside the subset range. Fixes: 8300f225268b ("net/mlx5e: Create new flow attr for multi table actions") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c | 7 ++++++- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 8 +++++--- 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c index fc923a99b6a4..0380a04c3691 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c @@ -84,7 +84,7 @@ mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state, int mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, - struct flow_action *flow_action, + struct flow_action *flow_action, int from, int to, struct mlx5_flow_attr *attr, enum mlx5_flow_namespace_type ns_type) { @@ -96,6 +96,11 @@ mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, priv = parse_state->flow->priv; flow_action_for_each(i, act, flow_action) { + if (i < from) + continue; + else if (i > to) + break; + tc_act = mlx5e_tc_act_get(act->id, ns_type); if (!tc_act || !tc_act->post_parse) continue; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h index 0e6e1872ac62..d6c12d0ea55b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -112,7 +112,7 @@ mlx5e_tc_act_init_parse_state(struct mlx5e_tc_act_parse_state *parse_state, int mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, - struct flow_action *flow_action, + struct flow_action *flow_action, int from, int to, struct mlx5_flow_attr *attr, enum mlx5_flow_namespace_type ns_type); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 8935156f8f4e..8a5a8703f0a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -3859,8 +3859,8 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5_flow_attr *prev_attr; struct flow_action_entry *act; struct mlx5e_tc_act *tc_act; + int err, i, i_split = 0; bool is_missable; - int err, i; ns_type = mlx5e_get_flow_namespace(flow); list_add(&attr->list, &flow->attrs); @@ -3901,7 +3901,8 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, i < flow_action->num_entries - 1)) { is_missable = tc_act->is_missable ? tc_act->is_missable(act) : false; - err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type); + err = mlx5e_tc_act_post_parse(parse_state, flow_action, i_split, i, attr, + ns_type); if (err) goto out_free_post_acts; @@ -3911,6 +3912,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, goto out_free_post_acts; } + i_split = i + 1; list_add(&attr->list, &flow->attrs); } @@ -3925,7 +3927,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, } } - err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type); + err = mlx5e_tc_act_post_parse(parse_state, flow_action, i_split, i, attr, ns_type); if (err) goto out_free_post_acts; -- cgit v1.2.3 From bdf274750fca17b289404ef03453c4070725302c Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 13 Oct 2021 14:39:24 +0300 Subject: net/mlx5e: Don't attach netdev profile while handling internal error As part of switchdev mode disablement, driver changes port netdevice profile from uplink to nic. If this process is triggered by health recovery flow (PCI reset, for ex.) profile attach would fail because all fw commands aborted when internal error flag is set. As a result, nic netdevice profile is not attached and driver fails to rollback to uplink profile, which leave driver in broken state and cause crash later. To handle broken state do netdevice profile initialization only instead of full attachment and release mdev resources on driver suspend as expected. Actual netdevice attachment is done during driver load. Fixes: c4d7eb57687f ("net/mxl5e: Add change profile method") Signed-off-by: Dmytro Linkin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 35 ++++++++++++++++++++--- 1 file changed, 31 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 0235adcbc609..a07bbe9a61be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5833,8 +5833,8 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv) } static int -mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev, - const struct mlx5e_profile *new_profile, void *new_ppriv) +mlx5e_netdev_init_profile(struct net_device *netdev, struct mlx5_core_dev *mdev, + const struct mlx5e_profile *new_profile, void *new_ppriv) { struct mlx5e_priv *priv = netdev_priv(netdev); int err; @@ -5850,6 +5850,25 @@ mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mde err = new_profile->init(priv->mdev, priv->netdev); if (err) goto priv_cleanup; + + return 0; + +priv_cleanup: + mlx5e_priv_cleanup(priv); + return err; +} + +static int +mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev, + const struct mlx5e_profile *new_profile, void *new_ppriv) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + err = mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv); + if (err) + return err; + err = mlx5e_attach_netdev(priv); if (err) goto profile_cleanup; @@ -5857,7 +5876,6 @@ mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mde profile_cleanup: new_profile->cleanup(priv); -priv_cleanup: mlx5e_priv_cleanup(priv); return err; } @@ -5876,6 +5894,12 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, priv->profile->cleanup(priv); mlx5e_priv_cleanup(priv); + if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5e_netdev_init_profile(netdev, mdev, new_profile, new_ppriv); + set_bit(MLX5E_STATE_DESTROYING, &priv->state); + return -EIO; + } + err = mlx5e_netdev_attach_profile(netdev, mdev, new_profile, new_ppriv); if (err) { /* roll back to original profile */ netdev_warn(netdev, "%s: new profile init failed, %d\n", __func__, err); @@ -5937,8 +5961,11 @@ static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state) struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; - if (!netif_device_present(netdev)) + if (!netif_device_present(netdev)) { + if (test_bit(MLX5E_STATE_DESTROYING, &priv->state)) + mlx5e_destroy_mdev_resources(mdev); return -ENODEV; + } mlx5e_detach_netdev(priv); mlx5e_destroy_mdev_resources(mdev); -- cgit v1.2.3 From c4c24fc30cc417ace332ceceaba4f70f81dcd521 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 16 May 2023 02:28:02 +0000 Subject: net/mlx5e: Move Ethernet driver debugfs to profile init callback As priv->dfs_root is cleared, and therefore missed, when change eswitch mode, move the creation of the root debugfs to the init callback of mlx5e_nic_profile and mlx5e_uplink_rep_profile, and the destruction to the cleanup callback for symmeter. Fixes: 288eca60cc31 ("net/mlx5e: Add Ethernet driver debugfs") Signed-off-by: Jianbo Liu Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 +++++----- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 6 ++++++ 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a07bbe9a61be..a7c526ee5024 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -5261,12 +5261,16 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, mlx5e_timestamp_init(priv); + priv->dfs_root = debugfs_create_dir("nic", + mlx5_debugfs_get_dev_root(mdev)); + fs = mlx5e_fs_init(priv->profile, mdev, !test_bit(MLX5E_STATE_DESTROYING, &priv->state), priv->dfs_root); if (!fs) { err = -ENOMEM; mlx5_core_err(mdev, "FS initialization failed, %d\n", err); + debugfs_remove_recursive(priv->dfs_root); return err; } priv->fs = fs; @@ -5287,6 +5291,7 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) mlx5e_health_destroy_reporters(priv); mlx5e_ktls_cleanup(priv); mlx5e_fs_cleanup(priv->fs); + debugfs_remove_recursive(priv->dfs_root); priv->fs = NULL; } @@ -6011,9 +6016,6 @@ static int mlx5e_probe(struct auxiliary_device *adev, priv->profile = profile; priv->ppriv = NULL; - priv->dfs_root = debugfs_create_dir("nic", - mlx5_debugfs_get_dev_root(priv->mdev)); - err = profile->init(mdev, netdev); if (err) { mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err); @@ -6042,7 +6044,6 @@ err_resume: err_profile_cleanup: profile->cleanup(priv); err_destroy_netdev: - debugfs_remove_recursive(priv->dfs_root); mlx5e_destroy_netdev(priv); err_devlink_port_unregister: mlx5e_devlink_port_unregister(mlx5e_dev); @@ -6062,7 +6063,6 @@ static void mlx5e_remove(struct auxiliary_device *adev) unregister_netdev(priv->netdev); mlx5e_suspend(adev, state); priv->profile->cleanup(priv); - debugfs_remove_recursive(priv->dfs_root); mlx5e_destroy_netdev(priv); mlx5e_devlink_port_unregister(mlx5e_dev); mlx5e_destroy_devlink(mlx5e_dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 1fc386eccaf8..3e7041bd5705 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include #include #include #include @@ -812,11 +813,15 @@ static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev, { struct mlx5e_priv *priv = netdev_priv(netdev); + priv->dfs_root = debugfs_create_dir("nic", + mlx5_debugfs_get_dev_root(mdev)); + priv->fs = mlx5e_fs_init(priv->profile, mdev, !test_bit(MLX5E_STATE_DESTROYING, &priv->state), priv->dfs_root); if (!priv->fs) { netdev_err(priv->netdev, "FS allocation failed\n"); + debugfs_remove_recursive(priv->dfs_root); return -ENOMEM; } @@ -829,6 +834,7 @@ static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev, static void mlx5e_cleanup_rep(struct mlx5e_priv *priv) { mlx5e_fs_cleanup(priv->fs); + debugfs_remove_recursive(priv->dfs_root); priv->fs = NULL; } -- cgit v1.2.3 From fe5c2d3aef9352ac36a6acbc2bff5f732211ce3b Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Wed, 17 May 2023 17:54:30 +0300 Subject: net/mlx5: DR, Add missing mutex init/destroy in pattern manager Add missing mutex init/destroy as caught by the lock's debug warning: DEBUG_LOCKS_WARN_ON(lock->magic != lock) Fixes: da5d0027d666 ("net/mlx5: DR, Add cache for modify header pattern") Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c index 13e06a6a6b22..d6947fe13d56 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c @@ -213,6 +213,8 @@ struct mlx5dr_ptrn_mgr *mlx5dr_ptrn_mgr_create(struct mlx5dr_domain *dmn) } INIT_LIST_HEAD(&mgr->ptrn_list); + mutex_init(&mgr->modify_hdr_mutex); + return mgr; free_mgr: @@ -237,5 +239,6 @@ void mlx5dr_ptrn_mgr_destroy(struct mlx5dr_ptrn_mgr *mgr) } mlx5dr_icm_pool_destroy(mgr->ptrn_icm_pool); + mutex_destroy(&mgr->modify_hdr_mutex); kfree(mgr); } -- cgit v1.2.3 From d5972f1000c1e6b5185ded0fc194f21984f26e14 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 19 May 2023 22:43:03 +0300 Subject: net/mlx5: Fix check for allocation failure in comp_irqs_request_pci() This function accidentally dereferences "cpus" instead of returning directly. Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202305200354.KV3jU94w-lkp@intel.com/ Fixes: b48a0f72bc3e ("net/mlx5: Refactor completion irq request/release code") Signed-off-by: Dan Carpenter Reviewed-by: Simon Horman Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index fe698c79616c..3db4866d7880 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -824,7 +824,7 @@ static int comp_irqs_request_pci(struct mlx5_core_dev *dev) ncomp_eqs = table->num_comp_eqs; cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL); if (!cpus) - ret = -ENOMEM; + return -ENOMEM; i = 0; rcu_read_lock(); -- cgit v1.2.3 From 623a71385312ee288d59e319b90922e6e6943766 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 23 May 2023 20:17:52 +0200 Subject: net/mlx4: Use bitmap_weight_and() Use bitmap_weight_and() instead of hand writing it. This saves a few LoC and is slightly faster, should it mater. Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/a29c2348a062408bec45cee2601b2417310e5ea7.1684865809.git.christophe.jaillet@wanadoo.fr Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx4/main.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 277738c50c56..28c435ce98d8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1374,16 +1374,13 @@ static int mlx4_mf_bond(struct mlx4_dev *dev) int nvfs; struct mlx4_slaves_pport slaves_port1; struct mlx4_slaves_pport slaves_port2; - DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX); slaves_port1 = mlx4_phys_to_slaves_pport(dev, 1); slaves_port2 = mlx4_phys_to_slaves_pport(dev, 2); - bitmap_and(slaves_port_1_2, - slaves_port1.slaves, slaves_port2.slaves, - dev->persist->num_vfs + 1); /* only single port vfs are allowed */ - if (bitmap_weight(slaves_port_1_2, dev->persist->num_vfs + 1) > 1) { + if (bitmap_weight_and(slaves_port1.slaves, slaves_port2.slaves, + dev->persist->num_vfs + 1) > 1) { mlx4_warn(dev, "HA mode unsupported for dual ported VFs\n"); return -EINVAL; } -- cgit v1.2.3 From d68cb7cf1fd0ef4287bc0ecd1ed0b6ae8e05fc70 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Wed, 24 May 2023 21:49:08 +0200 Subject: net: mellanox: mlxbf_gige: Fix skb_panic splat under memory pressure Do skb_put() after a new skb has been successfully allocated otherwise the reused skb leads to skb_panics or incorrect packet sizes. Fixes: f92e1869d74e ("Add Mellanox BlueField Gigabit Ethernet driver") Signed-off-by: Thomas Bogendoerfer Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230524194908.147145-1-tbogendoerfer@suse.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c index afa3b92a6905..0d5a41a2ae01 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c @@ -245,12 +245,6 @@ static bool mlxbf_gige_rx_packet(struct mlxbf_gige *priv, int *rx_pkts) skb = priv->rx_skb[rx_pi_rem]; - skb_put(skb, datalen); - - skb->ip_summed = CHECKSUM_NONE; /* device did not checksum packet */ - - skb->protocol = eth_type_trans(skb, netdev); - /* Alloc another RX SKB for this same index */ rx_skb = mlxbf_gige_alloc_skb(priv, MLXBF_GIGE_DEFAULT_BUF_SZ, &rx_buf_dma, DMA_FROM_DEVICE); @@ -259,6 +253,13 @@ static bool mlxbf_gige_rx_packet(struct mlxbf_gige *priv, int *rx_pkts) priv->rx_skb[rx_pi_rem] = rx_skb; dma_unmap_single(priv->dev, *rx_wqe_addr, MLXBF_GIGE_DEFAULT_BUF_SZ, DMA_FROM_DEVICE); + + skb_put(skb, datalen); + + skb->ip_summed = CHECKSUM_NONE; /* device did not checksum packet */ + + skb->protocol = eth_type_trans(skb, netdev); + *rx_wqe_addr = rx_buf_dma; } else if (rx_cqe & MLXBF_GIGE_RX_CQE_PKT_STATUS_MAC_ERR) { priv->stats.rx_mac_errors++; -- cgit v1.2.3 From ffb3322181d9e8db880202e4f00991764a35d812 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 24 May 2023 20:57:14 +0800 Subject: net: stmmac: fix call trace when stmmac_xdp_xmit() is invoked We encountered a kernel call trace issue which was related to ndo_xdp_xmit callback on our i.MX8MP platform. The reproduce steps show as follows. 1. The FEC port (eth0) connects to a PC port, and the PC uses pktgen_sample03_burst_single_flow.sh to generate packets and send these packets to the FEC port. Notice that the script must be executed before step 2. 2. Run the "./xdp_redirect eth0 eth1" command on i.MX8MP, the eth1 interface is the dwmac. Then there will be a call trace issue soon. Please see the log for more details. The root cause is that the NETDEV_XDP_ACT_NDO_XMIT feature is enabled by default, so when the step 2 command is exexcuted and packets have already been sent to eth0, the stmmac_xdp_xmit() starts running before the stmmac_xdp_set_prog() finishes. To resolve this issue, we disable the NETDEV_XDP_ACT_NDO_XMIT feature by default and turn on/off this feature when the bpf program is installed/uninstalled which just like the other ethernet drivers. Call Trace log: [ 306.311271] ------------[ cut here ]------------ [ 306.315910] WARNING: CPU: 0 PID: 15 at lib/timerqueue.c:55 timerqueue_del+0x68/0x70 [ 306.323590] Modules linked in: [ 306.326654] CPU: 0 PID: 15 Comm: ksoftirqd/0 Not tainted 6.4.0-rc1+ #37 [ 306.333277] Hardware name: NXP i.MX8MPlus EVK board (DT) [ 306.338591] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 306.345561] pc : timerqueue_del+0x68/0x70 [ 306.349577] lr : __remove_hrtimer+0x5c/0xa0 [ 306.353777] sp : ffff80000b7c3920 [ 306.357094] x29: ffff80000b7c3920 x28: 0000000000000000 x27: 0000000000000001 [ 306.364244] x26: ffff80000a763a40 x25: ffff0000d0285a00 x24: 0000000000000001 [ 306.371390] x23: 0000000000000001 x22: ffff000179389a40 x21: 0000000000000000 [ 306.378537] x20: ffff000179389aa0 x19: ffff0000d2951308 x18: 0000000000001000 [ 306.385686] x17: f1d3000000000000 x16: 00000000c39c1000 x15: 55e99bbe00001a00 [ 306.392835] x14: 09000900120aa8c0 x13: e49af1d300000000 x12: 000000000000c39c [ 306.399987] x11: 100055e99bbe0000 x10: ffff8000090b1048 x9 : ffff8000081603fc [ 306.407133] x8 : 000000000000003c x7 : 000000000000003c x6 : 0000000000000001 [ 306.414284] x5 : ffff0000d2950980 x4 : 0000000000000000 x3 : 0000000000000000 [ 306.421432] x2 : 0000000000000001 x1 : ffff0000d2951308 x0 : ffff0000d2951308 [ 306.428585] Call trace: [ 306.431035] timerqueue_del+0x68/0x70 [ 306.434706] __remove_hrtimer+0x5c/0xa0 [ 306.438549] hrtimer_start_range_ns+0x2bc/0x370 [ 306.443089] stmmac_xdp_xmit+0x174/0x1b0 [ 306.447021] bq_xmit_all+0x194/0x4b0 [ 306.450612] __dev_flush+0x4c/0x98 [ 306.454024] xdp_do_flush+0x18/0x38 [ 306.457522] fec_enet_rx_napi+0x6c8/0xc68 [ 306.461539] __napi_poll+0x40/0x220 [ 306.465038] net_rx_action+0xf8/0x240 [ 306.468707] __do_softirq+0x128/0x3a8 [ 306.472378] run_ksoftirqd+0x40/0x58 [ 306.475961] smpboot_thread_fn+0x1c4/0x288 [ 306.480068] kthread+0x124/0x138 [ 306.483305] ret_from_fork+0x10/0x20 [ 306.486889] ---[ end trace 0000000000000000 ]--- Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features") Signed-off-by: Wei Fang Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230524125714.357337-1-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +-- drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c | 6 ++++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 0fca81507a77..52cab9de05f2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7233,8 +7233,7 @@ int stmmac_dvr_probe(struct device *device, ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM; ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_XSK_ZEROCOPY | - NETDEV_XDP_ACT_NDO_XMIT; + NETDEV_XDP_ACT_XSK_ZEROCOPY; ret = stmmac_tc_init(priv, priv); if (!ret) { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c index 9d4d8c3dad0a..aa6f16d3df64 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_xdp.c @@ -117,6 +117,9 @@ int stmmac_xdp_set_prog(struct stmmac_priv *priv, struct bpf_prog *prog, return -EOPNOTSUPP; } + if (!prog) + xdp_features_clear_redirect_target(dev); + need_update = !!priv->xdp_prog != !!prog; if (if_running && need_update) stmmac_xdp_release(dev); @@ -131,5 +134,8 @@ int stmmac_xdp_set_prog(struct stmmac_priv *priv, struct bpf_prog *prog, if (if_running && need_update) stmmac_xdp_open(dev); + if (prog) + xdp_features_set_redirect_target(dev, false); + return 0; } -- cgit v1.2.3 From ca7d05007d0a95615a51cb5a624775db8c450f43 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Wed, 24 May 2023 10:36:38 +0100 Subject: sfc: handle VI shortage on ef100 by readjusting the channels When fewer VIs are allocated than what is allowed we can readjust the channels by calling efx_mcdi_alloc_vis() again. Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: Martin Habets Reviewed-by: Simon Horman Reviewed-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef100_netdev.c | 51 +++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/ef100_netdev.c b/drivers/net/ethernet/sfc/ef100_netdev.c index be395cd8770b..274f3a2562ad 100644 --- a/drivers/net/ethernet/sfc/ef100_netdev.c +++ b/drivers/net/ethernet/sfc/ef100_netdev.c @@ -40,19 +40,26 @@ static int ef100_alloc_vis(struct efx_nic *efx, unsigned int *allocated_vis) unsigned int tx_vis = efx->n_tx_channels + efx->n_extra_tx_channels; unsigned int rx_vis = efx->n_rx_channels; unsigned int min_vis, max_vis; + int rc; EFX_WARN_ON_PARANOID(efx->tx_queues_per_channel != 1); tx_vis += efx->n_xdp_channels * efx->xdp_tx_per_channel; max_vis = max(rx_vis, tx_vis); - /* Currently don't handle resource starvation and only accept - * our maximum needs and no less. + /* We require at least a single complete TX channel worth of queues. */ + min_vis = efx->tx_queues_per_channel; + + rc = efx_mcdi_alloc_vis(efx, min_vis, max_vis, + NULL, allocated_vis); + + /* We retry allocating VIs by reallocating channels when we have not + * been able to allocate the maximum VIs. */ - min_vis = max_vis; + if (!rc && *allocated_vis < max_vis) + rc = -EAGAIN; - return efx_mcdi_alloc_vis(efx, min_vis, max_vis, - NULL, allocated_vis); + return rc; } static int ef100_remap_bar(struct efx_nic *efx, int max_vis) @@ -133,9 +140,41 @@ static int ef100_net_open(struct net_device *net_dev) goto fail; rc = ef100_alloc_vis(efx, &allocated_vis); - if (rc) + if (rc && rc != -EAGAIN) goto fail; + /* Try one more time but with the maximum number of channels + * equal to the allocated VIs, which would more likely succeed. + */ + if (rc == -EAGAIN) { + rc = efx_mcdi_free_vis(efx); + if (rc) + goto fail; + + efx_remove_interrupts(efx); + efx->max_channels = allocated_vis; + + rc = efx_probe_interrupts(efx); + if (rc) + goto fail; + + rc = efx_set_channels(efx); + if (rc) + goto fail; + + rc = ef100_alloc_vis(efx, &allocated_vis); + if (rc && rc != -EAGAIN) + goto fail; + + /* It should be very unlikely that we failed here again, but in + * such a case we return ENOSPC. + */ + if (rc == -EAGAIN) { + rc = -ENOSPC; + goto fail; + } + } + rc = efx_probe_channels(efx); if (rc) return rc; -- cgit v1.2.3 From dc362e20cd6ab7a93d1b09669730c406f0910c35 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Thu, 25 May 2023 23:56:12 +0530 Subject: amd-xgbe: fix the false linkup in xgbe_phy_status In the event of a change in XGBE mode, the current auto-negotiation needs to be reset and the AN cycle needs to be re-triggerred. However, the current code ignores the return value of xgbe_set_mode(), leading to false information as the link is declared without checking the status register. Fix this by propagating the mode switch status information to xgbe_phy_status(). Fixes: e57f7a3feaef ("amd-xgbe: Prepare for working with more than one type of phy") Co-developed-by: Sudheesh Mavila Signed-off-by: Sudheesh Mavila Reviewed-by: Simon Horman Acked-by: Shyam Sundar S K Signed-off-by: Raju Rangoju Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 33a9574e9e04..32d2c6fac652 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -1329,7 +1329,7 @@ static enum xgbe_mode xgbe_phy_status_aneg(struct xgbe_prv_data *pdata) return pdata->phy_if.phy_impl.an_outcome(pdata); } -static void xgbe_phy_status_result(struct xgbe_prv_data *pdata) +static bool xgbe_phy_status_result(struct xgbe_prv_data *pdata) { struct ethtool_link_ksettings *lks = &pdata->phy.lks; enum xgbe_mode mode; @@ -1367,8 +1367,13 @@ static void xgbe_phy_status_result(struct xgbe_prv_data *pdata) pdata->phy.duplex = DUPLEX_FULL; - if (xgbe_set_mode(pdata, mode) && pdata->an_again) + if (!xgbe_set_mode(pdata, mode)) + return false; + + if (pdata->an_again) xgbe_phy_reconfig_aneg(pdata); + + return true; } static void xgbe_phy_status(struct xgbe_prv_data *pdata) @@ -1398,7 +1403,8 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata) return; } - xgbe_phy_status_result(pdata); + if (xgbe_phy_status_result(pdata)) + return; if (test_bit(XGBE_LINK_INIT, &pdata->dev_state)) clear_bit(XGBE_LINK_INIT, &pdata->dev_state); -- cgit v1.2.3 From d67790ddf0219aa0ad3e13b53ae0a7619b3425a2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 22 May 2023 14:18:13 -0700 Subject: overflow: Add struct_size_t() helper While struct_size() is normally used in situations where the structure type already has a pointer instance, there are places where no variable is available. In the past, this has been worked around by using a typed NULL first argument, but this is a bit ugly. Add a helper to do this, and replace the handful of instances of the code pattern with it. Instances were found with this Coccinelle script: @struct_size_t@ identifier STRUCT, MEMBER; expression COUNT; @@ - struct_size((struct STRUCT *)\(0\|NULL\), + struct_size_t(struct STRUCT, MEMBER, COUNT) Suggested-by: Christoph Hellwig Cc: Jesse Brandeburg Cc: Tony Nguyen Cc: "David S. Miller" Cc: Eric Dumazet Cc: Paolo Abeni Cc: James Smart Cc: Keith Busch Cc: Jens Axboe Cc: Sagi Grimberg Cc: HighPoint Linux Team Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: Kashyap Desai Cc: Sumit Saxena Cc: Shivasharan S Cc: Don Brace Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Guo Xuenan Cc: Gwan-gyeong Mun Cc: Nick Desaulniers Cc: Daniel Latypov Cc: kernel test robot Cc: intel-wired-lan@lists.osuosl.org Cc: netdev@vger.kernel.org Cc: linux-nvme@lists.infradead.org Cc: linux-scsi@vger.kernel.org Cc: megaraidlinux.pdl@broadcom.com Cc: storagedev@microchip.com Cc: linux-xfs@vger.kernel.org Cc: linux-hardening@vger.kernel.org Signed-off-by: Kees Cook Acked-by: Martin K. Petersen Reviewed-by: Darrick J. Wong Reviewed-by: Gustavo A. R. Silva Reviewed-by: Christoph Hellwig Acked-by: Jakub Kicinski Reviewed-by: Alexander Lobakin Link: https://lore.kernel.org/r/20230522211810.never.421-kees@kernel.org --- drivers/net/ethernet/intel/ice/ice_ddp.h | 9 ++++----- drivers/nvme/host/fc.c | 8 ++++---- drivers/scsi/hptiop.c | 4 ++-- drivers/scsi/megaraid/megaraid_sas_base.c | 12 ++++++------ drivers/scsi/megaraid/megaraid_sas_fp.c | 6 +++--- drivers/scsi/smartpqi/smartpqi_init.c | 2 +- fs/xfs/libxfs/xfs_btree.h | 2 +- fs/xfs/scrub/btree.h | 2 +- include/linux/overflow.h | 18 +++++++++++++++++- lib/overflow_kunit.c | 2 +- 10 files changed, 40 insertions(+), 25 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.h b/drivers/net/ethernet/intel/ice/ice_ddp.h index 37eadb3d27a8..41acfe26df1c 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.h +++ b/drivers/net/ethernet/intel/ice/ice_ddp.h @@ -185,7 +185,7 @@ struct ice_buf_hdr { #define ICE_MAX_ENTRIES_IN_BUF(hd_sz, ent_sz) \ ((ICE_PKG_BUF_SIZE - \ - struct_size((struct ice_buf_hdr *)0, section_entry, 1) - (hd_sz)) / \ + struct_size_t(struct ice_buf_hdr, section_entry, 1) - (hd_sz)) / \ (ent_sz)) /* ice package section IDs */ @@ -297,7 +297,7 @@ struct ice_label_section { }; #define ICE_MAX_LABELS_IN_BUF \ - ICE_MAX_ENTRIES_IN_BUF(struct_size((struct ice_label_section *)0, \ + ICE_MAX_ENTRIES_IN_BUF(struct_size_t(struct ice_label_section, \ label, 1) - \ sizeof(struct ice_label), \ sizeof(struct ice_label)) @@ -352,7 +352,7 @@ struct ice_boost_tcam_section { }; #define ICE_MAX_BST_TCAMS_IN_BUF \ - ICE_MAX_ENTRIES_IN_BUF(struct_size((struct ice_boost_tcam_section *)0, \ + ICE_MAX_ENTRIES_IN_BUF(struct_size_t(struct ice_boost_tcam_section, \ tcam, 1) - \ sizeof(struct ice_boost_tcam_entry), \ sizeof(struct ice_boost_tcam_entry)) @@ -372,8 +372,7 @@ struct ice_marker_ptype_tcam_section { }; #define ICE_MAX_MARKER_PTYPE_TCAMS_IN_BUF \ - ICE_MAX_ENTRIES_IN_BUF( \ - struct_size((struct ice_marker_ptype_tcam_section *)0, tcam, \ + ICE_MAX_ENTRIES_IN_BUF(struct_size_t(struct ice_marker_ptype_tcam_section, tcam, \ 1) - \ sizeof(struct ice_marker_ptype_tcam_entry), \ sizeof(struct ice_marker_ptype_tcam_entry)) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 2ed75923507d..691f2df574ce 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2917,8 +2917,8 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set, &nvme_fc_mq_ops, 1, - struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, - ctrl->lport->ops->fcprqst_priv_sz)); + struct_size_t(struct nvme_fcp_op_w_sgl, priv, + ctrl->lport->ops->fcprqst_priv_sz)); if (ret) return ret; @@ -3536,8 +3536,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set, &nvme_fc_admin_mq_ops, - struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, - ctrl->lport->ops->fcprqst_priv_sz)); + struct_size_t(struct nvme_fcp_op_w_sgl, priv, + ctrl->lport->ops->fcprqst_priv_sz)); if (ret) goto fail_ctrl; diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c index 06ccb51bf6a9..f5334ccbf2ca 100644 --- a/drivers/scsi/hptiop.c +++ b/drivers/scsi/hptiop.c @@ -1394,8 +1394,8 @@ static int hptiop_probe(struct pci_dev *pcidev, const struct pci_device_id *id) host->cmd_per_lun = le32_to_cpu(iop_config.max_requests); host->max_cmd_len = 16; - req_size = struct_size((struct hpt_iop_request_scsi_command *)0, - sg_list, hba->max_sg_descriptors); + req_size = struct_size_t(struct hpt_iop_request_scsi_command, + sg_list, hba->max_sg_descriptors); if ((req_size & 0x1f) != 0) req_size = (req_size + 0x1f) & ~0x1f; diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 317c944c68e3..050eed8e2684 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5153,8 +5153,8 @@ static void megasas_update_ext_vd_details(struct megasas_instance *instance) fusion->max_map_sz = ventura_map_sz; } else { fusion->old_map_sz = - struct_size((struct MR_FW_RAID_MAP *)0, ldSpanMap, - instance->fw_supported_vd_count); + struct_size_t(struct MR_FW_RAID_MAP, ldSpanMap, + instance->fw_supported_vd_count); fusion->new_map_sz = sizeof(struct MR_FW_RAID_MAP_EXT); fusion->max_map_sz = @@ -5789,8 +5789,8 @@ megasas_setup_jbod_map(struct megasas_instance *instance) struct fusion_context *fusion = instance->ctrl_context; size_t pd_seq_map_sz; - pd_seq_map_sz = struct_size((struct MR_PD_CFG_SEQ_NUM_SYNC *)0, seq, - MAX_PHYSICAL_DEVICES); + pd_seq_map_sz = struct_size_t(struct MR_PD_CFG_SEQ_NUM_SYNC, seq, + MAX_PHYSICAL_DEVICES); instance->use_seqnum_jbod_fp = instance->support_seqnum_jbod_fp; @@ -8033,8 +8033,8 @@ skip_firing_dcmds: if (instance->adapter_type != MFI_SERIES) { megasas_release_fusion(instance); pd_seq_map_sz = - struct_size((struct MR_PD_CFG_SEQ_NUM_SYNC *)0, - seq, MAX_PHYSICAL_DEVICES); + struct_size_t(struct MR_PD_CFG_SEQ_NUM_SYNC, + seq, MAX_PHYSICAL_DEVICES); for (i = 0; i < 2 ; i++) { if (fusion->ld_map[i]) dma_free_coherent(&instance->pdev->dev, diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c index 4463a538102a..b8b388a4e28f 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fp.c +++ b/drivers/scsi/megaraid/megaraid_sas_fp.c @@ -326,9 +326,9 @@ u8 MR_ValidateMapInfo(struct megasas_instance *instance, u64 map_id) else if (instance->supportmax256vd) expected_size = sizeof(struct MR_FW_RAID_MAP_EXT); else - expected_size = struct_size((struct MR_FW_RAID_MAP *)0, - ldSpanMap, - le16_to_cpu(pDrvRaidMap->ldCount)); + expected_size = struct_size_t(struct MR_FW_RAID_MAP, + ldSpanMap, + le16_to_cpu(pDrvRaidMap->ldCount)); if (le32_to_cpu(pDrvRaidMap->totalSize) != expected_size) { dev_dbg(&instance->pdev->dev, "megasas: map info structure size 0x%x", diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 03de97cd72c2..f4e0aa262164 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -5015,7 +5015,7 @@ static int pqi_create_queues(struct pqi_ctrl_info *ctrl_info) } #define PQI_REPORT_EVENT_CONFIG_BUFFER_LENGTH \ - struct_size((struct pqi_event_config *)0, descriptors, PQI_MAX_EVENT_DESCRIPTORS) + struct_size_t(struct pqi_event_config, descriptors, PQI_MAX_EVENT_DESCRIPTORS) static int pqi_configure_events(struct pqi_ctrl_info *ctrl_info, bool enable_events) diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index a2aa36b23e25..4d68a58be160 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -301,7 +301,7 @@ struct xfs_btree_cur static inline size_t xfs_btree_cur_sizeof(unsigned int nlevels) { - return struct_size((struct xfs_btree_cur *)NULL, bc_levels, nlevels); + return struct_size_t(struct xfs_btree_cur, bc_levels, nlevels); } /* cursor flags */ diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h index 9d7b9ee8bef4..c32b5fad6174 100644 --- a/fs/xfs/scrub/btree.h +++ b/fs/xfs/scrub/btree.h @@ -60,7 +60,7 @@ struct xchk_btree { static inline size_t xchk_btree_sizeof(unsigned int nlevels) { - return struct_size((struct xchk_btree *)NULL, lastkey, nlevels - 1); + return struct_size_t(struct xchk_btree, lastkey, nlevels - 1); } int xchk_btree(struct xfs_scrub *sc, struct xfs_btree_cur *cur, diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 0e33b5cbdb9f..f9b60313eaea 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -283,7 +283,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * @member: Name of the array member. * @count: Number of elements in the array. * - * Calculates size of memory needed for structure @p followed by an + * Calculates size of memory needed for structure of @p followed by an * array of @count number of @member elements. * * Return: number of bytes needed or SIZE_MAX on overflow. @@ -293,4 +293,20 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) sizeof(*(p)) + flex_array_size(p, member, count), \ size_add(sizeof(*(p)), flex_array_size(p, member, count))) +/** + * struct_size_t() - Calculate size of structure with trailing flexible array + * @type: structure type name. + * @member: Name of the array member. + * @count: Number of elements in the array. + * + * Calculates size of memory needed for structure @type followed by an + * array of @count number of @member elements. Prefer using struct_size() + * when possible instead, to keep calculations associated with a specific + * instance variable of type @type. + * + * Return: number of bytes needed or SIZE_MAX on overflow. + */ +#define struct_size_t(type, member, count) \ + struct_size((type *)NULL, member, count) + #endif /* __LINUX_OVERFLOW_H */ diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c index dcd3ba102db6..34db0b3aa502 100644 --- a/lib/overflow_kunit.c +++ b/lib/overflow_kunit.c @@ -649,7 +649,7 @@ struct __test_flex_array { static void overflow_size_helpers_test(struct kunit *test) { /* Make sure struct_size() can be used in a constant expression. */ - u8 ce_array[struct_size((struct __test_flex_array *)0, data, 55)]; + u8 ce_array[struct_size_t(struct __test_flex_array, data, 55)]; struct __test_flex_array *obj; int count = 0; int var; -- cgit v1.2.3 From 727e373f897d06214ffc59f820a356a5bc458789 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 26 May 2023 11:14:34 +0100 Subject: net: stmmac: use xpcs_create_mdiodev() Use the new xpcs_create_mdiodev() creator, which simplifies the creation and destruction of the mdio device associated with xpcs. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 6807c4c1a0a2..3db1cb0fd160 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -491,7 +491,6 @@ int stmmac_mdio_reset(struct mii_bus *bus) int stmmac_xpcs_setup(struct mii_bus *bus) { struct net_device *ndev = bus->priv; - struct mdio_device *mdiodev; struct stmmac_priv *priv; struct dw_xpcs *xpcs; int mode, addr; @@ -501,16 +500,10 @@ int stmmac_xpcs_setup(struct mii_bus *bus) /* Try to probe the XPCS by scanning all addresses. */ for (addr = 0; addr < PHY_MAX_ADDR; addr++) { - mdiodev = mdio_device_create(bus, addr); - if (IS_ERR(mdiodev)) + xpcs = xpcs_create_mdiodev(bus, addr, mode); + if (IS_ERR(xpcs)) continue; - xpcs = xpcs_create(mdiodev, mode); - if (IS_ERR_OR_NULL(xpcs)) { - mdio_device_free(mdiodev); - continue; - } - priv->hw->xpcs = xpcs; break; } @@ -669,10 +662,8 @@ int stmmac_mdio_unregister(struct net_device *ndev) if (!priv->mii) return 0; - if (priv->hw->xpcs) { - mdio_device_free(priv->hw->xpcs->mdiodev); + if (priv->hw->xpcs) xpcs_destroy(priv->hw->xpcs); - } mdiobus_unregister(priv->mii); priv->mii->priv = NULL; -- cgit v1.2.3 From b7d5d0438e01c7c61db5fc87d92ad8cb1166f217 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 26 May 2023 11:14:50 +0100 Subject: net: enetc: use lynx_pcs_create_mdiodev() Use the newly introduced lynx_pcs_create_mdiodev() which simplifies the creation and destruction of the lynx PCS. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Reviewed-by: Vladimir Oltean Tested-by: Vladimir Oltean Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_pf.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 7cd22d370caa..1416262d4296 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -863,7 +863,6 @@ static int enetc_imdio_create(struct enetc_pf *pf) struct device *dev = &pf->si->pdev->dev; struct enetc_mdio_priv *mdio_priv; struct phylink_pcs *phylink_pcs; - struct mdio_device *mdio_device; struct mii_bus *bus; int err; @@ -889,17 +888,9 @@ static int enetc_imdio_create(struct enetc_pf *pf) goto free_mdio_bus; } - mdio_device = mdio_device_create(bus, 0); - if (IS_ERR(mdio_device)) { - err = PTR_ERR(mdio_device); - dev_err(dev, "cannot create mdio device (%d)\n", err); - goto unregister_mdiobus; - } - - phylink_pcs = lynx_pcs_create(mdio_device); - if (!phylink_pcs) { - mdio_device_free(mdio_device); - err = -ENOMEM; + phylink_pcs = lynx_pcs_create_mdiodev(bus, 0); + if (IS_ERR(phylink_pcs)) { + err = PTR_ERR(phylink_pcs); dev_err(dev, "cannot create lynx pcs (%d)\n", err); goto unregister_mdiobus; } @@ -918,13 +909,8 @@ free_mdio_bus: static void enetc_imdio_remove(struct enetc_pf *pf) { - struct mdio_device *mdio_device; - - if (pf->pcs) { - mdio_device = lynx_get_mdio_device(pf->pcs); - mdio_device_free(mdio_device); + if (pf->pcs) lynx_pcs_destroy(pf->pcs); - } if (pf->imdio) { mdiobus_unregister(pf->imdio); mdiobus_free(pf->imdio); -- cgit v1.2.3 From 404621fab27310c231bab9a3999eab858390cb45 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 26 May 2023 12:44:43 +0100 Subject: net: dpaa2-mac: use correct interface to free mdiodev Rather than using put_device(&mdiodev->dev), use the proper interface provided to dispose of the mdiodev - that being mdio_device_free(). Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Reviewed-by: Ioana Ciornei Tested-by: Ioana Ciornei Link: https://lore.kernel.org/r/E1q2VsB-008QlZ-El@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index b1871e6c4006..cb70855e2b9a 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -273,7 +273,7 @@ static int dpaa2_pcs_create(struct dpaa2_mac *mac, mac->pcs = lynx_pcs_create(mdiodev); if (!mac->pcs) { netdev_err(mac->net_dev, "lynx_pcs_create() failed\n"); - put_device(&mdiodev->dev); + mdio_device_free(mdiodev); return -ENOMEM; } @@ -286,10 +286,9 @@ static void dpaa2_pcs_destroy(struct dpaa2_mac *mac) if (phylink_pcs) { struct mdio_device *mdio = lynx_get_mdio_device(phylink_pcs); - struct device *dev = &mdio->dev; lynx_pcs_destroy(phylink_pcs); - put_device(dev); + mdio_device_free(mdio); mac->pcs = NULL; } } -- cgit v1.2.3 From 1919b39fc6eabb9a6f9a51706ff6d03865f5df29 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 26 May 2023 08:38:57 -0700 Subject: net: mana: Fix perf regression: remove rx_cqes, tx_cqes counters The apc->eth_stats.rx_cqes is one per NIC (vport), and it's on the frequent and parallel code path of all queues. So, r/w into this single shared variable by many threads on different CPUs creates a lot caching and memory overhead, hence perf regression. And, it's not accurate due to the high volume concurrent r/w. For example, a workload is iperf with 128 threads, and with RPS enabled. We saw perf regression of 25% with the previous patch adding the counters. And this patch eliminates the regression. Since the error path of mana_poll_rx_cq() already has warnings, so keeping the counter and convert it to a per-queue variable is not necessary. So, just remove this counter from this high frequency code path. Also, remove the tx_cqes counter for the same reason. We have warnings & other counters for errors on that path, and don't need to count every normal cqe processing. Cc: stable@vger.kernel.org Fixes: bd7fc6e1957c ("net: mana: Add new MANA VF performance counters for easier troubleshooting") Signed-off-by: Haiyang Zhang Reviewed-by: Horatiu Vultur Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/1685115537-31675-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microsoft/mana/mana_en.c | 10 ---------- drivers/net/ethernet/microsoft/mana/mana_ethtool.c | 2 -- include/net/mana/mana.h | 2 -- 3 files changed, 14 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 06d6292e09b3..d907727c7b7a 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1279,8 +1279,6 @@ static void mana_poll_tx_cq(struct mana_cq *cq) if (comp_read < 1) return; - apc->eth_stats.tx_cqes = comp_read; - for (i = 0; i < comp_read; i++) { struct mana_tx_comp_oob *cqe_oob; @@ -1363,8 +1361,6 @@ static void mana_poll_tx_cq(struct mana_cq *cq) WARN_ON_ONCE(1); cq->work_done = pkt_transmitted; - - apc->eth_stats.tx_cqes -= pkt_transmitted; } static void mana_post_pkt_rxq(struct mana_rxq *rxq) @@ -1626,15 +1622,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq) { struct gdma_comp *comp = cq->gdma_comp_buf; struct mana_rxq *rxq = cq->rxq; - struct mana_port_context *apc; int comp_read, i; - apc = netdev_priv(rxq->ndev); - comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER); WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER); - apc->eth_stats.rx_cqes = comp_read; rxq->xdp_flush = false; for (i = 0; i < comp_read; i++) { @@ -1646,8 +1638,6 @@ static void mana_poll_rx_cq(struct mana_cq *cq) return; mana_process_rx_cqe(rxq, cq, &comp[i]); - - apc->eth_stats.rx_cqes--; } if (rxq->xdp_flush) diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index a64c81410dc1..0dc78679f620 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -13,11 +13,9 @@ static const struct { } mana_eth_stats[] = { {"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)}, {"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)}, - {"tx_cqes", offsetof(struct mana_ethtool_stats, tx_cqes)}, {"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)}, {"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats, tx_cqe_unknown_type)}, - {"rx_cqes", offsetof(struct mana_ethtool_stats, rx_cqes)}, {"rx_coalesced_err", offsetof(struct mana_ethtool_stats, rx_coalesced_err)}, {"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats, diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index cd386aa7c7cc..9eef19972845 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -347,10 +347,8 @@ struct mana_tx_qp { struct mana_ethtool_stats { u64 stop_queue; u64 wake_queue; - u64 tx_cqes; u64 tx_cqe_err; u64 tx_cqe_unknown_type; - u64 rx_cqes; u64 rx_coalesced_err; u64 rx_cqe_unknown_type; }; -- cgit v1.2.3 From bb269633f3da56ec65e4e5aa9d9fca0ef8b3d373 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 27 May 2023 21:40:08 +0200 Subject: liquidio: Use vzalloc() Use vzalloc() instead of hand writing it with vmalloc()+memset(). This is less verbose. Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/93b010824d9d92376e8d49b9eb396a0fa0c0ac80.1685216322.git.christophe.jaillet@wanadoo.fr Signed-off-by: Paolo Abeni --- drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c | 4 +--- drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index 9ed3d1ab2ca5..285d3825cad3 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -719,12 +719,10 @@ static int cn23xx_setup_pf_mbox(struct octeon_device *oct) for (i = 0; i < oct->sriov_info.max_vfs; i++) { q_no = i * oct->sriov_info.rings_per_vf; - mbox = vmalloc(sizeof(*mbox)); + mbox = vzalloc(sizeof(*mbox)); if (!mbox) goto free_mbox; - memset(mbox, 0, sizeof(struct octeon_mbox)); - spin_lock_init(&mbox->lock); mbox->oct_dev = oct; diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c index fda49404968c..b3bd2767d3dd 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c @@ -279,12 +279,10 @@ static int cn23xx_setup_vf_mbox(struct octeon_device *oct) { struct octeon_mbox *mbox = NULL; - mbox = vmalloc(sizeof(*mbox)); + mbox = vzalloc(sizeof(*mbox)); if (!mbox) return 1; - memset(mbox, 0, sizeof(struct octeon_mbox)); - spin_lock_init(&mbox->lock); mbox->oct_dev = oct; -- cgit v1.2.3 From bc590b47549225a03c6b36bbc1aede75c917767b Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 28 May 2023 19:35:12 +0200 Subject: r8169: check for PCI read error in probe Check whether first PCI read returns 0xffffffff. Currently, if this is the case, the user sees the following misleading message: unknown chip XID fcf, contact r8169 maintainers (see MAINTAINERS file) Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/75b54d23-fefe-2bf4-7e80-c9d3bc91af11@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/realtek/r8169_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 4b19803a7dd0..5e6308d574ba 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5164,6 +5164,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) int jumbo_max, region, rc; enum mac_version chipset; struct net_device *dev; + u32 txconfig; u16 xid; dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp)); @@ -5218,7 +5219,13 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->mmio_addr = pcim_iomap_table(pdev)[region]; - xid = (RTL_R32(tp, TxConfig) >> 20) & 0xfcf; + txconfig = RTL_R32(tp, TxConfig); + if (txconfig == ~0U) { + dev_err(&pdev->dev, "PCI read failed\n"); + return -EIO; + } + + xid = (txconfig >> 20) & 0xfcf; /* Identify chip attached to board */ chipset = rtl8169_get_mac_version(xid, tp->supports_gmii); -- cgit v1.2.3 From bc638eabfed90fdc798fd5765e67e41abea76152 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Mon, 29 May 2023 10:26:15 +0800 Subject: net: fec: remove last_bdp from fec_enet_txq_xmit_frame() The last_bdp is initialized to bdp, and both last_bdp and bdp are not changed. That is to say that last_bdp and bdp are always equal. So bdp can be used directly. Signed-off-by: Wei Fang Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230529022615.669589-1-wei.fang@nxp.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/freescale/fec_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 632bb4d589d7..4d37a811ae15 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3770,7 +3770,7 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, struct xdp_frame *frame) { unsigned int index, status, estatus; - struct bufdesc *bdp, *last_bdp; + struct bufdesc *bdp; dma_addr_t dma_addr; int entries_free; @@ -3782,7 +3782,6 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, /* Fill in a Tx ring entry */ bdp = txq->bd.cur; - last_bdp = bdp; status = fec16_to_cpu(bdp->cbd_sc); status &= ~BD_ENET_TX_STATS; @@ -3810,7 +3809,6 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, ebdp->cbd_esc = cpu_to_fec32(estatus); } - index = fec_enet_get_bd_index(last_bdp, &txq->bd); txq->tx_skbuff[index] = NULL; /* Make sure the updates to rest of the descriptor are performed before @@ -3825,7 +3823,7 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, bdp->cbd_sc = cpu_to_fec16(status); /* If this was the last BD in the ring, start at the beginning again. */ - bdp = fec_enet_get_nextdesc(last_bdp, &txq->bd); + bdp = fec_enet_get_nextdesc(bdp, &txq->bd); /* Make sure the update to bdp are performed before txq->bd.cur. */ dma_wmb(); -- cgit v1.2.3 From b2857685372bb0ac5c8b2d5079cd8126aeef1e6b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 26 May 2023 12:28:28 +0200 Subject: ice: register devlink port for PF with ops Use newly introduce devlink port registration function variant and register devlink port passing ops. Signed-off-by: Jiri Pirko Reviewed-by: Jesse Brandeburg Reviewed-by: Michal Wilczynski Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_devlink.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index bc44cc220818..6661d12772a3 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -1512,6 +1512,9 @@ ice_devlink_set_port_split_options(struct ice_pf *pf, ice_active_port_option = active_idx; } +static const struct devlink_port_ops ice_devlink_port_ops = { +}; + /** * ice_devlink_create_pf_port - Create a devlink port for this PF * @pf: the PF to create a devlink port for @@ -1551,7 +1554,8 @@ int ice_devlink_create_pf_port(struct ice_pf *pf) devlink_port_attrs_set(devlink_port, &attrs); devlink = priv_to_devlink(pf); - err = devlink_port_register(devlink, devlink_port, vsi->idx); + err = devlink_port_register_with_ops(devlink, devlink_port, vsi->idx, + &ice_devlink_port_ops); if (err) { dev_err(dev, "Failed to create devlink port for PF %d, error %d\n", pf->hw.pf_id, err); -- cgit v1.2.3 From 865a1a1b97b6779f1e775b075dd534a43de64cfd Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 26 May 2023 12:28:29 +0200 Subject: mlxsw_core: register devlink port with ops Use newly introduce devlink port registration function variant and register devlink port passing ops. Signed-off-by: Jiri Pirko Reviewed-by: Petr Machata Tested-by: Petr Machata Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 22db0bb15c45..605881b17ccc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -3116,6 +3116,9 @@ u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_res_get); +static const struct devlink_port_ops mlxsw_devlink_port_ops = { +}; + static int __mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u16 local_port, enum devlink_port_flavour flavour, u8 slot_index, u32 port_number, bool split, @@ -3150,7 +3153,8 @@ static int __mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u16 local_port, devlink_port_linecard_set(devlink_port, linecard->devlink_linecard); } - err = devl_port_register(devlink, devlink_port, local_port); + err = devl_port_register_with_ops(devlink, devlink_port, local_port, + &mlxsw_devlink_port_ops); if (err) memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port)); return err; -- cgit v1.2.3 From ab8ccc6c134779a26af3e613578f5b2ac820a649 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 26 May 2023 12:28:30 +0200 Subject: nfp: devlink: register devlink port with ops Use newly introduce devlink port registration function variant and register devlink port passing ops. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfp_devlink.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index bf6bae557158..4e4296ecae7c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -321,6 +321,9 @@ const struct devlink_ops nfp_devlink_ops = { .flash_update = nfp_devlink_flash_update, }; +static const struct devlink_port_ops nfp_devlink_port_ops = { +}; + int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) { struct devlink_port_attrs attrs = {}; @@ -351,7 +354,8 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) devlink = priv_to_devlink(app->pf); - return devl_port_register(devlink, &port->dl_port, port->eth_id); + return devl_port_register_with_ops(devlink, &port->dl_port, + port->eth_id, &nfp_devlink_port_ops); } void nfp_devlink_port_unregister(struct nfp_port *port) -- cgit v1.2.3 From f58a3e4dfe241393ce383363583190903f140da5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 26 May 2023 12:28:31 +0200 Subject: devlink: move port_split/unsplit() ops into devlink_port_ops Move port_split/unsplit() from devlink_ops into newly introduced devlink_port_ops. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_devlink.c | 4 ++-- drivers/net/ethernet/mellanox/mlxsw/core.c | 4 ++-- drivers/net/ethernet/netronome/nfp/nfp_devlink.c | 4 ++-- include/net/devlink.h | 11 +++++++---- net/devlink/leftover.c | 10 +++++----- 5 files changed, 18 insertions(+), 15 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 6661d12772a3..80dc5445b50d 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -1256,8 +1256,6 @@ static const struct devlink_ops ice_devlink_ops = { BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), .reload_down = ice_devlink_reload_down, .reload_up = ice_devlink_reload_up, - .port_split = ice_devlink_port_split, - .port_unsplit = ice_devlink_port_unsplit, .eswitch_mode_get = ice_eswitch_mode_get, .eswitch_mode_set = ice_eswitch_mode_set, .info_get = ice_devlink_info_get, @@ -1513,6 +1511,8 @@ ice_devlink_set_port_split_options(struct ice_pf *pf, } static const struct devlink_port_ops ice_devlink_port_ops = { + .port_split = ice_devlink_port_split, + .port_unsplit = ice_devlink_port_unsplit, }; /** diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 605881b17ccc..1ccf3b73ed72 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1723,8 +1723,6 @@ static const struct devlink_ops mlxsw_devlink_ops = { BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), .reload_down = mlxsw_devlink_core_bus_device_reload_down, .reload_up = mlxsw_devlink_core_bus_device_reload_up, - .port_split = mlxsw_devlink_port_split, - .port_unsplit = mlxsw_devlink_port_unsplit, .sb_pool_get = mlxsw_devlink_sb_pool_get, .sb_pool_set = mlxsw_devlink_sb_pool_set, .sb_port_pool_get = mlxsw_devlink_sb_port_pool_get, @@ -3117,6 +3115,8 @@ u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core, EXPORT_SYMBOL(mlxsw_core_res_get); static const struct devlink_port_ops mlxsw_devlink_port_ops = { + .port_split = mlxsw_devlink_port_split, + .port_unsplit = mlxsw_devlink_port_unsplit, }; static int __mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u16 local_port, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index 4e4296ecae7c..8c6954c58a88 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -311,8 +311,6 @@ nfp_devlink_flash_update(struct devlink *devlink, } const struct devlink_ops nfp_devlink_ops = { - .port_split = nfp_devlink_port_split, - .port_unsplit = nfp_devlink_port_unsplit, .sb_pool_get = nfp_devlink_sb_pool_get, .sb_pool_set = nfp_devlink_sb_pool_set, .eswitch_mode_get = nfp_devlink_eswitch_mode_get, @@ -322,6 +320,8 @@ const struct devlink_ops nfp_devlink_ops = { }; static const struct devlink_port_ops nfp_devlink_port_ops = { + .port_split = nfp_devlink_port_split, + .port_unsplit = nfp_devlink_port_unsplit, }; int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) diff --git a/include/net/devlink.h b/include/net/devlink.h index a1e230d24f05..fdcb2c55f1b5 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1276,10 +1276,6 @@ struct devlink_ops { struct netlink_ext_ack *extack); int (*port_type_set)(struct devlink_port *devlink_port, enum devlink_port_type port_type); - int (*port_split)(struct devlink *devlink, struct devlink_port *port, - unsigned int count, struct netlink_ext_ack *extack); - int (*port_unsplit)(struct devlink *devlink, struct devlink_port *port, - struct netlink_ext_ack *extack); int (*sb_pool_get)(struct devlink *devlink, unsigned int sb_index, u16 pool_index, struct devlink_sb_pool_info *pool_info); @@ -1653,8 +1649,15 @@ void devlink_free(struct devlink *devlink); /** * struct devlink_port_ops - Port operations + * @port_split: Callback used to split the port into multiple ones. + * @port_unsplit: Callback used to unsplit the port group back into + * a single port. */ struct devlink_port_ops { + int (*port_split)(struct devlink *devlink, struct devlink_port *port, + unsigned int count, struct netlink_ext_ack *extack); + int (*port_unsplit)(struct devlink *devlink, struct devlink_port *port, + struct netlink_ext_ack *extack); }; void devlink_port_init(struct devlink *devlink, diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c index 2295fa542dd8..1ed95c76cf67 100644 --- a/net/devlink/leftover.c +++ b/net/devlink/leftover.c @@ -1320,7 +1320,7 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_SPLIT_COUNT)) return -EINVAL; - if (!devlink->ops->port_split) + if (!devlink_port->ops->port_split) return -EOPNOTSUPP; count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]); @@ -1339,8 +1339,8 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb, return -EINVAL; } - return devlink->ops->port_split(devlink, devlink_port, count, - info->extack); + return devlink_port->ops->port_split(devlink, devlink_port, count, + info->extack); } static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, @@ -1349,9 +1349,9 @@ static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb, struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = info->user_ptr[0]; - if (!devlink->ops->port_unsplit) + if (!devlink_port->ops->port_unsplit) return -EOPNOTSUPP; - return devlink->ops->port_unsplit(devlink, devlink_port, info->extack); + return devlink_port->ops->port_unsplit(devlink, devlink_port, info->extack); } static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, -- cgit v1.2.3 From 8a756d91d26c1fe941d6839b41d385a4f84ac453 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 26 May 2023 12:28:32 +0200 Subject: mlx4: register devlink port with ops Use newly introduce devlink port registration function variant and register devlink port passing ops. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx4/main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 28c435ce98d8..369642478fab 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3024,13 +3024,17 @@ no_msi: } } +static const struct devlink_port_ops mlx4_devlink_port_ops = { +}; + static int mlx4_init_port_info(struct mlx4_dev *dev, int port) { struct devlink *devlink = priv_to_devlink(mlx4_priv(dev)); struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; int err; - err = devl_port_register(devlink, &info->devlink_port, port); + err = devl_port_register_with_ops(devlink, &info->devlink_port, port, + &mlx4_devlink_port_ops); if (err) return err; -- cgit v1.2.3 From 65a4c44bf9375a5d13287ee1e389b512e83f37eb Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 26 May 2023 12:28:33 +0200 Subject: devlink: move port_type_set() op into devlink_port_ops Move port_type_set() from devlink_ops into newly introduced devlink_port_ops. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx4/main.c | 52 +++++++++++++++---------------- include/net/devlink.h | 5 +-- net/devlink/leftover.c | 5 ++- 3 files changed, 31 insertions(+), 31 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 369642478fab..61286b0d9b0c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3024,7 +3024,33 @@ no_msi: } } +static int mlx4_devlink_port_type_set(struct devlink_port *devlink_port, + enum devlink_port_type port_type) +{ + struct mlx4_port_info *info = container_of(devlink_port, + struct mlx4_port_info, + devlink_port); + enum mlx4_port_type mlx4_port_type; + + switch (port_type) { + case DEVLINK_PORT_TYPE_AUTO: + mlx4_port_type = MLX4_PORT_TYPE_AUTO; + break; + case DEVLINK_PORT_TYPE_ETH: + mlx4_port_type = MLX4_PORT_TYPE_ETH; + break; + case DEVLINK_PORT_TYPE_IB: + mlx4_port_type = MLX4_PORT_TYPE_IB; + break; + default: + return -EOPNOTSUPP; + } + + return __set_port_type(info, mlx4_port_type); +} + static const struct devlink_port_ops mlx4_devlink_port_ops = { + .port_type_set = mlx4_devlink_port_type_set, }; static int mlx4_init_port_info(struct mlx4_dev *dev, int port) @@ -3878,31 +3904,6 @@ err_disable_pdev: return err; } -static int mlx4_devlink_port_type_set(struct devlink_port *devlink_port, - enum devlink_port_type port_type) -{ - struct mlx4_port_info *info = container_of(devlink_port, - struct mlx4_port_info, - devlink_port); - enum mlx4_port_type mlx4_port_type; - - switch (port_type) { - case DEVLINK_PORT_TYPE_AUTO: - mlx4_port_type = MLX4_PORT_TYPE_AUTO; - break; - case DEVLINK_PORT_TYPE_ETH: - mlx4_port_type = MLX4_PORT_TYPE_ETH; - break; - case DEVLINK_PORT_TYPE_IB: - mlx4_port_type = MLX4_PORT_TYPE_IB; - break; - default: - return -EOPNOTSUPP; - } - - return __set_port_type(info, mlx4_port_type); -} - static void mlx4_devlink_param_load_driverinit_values(struct devlink *devlink) { struct mlx4_priv *priv = devlink_priv(devlink); @@ -3987,7 +3988,6 @@ static int mlx4_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a } static const struct devlink_ops mlx4_devlink_ops = { - .port_type_set = mlx4_devlink_port_type_set, .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT), .reload_down = mlx4_devlink_reload_down, .reload_up = mlx4_devlink_reload_up, diff --git a/include/net/devlink.h b/include/net/devlink.h index fdcb2c55f1b5..0dfadc234b9e 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1274,8 +1274,6 @@ struct devlink_ops { int (*reload_up)(struct devlink *devlink, enum devlink_reload_action action, enum devlink_reload_limit limit, u32 *actions_performed, struct netlink_ext_ack *extack); - int (*port_type_set)(struct devlink_port *devlink_port, - enum devlink_port_type port_type); int (*sb_pool_get)(struct devlink *devlink, unsigned int sb_index, u16 pool_index, struct devlink_sb_pool_info *pool_info); @@ -1652,12 +1650,15 @@ void devlink_free(struct devlink *devlink); * @port_split: Callback used to split the port into multiple ones. * @port_unsplit: Callback used to unsplit the port group back into * a single port. + * @port_type_set: Callback used to set a type of a port. */ struct devlink_port_ops { int (*port_split)(struct devlink *devlink, struct devlink_port *port, unsigned int count, struct netlink_ext_ack *extack); int (*port_unsplit)(struct devlink *devlink, struct devlink_port *port, struct netlink_ext_ack *extack); + int (*port_type_set)(struct devlink_port *devlink_port, + enum devlink_port_type port_type); }; void devlink_port_init(struct devlink *devlink, diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c index 1ed95c76cf67..d69e278ae15a 100644 --- a/net/devlink/leftover.c +++ b/net/devlink/leftover.c @@ -1137,14 +1137,13 @@ static int devlink_port_type_set(struct devlink_port *devlink_port, { int err; - if (!devlink_port->devlink->ops->port_type_set) + if (!devlink_port->ops->port_type_set) return -EOPNOTSUPP; if (port_type == devlink_port->type) return 0; - err = devlink_port->devlink->ops->port_type_set(devlink_port, - port_type); + err = devlink_port->ops->port_type_set(devlink_port, port_type); if (err) return err; -- cgit v1.2.3 From 7bfb3d0a83b66567ccf2b19110bbb787c56089aa Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 26 May 2023 12:28:34 +0200 Subject: sfc: register devlink port with ops Use newly introduce devlink port registration function variant and register devlink port passing ops. Signed-off-by: Jiri Pirko Acked-by: Martin Habets Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/efx_devlink.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/efx_devlink.c b/drivers/net/ethernet/sfc/efx_devlink.c index ef9971cbb695..e74f74037405 100644 --- a/drivers/net/ethernet/sfc/efx_devlink.c +++ b/drivers/net/ethernet/sfc/efx_devlink.c @@ -25,6 +25,10 @@ struct efx_devlink { }; #ifdef CONFIG_SFC_SRIOV + +static const struct devlink_port_ops sfc_devlink_port_ops = { +}; + static void efx_devlink_del_port(struct devlink_port *dl_port) { if (!dl_port) @@ -57,7 +61,9 @@ static int efx_devlink_add_port(struct efx_nic *efx, mport->dl_port.index = mport->mport_id; - return devl_port_register(efx->devlink, &mport->dl_port, mport->mport_id); + return devl_port_register_with_ops(efx->devlink, &mport->dl_port, + mport->mport_id, + &sfc_devlink_port_ops); } static int efx_devlink_port_addr_get(struct devlink_port *port, u8 *hw_addr, -- cgit v1.2.3 From aa3aff8264f2a6b463a24a5453aff7afa3483425 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 26 May 2023 12:28:35 +0200 Subject: mlx5: register devlink ports with ops Use newly introduce devlink port registration function variant and register devlink port passing ops. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index 084a910bb4e7..d9c17481b972 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -65,6 +65,9 @@ static void mlx5_esw_dl_port_free(struct devlink_port *dl_port) kfree(dl_port); } +static const struct devlink_port_ops mlx5_esw_dl_port_ops = { +}; + int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num) { struct mlx5_core_dev *dev = esw->dev; @@ -87,7 +90,8 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_ devlink = priv_to_devlink(dev); dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num); - err = devl_port_register(devlink, dl_port, dl_port_index); + err = devl_port_register_with_ops(devlink, dl_port, dl_port_index, + &mlx5_esw_dl_port_ops); if (err) goto reg_err; @@ -134,6 +138,9 @@ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u1 return IS_ERR(vport) ? ERR_CAST(vport) : vport->dl_port; } +static const struct devlink_port_ops mlx5_esw_dl_sf_port_ops = { +}; + int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, u16 vport_num, u32 controller, u32 sfnum) { @@ -156,7 +163,8 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p devlink_port_attrs_pci_sf_set(dl_port, controller, pfnum, sfnum, !!controller); devlink = priv_to_devlink(dev); dl_port_index = mlx5_esw_vport_to_devlink_port_index(dev, vport_num); - err = devl_port_register(devlink, dl_port, dl_port_index); + err = devl_port_register_with_ops(devlink, dl_port, dl_port_index, + &mlx5_esw_dl_sf_port_ops); if (err) return err; -- cgit v1.2.3 From 71c93e37cf3d0528e5d17ecc0b1b07db2086db67 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 26 May 2023 12:28:36 +0200 Subject: devlink: move port_fn_hw_addr_get/set() to devlink_port_ops Move port_fn_hw_addr_get/set() from devlink_ops into newly introduced devlink_port_ops. Signed-off-by: Jiri Pirko Acked-by: Martin Habets Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 - .../ethernet/mellanox/mlx5/core/esw/devlink_port.c | 4 + drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 12 +-- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 12 +-- drivers/net/ethernet/sfc/efx_devlink.c | 86 +++++++++++----------- include/net/devlink.h | 38 ++++------ net/devlink/leftover.c | 15 ++-- 7 files changed, 80 insertions(+), 89 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index bfaec67abf0d..1e96f32bd1b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -310,8 +310,6 @@ static const struct devlink_ops mlx5_devlink_ops = { .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get, .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set, .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, - .port_function_hw_addr_get = mlx5_devlink_port_function_hw_addr_get, - .port_function_hw_addr_set = mlx5_devlink_port_function_hw_addr_set, .rate_leaf_tx_share_set = mlx5_esw_devlink_rate_leaf_tx_share_set, .rate_leaf_tx_max_set = mlx5_esw_devlink_rate_leaf_tx_max_set, .rate_node_tx_share_set = mlx5_esw_devlink_rate_node_tx_share_set, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index d9c17481b972..78d12c377900 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -66,6 +66,8 @@ static void mlx5_esw_dl_port_free(struct devlink_port *dl_port) } static const struct devlink_port_ops mlx5_esw_dl_port_ops = { + .port_fn_hw_addr_get = mlx5_devlink_port_fn_hw_addr_get, + .port_fn_hw_addr_set = mlx5_devlink_port_fn_hw_addr_set, }; int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num) @@ -139,6 +141,8 @@ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u1 } static const struct devlink_port_ops mlx5_esw_dl_sf_port_ops = { + .port_fn_hw_addr_get = mlx5_devlink_port_fn_hw_addr_get, + .port_fn_hw_addr_set = mlx5_devlink_port_fn_hw_addr_set, }; int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 280dc71b032c..f70124ad71cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -506,12 +506,12 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, struct netlink_ext_ack *extack); int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, enum devlink_eswitch_encap_mode *encap); -int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, - u8 *hw_addr, int *hw_addr_len, - struct netlink_ext_ack *extack); -int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, - const u8 *hw_addr, int hw_addr_len, - struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_hw_addr_get(struct devlink_port *port, + u8 *hw_addr, int *hw_addr_len, + struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_hw_addr_set(struct devlink_port *port, + const u8 *hw_addr, int hw_addr_len, + struct netlink_ext_ack *extack); int mlx5_devlink_port_fn_roce_get(struct devlink_port *port, bool *is_enabled, struct netlink_ext_ack *extack); int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7a65dcf01dba..1b2f5e273525 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3957,9 +3957,9 @@ is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num) mlx5_esw_is_sf_vport(esw, vport_num); } -int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, - u8 *hw_addr, int *hw_addr_len, - struct netlink_ext_ack *extack) +int mlx5_devlink_port_fn_hw_addr_get(struct devlink_port *port, + u8 *hw_addr, int *hw_addr_len, + struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw; struct mlx5_vport *vport; @@ -3986,9 +3986,9 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, return 0; } -int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, - const u8 *hw_addr, int hw_addr_len, - struct netlink_ext_ack *extack) +int mlx5_devlink_port_fn_hw_addr_set(struct devlink_port *port, + const u8 *hw_addr, int hw_addr_len, + struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw; u16 vport_num; diff --git a/drivers/net/ethernet/sfc/efx_devlink.c b/drivers/net/ethernet/sfc/efx_devlink.c index e74f74037405..b82dad50a5b1 100644 --- a/drivers/net/ethernet/sfc/efx_devlink.c +++ b/drivers/net/ethernet/sfc/efx_devlink.c @@ -26,46 +26,6 @@ struct efx_devlink { #ifdef CONFIG_SFC_SRIOV -static const struct devlink_port_ops sfc_devlink_port_ops = { -}; - -static void efx_devlink_del_port(struct devlink_port *dl_port) -{ - if (!dl_port) - return; - devl_port_unregister(dl_port); -} - -static int efx_devlink_add_port(struct efx_nic *efx, - struct mae_mport_desc *mport) -{ - bool external = false; - - if (!ef100_mport_on_local_intf(efx, mport)) - external = true; - - switch (mport->mport_type) { - case MAE_MPORT_DESC_MPORT_TYPE_VNIC: - if (mport->vf_idx != MAE_MPORT_DESC_VF_IDX_NULL) - devlink_port_attrs_pci_vf_set(&mport->dl_port, 0, mport->pf_idx, - mport->vf_idx, - external); - else - devlink_port_attrs_pci_pf_set(&mport->dl_port, 0, mport->pf_idx, - external); - break; - default: - /* MAE_MPORT_DESC_MPORT_ALIAS and UNDEFINED */ - return 0; - } - - mport->dl_port.index = mport->mport_id; - - return devl_port_register_with_ops(efx->devlink, &mport->dl_port, - mport->mport_id, - &sfc_devlink_port_ops); -} - static int efx_devlink_port_addr_get(struct devlink_port *port, u8 *hw_addr, int *hw_addr_len, struct netlink_ext_ack *extack) @@ -164,6 +124,48 @@ static int efx_devlink_port_addr_set(struct devlink_port *port, return rc; } +static const struct devlink_port_ops sfc_devlink_port_ops = { + .port_fn_hw_addr_get = efx_devlink_port_addr_get, + .port_fn_hw_addr_set = efx_devlink_port_addr_set, +}; + +static void efx_devlink_del_port(struct devlink_port *dl_port) +{ + if (!dl_port) + return; + devl_port_unregister(dl_port); +} + +static int efx_devlink_add_port(struct efx_nic *efx, + struct mae_mport_desc *mport) +{ + bool external = false; + + if (!ef100_mport_on_local_intf(efx, mport)) + external = true; + + switch (mport->mport_type) { + case MAE_MPORT_DESC_MPORT_TYPE_VNIC: + if (mport->vf_idx != MAE_MPORT_DESC_VF_IDX_NULL) + devlink_port_attrs_pci_vf_set(&mport->dl_port, 0, mport->pf_idx, + mport->vf_idx, + external); + else + devlink_port_attrs_pci_pf_set(&mport->dl_port, 0, mport->pf_idx, + external); + break; + default: + /* MAE_MPORT_DESC_MPORT_ALIAS and UNDEFINED */ + return 0; + } + + mport->dl_port.index = mport->mport_id; + + return devl_port_register_with_ops(efx->devlink, &mport->dl_port, + mport->mport_id, + &sfc_devlink_port_ops); +} + #endif static int efx_devlink_info_nvram_partition(struct efx_nic *efx, @@ -615,10 +617,6 @@ static int efx_devlink_info_get(struct devlink *devlink, static const struct devlink_ops sfc_devlink_ops = { .info_get = efx_devlink_info_get, -#ifdef CONFIG_SFC_SRIOV - .port_function_hw_addr_get = efx_devlink_port_addr_get, - .port_function_hw_addr_set = efx_devlink_port_addr_set, -#endif }; #ifdef CONFIG_SFC_SRIOV diff --git a/include/net/devlink.h b/include/net/devlink.h index 0dfadc234b9e..c580b154cfe4 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1429,28 +1429,6 @@ struct devlink_ops { int (*trap_policer_counter_get)(struct devlink *devlink, const struct devlink_trap_policer *policer, u64 *p_drops); - /** - * @port_function_hw_addr_get: Port function's hardware address get function. - * - * Should be used by device drivers to report the hardware address of a function managed - * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port - * function handling for a particular port. - * - * Note: @extack can be NULL when port notifier queries the port function. - */ - int (*port_function_hw_addr_get)(struct devlink_port *port, u8 *hw_addr, - int *hw_addr_len, - struct netlink_ext_ack *extack); - /** - * @port_function_hw_addr_set: Port function's hardware address set function. - * - * Should be used by device drivers to set the hardware address of a function managed - * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port - * function handling for a particular port. - */ - int (*port_function_hw_addr_set)(struct devlink_port *port, - const u8 *hw_addr, int hw_addr_len, - struct netlink_ext_ack *extack); /** * @port_fn_roce_get: Port function's roce get function. * @@ -1651,6 +1629,16 @@ void devlink_free(struct devlink *devlink); * @port_unsplit: Callback used to unsplit the port group back into * a single port. * @port_type_set: Callback used to set a type of a port. + * @port_fn_hw_addr_get: Callback used to set port function's hardware address. + * Should be used by device drivers to report + * the hardware address of a function managed + * by the devlink port. + * @port_fn_hw_addr_set: Callback used to set port function's hardware address. + * Should be used by device drivers to set the hardware + * address of a function managed by the devlink port. + * + * Note: Driver should return -EOPNOTSUPP if it doesn't support + * port function (@port_fn_*) handling for a particular port. */ struct devlink_port_ops { int (*port_split)(struct devlink *devlink, struct devlink_port *port, @@ -1659,6 +1647,12 @@ struct devlink_port_ops { struct netlink_ext_ack *extack); int (*port_type_set)(struct devlink_port *devlink_port, enum devlink_port_type port_type); + int (*port_fn_hw_addr_get)(struct devlink_port *port, u8 *hw_addr, + int *hw_addr_len, + struct netlink_ext_ack *extack); + int (*port_fn_hw_addr_set)(struct devlink_port *port, + const u8 *hw_addr, int hw_addr_len, + struct netlink_ext_ack *extack); }; void devlink_port_init(struct devlink *devlink, diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c index d69e278ae15a..ac171ea984cc 100644 --- a/net/devlink/leftover.c +++ b/net/devlink/leftover.c @@ -691,8 +691,7 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, return 0; } -static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops, - struct devlink_port *port, +static int devlink_port_fn_hw_addr_fill(struct devlink_port *port, struct sk_buff *msg, struct netlink_ext_ack *extack, bool *msg_updated) @@ -701,10 +700,10 @@ static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops, int hw_addr_len; int err; - if (!ops->port_function_hw_addr_get) + if (!port->ops->port_fn_hw_addr_get) return 0; - err = ops->port_function_hw_addr_get(port, hw_addr, &hw_addr_len, + err = port->ops->port_fn_hw_addr_get(port, hw_addr, &hw_addr_len, extack); if (err) { if (err == -EOPNOTSUPP) @@ -884,8 +883,7 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por return -EMSGSIZE; ops = port->devlink->ops; - err = devlink_port_fn_hw_addr_fill(ops, port, msg, extack, - &msg_updated); + err = devlink_port_fn_hw_addr_fill(port, msg, extack, &msg_updated); if (err) goto out; err = devlink_port_fn_caps_fill(ops, port, msg, extack, @@ -1156,7 +1154,6 @@ static int devlink_port_function_hw_addr_set(struct devlink_port *port, const struct nlattr *attr, struct netlink_ext_ack *extack) { - const struct devlink_ops *ops = port->devlink->ops; const u8 *hw_addr; int hw_addr_len; @@ -1177,7 +1174,7 @@ static int devlink_port_function_hw_addr_set(struct devlink_port *port, } } - return ops->port_function_hw_addr_set(port, hw_addr, hw_addr_len, + return port->ops->port_fn_hw_addr_set(port, hw_addr, hw_addr_len, extack); } @@ -1201,7 +1198,7 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port, struct nlattr *attr; if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] && - !ops->port_function_hw_addr_set) { + !devlink_port->ops->port_fn_hw_addr_set) { NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR], "Port doesn't support function attributes"); return -EOPNOTSUPP; -- cgit v1.2.3 From 933c13275c4933ad68f107ed93ae9b0658b19ad0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 26 May 2023 12:28:37 +0200 Subject: devlink: move port_fn_roce_get/set() to devlink_port_ops Move port_fn_roce_get/set() from devlink_ops into newly introduced devlink_port_ops. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 -- .../ethernet/mellanox/mlx5/core/esw/devlink_port.c | 4 +++ include/net/devlink.h | 31 +++++++++------------- net/devlink/leftover.c | 17 ++++++------ 4 files changed, 25 insertions(+), 29 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 1e96f32bd1b5..d63ec466dcd6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -317,8 +317,6 @@ static const struct devlink_ops mlx5_devlink_ops = { .rate_node_new = mlx5_esw_devlink_rate_node_new, .rate_node_del = mlx5_esw_devlink_rate_node_del, .rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set, - .port_fn_roce_get = mlx5_devlink_port_fn_roce_get, - .port_fn_roce_set = mlx5_devlink_port_fn_roce_set, .port_fn_migratable_get = mlx5_devlink_port_fn_migratable_get, .port_fn_migratable_set = mlx5_devlink_port_fn_migratable_set, #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index 78d12c377900..9011619e1fdd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -68,6 +68,8 @@ static void mlx5_esw_dl_port_free(struct devlink_port *dl_port) static const struct devlink_port_ops mlx5_esw_dl_port_ops = { .port_fn_hw_addr_get = mlx5_devlink_port_fn_hw_addr_get, .port_fn_hw_addr_set = mlx5_devlink_port_fn_hw_addr_set, + .port_fn_roce_get = mlx5_devlink_port_fn_roce_get, + .port_fn_roce_set = mlx5_devlink_port_fn_roce_set, }; int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num) @@ -143,6 +145,8 @@ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u1 static const struct devlink_port_ops mlx5_esw_dl_sf_port_ops = { .port_fn_hw_addr_get = mlx5_devlink_port_fn_hw_addr_get, .port_fn_hw_addr_set = mlx5_devlink_port_fn_hw_addr_set, + .port_fn_roce_get = mlx5_devlink_port_fn_roce_get, + .port_fn_roce_set = mlx5_devlink_port_fn_roce_set, }; int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, diff --git a/include/net/devlink.h b/include/net/devlink.h index c580b154cfe4..b8e8ea850562 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1429,24 +1429,6 @@ struct devlink_ops { int (*trap_policer_counter_get)(struct devlink *devlink, const struct devlink_trap_policer *policer, u64 *p_drops); - /** - * @port_fn_roce_get: Port function's roce get function. - * - * Query RoCE state of a function managed by the devlink port. - * Return -EOPNOTSUPP if port function RoCE handling is not supported. - */ - int (*port_fn_roce_get)(struct devlink_port *devlink_port, - bool *is_enable, - struct netlink_ext_ack *extack); - /** - * @port_fn_roce_set: Port function's roce set function. - * - * Enable/Disable the RoCE state of a function managed by the devlink - * port. - * Return -EOPNOTSUPP if port function RoCE handling is not supported. - */ - int (*port_fn_roce_set)(struct devlink_port *devlink_port, - bool enable, struct netlink_ext_ack *extack); /** * @port_fn_migratable_get: Port function's migratable get function. * @@ -1636,6 +1618,14 @@ void devlink_free(struct devlink *devlink); * @port_fn_hw_addr_set: Callback used to set port function's hardware address. * Should be used by device drivers to set the hardware * address of a function managed by the devlink port. + * @port_fn_roce_get: Callback used to get port function's RoCE capability. + * Should be used by device drivers to report + * the current state of RoCE capability of a function + * managed by the devlink port. + * @port_fn_roce_set: Callback used to set port function's RoCE capability. + * Should be used by device drivers to enable/disable + * RoCE capability of a function managed + * by the devlink port. * * Note: Driver should return -EOPNOTSUPP if it doesn't support * port function (@port_fn_*) handling for a particular port. @@ -1653,6 +1643,11 @@ struct devlink_port_ops { int (*port_fn_hw_addr_set)(struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack); + int (*port_fn_roce_get)(struct devlink_port *devlink_port, + bool *is_enable, + struct netlink_ext_ack *extack); + int (*port_fn_roce_set)(struct devlink_port *devlink_port, + bool enable, struct netlink_ext_ack *extack); }; void devlink_port_init(struct devlink *devlink, diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c index ac171ea984cc..e87ca3933a50 100644 --- a/net/devlink/leftover.c +++ b/net/devlink/leftover.c @@ -447,18 +447,18 @@ static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps, caps->value |= cap; } -static int devlink_port_fn_roce_fill(const struct devlink_ops *ops, - struct devlink_port *devlink_port, +static int devlink_port_fn_roce_fill(struct devlink_port *devlink_port, struct nla_bitfield32 *caps, struct netlink_ext_ack *extack) { bool is_enable; int err; - if (!ops->port_fn_roce_get) + if (!devlink_port->ops->port_fn_roce_get) return 0; - err = ops->port_fn_roce_get(devlink_port, &is_enable, extack); + err = devlink_port->ops->port_fn_roce_get(devlink_port, &is_enable, + extack); if (err) { if (err == -EOPNOTSUPP) return 0; @@ -501,7 +501,7 @@ static int devlink_port_fn_caps_fill(const struct devlink_ops *ops, struct nla_bitfield32 caps = {}; int err; - err = devlink_port_fn_roce_fill(ops, devlink_port, &caps, extack); + err = devlink_port_fn_roce_fill(devlink_port, &caps, extack); if (err) return err; @@ -837,9 +837,8 @@ static int devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable, struct netlink_ext_ack *extack) { - const struct devlink_ops *ops = devlink_port->devlink->ops; - - return ops->port_fn_roce_set(devlink_port, enable, extack); + return devlink_port->ops->port_fn_roce_set(devlink_port, enable, + extack); } static int devlink_port_fn_caps_set(struct devlink_port *devlink_port, @@ -1214,7 +1213,7 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port, caps = nla_get_bitfield32(attr); if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE && - !ops->port_fn_roce_set) { + !devlink_port->ops->port_fn_roce_set) { NL_SET_ERR_MSG_ATTR(extack, attr, "Port doesn't support RoCE function attribute"); return -EOPNOTSUPP; -- cgit v1.2.3 From 4a490d7154b3d84c7e451502306a53b6607b6566 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 26 May 2023 12:28:38 +0200 Subject: devlink: move port_fn_migratable_get/set() to devlink_port_ops Move port_fn_migratable_get/set() from devlink_ops into newly introduced devlink_port_ops. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 -- .../ethernet/mellanox/mlx5/core/esw/devlink_port.c | 2 ++ include/net/devlink.h | 35 +++++++++------------- net/devlink/leftover.c | 23 +++++++------- 4 files changed, 26 insertions(+), 36 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index d63ec466dcd6..678bae618769 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -317,8 +317,6 @@ static const struct devlink_ops mlx5_devlink_ops = { .rate_node_new = mlx5_esw_devlink_rate_node_new, .rate_node_del = mlx5_esw_devlink_rate_node_del, .rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set, - .port_fn_migratable_get = mlx5_devlink_port_fn_migratable_get, - .port_fn_migratable_set = mlx5_devlink_port_fn_migratable_set, #endif #ifdef CONFIG_MLX5_SF_MANAGER .port_new = mlx5_devlink_sf_port_new, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index 9011619e1fdd..2ececd2b86c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -70,6 +70,8 @@ static const struct devlink_port_ops mlx5_esw_dl_port_ops = { .port_fn_hw_addr_set = mlx5_devlink_port_fn_hw_addr_set, .port_fn_roce_get = mlx5_devlink_port_fn_roce_get, .port_fn_roce_set = mlx5_devlink_port_fn_roce_set, + .port_fn_migratable_get = mlx5_devlink_port_fn_migratable_get, + .port_fn_migratable_set = mlx5_devlink_port_fn_migratable_set, }; int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_num) diff --git a/include/net/devlink.h b/include/net/devlink.h index b8e8ea850562..25fa952a46a6 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1429,27 +1429,6 @@ struct devlink_ops { int (*trap_policer_counter_get)(struct devlink *devlink, const struct devlink_trap_policer *policer, u64 *p_drops); - /** - * @port_fn_migratable_get: Port function's migratable get function. - * - * Query migratable state of a function managed by the devlink port. - * Return -EOPNOTSUPP if port function migratable handling is not - * supported. - */ - int (*port_fn_migratable_get)(struct devlink_port *devlink_port, - bool *is_enable, - struct netlink_ext_ack *extack); - /** - * @port_fn_migratable_set: Port function's migratable set function. - * - * Enable/Disable migratable state of a function managed by the devlink - * port. - * Return -EOPNOTSUPP if port function migratable handling is not - * supported. - */ - int (*port_fn_migratable_set)(struct devlink_port *devlink_port, - bool enable, - struct netlink_ext_ack *extack); /** * port_new() - Add a new port function of a specified flavor * @devlink: Devlink instance @@ -1626,6 +1605,14 @@ void devlink_free(struct devlink *devlink); * Should be used by device drivers to enable/disable * RoCE capability of a function managed * by the devlink port. + * @port_fn_migratable_get: Callback used to get port function's migratable + * capability. Should be used by device drivers + * to report the current state of migratable capability + * of a function managed by the devlink port. + * @port_fn_migratable_set: Callback used to set port function's migratable + * capability. Should be used by device drivers + * to enable/disable migratable capability of + * a function managed by the devlink port. * * Note: Driver should return -EOPNOTSUPP if it doesn't support * port function (@port_fn_*) handling for a particular port. @@ -1648,6 +1635,12 @@ struct devlink_port_ops { struct netlink_ext_ack *extack); int (*port_fn_roce_set)(struct devlink_port *devlink_port, bool enable, struct netlink_ext_ack *extack); + int (*port_fn_migratable_get)(struct devlink_port *devlink_port, + bool *is_enable, + struct netlink_ext_ack *extack); + int (*port_fn_migratable_set)(struct devlink_port *devlink_port, + bool enable, + struct netlink_ext_ack *extack); }; void devlink_port_init(struct devlink *devlink, diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c index e87ca3933a50..c16451ca744d 100644 --- a/net/devlink/leftover.c +++ b/net/devlink/leftover.c @@ -469,19 +469,19 @@ static int devlink_port_fn_roce_fill(struct devlink_port *devlink_port, return 0; } -static int devlink_port_fn_migratable_fill(const struct devlink_ops *ops, - struct devlink_port *devlink_port, +static int devlink_port_fn_migratable_fill(struct devlink_port *devlink_port, struct nla_bitfield32 *caps, struct netlink_ext_ack *extack) { bool is_enable; int err; - if (!ops->port_fn_migratable_get || + if (!devlink_port->ops->port_fn_migratable_get || devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) return 0; - err = ops->port_fn_migratable_get(devlink_port, &is_enable, extack); + err = devlink_port->ops->port_fn_migratable_get(devlink_port, + &is_enable, extack); if (err) { if (err == -EOPNOTSUPP) return 0; @@ -492,8 +492,7 @@ static int devlink_port_fn_migratable_fill(const struct devlink_ops *ops, return 0; } -static int devlink_port_fn_caps_fill(const struct devlink_ops *ops, - struct devlink_port *devlink_port, +static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port, struct sk_buff *msg, struct netlink_ext_ack *extack, bool *msg_updated) @@ -505,7 +504,7 @@ static int devlink_port_fn_caps_fill(const struct devlink_ops *ops, if (err) return err; - err = devlink_port_fn_migratable_fill(ops, devlink_port, &caps, extack); + err = devlink_port_fn_migratable_fill(devlink_port, &caps, extack); if (err) return err; @@ -828,9 +827,8 @@ static int devlink_port_fn_mig_set(struct devlink_port *devlink_port, bool enable, struct netlink_ext_ack *extack) { - const struct devlink_ops *ops = devlink_port->devlink->ops; - - return ops->port_fn_migratable_set(devlink_port, enable, extack); + return devlink_port->ops->port_fn_migratable_set(devlink_port, enable, + extack); } static int @@ -885,8 +883,7 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por err = devlink_port_fn_hw_addr_fill(port, msg, extack, &msg_updated); if (err) goto out; - err = devlink_port_fn_caps_fill(ops, port, msg, extack, - &msg_updated); + err = devlink_port_fn_caps_fill(port, msg, extack, &msg_updated); if (err) goto out; err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated); @@ -1219,7 +1216,7 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port, return -EOPNOTSUPP; } if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) { - if (!ops->port_fn_migratable_set) { + if (!devlink_port->ops->port_fn_migratable_set) { NL_SET_ERR_MSG_ATTR(extack, attr, "Port doesn't support migratable function attribute"); return -EOPNOTSUPP; -- cgit v1.2.3 From 216aa67f3e981a0cfb0a7c3b0d4c107823ef6c56 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 26 May 2023 12:28:39 +0200 Subject: devlink: move port_fn_state_get/set() to devlink_port_ops Move port_fn_state_get/set() from devlink_ops into newly introduced devlink_port_ops. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 - .../ethernet/mellanox/mlx5/core/esw/devlink_port.c | 4 ++ include/net/devlink.h | 45 ++++++++-------------- net/devlink/leftover.c | 19 ++++----- 4 files changed, 26 insertions(+), 44 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 678bae618769..e39fd85ea2f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -321,8 +321,6 @@ static const struct devlink_ops mlx5_devlink_ops = { #ifdef CONFIG_MLX5_SF_MANAGER .port_new = mlx5_devlink_sf_port_new, .port_del = mlx5_devlink_sf_port_del, - .port_fn_state_get = mlx5_devlink_sf_port_fn_state_get, - .port_fn_state_set = mlx5_devlink_sf_port_fn_state_set, #endif .flash_update = mlx5_devlink_flash_update, .info_get = mlx5_devlink_info_get, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index 2ececd2b86c8..76c5d6e9d47f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -149,6 +149,10 @@ static const struct devlink_port_ops mlx5_esw_dl_sf_port_ops = { .port_fn_hw_addr_set = mlx5_devlink_port_fn_hw_addr_set, .port_fn_roce_get = mlx5_devlink_port_fn_roce_get, .port_fn_roce_set = mlx5_devlink_port_fn_roce_set, +#ifdef CONFIG_MLX5_SF_MANAGER + .port_fn_state_get = mlx5_devlink_sf_port_fn_state_get, + .port_fn_state_set = mlx5_devlink_sf_port_fn_state_set, +#endif }; int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port, diff --git a/include/net/devlink.h b/include/net/devlink.h index 25fa952a46a6..835989c10395 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1464,36 +1464,6 @@ struct devlink_ops { */ int (*port_del)(struct devlink *devlink, struct devlink_port *port, struct netlink_ext_ack *extack); - /** - * port_fn_state_get() - Get the state of a port function - * @devlink: Devlink instance - * @port: The devlink port - * @state: Admin configured state - * @opstate: Current operational state - * @extack: extack for reporting error messages - * - * Reports the admin and operational state of a devlink port function - * - * Return: 0 on success, negative value otherwise. - */ - int (*port_fn_state_get)(struct devlink_port *port, - enum devlink_port_fn_state *state, - enum devlink_port_fn_opstate *opstate, - struct netlink_ext_ack *extack); - /** - * port_fn_state_set() - Set the admin state of a port function - * @devlink: Devlink instance - * @port: The devlink port - * @state: Admin state - * @extack: extack for reporting error messages - * - * Set the admin state of a devlink port function - * - * Return: 0 on success, negative value otherwise. - */ - int (*port_fn_state_set)(struct devlink_port *port, - enum devlink_port_fn_state state, - struct netlink_ext_ack *extack); /** * Rate control callbacks. @@ -1613,6 +1583,14 @@ void devlink_free(struct devlink *devlink); * capability. Should be used by device drivers * to enable/disable migratable capability of * a function managed by the devlink port. + * @port_fn_state_get: Callback used to get port function's state. + * Should be used by device drivers to report + * the current admin and operational state of a + * function managed by the devlink port. + * @port_fn_state_set: Callback used to get port function's state. + * Should be used by device drivers set + * the admin state of a function managed + * by the devlink port. * * Note: Driver should return -EOPNOTSUPP if it doesn't support * port function (@port_fn_*) handling for a particular port. @@ -1641,6 +1619,13 @@ struct devlink_port_ops { int (*port_fn_migratable_set)(struct devlink_port *devlink_port, bool enable, struct netlink_ext_ack *extack); + int (*port_fn_state_get)(struct devlink_port *port, + enum devlink_port_fn_state *state, + enum devlink_port_fn_opstate *opstate, + struct netlink_ext_ack *extack); + int (*port_fn_state_set)(struct devlink_port *port, + enum devlink_port_fn_state state, + struct netlink_ext_ack *extack); }; void devlink_port_init(struct devlink *devlink, diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c index c16451ca744d..36acaa6a6f18 100644 --- a/net/devlink/leftover.c +++ b/net/devlink/leftover.c @@ -787,8 +787,7 @@ devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate) opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED; } -static int devlink_port_fn_state_fill(const struct devlink_ops *ops, - struct devlink_port *port, +static int devlink_port_fn_state_fill(struct devlink_port *port, struct sk_buff *msg, struct netlink_ext_ack *extack, bool *msg_updated) @@ -797,10 +796,10 @@ static int devlink_port_fn_state_fill(const struct devlink_ops *ops, enum devlink_port_fn_state state; int err; - if (!ops->port_fn_state_get) + if (!port->ops->port_fn_state_get) return 0; - err = ops->port_fn_state_get(port, &state, &opstate, extack); + err = port->ops->port_fn_state_get(port, &state, &opstate, extack); if (err) { if (err == -EOPNOTSUPP) return 0; @@ -870,7 +869,6 @@ static int devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port, struct netlink_ext_ack *extack) { - const struct devlink_ops *ops; struct nlattr *function_attr; bool msg_updated = false; int err; @@ -879,14 +877,13 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por if (!function_attr) return -EMSGSIZE; - ops = port->devlink->ops; err = devlink_port_fn_hw_addr_fill(port, msg, extack, &msg_updated); if (err) goto out; err = devlink_port_fn_caps_fill(port, msg, extack, &msg_updated); if (err) goto out; - err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated); + err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated); out: if (err || !msg_updated) nla_nest_cancel(msg, function_attr); @@ -1179,18 +1176,15 @@ static int devlink_port_fn_state_set(struct devlink_port *port, struct netlink_ext_ack *extack) { enum devlink_port_fn_state state; - const struct devlink_ops *ops; state = nla_get_u8(attr); - ops = port->devlink->ops; - return ops->port_fn_state_set(port, state, extack); + return port->ops->port_fn_state_set(port, state, extack); } static int devlink_port_function_validate(struct devlink_port *devlink_port, struct nlattr **tb, struct netlink_ext_ack *extack) { - const struct devlink_ops *ops = devlink_port->devlink->ops; struct nlattr *attr; if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] && @@ -1199,7 +1193,8 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port, "Port doesn't support function attributes"); return -EOPNOTSUPP; } - if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) { + if (tb[DEVLINK_PORT_FN_ATTR_STATE] && + !devlink_port->ops->port_fn_state_set) { NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR], "Function does not support state setting"); return -EOPNOTSUPP; -- cgit v1.2.3 From 216ba9f4adc8f2e452edb9a58d2dfbfc11608c00 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 26 May 2023 12:28:40 +0200 Subject: devlink: move port_del() to devlink_port_ops Move port_del() from devlink_ops into newly introduced devlink_port_ops. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 1 - .../ethernet/mellanox/mlx5/core/esw/devlink_port.c | 3 +++ include/net/devlink.h | 22 +++++----------------- net/devlink/leftover.c | 6 +++--- 4 files changed, 11 insertions(+), 21 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index e39fd85ea2f9..63635cc44479 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -320,7 +320,6 @@ static const struct devlink_ops mlx5_devlink_ops = { #endif #ifdef CONFIG_MLX5_SF_MANAGER .port_new = mlx5_devlink_sf_port_new, - .port_del = mlx5_devlink_sf_port_del, #endif .flash_update = mlx5_devlink_flash_update, .info_get = mlx5_devlink_info_get, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index 76c5d6e9d47f..f370f67d9e33 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -145,6 +145,9 @@ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u1 } static const struct devlink_port_ops mlx5_esw_dl_sf_port_ops = { +#ifdef CONFIG_MLX5_SF_MANAGER + .port_del = mlx5_devlink_sf_port_del, +#endif .port_fn_hw_addr_get = mlx5_devlink_port_fn_hw_addr_get, .port_fn_hw_addr_set = mlx5_devlink_port_fn_hw_addr_set, .port_fn_roce_get = mlx5_devlink_port_fn_roce_get, diff --git a/include/net/devlink.h b/include/net/devlink.h index 835989c10395..fe42ad46cf3b 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1447,23 +1447,6 @@ struct devlink_ops { int (*port_new)(struct devlink *devlink, const struct devlink_port_new_attrs *attrs, struct netlink_ext_ack *extack); - /** - * port_del() - Delete a port function - * @devlink: Devlink instance - * @port: The devlink port - * @extack: extack for reporting error messages - * - * Devlink core will call this device driver function upon user request - * to delete a previously created port function - * - * Notes: - * - On success, drivers must unregister the corresponding devlink - * port - * - * Return: 0 on success, negative value otherwise. - */ - int (*port_del)(struct devlink *devlink, struct devlink_port *port, - struct netlink_ext_ack *extack); /** * Rate control callbacks. @@ -1560,6 +1543,9 @@ void devlink_free(struct devlink *devlink); * @port_unsplit: Callback used to unsplit the port group back into * a single port. * @port_type_set: Callback used to set a type of a port. + * @port_del: Callback used to delete selected port along with related function. + * Devlink core calls this upon user request to delete + * a port previously created by devlink_ops->port_new(). * @port_fn_hw_addr_get: Callback used to set port function's hardware address. * Should be used by device drivers to report * the hardware address of a function managed @@ -1602,6 +1588,8 @@ struct devlink_port_ops { struct netlink_ext_ack *extack); int (*port_type_set)(struct devlink_port *devlink_port, enum devlink_port_type port_type); + int (*port_del)(struct devlink *devlink, struct devlink_port *port, + struct netlink_ext_ack *extack); int (*port_fn_hw_addr_get)(struct devlink_port *port, u8 *hw_addr, int *hw_addr_len, struct netlink_ext_ack *extack); diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c index 36acaa6a6f18..404313128cfb 100644 --- a/net/devlink/leftover.c +++ b/net/devlink/leftover.c @@ -1348,7 +1348,7 @@ static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, struct devlink_port_new_attrs new_attrs = {}; struct devlink *devlink = info->user_ptr[0]; - if (!devlink->ops->port_new || !devlink->ops->port_del) + if (!devlink->ops->port_new) return -EOPNOTSUPP; if (!info->attrs[DEVLINK_ATTR_PORT_FLAVOUR] || @@ -1387,10 +1387,10 @@ static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, struct netlink_ext_ack *extack = info->extack; struct devlink *devlink = info->user_ptr[0]; - if (!devlink->ops->port_del) + if (!devlink_port->ops->port_del) return -EOPNOTSUPP; - return devlink->ops->port_del(devlink, devlink_port, extack); + return devlink_port->ops->port_del(devlink, devlink_port, extack); } static int -- cgit v1.2.3 From 59272ad8d9e8ea6398a96f8c6d62da284bf2ae6e Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 10 Mar 2023 23:41:28 +0100 Subject: bus: fsl-mc: Make remove function return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The value returned by an fsl-mc driver's remove function is mostly ignored. (Only an error message is printed if the value is non-zero and then device removal continues unconditionally.) So change the prototype of the remove function to return no value. This way driver authors are not tempted to assume that passing an error to the upper layer is a good idea. All drivers are adapted accordingly. There is no intended change of behaviour, all callbacks were prepared to return 0 before. Signed-off-by: Uwe Kleine-König Reviewed-by: Ioana Ciornei Tested-by: Ioana Ciornei # sanity checks Reviewed-by: Laurentiu Tudor Tested-by: Laurentiu Tudor Signed-off-by: Li Yang --- drivers/bus/fsl-mc/dprc-driver.c | 5 ++--- drivers/bus/fsl-mc/fsl-mc-allocator.c | 5 ++--- drivers/bus/fsl-mc/fsl-mc-bus.c | 5 +---- drivers/crypto/caam/caamalg_qi2.c | 4 +--- drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c | 4 +--- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 4 +--- drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c | 4 +--- drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c | 4 +--- drivers/soc/fsl/dpio/dpio-driver.c | 4 +--- drivers/vfio/fsl-mc/vfio_fsl_mc.c | 3 +-- include/linux/fsl/mc.h | 2 +- 11 files changed, 13 insertions(+), 31 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/bus/fsl-mc/dprc-driver.c b/drivers/bus/fsl-mc/dprc-driver.c index ef4f43f67b80..595d4cecd041 100644 --- a/drivers/bus/fsl-mc/dprc-driver.c +++ b/drivers/bus/fsl-mc/dprc-driver.c @@ -835,13 +835,13 @@ EXPORT_SYMBOL_GPL(dprc_cleanup); * It tears down the interrupts that were configured for the DPRC device. * It destroys the interrupt pool associated with this MC bus. */ -static int dprc_remove(struct fsl_mc_device *mc_dev) +static void dprc_remove(struct fsl_mc_device *mc_dev) { struct fsl_mc_bus *mc_bus = to_fsl_mc_bus(mc_dev); if (!mc_bus->irq_resources) { dev_err(&mc_dev->dev, "No irq resources, so unbinding the device failed\n"); - return 0; + return; } if (dev_get_msi_domain(&mc_dev->dev)) @@ -852,7 +852,6 @@ static int dprc_remove(struct fsl_mc_device *mc_dev) dprc_cleanup(mc_dev); dev_info(&mc_dev->dev, "DPRC device unbound from driver"); - return 0; } static const struct fsl_mc_device_id match_id_table[] = { diff --git a/drivers/bus/fsl-mc/fsl-mc-allocator.c b/drivers/bus/fsl-mc/fsl-mc-allocator.c index 36f70e5e418b..0ad68099684e 100644 --- a/drivers/bus/fsl-mc/fsl-mc-allocator.c +++ b/drivers/bus/fsl-mc/fsl-mc-allocator.c @@ -614,19 +614,18 @@ static int fsl_mc_allocator_probe(struct fsl_mc_device *mc_dev) * fsl_mc_allocator_remove - callback invoked when an allocatable device is * being removed from the system */ -static int fsl_mc_allocator_remove(struct fsl_mc_device *mc_dev) +static void fsl_mc_allocator_remove(struct fsl_mc_device *mc_dev) { int error; if (mc_dev->resource) { error = fsl_mc_resource_pool_remove_device(mc_dev); if (error < 0) - return 0; + return; } dev_dbg(&mc_dev->dev, "Allocatable fsl-mc device unbound from fsl_mc_allocator driver"); - return 0; } static const struct fsl_mc_device_id match_id_table[] = { diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index 410693ff7335..4352745a923c 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -454,11 +454,8 @@ static int fsl_mc_driver_remove(struct device *dev) { struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(dev->driver); struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev); - int error; - error = mc_drv->remove(mc_dev); - if (error < 0) - dev_err(dev, "%s failed: %d\n", __func__, error); + mc_drv->remove(mc_dev); return 0; } diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index 5c8d35edaa1c..9156bbe038b7 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c @@ -5402,7 +5402,7 @@ err_dma_mask: return err; } -static int __cold dpaa2_caam_remove(struct fsl_mc_device *ls_dev) +static void __cold dpaa2_caam_remove(struct fsl_mc_device *ls_dev) { struct device *dev; struct dpaa2_caam_priv *priv; @@ -5443,8 +5443,6 @@ static int __cold dpaa2_caam_remove(struct fsl_mc_device *ls_dev) free_percpu(priv->ppriv); fsl_mc_portal_free(priv->mc_io); kmem_cache_destroy(qi_cache); - - return 0; } int dpaa2_caam_enqueue(struct device *dev, struct caam_request *req) diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c index 8dd40d00a672..a42a37634881 100644 --- a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c +++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c @@ -765,7 +765,7 @@ err_mcportal: return err; } -static int dpaa2_qdma_remove(struct fsl_mc_device *ls_dev) +static void dpaa2_qdma_remove(struct fsl_mc_device *ls_dev) { struct dpaa2_qdma_engine *dpaa2_qdma; struct dpaa2_qdma_priv *priv; @@ -787,8 +787,6 @@ static int dpaa2_qdma_remove(struct fsl_mc_device *ls_dev) dma_async_device_unregister(&dpaa2_qdma->dma_dev); kfree(priv); kfree(dpaa2_qdma); - - return 0; } static void dpaa2_qdma_shutdown(struct fsl_mc_device *ls_dev) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index a62cffaf6ff1..a9676d0dece8 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -5025,7 +5025,7 @@ err_wq_alloc: return err; } -static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) +static void dpaa2_eth_remove(struct fsl_mc_device *ls_dev) { struct device *dev; struct net_device *net_dev; @@ -5073,8 +5073,6 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) dev_dbg(net_dev->dev.parent, "Removed interface %s\n", net_dev->name); free_netdev(net_dev); - - return 0; } static const struct fsl_mc_device_id dpaa2_eth_match_id_table[] = { diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c index 90d23ab1ce9d..4497e3c0456d 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c @@ -219,7 +219,7 @@ err_exit: return err; } -static int dpaa2_ptp_remove(struct fsl_mc_device *mc_dev) +static void dpaa2_ptp_remove(struct fsl_mc_device *mc_dev) { struct device *dev = &mc_dev->dev; struct ptp_qoriq *ptp_qoriq; @@ -232,8 +232,6 @@ static int dpaa2_ptp_remove(struct fsl_mc_device *mc_dev) fsl_mc_free_irqs(mc_dev); dprtc_close(mc_dev->mc_io, 0, mc_dev->mc_handle); fsl_mc_portal_free(mc_dev->mc_io); - - return 0; } static const struct fsl_mc_device_id dpaa2_ptp_match_id_table[] = { diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index f4ae4289c41a..21cc4e52425a 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -3221,7 +3221,7 @@ static void dpaa2_switch_teardown(struct fsl_mc_device *sw_dev) dev_warn(dev, "dpsw_close err %d\n", err); } -static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev) +static void dpaa2_switch_remove(struct fsl_mc_device *sw_dev) { struct ethsw_port_priv *port_priv; struct ethsw_core *ethsw; @@ -3252,8 +3252,6 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev) kfree(ethsw); dev_set_drvdata(dev, NULL); - - return 0; } static int dpaa2_switch_probe_port(struct ethsw_core *ethsw, diff --git a/drivers/soc/fsl/dpio/dpio-driver.c b/drivers/soc/fsl/dpio/dpio-driver.c index 09df5302d255..9e3fddd8f5a9 100644 --- a/drivers/soc/fsl/dpio/dpio-driver.c +++ b/drivers/soc/fsl/dpio/dpio-driver.c @@ -270,7 +270,7 @@ static void dpio_teardown_irqs(struct fsl_mc_device *dpio_dev) fsl_mc_free_irqs(dpio_dev); } -static int dpaa2_dpio_remove(struct fsl_mc_device *dpio_dev) +static void dpaa2_dpio_remove(struct fsl_mc_device *dpio_dev) { struct device *dev; struct dpio_priv *priv; @@ -299,8 +299,6 @@ static int dpaa2_dpio_remove(struct fsl_mc_device *dpio_dev) err_open: fsl_mc_portal_free(dpio_dev->mc_io); - - return 0; } static const struct fsl_mc_device_id dpaa2_dpio_match_id_table[] = { diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index c89a047a4cd8..f2140e94d41e 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -570,7 +570,7 @@ static void vfio_fsl_mc_release_dev(struct vfio_device *core_vdev) mutex_destroy(&vdev->igate); } -static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev) +static void vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev) { struct device *dev = &mc_dev->dev; struct vfio_fsl_mc_device *vdev = dev_get_drvdata(dev); @@ -578,7 +578,6 @@ static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev) vfio_unregister_group_dev(&vdev->vdev); dprc_remove_devices(mc_dev, NULL, 0); vfio_put_device(&vdev->vdev); - return 0; } static const struct vfio_device_ops vfio_fsl_mc_ops = { diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index a86115bc799c..a1b3de87a3d1 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -48,7 +48,7 @@ struct fsl_mc_driver { struct device_driver driver; const struct fsl_mc_device_id *match_id_table; int (*probe)(struct fsl_mc_device *dev); - int (*remove)(struct fsl_mc_device *dev); + void (*remove)(struct fsl_mc_device *dev); void (*shutdown)(struct fsl_mc_device *dev); int (*suspend)(struct fsl_mc_device *dev, pm_message_t state); int (*resume)(struct fsl_mc_device *dev); -- cgit v1.2.3 From f4356947f0297b0962fdd197672db7edf9f58be6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 29 May 2023 14:48:31 +0300 Subject: flow_offload: Reject matching on layer 2 miss Adjust drivers that support the 'FLOW_DISSECTOR_KEY_META' key to reject filters that try to match on the newly added layer 2 miss field. Add an extack message to clearly communicate the failure reason to user space. The following users were not patched: 1. mtk_flow_offload_replace(): Only checks that the key is present, but does not do anything with it. 2. mlx5_tc_ct_set_tuple_match(): Used as part of netfilter offload, which does not make use of the new field, unlike tc. 3. get_netdev_from_rule() in nfp: Likewise. Example: # tc filter add dev swp1 egress pref 1 proto all flower skip_sw l2_miss true action drop Error: mlxsw_spectrum: Can't match on "l2_miss". We have an error talking to the kernel Acked-by: Elad Nachman Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/prestera/prestera_flower.c | 6 ++++++ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++++ drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 6 ++++++ drivers/net/ethernet/mscc/ocelot_flower.c | 10 ++++++++++ 4 files changed, 28 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/prestera/prestera_flower.c b/drivers/net/ethernet/marvell/prestera/prestera_flower.c index 91a478b75cbf..3e20e71b0f81 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_flower.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_flower.c @@ -148,6 +148,12 @@ static int prestera_flower_parse_meta(struct prestera_acl_rule *rule, __be16 key, mask; flow_rule_match_meta(f_rule, &match); + + if (match.mask->l2_miss) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on \"l2_miss\""); + return -EOPNOTSUPP; + } + if (match.mask->ingress_ifindex != 0xFFFFFFFF) { NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported ingress ifindex mask"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e95414ef1f04..1b0906cb57ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2587,6 +2587,12 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev, return 0; flow_rule_match_meta(rule, &match); + + if (match.mask->l2_miss) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on \"l2_miss\""); + return -EOPNOTSUPP; + } + if (!match.mask->ingress_ifindex) return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 594cdcb90b3d..6fec9223250b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -294,6 +294,12 @@ static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei, return 0; flow_rule_match_meta(rule, &match); + + if (match.mask->l2_miss) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on \"l2_miss\""); + return -EOPNOTSUPP; + } + if (match.mask->ingress_ifindex != 0xFFFFFFFF) { NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported ingress ifindex mask"); return -EINVAL; diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index ee052404eb55..e0916afcddfb 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -592,6 +592,16 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress, return -EOPNOTSUPP; } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) { + struct flow_match_meta match; + + flow_rule_match_meta(rule, &match); + if (match.mask->l2_miss) { + NL_SET_ERR_MSG_MOD(extack, "Can't match on \"l2_miss\""); + return -EOPNOTSUPP; + } + } + /* For VCAP ES0 (egress rewriter) we can match on the ingress port */ if (!ingress) { ret = ocelot_flower_parse_indev(ocelot, port, f, filter); -- cgit v1.2.3 From d04e265096784b4cebeb627b21f0f27410d20dc4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 29 May 2023 14:48:32 +0300 Subject: mlxsw: spectrum_flower: Split iif parsing to a separate function Currently, mlxsw only supports the 'ingress_ifindex' field in the 'FLOW_DISSECTOR_KEY_META' key, but subsequent patches are going to add support for the 'l2_miss' field as well. Split the parsing of the 'ingress_ifindex' field to a separate function to avoid nesting. No functional changes intended. Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlxsw/spectrum_flower.c | 54 +++++++++++++--------- 1 file changed, 33 insertions(+), 21 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 6fec9223250b..2b0bae847eb9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -281,45 +281,35 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return 0; } -static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei, - struct flow_cls_offload *f, - struct mlxsw_sp_flow_block *block) +static int +mlxsw_sp_flower_parse_meta_iif(struct mlxsw_sp_acl_rule_info *rulei, + const struct mlxsw_sp_flow_block *block, + const struct flow_match_meta *match, + struct netlink_ext_ack *extack) { - struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *ingress_dev; - struct flow_match_meta match; - - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) - return 0; - - flow_rule_match_meta(rule, &match); - if (match.mask->l2_miss) { - NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on \"l2_miss\""); - return -EOPNOTSUPP; - } - - if (match.mask->ingress_ifindex != 0xFFFFFFFF) { - NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported ingress ifindex mask"); + if (match->mask->ingress_ifindex != 0xFFFFFFFF) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask"); return -EINVAL; } ingress_dev = __dev_get_by_index(block->net, - match.key->ingress_ifindex); + match->key->ingress_ifindex); if (!ingress_dev) { - NL_SET_ERR_MSG_MOD(f->common.extack, "Can't find specified ingress port to match on"); + NL_SET_ERR_MSG_MOD(extack, "Can't find specified ingress port to match on"); return -EINVAL; } if (!mlxsw_sp_port_dev_check(ingress_dev)) { - NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on non-mlxsw ingress port"); + NL_SET_ERR_MSG_MOD(extack, "Can't match on non-mlxsw ingress port"); return -EINVAL; } mlxsw_sp_port = netdev_priv(ingress_dev); if (mlxsw_sp_port->mlxsw_sp != block->mlxsw_sp) { - NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on a port from different device"); + NL_SET_ERR_MSG_MOD(extack, "Can't match on a port from different device"); return -EINVAL; } @@ -327,9 +317,31 @@ static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei, MLXSW_AFK_ELEMENT_SRC_SYS_PORT, mlxsw_sp_port->local_port, 0xFFFFFFFF); + return 0; } +static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei, + struct flow_cls_offload *f, + struct mlxsw_sp_flow_block *block) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_match_meta match; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) + return 0; + + flow_rule_match_meta(rule, &match); + + if (match.mask->l2_miss) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on \"l2_miss\""); + return -EOPNOTSUPP; + } + + return mlxsw_sp_flower_parse_meta_iif(rulei, block, &match, + f->common.extack); +} + static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei, struct flow_cls_offload *f) { -- cgit v1.2.3 From 0b9cd74b8d1e07111a048e8eeb15f54f2ed9cbe2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 29 May 2023 14:48:33 +0300 Subject: mlxsw: spectrum_flower: Do not force matching on iif Currently, mlxsw only supports the 'ingress_ifindex' field in the 'FLOW_DISSECTOR_KEY_META' key, but subsequent patches are going to add support for the 'l2_miss' field as well. It is valid to only match on 'l2_miss' without 'ingress_ifindex', so do not force matching on it. Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 2b0bae847eb9..9c62c12e410b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -290,6 +290,9 @@ mlxsw_sp_flower_parse_meta_iif(struct mlxsw_sp_acl_rule_info *rulei, struct mlxsw_sp_port *mlxsw_sp_port; struct net_device *ingress_dev; + if (!match->mask->ingress_ifindex) + return 0; + if (match->mask->ingress_ifindex != 0xFFFFFFFF) { NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask"); return -EINVAL; -- cgit v1.2.3 From caa4c58ab5d9078097067cdd8a350ff6796df0ba Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 29 May 2023 14:48:34 +0300 Subject: mlxsw: spectrum_flower: Add ability to match on layer 2 miss Add the 'fdb_miss' key element to supported key blocks and make use of it to match on layer 2 miss. The key is only supported on Spectrum-{2,3,4}. An error is returned for Spectrum-1 since the key element is not present in any of its key blocks. Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c | 1 + drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h | 3 ++- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c | 2 ++ drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 6 ++---- 4 files changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c index bd1a51a0a540..f0b2963ebac3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c @@ -42,6 +42,7 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_64_95, 0x34, 4), MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_32_63, 0x38, 4), MLXSW_AFK_ELEMENT_INFO_BUF(DST_IP_0_31, 0x3C, 4), + MLXSW_AFK_ELEMENT_INFO_U32(FDB_MISS, 0x40, 0, 1), }; struct mlxsw_afk { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h index 3a037fe47211..65a4abadc7db 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -35,6 +35,7 @@ enum mlxsw_afk_element { MLXSW_AFK_ELEMENT_IP_DSCP, MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB, MLXSW_AFK_ELEMENT_VIRT_ROUTER_LSB, + MLXSW_AFK_ELEMENT_FDB_MISS, MLXSW_AFK_ELEMENT_MAX, }; @@ -69,7 +70,7 @@ struct mlxsw_afk_element_info { MLXSW_AFK_ELEMENT_INFO(MLXSW_AFK_ELEMENT_TYPE_BUF, \ _element, _offset, 0, _size) -#define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x40 +#define MLXSW_AFK_ELEMENT_STORAGE_SIZE 0x44 struct mlxsw_afk_element_inst { /* element instance in actual block */ enum mlxsw_afk_element element; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c index 00c32320f891..4dea39f2b304 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c @@ -123,10 +123,12 @@ const struct mlxsw_afk_ops mlxsw_sp1_afk_ops = { }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_0[] = { + MLXSW_AFK_ELEMENT_INST_U32(FDB_MISS, 0x00, 3, 1), MLXSW_AFK_ELEMENT_INST_BUF(DMAC_0_31, 0x04, 4), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_1[] = { + MLXSW_AFK_ELEMENT_INST_U32(FDB_MISS, 0x00, 3, 1), MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x04, 4), }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 9c62c12e410b..72917f09e806 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -336,10 +336,8 @@ static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei, flow_rule_match_meta(rule, &match); - if (match.mask->l2_miss) { - NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on \"l2_miss\""); - return -EOPNOTSUPP; - } + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_FDB_MISS, + match.key->l2_miss, match.mask->l2_miss); return mlxsw_sp_flower_parse_meta_iif(rulei, block, &match, f->common.extack); -- cgit v1.2.3 From 2d800bc500fb3fb07a0fb42e2d0a1356fb9e1e8f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 30 May 2023 12:19:45 +0300 Subject: net/sched: taprio: replace tc_taprio_qopt_offload :: enable with a "cmd" enum Inspired from struct flow_cls_offload :: cmd, in order for taprio to be able to report statistics (which is future work), it seems that we need to drill one step further with the ndo_setup_tc(TC_SETUP_QDISC_TAPRIO) multiplexing, and pass the command as part of the common portion of the muxed structure. Since we already have an "enable" variable in tc_taprio_qopt_offload, refactor all drivers to check for "cmd" instead of "enable", and reject every other command except "replace" and "destroy" - to be future proof. Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur # for lan966x Acked-by: Kurt Kanzenbach # hellcreek Reviewed-by: Muhammad Husaini Zulkifli Reviewed-by: Gerhard Engleder Signed-off-by: David S. Miller --- drivers/net/dsa/hirschmann/hellcreek.c | 14 +++++++++----- drivers/net/dsa/ocelot/felix_vsc9959.c | 4 +++- drivers/net/dsa/sja1105/sja1105_tas.c | 7 +++++-- drivers/net/ethernet/engleder/tsnep_selftests.c | 12 ++++++------ drivers/net/ethernet/engleder/tsnep_tc.c | 4 +++- drivers/net/ethernet/freescale/enetc/enetc_qos.c | 6 +++++- drivers/net/ethernet/intel/igc/igc_main.c | 13 +++++++++++-- drivers/net/ethernet/microchip/lan966x/lan966x_tc.c | 10 ++++++++-- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 7 +++++-- drivers/net/ethernet/ti/am65-cpsw-qos.c | 11 ++++++++--- include/net/pkt_sched.h | 7 ++++++- net/sched/sch_taprio.c | 4 ++-- 12 files changed, 71 insertions(+), 28 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 595a548bb0a8..af50001ccdd4 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -1885,13 +1885,17 @@ static int hellcreek_port_setup_tc(struct dsa_switch *ds, int port, case TC_SETUP_QDISC_TAPRIO: { struct tc_taprio_qopt_offload *taprio = type_data; - if (!hellcreek_validate_schedule(hellcreek, taprio)) - return -EOPNOTSUPP; + switch (taprio->cmd) { + case TAPRIO_CMD_REPLACE: + if (!hellcreek_validate_schedule(hellcreek, taprio)) + return -EOPNOTSUPP; - if (taprio->enable) return hellcreek_port_set_schedule(ds, port, taprio); - - return hellcreek_port_del_schedule(ds, port); + case TAPRIO_CMD_DESTROY: + return hellcreek_port_del_schedule(ds, port); + default: + return -EOPNOTSUPP; + } } default: return -EOPNOTSUPP; diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 030738fef60e..5de6a27052fc 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1411,7 +1411,7 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, mutex_lock(&ocelot->tas_lock); - if (!taprio->enable) { + if (taprio->cmd == TAPRIO_CMD_DESTROY) { ocelot_port_mqprio(ocelot, port, &taprio->mqprio); ocelot_rmw_rix(ocelot, 0, QSYS_TAG_CONFIG_ENABLE, QSYS_TAG_CONFIG, port); @@ -1423,6 +1423,8 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, mutex_unlock(&ocelot->tas_lock); return 0; + } else if (taprio->cmd != TAPRIO_CMD_REPLACE) { + return -EOPNOTSUPP; } ret = ocelot_port_mqprio(ocelot, port, &taprio->mqprio); diff --git a/drivers/net/dsa/sja1105/sja1105_tas.c b/drivers/net/dsa/sja1105/sja1105_tas.c index e6153848a950..d7818710bc02 100644 --- a/drivers/net/dsa/sja1105/sja1105_tas.c +++ b/drivers/net/dsa/sja1105/sja1105_tas.c @@ -516,10 +516,11 @@ int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port, /* Can't change an already configured port (must delete qdisc first). * Can't delete the qdisc from an unconfigured port. */ - if (!!tas_data->offload[port] == admin->enable) + if ((!!tas_data->offload[port] && admin->cmd == TAPRIO_CMD_REPLACE) || + (!tas_data->offload[port] && admin->cmd == TAPRIO_CMD_DESTROY)) return -EINVAL; - if (!admin->enable) { + if (admin->cmd == TAPRIO_CMD_DESTROY) { taprio_offload_free(tas_data->offload[port]); tas_data->offload[port] = NULL; @@ -528,6 +529,8 @@ int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port, return rc; return sja1105_static_config_reload(priv, SJA1105_SCHEDULING); + } else if (admin->cmd != TAPRIO_CMD_REPLACE) { + return -EOPNOTSUPP; } /* The cycle time extension is the amount of time the last cycle from diff --git a/drivers/net/ethernet/engleder/tsnep_selftests.c b/drivers/net/ethernet/engleder/tsnep_selftests.c index 1581d6b22232..8a9145f93147 100644 --- a/drivers/net/ethernet/engleder/tsnep_selftests.c +++ b/drivers/net/ethernet/engleder/tsnep_selftests.c @@ -329,7 +329,7 @@ static bool disable_taprio(struct tsnep_adapter *adapter) int retval; memset(&qopt, 0, sizeof(qopt)); - qopt.enable = 0; + qopt.cmd = TAPRIO_CMD_DESTROY; retval = tsnep_tc_setup(adapter->netdev, TC_SETUP_QDISC_TAPRIO, &qopt); if (retval) return false; @@ -360,7 +360,7 @@ static bool tsnep_test_taprio(struct tsnep_adapter *adapter) for (i = 0; i < 255; i++) qopt->entries[i].command = TC_TAPRIO_CMD_SET_GATES; - qopt->enable = 1; + qopt->cmd = TAPRIO_CMD_REPLACE; qopt->base_time = ktime_set(0, 0); qopt->cycle_time = 1500000; qopt->cycle_time_extension = 0; @@ -382,7 +382,7 @@ static bool tsnep_test_taprio(struct tsnep_adapter *adapter) if (!run_taprio(adapter, qopt, 100)) goto failed; - qopt->enable = 1; + qopt->cmd = TAPRIO_CMD_REPLACE; qopt->base_time = ktime_set(0, 0); qopt->cycle_time = 411854; qopt->cycle_time_extension = 0; @@ -406,7 +406,7 @@ static bool tsnep_test_taprio(struct tsnep_adapter *adapter) if (!run_taprio(adapter, qopt, 100)) goto failed; - qopt->enable = 1; + qopt->cmd = TAPRIO_CMD_REPLACE; qopt->base_time = ktime_set(0, 0); delay_base_time(adapter, qopt, 12); qopt->cycle_time = 125000; @@ -457,7 +457,7 @@ static bool tsnep_test_taprio_change(struct tsnep_adapter *adapter) for (i = 0; i < 255; i++) qopt->entries[i].command = TC_TAPRIO_CMD_SET_GATES; - qopt->enable = 1; + qopt->cmd = TAPRIO_CMD_REPLACE; qopt->base_time = ktime_set(0, 0); qopt->cycle_time = 100000; qopt->cycle_time_extension = 0; @@ -610,7 +610,7 @@ static bool tsnep_test_taprio_extension(struct tsnep_adapter *adapter) for (i = 0; i < 255; i++) qopt->entries[i].command = TC_TAPRIO_CMD_SET_GATES; - qopt->enable = 1; + qopt->cmd = TAPRIO_CMD_REPLACE; qopt->base_time = ktime_set(0, 0); qopt->cycle_time = 100000; qopt->cycle_time_extension = 50000; diff --git a/drivers/net/ethernet/engleder/tsnep_tc.c b/drivers/net/ethernet/engleder/tsnep_tc.c index d083e6684f12..745b191a5540 100644 --- a/drivers/net/ethernet/engleder/tsnep_tc.c +++ b/drivers/net/ethernet/engleder/tsnep_tc.c @@ -325,7 +325,7 @@ static int tsnep_taprio(struct tsnep_adapter *adapter, if (!adapter->gate_control) return -EOPNOTSUPP; - if (!qopt->enable) { + if (qopt->cmd == TAPRIO_CMD_DESTROY) { /* disable gate control if active */ mutex_lock(&adapter->gate_control_lock); @@ -337,6 +337,8 @@ static int tsnep_taprio(struct tsnep_adapter *adapter, mutex_unlock(&adapter->gate_control_lock); return 0; + } else if (qopt->cmd != TAPRIO_CMD_REPLACE) { + return -EOPNOTSUPP; } retval = tsnep_validate_gcl(qopt); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 83c27bbbc6ed..7aad824f4da7 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -65,7 +65,7 @@ static int enetc_setup_taprio(struct net_device *ndev, gcl_len = admin_conf->num_entries; tge = enetc_rd(hw, ENETC_PTGCR); - if (!admin_conf->enable) { + if (admin_conf->cmd == TAPRIO_CMD_DESTROY) { enetc_wr(hw, ENETC_PTGCR, tge & ~ENETC_PTGCR_TGE); enetc_reset_ptcmsdur(hw); @@ -138,6 +138,10 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) struct enetc_ndev_priv *priv = netdev_priv(ndev); int err, i; + if (taprio->cmd != TAPRIO_CMD_REPLACE && + taprio->cmd != TAPRIO_CMD_DESTROY) + return -EOPNOTSUPP; + /* TSD and Qbv are mutually exclusive in hardware */ for (i = 0; i < priv->num_tx_rings; i++) if (priv->tx_ring[i]->tsd_enable) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index c5ef1edcf548..88145c30c919 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6113,9 +6113,18 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, size_t n; int i; - adapter->qbv_enable = qopt->enable; + switch (qopt->cmd) { + case TAPRIO_CMD_REPLACE: + adapter->qbv_enable = true; + break; + case TAPRIO_CMD_DESTROY: + adapter->qbv_enable = false; + break; + default: + return -EOPNOTSUPP; + } - if (!qopt->enable) + if (!adapter->qbv_enable) return igc_tsn_clear_schedule(adapter); if (qopt->base_time < 0) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c index cf0cc7562d04..ee652f2d2359 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c @@ -21,8 +21,14 @@ static int lan966x_tc_setup_qdisc_mqprio(struct lan966x_port *port, static int lan966x_tc_setup_qdisc_taprio(struct lan966x_port *port, struct tc_taprio_qopt_offload *taprio) { - return taprio->enable ? lan966x_taprio_add(port, taprio) : - lan966x_taprio_del(port); + switch (taprio->cmd) { + case TAPRIO_CMD_REPLACE: + return lan966x_taprio_add(port, taprio); + case TAPRIO_CMD_DESTROY: + return lan966x_taprio_del(port); + default: + return -EOPNOTSUPP; + } } static int lan966x_tc_setup_qdisc_tbf(struct lan966x_port *port, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 9d55226479b4..ac41ef4cbd2f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -966,8 +966,11 @@ static int tc_setup_taprio(struct stmmac_priv *priv, return -EOPNOTSUPP; } - if (!qopt->enable) + if (qopt->cmd == TAPRIO_CMD_DESTROY) goto disable; + else if (qopt->cmd != TAPRIO_CMD_REPLACE) + return -EOPNOTSUPP; + if (qopt->num_entries >= dep) return -EINVAL; if (!qopt->cycle_time) @@ -988,7 +991,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, mutex_lock(&priv->plat->est->lock); priv->plat->est->gcl_size = size; - priv->plat->est->enable = qopt->enable; + priv->plat->est->enable = qopt->cmd == TAPRIO_CMD_REPLACE; mutex_unlock(&priv->plat->est->lock); for (i = 0; i < size; i++) { diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.c b/drivers/net/ethernet/ti/am65-cpsw-qos.c index 3a908db6e5b2..eced87fa261c 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-qos.c +++ b/drivers/net/ethernet/ti/am65-cpsw-qos.c @@ -450,7 +450,7 @@ static int am65_cpsw_configure_taprio(struct net_device *ndev, am65_cpsw_est_update_state(ndev); - if (!est_new->taprio.enable) { + if (est_new->taprio.cmd == TAPRIO_CMD_DESTROY) { am65_cpsw_stop_est(ndev); return ret; } @@ -476,7 +476,7 @@ static int am65_cpsw_configure_taprio(struct net_device *ndev, am65_cpsw_est_set_sched_list(ndev, est_new); am65_cpsw_port_est_assign_buf_num(ndev, est_new->buf); - am65_cpsw_est_set(ndev, est_new->taprio.enable); + am65_cpsw_est_set(ndev, est_new->taprio.cmd == TAPRIO_CMD_REPLACE); if (tact == TACT_PROG) { ret = am65_cpsw_timer_set(ndev, est_new); @@ -520,7 +520,7 @@ static int am65_cpsw_set_taprio(struct net_device *ndev, void *type_data) am65_cpsw_cp_taprio(taprio, &est_new->taprio); ret = am65_cpsw_configure_taprio(ndev, est_new); if (!ret) { - if (taprio->enable) { + if (taprio->cmd == TAPRIO_CMD_REPLACE) { devm_kfree(&ndev->dev, port->qos.est_admin); port->qos.est_admin = est_new; @@ -564,8 +564,13 @@ purge_est: static int am65_cpsw_setup_taprio(struct net_device *ndev, void *type_data) { struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + struct tc_taprio_qopt_offload *taprio = type_data; struct am65_cpsw_common *common = port->common; + if (taprio->cmd != TAPRIO_CMD_REPLACE && + taprio->cmd != TAPRIO_CMD_DESTROY) + return -EOPNOTSUPP; + if (!IS_ENABLED(CONFIG_TI_AM65_CPSW_TAS)) return -ENODEV; diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index f436688b6efc..f5fb11da357b 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -185,6 +185,11 @@ struct tc_taprio_caps { bool broken_mqprio:1; }; +enum tc_taprio_qopt_cmd { + TAPRIO_CMD_REPLACE, + TAPRIO_CMD_DESTROY, +}; + struct tc_taprio_sched_entry { u8 command; /* TC_TAPRIO_CMD_* */ @@ -196,7 +201,7 @@ struct tc_taprio_sched_entry { struct tc_taprio_qopt_offload { struct tc_mqprio_qopt_offload mqprio; struct netlink_ext_ack *extack; - u8 enable; + enum tc_taprio_qopt_cmd cmd; ktime_t base_time; u64 cycle_time; u64 cycle_time_extension; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index d29e6785854d..06bf4c6355a5 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1524,7 +1524,7 @@ static int taprio_enable_offload(struct net_device *dev, "Not enough memory for enabling offload mode"); return -ENOMEM; } - offload->enable = 1; + offload->cmd = TAPRIO_CMD_REPLACE; offload->extack = extack; mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt); offload->mqprio.extack = extack; @@ -1572,7 +1572,7 @@ static int taprio_disable_offload(struct net_device *dev, "Not enough memory to disable offload mode"); return -ENOMEM; } - offload->enable = 0; + offload->cmd = TAPRIO_CMD_DESTROY; err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); if (err < 0) { -- cgit v1.2.3 From 5353599aa74524acbf48c5e78683534f6bdd1ed3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 30 May 2023 12:19:47 +0300 Subject: net: enetc: refactor enetc_setup_tc_taprio() to have a switch/case for cmd Make enetc_setup_tc_taprio() more amenable to future extensions, like reporting statistics. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 3 +- drivers/net/ethernet/freescale/enetc/enetc.h | 1 + drivers/net/ethernet/freescale/enetc/enetc_qos.c | 79 +++++++++++++++--------- 3 files changed, 54 insertions(+), 29 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 63854294ac33..3aa31a760657 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -2624,7 +2624,7 @@ static void enetc_debug_tx_ring_prios(struct enetc_ndev_priv *priv) priv->tx_ring[i]->prio); } -static void enetc_reset_tc_mqprio(struct net_device *ndev) +void enetc_reset_tc_mqprio(struct net_device *ndev) { struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_hw *hw = &priv->si->hw; @@ -2649,6 +2649,7 @@ static void enetc_reset_tc_mqprio(struct net_device *ndev) enetc_change_preemptible_tcs(priv, 0); } +EXPORT_SYMBOL_GPL(enetc_reset_tc_mqprio); int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data) { diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index c97a8e3d7a7f..8577cf7699a0 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -429,6 +429,7 @@ struct net_device_stats *enetc_get_stats(struct net_device *ndev); void enetc_set_features(struct net_device *ndev, netdev_features_t features); int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd); int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data); +void enetc_reset_tc_mqprio(struct net_device *ndev); int enetc_setup_bpf(struct net_device *ndev, struct netdev_bpf *bpf); int enetc_xdp_xmit(struct net_device *ndev, int num_frames, struct xdp_frame **frames, u32 flags); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 7aad824f4da7..2b8fdfffd02d 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -43,10 +43,9 @@ void enetc_sched_speed_set(struct enetc_ndev_priv *priv, int speed) enetc_port_wr(hw, ENETC_PMR, (tmp & ~ENETC_PMR_PSPEED_MASK) | pspeed); } -static int enetc_setup_taprio(struct net_device *ndev, +static int enetc_setup_taprio(struct enetc_ndev_priv *priv, struct tc_taprio_qopt_offload *admin_conf) { - struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_hw *hw = &priv->si->hw; struct enetc_cbd cbd = {.cmd = 0}; struct tgs_gcl_conf *gcl_config; @@ -60,19 +59,13 @@ static int enetc_setup_taprio(struct net_device *ndev, int err; int i; + /* TSD and Qbv are mutually exclusive in hardware */ + for (i = 0; i < priv->num_tx_rings; i++) + if (priv->tx_ring[i]->tsd_enable) + return -EBUSY; + if (admin_conf->num_entries > enetc_get_max_gcl_len(hw)) return -EINVAL; - gcl_len = admin_conf->num_entries; - - tge = enetc_rd(hw, ENETC_PTGCR); - if (admin_conf->cmd == TAPRIO_CMD_DESTROY) { - enetc_wr(hw, ENETC_PTGCR, tge & ~ENETC_PTGCR_TGE); - enetc_reset_ptcmsdur(hw); - - priv->active_offloads &= ~ENETC_F_QBV; - - return 0; - } if (admin_conf->cycle_time > U32_MAX || admin_conf->cycle_time_extension > U32_MAX) @@ -82,6 +75,7 @@ static int enetc_setup_taprio(struct net_device *ndev, * control BD descriptor. */ gcl_config = &cbd.gcl_conf; + gcl_len = admin_conf->num_entries; data_size = struct_size(gcl_data, entry, gcl_len); tmp = enetc_cbd_alloc_data_mem(priv->si, &cbd, data_size, @@ -115,6 +109,7 @@ static int enetc_setup_taprio(struct net_device *ndev, cbd.cls = BDCR_CMD_PORT_GCL; cbd.status_flags = 0; + tge = enetc_rd(hw, ENETC_PTGCR); enetc_wr(hw, ENETC_PTGCR, tge | ENETC_PTGCR_TGE); err = enetc_send_cmd(priv->si, &cbd); @@ -132,29 +127,57 @@ static int enetc_setup_taprio(struct net_device *ndev, return 0; } -int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) +static void enetc_reset_taprio(struct enetc_ndev_priv *priv) +{ + struct enetc_hw *hw = &priv->si->hw; + u32 val; + + val = enetc_rd(hw, ENETC_PTGCR); + enetc_wr(hw, ENETC_PTGCR, val & ~ENETC_PTGCR_TGE); + enetc_reset_ptcmsdur(hw); + + priv->active_offloads &= ~ENETC_F_QBV; +} + +static void enetc_taprio_destroy(struct net_device *ndev) { - struct tc_taprio_qopt_offload *taprio = type_data; struct enetc_ndev_priv *priv = netdev_priv(ndev); - int err, i; - if (taprio->cmd != TAPRIO_CMD_REPLACE && - taprio->cmd != TAPRIO_CMD_DESTROY) - return -EOPNOTSUPP; + enetc_reset_taprio(priv); + enetc_reset_tc_mqprio(ndev); +} - /* TSD and Qbv are mutually exclusive in hardware */ - for (i = 0; i < priv->num_tx_rings; i++) - if (priv->tx_ring[i]->tsd_enable) - return -EBUSY; +static int enetc_taprio_replace(struct net_device *ndev, + struct tc_taprio_qopt_offload *offload) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + int err; - err = enetc_setup_tc_mqprio(ndev, &taprio->mqprio); + err = enetc_setup_tc_mqprio(ndev, &offload->mqprio); if (err) return err; - err = enetc_setup_taprio(ndev, taprio); - if (err) { - taprio->mqprio.qopt.num_tc = 0; - enetc_setup_tc_mqprio(ndev, &taprio->mqprio); + err = enetc_setup_taprio(priv, offload); + if (err) + enetc_reset_tc_mqprio(ndev); + + return err; +} + +int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) +{ + struct tc_taprio_qopt_offload *offload = type_data; + int err = 0; + + switch (offload->cmd) { + case TAPRIO_CMD_REPLACE: + err = enetc_taprio_replace(ndev, offload); + break; + case TAPRIO_CMD_DESTROY: + enetc_taprio_destroy(ndev); + break; + default: + err = -EOPNOTSUPP; } return err; -- cgit v1.2.3 From 4802fca8d1af9687a0fd71b729d96726f05192ad Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 30 May 2023 12:19:48 +0300 Subject: net: enetc: report statistics counters for taprio Report the "win_drop" counter from the unstructured ethtool -S as TCA_TAPRIO_OFFLOAD_STATS_WINDOW_DROPS to the Qdisc layer. It is available both as a global counter as well as a per-TC one. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc_qos.c | 35 ++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 2b8fdfffd02d..71157eba1fbe 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -147,6 +147,35 @@ static void enetc_taprio_destroy(struct net_device *ndev) enetc_reset_tc_mqprio(ndev); } +static void enetc_taprio_stats(struct net_device *ndev, + struct tc_taprio_qopt_stats *stats) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + u64 window_drops = 0; + int i; + + for (i = 0; i < priv->num_tx_rings; i++) + window_drops += priv->tx_ring[i]->stats.win_drop; + + stats->window_drops = window_drops; +} + +static void enetc_taprio_tc_stats(struct net_device *ndev, + struct tc_taprio_qopt_tc_stats *tc_stats) +{ + struct tc_taprio_qopt_stats *stats = &tc_stats->stats; + struct enetc_ndev_priv *priv = netdev_priv(ndev); + int tc = tc_stats->tc; + u64 window_drops = 0; + int i; + + for (i = 0; i < priv->num_tx_rings; i++) + if (priv->tx_ring[i]->prio == tc) + window_drops += priv->tx_ring[i]->stats.win_drop; + + stats->window_drops = window_drops; +} + static int enetc_taprio_replace(struct net_device *ndev, struct tc_taprio_qopt_offload *offload) { @@ -176,6 +205,12 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) case TAPRIO_CMD_DESTROY: enetc_taprio_destroy(ndev); break; + case TAPRIO_CMD_STATS: + enetc_taprio_stats(ndev, &offload->stats); + break; + case TAPRIO_CMD_TC_STATS: + enetc_taprio_tc_stats(ndev, &offload->tc_stats); + break; default: err = -EOPNOTSUPP; } -- cgit v1.2.3 From 1c4c769cdf682c63d3b10cb241f4a96ebad2f215 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 30 May 2023 11:59:34 +0300 Subject: net/mlx5: Remove rmap also in case dynamic MSIX not supported mlx5 add IRQs to rmap upon MSIX request, and mlx5 remove rmap from MSIX only if msi_map.index is populated. However, msi_map.index is populated only when dynamic MSIX is supported. This results in freeing IRQs without removing them from rmap, which triggers the bellow WARN_ON[1]. rmap is a feature which have no relation to dynamic MSIX. Hence, remove the check of msi_map.index when removing IRQ from rmap. [1] [ 200.307160 ] WARNING: CPU: 20 PID: 1702 at kernel/irq/manage.c:2034 free_irq+0x2ac/0x358 [ 200.316990 ] CPU: 20 PID: 1702 Comm: modprobe Not tainted 6.4.0-rc3_for_upstream_min_debug_2023_05_24_14_02 #1 [ 200.318939 ] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 [ 200.321659 ] pc : free_irq+0x2ac/0x358 [ 200.322400 ] lr : free_irq+0x20/0x358 [ 200.337865 ] Call trace: [ 200.338360 ] free_irq+0x2ac/0x358 [ 200.339029 ] irq_release+0x58/0xd0 [mlx5_core] [ 200.340093 ] mlx5_irqs_release_vectors+0x80/0xb0 [mlx5_core] [ 200.341344 ] destroy_comp_eqs+0x120/0x170 [mlx5_core] [ 200.342469 ] mlx5_eq_table_destroy+0x1c/0x38 [mlx5_core] [ 200.343645 ] mlx5_unload+0x8c/0xc8 [mlx5_core] [ 200.344652 ] mlx5_uninit_one+0x78/0x118 [mlx5_core] [ 200.345745 ] remove_one+0x80/0x108 [mlx5_core] [ 200.346752 ] pci_device_remove+0x40/0xd8 [ 200.347554 ] device_remove+0x50/0x88 [ 200.348272 ] device_release_driver_internal+0x1c4/0x228 [ 200.349312 ] driver_detach+0x54/0xa0 [ 200.350030 ] bus_remove_driver+0x74/0x100 [ 200.350833 ] driver_unregister+0x34/0x68 [ 200.351619 ] pci_unregister_driver+0x28/0xa0 [ 200.352476 ] mlx5_cleanup+0x14/0x2210 [mlx5_core] [ 200.353536 ] __arm64_sys_delete_module+0x190/0x2e8 [ 200.354495 ] el0_svc_common.constprop.0+0x6c/0x1d0 [ 200.355455 ] do_el0_svc+0x38/0x98 [ 200.356122 ] el0_svc+0x1c/0x80 [ 200.356739 ] el0t_64_sync_handler+0xb4/0x130 [ 200.357604 ] el0t_64_sync+0x174/0x178 [ 200.358345 ] ---[ end trace 0000000000000000 ]--- Fixes: 3354822cde5a ("net/mlx5: Use dynamic msix vectors allocation") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index db5687d9fec9..86ac4a85fd87 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -141,7 +141,7 @@ static void irq_release(struct mlx5_irq *irq) irq_update_affinity_hint(irq->map.virq, NULL); #ifdef CONFIG_RFS_ACCEL rmap = mlx5_eq_table_get_rmap(pool->dev); - if (rmap && irq->map.index) + if (rmap) irq_cpu_rmap_remove(rmap, irq->map.virq); #endif -- cgit v1.2.3 From 8764bd0fa5d402c51b136f6aeaba20fc16961ba1 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 31 May 2023 10:48:56 +0200 Subject: net/mlx5: Fix setting of irq->map.index for static IRQ case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When dynamic IRQ allocation is not supported all IRQs are allocated up front in mlx5_irq_table_create() instead of dynamically as part of mlx5_irq_alloc(). In the latter dynamic case irq->map.index is set via the mapping returned by pci_msix_alloc_irq_at(). In the static case and prior to commit 1da438c0ae02 ("net/mlx5: Fix indexing of mlx5_irq") irq->map.index was set in mlx5_irq_alloc() twice once initially to 0 and then to the requested index before storing in the xarray. After this commit it is only set to 0 which breaks all other IRQ mappings. Fix this by setting irq->map.index to the requested index together with irq->map.virq and improve the related comment to make it clearer which cases it deals with. Cc: Chuck Lever III Tested-by: Mark Brown Reviewed-by: Mark Brown Reviewed-by: Simon Horman Reviewed-by: Eli Cohen Fixes: 1da438c0ae02 ("net/mlx5: Fix indexing of mlx5_irq") Signed-off-by: Niklas Schnelle Tested-by: Cédric Le Goater Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 86ac4a85fd87..38edd485ba6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -232,12 +232,13 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, if (!irq) return ERR_PTR(-ENOMEM); if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) { - /* The vector at index 0 was already allocated. - * Just get the irq number. If dynamic irq is not supported - * vectors have also been allocated. + /* The vector at index 0 is always statically allocated. If + * dynamic irq is not supported all vectors are statically + * allocated. In both cases just get the irq number and set + * the index. */ irq->map.virq = pci_irq_vector(dev->pdev, i); - irq->map.index = 0; + irq->map.index = i; } else { irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc); if (!irq->map.virq) { -- cgit v1.2.3 From 368591995d010e639ad8f28b27f1b721f0872342 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 31 May 2023 15:48:25 -0400 Subject: net/mlx5: Ensure af_desc.mask is properly initialized [ 9.837087] mlx5_core 0000:02:00.0: firmware version: 16.35.2000 [ 9.843126] mlx5_core 0000:02:00.0: 126.016 Gb/s available PCIe bandwidth (8.0 GT/s PCIe x16 link) [ 10.311515] mlx5_core 0000:02:00.0: Rate limit: 127 rates are supported, range: 0Mbps to 97656Mbps [ 10.321948] mlx5_core 0000:02:00.0: E-Switch: Total vports 2, per vport: max uc(128) max mc(2048) [ 10.344324] mlx5_core 0000:02:00.0: mlx5_pcie_event:301:(pid 88): PCIe slot advertised sufficient power (27W). [ 10.354339] BUG: unable to handle page fault for address: ffffffff8ff0ade0 [ 10.361206] #PF: supervisor read access in kernel mode [ 10.366335] #PF: error_code(0x0000) - not-present page [ 10.371467] PGD 81ec39067 P4D 81ec39067 PUD 81ec3a063 PMD 114b07063 PTE 800ffff7e10f5062 [ 10.379544] Oops: 0000 [#1] PREEMPT SMP PTI [ 10.383721] CPU: 0 PID: 117 Comm: kworker/0:6 Not tainted 6.3.0-13028-g7222f123c983 #1 [ 10.391625] Hardware name: Supermicro X10SRA-F/X10SRA-F, BIOS 2.0b 06/12/2017 [ 10.398750] Workqueue: events work_for_cpu_fn [ 10.403108] RIP: 0010:__bitmap_or+0x10/0x26 [ 10.407286] Code: 85 c0 0f 95 c0 c3 cc cc cc cc 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 89 c9 31 c0 48 83 c1 3f 48 c1 e9 06 39 c> [ 10.426024] RSP: 0000:ffffb45a0078f7b0 EFLAGS: 00010097 [ 10.431240] RAX: 0000000000000000 RBX: ffffffff8ff0adc0 RCX: 0000000000000004 [ 10.438365] RDX: ffff9156801967d0 RSI: ffffffff8ff0ade0 RDI: ffff9156801967b0 [ 10.445489] RBP: ffffb45a0078f7e8 R08: 0000000000000030 R09: 0000000000000000 [ 10.452613] R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000000ec [ 10.459737] R13: ffffffff8ff0ade0 R14: 0000000000000001 R15: 0000000000000020 [ 10.466862] FS: 0000000000000000(0000) GS:ffff9165bfc00000(0000) knlGS:0000000000000000 [ 10.474936] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 10.480674] CR2: ffffffff8ff0ade0 CR3: 00000001011ae003 CR4: 00000000003706f0 [ 10.487800] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 10.494922] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 10.502046] Call Trace: [ 10.504493] [ 10.506589] ? matrix_alloc_area.constprop.0+0x43/0x9a [ 10.511729] ? prepare_namespace+0x84/0x174 [ 10.515914] irq_matrix_reserve_managed+0x56/0x10c [ 10.520699] x86_vector_alloc_irqs+0x1d2/0x31e [ 10.525146] irq_domain_alloc_irqs_hierarchy+0x39/0x3f [ 10.530284] irq_domain_alloc_irqs_parent+0x1a/0x2a [ 10.535155] intel_irq_remapping_alloc+0x59/0x5e9 [ 10.539859] ? kmem_cache_debug_flags+0x11/0x26 [ 10.544383] ? __radix_tree_lookup+0x39/0xb9 [ 10.548649] irq_domain_alloc_irqs_hierarchy+0x39/0x3f [ 10.553779] irq_domain_alloc_irqs_parent+0x1a/0x2a [ 10.558650] msi_domain_alloc+0x8c/0x120 [ 10.567697] irq_domain_alloc_irqs_locked+0x11d/0x286 [ 10.572741] __irq_domain_alloc_irqs+0x72/0x93 [ 10.577179] __msi_domain_alloc_irqs+0x193/0x3f1 [ 10.581789] ? __xa_alloc+0xcf/0xe2 [ 10.585273] msi_domain_alloc_irq_at+0xa8/0xfe [ 10.589711] pci_msix_alloc_irq_at+0x47/0x5c The crash is due to matrix_alloc_area() attempting to access per-CPU memory for CPUs that are not present on the system. The CPU mask passed into reserve_managed_vector() via it's @irqd parameter is corrupted because it contains uninitialized stack data. Fixes: bbac70c74183 ("net/mlx5: Use newer affinity descriptor") Reviewed-by: Thomas Gleixner Signed-off-by: Chuck Lever Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 38edd485ba6f..843da89a9035 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -571,11 +571,11 @@ int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, af_desc.is_managed = false; for (i = 0; i < nirqs; i++) { + cpumask_clear(&af_desc.mask); cpumask_set_cpu(cpus[i], &af_desc.mask); irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap); if (IS_ERR(irq)) break; - cpumask_clear(&af_desc.mask); irqs[i] = irq; } -- cgit v1.2.3 From b6193d7030e3c59f1d4c75648c9c8fa40cad2bcd Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sat, 27 May 2023 23:07:08 -0700 Subject: net/mlx5e: Fix error handling in mlx5e_refresh_tirs Allocation failure is outside the critical lock section and should return immediately rather than jumping to the unlock section. Also unlock as soon as required and remove the now redundant jump label. Fixes: 80a2a9026b24 ("net/mlx5e: Add a lock on tir list") Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_common.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 1f90594499c6..41c396e76457 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -150,10 +150,8 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = kvzalloc(inlen, GFP_KERNEL); - if (!in) { - err = -ENOMEM; - goto out; - } + if (!in) + return -ENOMEM; if (enable_uc_lb) lb_flags = MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; @@ -171,14 +169,13 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, tirn = tir->tirn; err = mlx5_core_modify_tir(mdev, tirn, in); if (err) - goto out; + break; } + mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock); -out: kvfree(in); if (err) netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err); - mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock); return err; } -- cgit v1.2.3 From bbfa4b58997e3d38ba629c9f6fc0bd1c163aaf43 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Fri, 28 Apr 2023 13:48:13 +0300 Subject: net/mlx5: Read embedded cpu after init bit cleared During driver load it reads embedded_cpu bit from initialization segment, but the initialization segment is readable only after initialization bit is cleared. Move the call to mlx5_read_embedded_cpu() right after initialization bit cleared. Signed-off-by: Moshe Shemesh Fixes: 591905ba9679 ("net/mlx5: Introduce Mellanox SmartNIC and modify page management logic") Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 2132a6510639..d6ee016deae1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -923,7 +923,6 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, } mlx5_pci_vsc_init(dev); - dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); return 0; err_clr_master: @@ -1155,6 +1154,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout goto err_cmd_cleanup; } + dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP); mlx5_start_health_poll(dev); -- cgit v1.2.3 From 622ab656344a288acf4fb03d628c3bb5dd241f34 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 30 May 2023 21:25:27 +0100 Subject: sfc: fix error unwinds in TC offload Failure ladders weren't exactly unwinding what the function had done up to that point; most seriously, when we encountered an already offloaded rule, the failure path tried to remove the new rule from the hashtable, which would in fact remove the already-present 'old' rule (since it has the same key) from the table, and leak its resources. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202305200745.xmIlkqjH-lkp@intel.com/ Fixes: d902e1a737d4 ("sfc: bare bones TC offload on EF100") Fixes: 17654d84b47c ("sfc: add offloading of 'foreign' TC (decap) rules") Signed-off-by: Edward Cree Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230530202527.53115-1-edward.cree@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/tc.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index 0327639a628a..c004443c1d58 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -624,13 +624,12 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx, if (!found) { /* We don't care. */ netif_dbg(efx, drv, efx->net_dev, "Ignoring foreign filter that doesn't egdev us\n"); - rc = -EOPNOTSUPP; - goto release; + return -EOPNOTSUPP; } rc = efx_mae_match_check_caps(efx, &match.mask, NULL); if (rc) - goto release; + return rc; if (efx_tc_match_is_encap(&match.mask)) { enum efx_encap_type type; @@ -639,8 +638,7 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx, if (type == EFX_ENCAP_TYPE_NONE) { NL_SET_ERR_MSG_MOD(extack, "Egress encap match on unsupported tunnel device"); - rc = -EOPNOTSUPP; - goto release; + return -EOPNOTSUPP; } rc = efx_mae_check_encap_type_supported(efx, type); @@ -648,25 +646,24 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx, NL_SET_ERR_MSG_FMT_MOD(extack, "Firmware reports no support for %s encap match", efx_tc_encap_type_name(type)); - goto release; + return rc; } rc = efx_tc_flower_record_encap_match(efx, &match, type, extack); if (rc) - goto release; + return rc; } else { /* This is not a tunnel decap rule, ignore it */ netif_dbg(efx, drv, efx->net_dev, "Ignoring foreign filter without encap match\n"); - rc = -EOPNOTSUPP; - goto release; + return -EOPNOTSUPP; } rule = kzalloc(sizeof(*rule), GFP_USER); if (!rule) { rc = -ENOMEM; - goto release; + goto out_free; } INIT_LIST_HEAD(&rule->acts.list); rule->cookie = tc->cookie; @@ -678,7 +675,7 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx, "Ignoring already-offloaded rule (cookie %lx)\n", tc->cookie); rc = -EEXIST; - goto release; + goto out_free; } act = kzalloc(sizeof(*act), GFP_USER); @@ -843,6 +840,7 @@ release: efx_tc_match_action_ht_params); efx_tc_free_action_set_list(efx, &rule->acts, false); } +out_free: kfree(rule); if (match.encap) efx_tc_flower_release_encap_match(efx, match.encap); @@ -899,8 +897,7 @@ static int efx_tc_flower_replace(struct efx_nic *efx, return rc; if (efx_tc_match_is_encap(&match.mask)) { NL_SET_ERR_MSG_MOD(extack, "Ingress enc_key matches not supported"); - rc = -EOPNOTSUPP; - goto release; + return -EOPNOTSUPP; } if (tc->common.chain_index) { @@ -924,9 +921,9 @@ static int efx_tc_flower_replace(struct efx_nic *efx, if (old) { netif_dbg(efx, drv, efx->net_dev, "Already offloaded rule (cookie %lx)\n", tc->cookie); - rc = -EEXIST; NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded"); - goto release; + kfree(rule); + return -EEXIST; } /* Parse actions */ -- cgit v1.2.3 From 3403960cdf86c967442dccc2bec981e0093f716e Mon Sep 17 00:00:00 2001 From: Mengyuan Lou Date: Tue, 30 May 2023 10:26:25 +0800 Subject: net: wangxun: libwx add tx offload functions Add tx offload functions for wx_xmit_frame_ring which includes wx_encode_tx_desc_ptype, wx_tso and wx_tx_csum. which supports ngbe and txgbe to implement tx offload function. Signed-off-by: Mengyuan Lou Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 462 ++++++++++++++++++++++++++- drivers/net/ethernet/wangxun/libwx/wx_type.h | 87 ++++- 2 files changed, 541 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 1e8d8b7b0c62..34ac30e87b7c 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -2,9 +2,14 @@ /* Copyright (c) 2019 - 2022 Beijing WangXun Technology Co., Ltd. */ #include +#include #include +#include #include +#include #include +#include +#include #include "wx_type.h" #include "wx_lib.h" @@ -707,11 +712,50 @@ static int wx_maybe_stop_tx(struct wx_ring *tx_ring, u16 size) return 0; } +static u32 wx_tx_cmd_type(u32 tx_flags) +{ + /* set type for advanced descriptor with frame checksum insertion */ + u32 cmd_type = WX_TXD_DTYP_DATA | WX_TXD_IFCS; + + /* set HW vlan bit if vlan is present */ + cmd_type |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_HW_VLAN, WX_TXD_VLE); + /* set segmentation enable bits for TSO/FSO */ + cmd_type |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_TSO, WX_TXD_TSE); + /* set timestamp bit if present */ + cmd_type |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_TSTAMP, WX_TXD_MAC_TSTAMP); + cmd_type |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_LINKSEC, WX_TXD_LINKSEC); + + return cmd_type; +} + +static void wx_tx_olinfo_status(union wx_tx_desc *tx_desc, + u32 tx_flags, unsigned int paylen) +{ + u32 olinfo_status = paylen << WX_TXD_PAYLEN_SHIFT; + + /* enable L4 checksum for TSO and TX checksum offload */ + olinfo_status |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_CSUM, WX_TXD_L4CS); + /* enable IPv4 checksum for TSO */ + olinfo_status |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_IPV4, WX_TXD_IIPCS); + /* enable outer IPv4 checksum for TSO */ + olinfo_status |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_OUTER_IPV4, + WX_TXD_EIPCS); + /* Check Context must be set if Tx switch is enabled, which it + * always is for case where virtual functions are running + */ + olinfo_status |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_CC, WX_TXD_CC); + olinfo_status |= WX_SET_FLAG(tx_flags, WX_TX_FLAGS_IPSEC, + WX_TXD_IPSEC); + tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); +} + static void wx_tx_map(struct wx_ring *tx_ring, - struct wx_tx_buffer *first) + struct wx_tx_buffer *first, + const u8 hdr_len) { struct sk_buff *skb = first->skb; struct wx_tx_buffer *tx_buffer; + u32 tx_flags = first->tx_flags; u16 i = tx_ring->next_to_use; unsigned int data_len, size; union wx_tx_desc *tx_desc; @@ -719,10 +763,9 @@ static void wx_tx_map(struct wx_ring *tx_ring, dma_addr_t dma; u32 cmd_type; - cmd_type = WX_TXD_DTYP_DATA | WX_TXD_IFCS; + cmd_type = wx_tx_cmd_type(tx_flags); tx_desc = WX_TX_DESC(tx_ring, i); - - tx_desc->read.olinfo_status = cpu_to_le32(skb->len << WX_TXD_PAYLEN_SHIFT); + wx_tx_olinfo_status(tx_desc, tx_flags, skb->len - hdr_len); size = skb_headlen(skb); data_len = skb->data_len; @@ -838,12 +881,399 @@ dma_error: tx_ring->next_to_use = i; } +static void wx_tx_ctxtdesc(struct wx_ring *tx_ring, u32 vlan_macip_lens, + u32 fcoe_sof_eof, u32 type_tucmd, u32 mss_l4len_idx) +{ + struct wx_tx_context_desc *context_desc; + u16 i = tx_ring->next_to_use; + + context_desc = WX_TX_CTXTDESC(tx_ring, i); + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; + + /* set bits to identify this as an advanced context descriptor */ + type_tucmd |= WX_TXD_DTYP_CTXT; + context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); + context_desc->seqnum_seed = cpu_to_le32(fcoe_sof_eof); + context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); + context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); +} + +static void wx_get_ipv6_proto(struct sk_buff *skb, int offset, u8 *nexthdr) +{ + struct ipv6hdr *hdr = (struct ipv6hdr *)(skb->data + offset); + + *nexthdr = hdr->nexthdr; + offset += sizeof(struct ipv6hdr); + while (ipv6_ext_hdr(*nexthdr)) { + struct ipv6_opt_hdr _hdr, *hp; + + if (*nexthdr == NEXTHDR_NONE) + return; + hp = skb_header_pointer(skb, offset, sizeof(_hdr), &_hdr); + if (!hp) + return; + if (*nexthdr == NEXTHDR_FRAGMENT) + break; + *nexthdr = hp->nexthdr; + } +} + +union network_header { + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + void *raw; +}; + +static u8 wx_encode_tx_desc_ptype(const struct wx_tx_buffer *first) +{ + u8 tun_prot = 0, l4_prot = 0, ptype = 0; + struct sk_buff *skb = first->skb; + + if (skb->encapsulation) { + union network_header hdr; + + switch (first->protocol) { + case htons(ETH_P_IP): + tun_prot = ip_hdr(skb)->protocol; + ptype = WX_PTYPE_TUN_IPV4; + break; + case htons(ETH_P_IPV6): + wx_get_ipv6_proto(skb, skb_network_offset(skb), &tun_prot); + ptype = WX_PTYPE_TUN_IPV6; + break; + default: + return ptype; + } + + if (tun_prot == IPPROTO_IPIP) { + hdr.raw = (void *)inner_ip_hdr(skb); + ptype |= WX_PTYPE_PKT_IPIP; + } else if (tun_prot == IPPROTO_UDP) { + hdr.raw = (void *)inner_ip_hdr(skb); + if (skb->inner_protocol_type != ENCAP_TYPE_ETHER || + skb->inner_protocol != htons(ETH_P_TEB)) { + ptype |= WX_PTYPE_PKT_IG; + } else { + if (((struct ethhdr *)skb_inner_mac_header(skb))->h_proto + == htons(ETH_P_8021Q)) + ptype |= WX_PTYPE_PKT_IGMV; + else + ptype |= WX_PTYPE_PKT_IGM; + } + + } else if (tun_prot == IPPROTO_GRE) { + hdr.raw = (void *)inner_ip_hdr(skb); + if (skb->inner_protocol == htons(ETH_P_IP) || + skb->inner_protocol == htons(ETH_P_IPV6)) { + ptype |= WX_PTYPE_PKT_IG; + } else { + if (((struct ethhdr *)skb_inner_mac_header(skb))->h_proto + == htons(ETH_P_8021Q)) + ptype |= WX_PTYPE_PKT_IGMV; + else + ptype |= WX_PTYPE_PKT_IGM; + } + } else { + return ptype; + } + + switch (hdr.ipv4->version) { + case IPVERSION: + l4_prot = hdr.ipv4->protocol; + break; + case 6: + wx_get_ipv6_proto(skb, skb_inner_network_offset(skb), &l4_prot); + ptype |= WX_PTYPE_PKT_IPV6; + break; + default: + return ptype; + } + } else { + switch (first->protocol) { + case htons(ETH_P_IP): + l4_prot = ip_hdr(skb)->protocol; + ptype = WX_PTYPE_PKT_IP; + break; + case htons(ETH_P_IPV6): + wx_get_ipv6_proto(skb, skb_network_offset(skb), &l4_prot); + ptype = WX_PTYPE_PKT_IP | WX_PTYPE_PKT_IPV6; + break; + default: + return WX_PTYPE_PKT_MAC | WX_PTYPE_TYP_MAC; + } + } + switch (l4_prot) { + case IPPROTO_TCP: + ptype |= WX_PTYPE_TYP_TCP; + break; + case IPPROTO_UDP: + ptype |= WX_PTYPE_TYP_UDP; + break; + case IPPROTO_SCTP: + ptype |= WX_PTYPE_TYP_SCTP; + break; + default: + ptype |= WX_PTYPE_TYP_IP; + break; + } + + return ptype; +} + +static int wx_tso(struct wx_ring *tx_ring, struct wx_tx_buffer *first, + u8 *hdr_len, u8 ptype) +{ + u32 vlan_macip_lens, type_tucmd, mss_l4len_idx; + struct net_device *netdev = tx_ring->netdev; + u32 l4len, tunhdr_eiplen_tunlen = 0; + struct sk_buff *skb = first->skb; + bool enc = skb->encapsulation; + struct ipv6hdr *ipv6h; + struct tcphdr *tcph; + struct iphdr *iph; + u8 tun_prot = 0; + int err; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + if (!skb_is_gso(skb)) + return 0; + + err = skb_cow_head(skb, 0); + if (err < 0) + return err; + + /* indicates the inner headers in the skbuff are valid. */ + iph = enc ? inner_ip_hdr(skb) : ip_hdr(skb); + if (iph->version == 4) { + tcph = enc ? inner_tcp_hdr(skb) : tcp_hdr(skb); + iph->tot_len = 0; + iph->check = 0; + tcph->check = ~csum_tcpudp_magic(iph->saddr, + iph->daddr, 0, + IPPROTO_TCP, 0); + first->tx_flags |= WX_TX_FLAGS_TSO | + WX_TX_FLAGS_CSUM | + WX_TX_FLAGS_IPV4 | + WX_TX_FLAGS_CC; + } else if (iph->version == 6 && skb_is_gso_v6(skb)) { + ipv6h = enc ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); + tcph = enc ? inner_tcp_hdr(skb) : tcp_hdr(skb); + ipv6h->payload_len = 0; + tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, + &ipv6h->daddr, 0, + IPPROTO_TCP, 0); + first->tx_flags |= WX_TX_FLAGS_TSO | + WX_TX_FLAGS_CSUM | + WX_TX_FLAGS_CC; + } + + /* compute header lengths */ + l4len = enc ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb); + *hdr_len = enc ? (skb_inner_transport_header(skb) - skb->data) : + skb_transport_offset(skb); + *hdr_len += l4len; + + /* update gso size and bytecount with header size */ + first->gso_segs = skb_shinfo(skb)->gso_segs; + first->bytecount += (first->gso_segs - 1) * *hdr_len; + + /* mss_l4len_id: use 0 as index for TSO */ + mss_l4len_idx = l4len << WX_TXD_L4LEN_SHIFT; + mss_l4len_idx |= skb_shinfo(skb)->gso_size << WX_TXD_MSS_SHIFT; + + /* vlan_macip_lens: HEADLEN, MACLEN, VLAN tag */ + if (enc) { + switch (first->protocol) { + case htons(ETH_P_IP): + tun_prot = ip_hdr(skb)->protocol; + first->tx_flags |= WX_TX_FLAGS_OUTER_IPV4; + break; + case htons(ETH_P_IPV6): + tun_prot = ipv6_hdr(skb)->nexthdr; + break; + default: + break; + } + switch (tun_prot) { + case IPPROTO_UDP: + tunhdr_eiplen_tunlen = WX_TXD_TUNNEL_UDP; + tunhdr_eiplen_tunlen |= ((skb_network_header_len(skb) >> 2) << + WX_TXD_OUTER_IPLEN_SHIFT) | + (((skb_inner_mac_header(skb) - + skb_transport_header(skb)) >> 1) << + WX_TXD_TUNNEL_LEN_SHIFT); + break; + case IPPROTO_GRE: + tunhdr_eiplen_tunlen = WX_TXD_TUNNEL_GRE; + tunhdr_eiplen_tunlen |= ((skb_network_header_len(skb) >> 2) << + WX_TXD_OUTER_IPLEN_SHIFT) | + (((skb_inner_mac_header(skb) - + skb_transport_header(skb)) >> 1) << + WX_TXD_TUNNEL_LEN_SHIFT); + break; + case IPPROTO_IPIP: + tunhdr_eiplen_tunlen = (((char *)inner_ip_hdr(skb) - + (char *)ip_hdr(skb)) >> 2) << + WX_TXD_OUTER_IPLEN_SHIFT; + break; + default: + break; + } + vlan_macip_lens = skb_inner_network_header_len(skb) >> 1; + } else { + vlan_macip_lens = skb_network_header_len(skb) >> 1; + } + + vlan_macip_lens |= skb_network_offset(skb) << WX_TXD_MACLEN_SHIFT; + vlan_macip_lens |= first->tx_flags & WX_TX_FLAGS_VLAN_MASK; + + type_tucmd = ptype << 24; + if (skb->vlan_proto == htons(ETH_P_8021AD) && + netdev->features & NETIF_F_HW_VLAN_STAG_TX) + type_tucmd |= WX_SET_FLAG(first->tx_flags, + WX_TX_FLAGS_HW_VLAN, + 0x1 << WX_TXD_TAG_TPID_SEL_SHIFT); + wx_tx_ctxtdesc(tx_ring, vlan_macip_lens, tunhdr_eiplen_tunlen, + type_tucmd, mss_l4len_idx); + + return 1; +} + +static void wx_tx_csum(struct wx_ring *tx_ring, struct wx_tx_buffer *first, + u8 ptype) +{ + u32 tunhdr_eiplen_tunlen = 0, vlan_macip_lens = 0; + struct net_device *netdev = tx_ring->netdev; + u32 mss_l4len_idx = 0, type_tucmd; + struct sk_buff *skb = first->skb; + u8 tun_prot = 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + if (!(first->tx_flags & WX_TX_FLAGS_HW_VLAN) && + !(first->tx_flags & WX_TX_FLAGS_CC)) + return; + vlan_macip_lens = skb_network_offset(skb) << + WX_TXD_MACLEN_SHIFT; + } else { + u8 l4_prot = 0; + union { + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + u8 *raw; + } network_hdr; + union { + struct tcphdr *tcphdr; + u8 *raw; + } transport_hdr; + + if (skb->encapsulation) { + network_hdr.raw = skb_inner_network_header(skb); + transport_hdr.raw = skb_inner_transport_header(skb); + vlan_macip_lens = skb_network_offset(skb) << + WX_TXD_MACLEN_SHIFT; + switch (first->protocol) { + case htons(ETH_P_IP): + tun_prot = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + tun_prot = ipv6_hdr(skb)->nexthdr; + break; + default: + return; + } + switch (tun_prot) { + case IPPROTO_UDP: + tunhdr_eiplen_tunlen = WX_TXD_TUNNEL_UDP; + tunhdr_eiplen_tunlen |= + ((skb_network_header_len(skb) >> 2) << + WX_TXD_OUTER_IPLEN_SHIFT) | + (((skb_inner_mac_header(skb) - + skb_transport_header(skb)) >> 1) << + WX_TXD_TUNNEL_LEN_SHIFT); + break; + case IPPROTO_GRE: + tunhdr_eiplen_tunlen = WX_TXD_TUNNEL_GRE; + tunhdr_eiplen_tunlen |= ((skb_network_header_len(skb) >> 2) << + WX_TXD_OUTER_IPLEN_SHIFT) | + (((skb_inner_mac_header(skb) - + skb_transport_header(skb)) >> 1) << + WX_TXD_TUNNEL_LEN_SHIFT); + break; + case IPPROTO_IPIP: + tunhdr_eiplen_tunlen = (((char *)inner_ip_hdr(skb) - + (char *)ip_hdr(skb)) >> 2) << + WX_TXD_OUTER_IPLEN_SHIFT; + break; + default: + break; + } + + } else { + network_hdr.raw = skb_network_header(skb); + transport_hdr.raw = skb_transport_header(skb); + vlan_macip_lens = skb_network_offset(skb) << + WX_TXD_MACLEN_SHIFT; + } + + switch (network_hdr.ipv4->version) { + case IPVERSION: + vlan_macip_lens |= (transport_hdr.raw - network_hdr.raw) >> 1; + l4_prot = network_hdr.ipv4->protocol; + break; + case 6: + vlan_macip_lens |= (transport_hdr.raw - network_hdr.raw) >> 1; + l4_prot = network_hdr.ipv6->nexthdr; + break; + default: + break; + } + + switch (l4_prot) { + case IPPROTO_TCP: + mss_l4len_idx = (transport_hdr.tcphdr->doff * 4) << + WX_TXD_L4LEN_SHIFT; + break; + case IPPROTO_SCTP: + mss_l4len_idx = sizeof(struct sctphdr) << + WX_TXD_L4LEN_SHIFT; + break; + case IPPROTO_UDP: + mss_l4len_idx = sizeof(struct udphdr) << + WX_TXD_L4LEN_SHIFT; + break; + default: + break; + } + + /* update TX checksum flag */ + first->tx_flags |= WX_TX_FLAGS_CSUM; + } + first->tx_flags |= WX_TX_FLAGS_CC; + /* vlan_macip_lens: MACLEN, VLAN tag */ + vlan_macip_lens |= first->tx_flags & WX_TX_FLAGS_VLAN_MASK; + + type_tucmd = ptype << 24; + if (skb->vlan_proto == htons(ETH_P_8021AD) && + netdev->features & NETIF_F_HW_VLAN_STAG_TX) + type_tucmd |= WX_SET_FLAG(first->tx_flags, + WX_TX_FLAGS_HW_VLAN, + 0x1 << WX_TXD_TAG_TPID_SEL_SHIFT); + wx_tx_ctxtdesc(tx_ring, vlan_macip_lens, tunhdr_eiplen_tunlen, + type_tucmd, mss_l4len_idx); +} + static netdev_tx_t wx_xmit_frame_ring(struct sk_buff *skb, struct wx_ring *tx_ring) { u16 count = TXD_USE_COUNT(skb_headlen(skb)); struct wx_tx_buffer *first; + u8 hdr_len = 0, ptype; unsigned short f; + u32 tx_flags = 0; + int tso; /* need: 1 descriptor per page * PAGE_SIZE/WX_MAX_DATA_PER_TXD, * + 1 desc for skb_headlen/WX_MAX_DATA_PER_TXD, @@ -864,7 +1294,29 @@ static netdev_tx_t wx_xmit_frame_ring(struct sk_buff *skb, first->bytecount = skb->len; first->gso_segs = 1; - wx_tx_map(tx_ring, first); + /* if we have a HW VLAN tag being added default to the HW one */ + if (skb_vlan_tag_present(skb)) { + tx_flags |= skb_vlan_tag_get(skb) << WX_TX_FLAGS_VLAN_SHIFT; + tx_flags |= WX_TX_FLAGS_HW_VLAN; + } + + /* record initial flags and protocol */ + first->tx_flags = tx_flags; + first->protocol = vlan_get_protocol(skb); + + ptype = wx_encode_tx_desc_ptype(first); + + tso = wx_tso(tx_ring, first, &hdr_len, ptype); + if (tso < 0) + goto out_drop; + else if (!tso) + wx_tx_csum(tx_ring, first, ptype); + wx_tx_map(tx_ring, first, hdr_len); + + return NETDEV_TX_OK; +out_drop: + dev_kfree_skb_any(first->skb); + first->skb = NULL; return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index cbe7f184b50e..9d549dc49dfd 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -6,6 +6,7 @@ #include #include +#include #define WX_NCSI_SUP 0x8000 #define WX_NCSI_MASK 0x8000 @@ -315,9 +316,6 @@ #define TXD_USE_COUNT(S) DIV_ROUND_UP((S), WX_MAX_DATA_PER_TXD) #define DESC_NEEDED (MAX_SKB_FRAGS + 4) -/* Ether Types */ -#define WX_ETH_P_CNM 0x22E7 - #define WX_CFG_PORT_ST 0x14404 /******************* Receive Descriptor bit definitions **********************/ @@ -326,6 +324,29 @@ #define WX_RXD_ERR_RXE BIT(29) /* Any MAC Error */ +/* TUN */ +#define WX_PTYPE_TUN_IPV4 0x80 +#define WX_PTYPE_TUN_IPV6 0xC0 + +/* PKT for TUN */ +#define WX_PTYPE_PKT_IPIP 0x00 /* IP+IP */ +#define WX_PTYPE_PKT_IG 0x10 /* IP+GRE */ +#define WX_PTYPE_PKT_IGM 0x20 /* IP+GRE+MAC */ +#define WX_PTYPE_PKT_IGMV 0x30 /* IP+GRE+MAC+VLAN */ +/* PKT for !TUN */ +#define WX_PTYPE_PKT_MAC 0x10 +#define WX_PTYPE_PKT_IP 0x20 + +/* TYP for PKT=mac */ +#define WX_PTYPE_TYP_MAC 0x01 +/* TYP for PKT=ip */ +#define WX_PTYPE_PKT_IPV6 0x08 +#define WX_PTYPE_TYP_IPFRAG 0x01 +#define WX_PTYPE_TYP_IP 0x02 +#define WX_PTYPE_TYP_UDP 0x03 +#define WX_PTYPE_TYP_TCP 0x04 +#define WX_PTYPE_TYP_SCTP 0x05 + /*********************** Transmit Descriptor Config Masks ****************/ #define WX_TXD_STAT_DD BIT(0) /* Descriptor Done */ #define WX_TXD_DTYP_DATA 0 /* Adv Data Descriptor */ @@ -334,6 +355,49 @@ #define WX_TXD_IFCS BIT(25) /* Insert FCS */ #define WX_TXD_RS BIT(27) /* Report Status */ +/*********************** Adv Transmit Descriptor Config Masks ****************/ +#define WX_TXD_MAC_TSTAMP BIT(19) /* IEEE1588 time stamp */ +#define WX_TXD_DTYP_CTXT BIT(20) /* Adv Context Desc */ +#define WX_TXD_LINKSEC BIT(26) /* enable linksec */ +#define WX_TXD_VLE BIT(30) /* VLAN pkt enable */ +#define WX_TXD_TSE BIT(31) /* TCP Seg enable */ +#define WX_TXD_CC BIT(7) /* Check Context */ +#define WX_TXD_IPSEC BIT(8) /* enable ipsec esp */ +#define WX_TXD_L4CS BIT(9) +#define WX_TXD_IIPCS BIT(10) +#define WX_TXD_EIPCS BIT(11) +#define WX_TXD_PAYLEN_SHIFT 13 /* Adv desc PAYLEN shift */ +#define WX_TXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ +#define WX_TXD_TAG_TPID_SEL_SHIFT 11 + +#define WX_TXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ +#define WX_TXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ + +#define WX_TXD_OUTER_IPLEN_SHIFT 12 /* Adv ctxt OUTERIPLEN shift */ +#define WX_TXD_TUNNEL_LEN_SHIFT 21 /* Adv ctxt TUNNELLEN shift */ +#define WX_TXD_TUNNEL_TYPE_SHIFT 11 /* Adv Tx Desc Tunnel Type shift */ +#define WX_TXD_TUNNEL_UDP FIELD_PREP(BIT(WX_TXD_TUNNEL_TYPE_SHIFT), 0) +#define WX_TXD_TUNNEL_GRE FIELD_PREP(BIT(WX_TXD_TUNNEL_TYPE_SHIFT), 1) + +enum wx_tx_flags { + /* cmd_type flags */ + WX_TX_FLAGS_HW_VLAN = 0x01, + WX_TX_FLAGS_TSO = 0x02, + WX_TX_FLAGS_TSTAMP = 0x04, + + /* olinfo flags */ + WX_TX_FLAGS_CC = 0x08, + WX_TX_FLAGS_IPV4 = 0x10, + WX_TX_FLAGS_CSUM = 0x20, + WX_TX_FLAGS_OUTER_IPV4 = 0x100, + WX_TX_FLAGS_LINKSEC = 0x200, + WX_TX_FLAGS_IPSEC = 0x400, +}; + +/* VLAN info */ +#define WX_TX_FLAGS_VLAN_MASK GENMASK(31, 16) +#define WX_TX_FLAGS_VLAN_SHIFT 16 + /* Host Interface Command Structures */ struct wx_hic_hdr { u8 cmd; @@ -508,10 +572,25 @@ union wx_rx_desc { } wb; /* writeback */ }; +struct wx_tx_context_desc { + __le32 vlan_macip_lens; + __le32 seqnum_seed; + __le32 type_tucmd_mlhl; + __le32 mss_l4len_idx; +}; + +/* if _flag is in _input, return _result */ +#define WX_SET_FLAG(_input, _flag, _result) \ + (((_flag) <= (_result)) ? \ + ((u32)((_input) & (_flag)) * ((_result) / (_flag))) : \ + ((u32)((_input) & (_flag)) / ((_flag) / (_result)))) + #define WX_RX_DESC(R, i) \ (&(((union wx_rx_desc *)((R)->desc))[i])) #define WX_TX_DESC(R, i) \ (&(((union wx_tx_desc *)((R)->desc))[i])) +#define WX_TX_CTXTDESC(R, i) \ + (&(((struct wx_tx_context_desc *)((R)->desc))[i])) /* wrapper around a pointer to a socket buffer, * so a DMA handle can be stored along with the buffer @@ -523,6 +602,8 @@ struct wx_tx_buffer { unsigned short gso_segs; DEFINE_DMA_UNMAP_ADDR(dma); DEFINE_DMA_UNMAP_LEN(len); + __be16 protocol; + u32 tx_flags; }; struct wx_rx_buffer { -- cgit v1.2.3 From ef4f3c19f912820c15a6de0aedcc0fda67c7dd3a Mon Sep 17 00:00:00 2001 From: Mengyuan Lou Date: Tue, 30 May 2023 10:26:26 +0800 Subject: net: wangxun: libwx add rx offload functions Add rx offload functions for wx_clean_rx_irq which supports ngbe and txgbe to implement rx offload function. Signed-off-by: Mengyuan Lou Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 234 ++++++++++++++++++++++++++- drivers/net/ethernet/wangxun/libwx/wx_type.h | 100 +++++++++++- 2 files changed, 331 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 34ac30e87b7c..84107208401e 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -15,6 +15,144 @@ #include "wx_lib.h" #include "wx_hw.h" +/* Lookup table mapping the HW PTYPE to the bit field for decoding */ +static struct wx_dec_ptype wx_ptype_lookup[256] = { + /* L2: mac */ + [0x11] = WX_PTT(L2, NONE, NONE, NONE, NONE, PAY2), + [0x12] = WX_PTT(L2, NONE, NONE, NONE, TS, PAY2), + [0x13] = WX_PTT(L2, NONE, NONE, NONE, NONE, PAY2), + [0x14] = WX_PTT(L2, NONE, NONE, NONE, NONE, PAY2), + [0x15] = WX_PTT(L2, NONE, NONE, NONE, NONE, NONE), + [0x16] = WX_PTT(L2, NONE, NONE, NONE, NONE, PAY2), + [0x17] = WX_PTT(L2, NONE, NONE, NONE, NONE, NONE), + + /* L2: ethertype filter */ + [0x18 ... 0x1F] = WX_PTT(L2, NONE, NONE, NONE, NONE, NONE), + + /* L3: ip non-tunnel */ + [0x21] = WX_PTT(IP, FGV4, NONE, NONE, NONE, PAY3), + [0x22] = WX_PTT(IP, IPV4, NONE, NONE, NONE, PAY3), + [0x23] = WX_PTT(IP, IPV4, NONE, NONE, UDP, PAY4), + [0x24] = WX_PTT(IP, IPV4, NONE, NONE, TCP, PAY4), + [0x25] = WX_PTT(IP, IPV4, NONE, NONE, SCTP, PAY4), + [0x29] = WX_PTT(IP, FGV6, NONE, NONE, NONE, PAY3), + [0x2A] = WX_PTT(IP, IPV6, NONE, NONE, NONE, PAY3), + [0x2B] = WX_PTT(IP, IPV6, NONE, NONE, UDP, PAY3), + [0x2C] = WX_PTT(IP, IPV6, NONE, NONE, TCP, PAY4), + [0x2D] = WX_PTT(IP, IPV6, NONE, NONE, SCTP, PAY4), + + /* L2: fcoe */ + [0x30 ... 0x34] = WX_PTT(FCOE, NONE, NONE, NONE, NONE, PAY3), + [0x38 ... 0x3C] = WX_PTT(FCOE, NONE, NONE, NONE, NONE, PAY3), + + /* IPv4 --> IPv4/IPv6 */ + [0x81] = WX_PTT(IP, IPV4, IPIP, FGV4, NONE, PAY3), + [0x82] = WX_PTT(IP, IPV4, IPIP, IPV4, NONE, PAY3), + [0x83] = WX_PTT(IP, IPV4, IPIP, IPV4, UDP, PAY4), + [0x84] = WX_PTT(IP, IPV4, IPIP, IPV4, TCP, PAY4), + [0x85] = WX_PTT(IP, IPV4, IPIP, IPV4, SCTP, PAY4), + [0x89] = WX_PTT(IP, IPV4, IPIP, FGV6, NONE, PAY3), + [0x8A] = WX_PTT(IP, IPV4, IPIP, IPV6, NONE, PAY3), + [0x8B] = WX_PTT(IP, IPV4, IPIP, IPV6, UDP, PAY4), + [0x8C] = WX_PTT(IP, IPV4, IPIP, IPV6, TCP, PAY4), + [0x8D] = WX_PTT(IP, IPV4, IPIP, IPV6, SCTP, PAY4), + + /* IPv4 --> GRE/NAT --> NONE/IPv4/IPv6 */ + [0x90] = WX_PTT(IP, IPV4, IG, NONE, NONE, PAY3), + [0x91] = WX_PTT(IP, IPV4, IG, FGV4, NONE, PAY3), + [0x92] = WX_PTT(IP, IPV4, IG, IPV4, NONE, PAY3), + [0x93] = WX_PTT(IP, IPV4, IG, IPV4, UDP, PAY4), + [0x94] = WX_PTT(IP, IPV4, IG, IPV4, TCP, PAY4), + [0x95] = WX_PTT(IP, IPV4, IG, IPV4, SCTP, PAY4), + [0x99] = WX_PTT(IP, IPV4, IG, FGV6, NONE, PAY3), + [0x9A] = WX_PTT(IP, IPV4, IG, IPV6, NONE, PAY3), + [0x9B] = WX_PTT(IP, IPV4, IG, IPV6, UDP, PAY4), + [0x9C] = WX_PTT(IP, IPV4, IG, IPV6, TCP, PAY4), + [0x9D] = WX_PTT(IP, IPV4, IG, IPV6, SCTP, PAY4), + + /* IPv4 --> GRE/NAT --> MAC --> NONE/IPv4/IPv6 */ + [0xA0] = WX_PTT(IP, IPV4, IGM, NONE, NONE, PAY3), + [0xA1] = WX_PTT(IP, IPV4, IGM, FGV4, NONE, PAY3), + [0xA2] = WX_PTT(IP, IPV4, IGM, IPV4, NONE, PAY3), + [0xA3] = WX_PTT(IP, IPV4, IGM, IPV4, UDP, PAY4), + [0xA4] = WX_PTT(IP, IPV4, IGM, IPV4, TCP, PAY4), + [0xA5] = WX_PTT(IP, IPV4, IGM, IPV4, SCTP, PAY4), + [0xA9] = WX_PTT(IP, IPV4, IGM, FGV6, NONE, PAY3), + [0xAA] = WX_PTT(IP, IPV4, IGM, IPV6, NONE, PAY3), + [0xAB] = WX_PTT(IP, IPV4, IGM, IPV6, UDP, PAY4), + [0xAC] = WX_PTT(IP, IPV4, IGM, IPV6, TCP, PAY4), + [0xAD] = WX_PTT(IP, IPV4, IGM, IPV6, SCTP, PAY4), + + /* IPv4 --> GRE/NAT --> MAC+VLAN --> NONE/IPv4/IPv6 */ + [0xB0] = WX_PTT(IP, IPV4, IGMV, NONE, NONE, PAY3), + [0xB1] = WX_PTT(IP, IPV4, IGMV, FGV4, NONE, PAY3), + [0xB2] = WX_PTT(IP, IPV4, IGMV, IPV4, NONE, PAY3), + [0xB3] = WX_PTT(IP, IPV4, IGMV, IPV4, UDP, PAY4), + [0xB4] = WX_PTT(IP, IPV4, IGMV, IPV4, TCP, PAY4), + [0xB5] = WX_PTT(IP, IPV4, IGMV, IPV4, SCTP, PAY4), + [0xB9] = WX_PTT(IP, IPV4, IGMV, FGV6, NONE, PAY3), + [0xBA] = WX_PTT(IP, IPV4, IGMV, IPV6, NONE, PAY3), + [0xBB] = WX_PTT(IP, IPV4, IGMV, IPV6, UDP, PAY4), + [0xBC] = WX_PTT(IP, IPV4, IGMV, IPV6, TCP, PAY4), + [0xBD] = WX_PTT(IP, IPV4, IGMV, IPV6, SCTP, PAY4), + + /* IPv6 --> IPv4/IPv6 */ + [0xC1] = WX_PTT(IP, IPV6, IPIP, FGV4, NONE, PAY3), + [0xC2] = WX_PTT(IP, IPV6, IPIP, IPV4, NONE, PAY3), + [0xC3] = WX_PTT(IP, IPV6, IPIP, IPV4, UDP, PAY4), + [0xC4] = WX_PTT(IP, IPV6, IPIP, IPV4, TCP, PAY4), + [0xC5] = WX_PTT(IP, IPV6, IPIP, IPV4, SCTP, PAY4), + [0xC9] = WX_PTT(IP, IPV6, IPIP, FGV6, NONE, PAY3), + [0xCA] = WX_PTT(IP, IPV6, IPIP, IPV6, NONE, PAY3), + [0xCB] = WX_PTT(IP, IPV6, IPIP, IPV6, UDP, PAY4), + [0xCC] = WX_PTT(IP, IPV6, IPIP, IPV6, TCP, PAY4), + [0xCD] = WX_PTT(IP, IPV6, IPIP, IPV6, SCTP, PAY4), + + /* IPv6 --> GRE/NAT -> NONE/IPv4/IPv6 */ + [0xD0] = WX_PTT(IP, IPV6, IG, NONE, NONE, PAY3), + [0xD1] = WX_PTT(IP, IPV6, IG, FGV4, NONE, PAY3), + [0xD2] = WX_PTT(IP, IPV6, IG, IPV4, NONE, PAY3), + [0xD3] = WX_PTT(IP, IPV6, IG, IPV4, UDP, PAY4), + [0xD4] = WX_PTT(IP, IPV6, IG, IPV4, TCP, PAY4), + [0xD5] = WX_PTT(IP, IPV6, IG, IPV4, SCTP, PAY4), + [0xD9] = WX_PTT(IP, IPV6, IG, FGV6, NONE, PAY3), + [0xDA] = WX_PTT(IP, IPV6, IG, IPV6, NONE, PAY3), + [0xDB] = WX_PTT(IP, IPV6, IG, IPV6, UDP, PAY4), + [0xDC] = WX_PTT(IP, IPV6, IG, IPV6, TCP, PAY4), + [0xDD] = WX_PTT(IP, IPV6, IG, IPV6, SCTP, PAY4), + + /* IPv6 --> GRE/NAT -> MAC -> NONE/IPv4/IPv6 */ + [0xE0] = WX_PTT(IP, IPV6, IGM, NONE, NONE, PAY3), + [0xE1] = WX_PTT(IP, IPV6, IGM, FGV4, NONE, PAY3), + [0xE2] = WX_PTT(IP, IPV6, IGM, IPV4, NONE, PAY3), + [0xE3] = WX_PTT(IP, IPV6, IGM, IPV4, UDP, PAY4), + [0xE4] = WX_PTT(IP, IPV6, IGM, IPV4, TCP, PAY4), + [0xE5] = WX_PTT(IP, IPV6, IGM, IPV4, SCTP, PAY4), + [0xE9] = WX_PTT(IP, IPV6, IGM, FGV6, NONE, PAY3), + [0xEA] = WX_PTT(IP, IPV6, IGM, IPV6, NONE, PAY3), + [0xEB] = WX_PTT(IP, IPV6, IGM, IPV6, UDP, PAY4), + [0xEC] = WX_PTT(IP, IPV6, IGM, IPV6, TCP, PAY4), + [0xED] = WX_PTT(IP, IPV6, IGM, IPV6, SCTP, PAY4), + + /* IPv6 --> GRE/NAT -> MAC--> NONE/IPv */ + [0xF0] = WX_PTT(IP, IPV6, IGMV, NONE, NONE, PAY3), + [0xF1] = WX_PTT(IP, IPV6, IGMV, FGV4, NONE, PAY3), + [0xF2] = WX_PTT(IP, IPV6, IGMV, IPV4, NONE, PAY3), + [0xF3] = WX_PTT(IP, IPV6, IGMV, IPV4, UDP, PAY4), + [0xF4] = WX_PTT(IP, IPV6, IGMV, IPV4, TCP, PAY4), + [0xF5] = WX_PTT(IP, IPV6, IGMV, IPV4, SCTP, PAY4), + [0xF9] = WX_PTT(IP, IPV6, IGMV, FGV6, NONE, PAY3), + [0xFA] = WX_PTT(IP, IPV6, IGMV, IPV6, NONE, PAY3), + [0xFB] = WX_PTT(IP, IPV6, IGMV, IPV6, UDP, PAY4), + [0xFC] = WX_PTT(IP, IPV6, IGMV, IPV6, TCP, PAY4), + [0xFD] = WX_PTT(IP, IPV6, IGMV, IPV6, SCTP, PAY4), +}; + +static struct wx_dec_ptype wx_decode_ptype(const u8 ptype) +{ + return wx_ptype_lookup[ptype]; +} + /* wx_test_staterr - tests bits in Rx descriptor status and error fields */ static __le32 wx_test_staterr(union wx_rx_desc *rx_desc, const u32 stat_err_bits) @@ -424,6 +562,98 @@ static bool wx_cleanup_headers(struct wx_ring *rx_ring, return false; } +static void wx_rx_hash(struct wx_ring *ring, + union wx_rx_desc *rx_desc, + struct sk_buff *skb) +{ + u16 rss_type; + + if (!(ring->netdev->features & NETIF_F_RXHASH)) + return; + + rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) & + WX_RXD_RSSTYPE_MASK; + + if (!rss_type) + return; + + skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), + (WX_RSS_L4_TYPES_MASK & (1ul << rss_type)) ? + PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); +} + +/** + * wx_rx_checksum - indicate in skb if hw indicated a good cksum + * @ring: structure containing ring specific data + * @rx_desc: current Rx descriptor being processed + * @skb: skb currently being received and modified + **/ +static void wx_rx_checksum(struct wx_ring *ring, + union wx_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct wx_dec_ptype dptype = wx_decode_ptype(WX_RXD_PKTTYPE(rx_desc)); + + skb_checksum_none_assert(skb); + /* Rx csum disabled */ + if (!(ring->netdev->features & NETIF_F_RXCSUM)) + return; + + /* if IPv4 header checksum error */ + if ((wx_test_staterr(rx_desc, WX_RXD_STAT_IPCS) && + wx_test_staterr(rx_desc, WX_RXD_ERR_IPE)) || + (wx_test_staterr(rx_desc, WX_RXD_STAT_OUTERIPCS) && + wx_test_staterr(rx_desc, WX_RXD_ERR_OUTERIPER))) { + ring->rx_stats.csum_err++; + return; + } + + /* L4 checksum offload flag must set for the below code to work */ + if (!wx_test_staterr(rx_desc, WX_RXD_STAT_L4CS)) + return; + + /* Hardware can't guarantee csum if IPv6 Dest Header found */ + if (dptype.prot != WX_DEC_PTYPE_PROT_SCTP && WX_RXD_IPV6EX(rx_desc)) + return; + + /* if L4 checksum error */ + if (wx_test_staterr(rx_desc, WX_RXD_ERR_TCPE)) { + ring->rx_stats.csum_err++; + return; + } + + /* It must be a TCP or UDP or SCTP packet with a valid checksum */ + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* If there is an outer header present that might contain a checksum + * we need to bump the checksum level by 1 to reflect the fact that + * we are indicating we validated the inner checksum. + */ + if (dptype.etype >= WX_DEC_PTYPE_ETYPE_IG) + __skb_incr_checksum_unnecessary(skb); + ring->rx_stats.csum_good_cnt++; +} + +/** + * wx_process_skb_fields - Populate skb header fields from Rx descriptor + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated + * + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, protocol, and + * other fields within the skb. + **/ +static void wx_process_skb_fields(struct wx_ring *rx_ring, + union wx_rx_desc *rx_desc, + struct sk_buff *skb) +{ + wx_rx_hash(rx_ring, rx_desc, skb); + wx_rx_checksum(rx_ring, rx_desc, skb); + skb_record_rx_queue(skb, rx_ring->queue_index); + skb->protocol = eth_type_trans(skb, rx_ring->netdev); +} + /** * wx_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @q_vector: structure containing interrupt and ring information @@ -491,8 +721,8 @@ static int wx_clean_rx_irq(struct wx_q_vector *q_vector, /* probably a little skewed due to removing CRC */ total_rx_bytes += skb->len; - skb_record_rx_queue(skb, rx_ring->queue_index); - skb->protocol = eth_type_trans(skb, rx_ring->netdev); + /* populate checksum, timestamp, VLAN, and protocol */ + wx_process_skb_fields(rx_ring, rx_desc, skb); napi_gro_receive(&q_vector->napi, skb); /* update budget accounting */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 9d549dc49dfd..2c1fd0f1025d 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -321,9 +321,31 @@ /******************* Receive Descriptor bit definitions **********************/ #define WX_RXD_STAT_DD BIT(0) /* Done */ #define WX_RXD_STAT_EOP BIT(1) /* End of Packet */ +#define WX_RXD_STAT_L4CS BIT(7) /* L4 xsum calculated */ +#define WX_RXD_STAT_IPCS BIT(8) /* IP xsum calculated */ +#define WX_RXD_STAT_OUTERIPCS BIT(10) /* Cloud IP xsum calculated*/ +#define WX_RXD_ERR_OUTERIPER BIT(26) /* CRC IP Header error */ #define WX_RXD_ERR_RXE BIT(29) /* Any MAC Error */ - +#define WX_RXD_ERR_TCPE BIT(30) /* TCP/UDP Checksum Error */ +#define WX_RXD_ERR_IPE BIT(31) /* IP Checksum Error */ + +/* RSS Hash results */ +#define WX_RXD_RSSTYPE_MASK GENMASK(3, 0) +#define WX_RXD_RSSTYPE_IPV4_TCP 0x00000001U +#define WX_RXD_RSSTYPE_IPV6_TCP 0x00000003U +#define WX_RXD_RSSTYPE_IPV4_SCTP 0x00000004U +#define WX_RXD_RSSTYPE_IPV6_SCTP 0x00000006U +#define WX_RXD_RSSTYPE_IPV4_UDP 0x00000007U +#define WX_RXD_RSSTYPE_IPV6_UDP 0x00000008U + +#define WX_RSS_L4_TYPES_MASK \ + ((1ul << WX_RXD_RSSTYPE_IPV4_TCP) | \ + (1ul << WX_RXD_RSSTYPE_IPV4_UDP) | \ + (1ul << WX_RXD_RSSTYPE_IPV4_SCTP) | \ + (1ul << WX_RXD_RSSTYPE_IPV6_TCP) | \ + (1ul << WX_RXD_RSSTYPE_IPV6_UDP) | \ + (1ul << WX_RXD_RSSTYPE_IPV6_SCTP)) /* TUN */ #define WX_PTYPE_TUN_IPV4 0x80 #define WX_PTYPE_TUN_IPV6 0xC0 @@ -347,6 +369,10 @@ #define WX_PTYPE_TYP_TCP 0x04 #define WX_PTYPE_TYP_SCTP 0x05 +#define WX_RXD_PKTTYPE(_rxd) \ + ((le32_to_cpu((_rxd)->wb.lower.lo_dword.data) >> 9) & 0xFF) +#define WX_RXD_IPV6EX(_rxd) \ + ((le32_to_cpu((_rxd)->wb.lower.lo_dword.data) >> 6) & 0x1) /*********************** Transmit Descriptor Config Masks ****************/ #define WX_TXD_STAT_DD BIT(0) /* Descriptor Done */ #define WX_TXD_DTYP_DATA 0 /* Adv Data Descriptor */ @@ -398,6 +424,70 @@ enum wx_tx_flags { #define WX_TX_FLAGS_VLAN_MASK GENMASK(31, 16) #define WX_TX_FLAGS_VLAN_SHIFT 16 +/* wx_dec_ptype.mac: outer mac */ +enum wx_dec_ptype_mac { + WX_DEC_PTYPE_MAC_IP = 0, + WX_DEC_PTYPE_MAC_L2 = 2, + WX_DEC_PTYPE_MAC_FCOE = 3, +}; + +/* wx_dec_ptype.[e]ip: outer&encaped ip */ +#define WX_DEC_PTYPE_IP_FRAG 0x4 +enum wx_dec_ptype_ip { + WX_DEC_PTYPE_IP_NONE = 0, + WX_DEC_PTYPE_IP_IPV4 = 1, + WX_DEC_PTYPE_IP_IPV6 = 2, + WX_DEC_PTYPE_IP_FGV4 = WX_DEC_PTYPE_IP_FRAG | WX_DEC_PTYPE_IP_IPV4, + WX_DEC_PTYPE_IP_FGV6 = WX_DEC_PTYPE_IP_FRAG | WX_DEC_PTYPE_IP_IPV6, +}; + +/* wx_dec_ptype.etype: encaped type */ +enum wx_dec_ptype_etype { + WX_DEC_PTYPE_ETYPE_NONE = 0, + WX_DEC_PTYPE_ETYPE_IPIP = 1, /* IP+IP */ + WX_DEC_PTYPE_ETYPE_IG = 2, /* IP+GRE */ + WX_DEC_PTYPE_ETYPE_IGM = 3, /* IP+GRE+MAC */ + WX_DEC_PTYPE_ETYPE_IGMV = 4, /* IP+GRE+MAC+VLAN */ +}; + +/* wx_dec_ptype.proto: payload proto */ +enum wx_dec_ptype_prot { + WX_DEC_PTYPE_PROT_NONE = 0, + WX_DEC_PTYPE_PROT_UDP = 1, + WX_DEC_PTYPE_PROT_TCP = 2, + WX_DEC_PTYPE_PROT_SCTP = 3, + WX_DEC_PTYPE_PROT_ICMP = 4, + WX_DEC_PTYPE_PROT_TS = 5, /* time sync */ +}; + +/* wx_dec_ptype.layer: payload layer */ +enum wx_dec_ptype_layer { + WX_DEC_PTYPE_LAYER_NONE = 0, + WX_DEC_PTYPE_LAYER_PAY2 = 1, + WX_DEC_PTYPE_LAYER_PAY3 = 2, + WX_DEC_PTYPE_LAYER_PAY4 = 3, +}; + +struct wx_dec_ptype { + u32 known:1; + u32 mac:2; /* outer mac */ + u32 ip:3; /* outer ip*/ + u32 etype:3; /* encaped type */ + u32 eip:3; /* encaped ip */ + u32 prot:4; /* payload proto */ + u32 layer:3; /* payload layer */ +}; + +/* macro to make the table lines short */ +#define WX_PTT(mac, ip, etype, eip, proto, layer)\ + {1, \ + WX_DEC_PTYPE_MAC_##mac, /* mac */\ + WX_DEC_PTYPE_IP_##ip, /* ip */ \ + WX_DEC_PTYPE_ETYPE_##etype, /* etype */\ + WX_DEC_PTYPE_IP_##eip, /* eip */\ + WX_DEC_PTYPE_PROT_##proto, /* proto */\ + WX_DEC_PTYPE_LAYER_##layer /* layer */} + /* Host Interface Command Structures */ struct wx_hic_hdr { u8 cmd; @@ -620,6 +710,11 @@ struct wx_queue_stats { u64 bytes; }; +struct wx_rx_queue_stats { + u64 csum_good_cnt; + u64 csum_err; +}; + /* iterator for handling rings in ring container */ #define wx_for_each_ring(posm, headm) \ for (posm = (headm).ring; posm; posm = posm->next) @@ -661,6 +756,9 @@ struct wx_ring { struct wx_queue_stats stats; struct u64_stats_sync syncp; + union { + struct wx_rx_queue_stats rx_stats; + }; } ____cacheline_internodealigned_in_smp; struct wx_q_vector { -- cgit v1.2.3 From f3b03c655f67834cb25174ac6f2b099c9e68c74d Mon Sep 17 00:00:00 2001 From: Mengyuan Lou Date: Tue, 30 May 2023 10:26:27 +0800 Subject: net: wangxun: Implement vlan add and kill functions Implement vlan add/kill functions which add and remove vlan id in hardware. Signed-off-by: Mengyuan Lou Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 272 ++++++++++++++++++++++++++- drivers/net/ethernet/wangxun/libwx/wx_hw.h | 3 + drivers/net/ethernet/wangxun/libwx/wx_lib.c | 18 ++ drivers/net/ethernet/wangxun/libwx/wx_type.h | 25 ++- 4 files changed, 316 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index ca409b4054d0..39a9aeee7aab 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -1182,12 +1182,28 @@ static void wx_enable_sec_rx_path(struct wx *wx) WX_WRITE_FLUSH(wx); } +static void wx_vlan_strip_control(struct wx *wx, bool enable) +{ + int i, j; + + for (i = 0; i < wx->num_rx_queues; i++) { + struct wx_ring *ring = wx->rx_ring[i]; + + j = ring->reg_idx; + wr32m(wx, WX_PX_RR_CFG(j), WX_PX_RR_CFG_VLAN, + enable ? WX_PX_RR_CFG_VLAN : 0); + } +} + void wx_set_rx_mode(struct net_device *netdev) { struct wx *wx = netdev_priv(netdev); + netdev_features_t features; u32 fctrl, vmolr, vlnctrl; int count; + features = netdev->features; + /* Check for Promiscuous and All Multicast modes */ fctrl = rd32(wx, WX_PSR_CTL); fctrl &= ~(WX_PSR_CTL_UPE | WX_PSR_CTL_MPE); @@ -1254,6 +1270,13 @@ void wx_set_rx_mode(struct net_device *netdev) wr32(wx, WX_PSR_VLAN_CTL, vlnctrl); wr32(wx, WX_PSR_CTL, fctrl); wr32(wx, WX_PSR_VM_L2CTL(0), vmolr); + + if ((features & NETIF_F_HW_VLAN_CTAG_RX) && + (features & NETIF_F_HW_VLAN_STAG_RX)) + wx_vlan_strip_control(wx, true); + else + wx_vlan_strip_control(wx, false); + } EXPORT_SYMBOL(wx_set_rx_mode); @@ -1462,6 +1485,16 @@ static void wx_configure_tx(struct wx *wx) WX_MAC_TX_CFG_TE, WX_MAC_TX_CFG_TE); } +static void wx_restore_vlan(struct wx *wx) +{ + u16 vid = 1; + + wx_vlan_rx_add_vid(wx->netdev, htons(ETH_P_8021Q), 0); + + for_each_set_bit_from(vid, wx->active_vlans, VLAN_N_VID) + wx_vlan_rx_add_vid(wx->netdev, htons(ETH_P_8021Q), vid); +} + /** * wx_configure_rx - Configure Receive Unit after Reset * @wx: pointer to private structure @@ -1527,7 +1560,7 @@ void wx_configure(struct wx *wx) wx_configure_port(wx); wx_set_rx_mode(wx->netdev); - + wx_restore_vlan(wx); wx_enable_sec_rx_path(wx); wx_configure_tx(wx); @@ -1727,4 +1760,241 @@ int wx_sw_init(struct wx *wx) } EXPORT_SYMBOL(wx_sw_init); +/** + * wx_find_vlvf_slot - find the vlanid or the first empty slot + * @wx: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * + * return the VLVF index where this VLAN id should be placed + * + **/ +static int wx_find_vlvf_slot(struct wx *wx, u32 vlan) +{ + u32 bits = 0, first_empty_slot = 0; + int regindex; + + /* short cut the special case */ + if (vlan == 0) + return 0; + + /* Search for the vlan id in the VLVF entries. Save off the first empty + * slot found along the way + */ + for (regindex = 1; regindex < WX_PSR_VLAN_SWC_ENTRIES; regindex++) { + wr32(wx, WX_PSR_VLAN_SWC_IDX, regindex); + bits = rd32(wx, WX_PSR_VLAN_SWC); + if (!bits && !(first_empty_slot)) + first_empty_slot = regindex; + else if ((bits & 0x0FFF) == vlan) + break; + } + + if (regindex >= WX_PSR_VLAN_SWC_ENTRIES) { + if (first_empty_slot) + regindex = first_empty_slot; + else + regindex = -ENOMEM; + } + + return regindex; +} + +/** + * wx_set_vlvf - Set VLAN Pool Filter + * @wx: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * @vind: VMDq output index that maps queue to VLAN id in VFVFB + * @vlan_on: boolean flag to turn on/off VLAN in VFVF + * @vfta_changed: pointer to boolean flag which indicates whether VFTA + * should be changed + * + * Turn on/off specified bit in VLVF table. + **/ +static int wx_set_vlvf(struct wx *wx, u32 vlan, u32 vind, bool vlan_on, + bool *vfta_changed) +{ + int vlvf_index; + u32 vt, bits; + + /* If VT Mode is set + * Either vlan_on + * make sure the vlan is in VLVF + * set the vind bit in the matching VLVFB + * Or !vlan_on + * clear the pool bit and possibly the vind + */ + vt = rd32(wx, WX_CFG_PORT_CTL); + if (!(vt & WX_CFG_PORT_CTL_NUM_VT_MASK)) + return 0; + + vlvf_index = wx_find_vlvf_slot(wx, vlan); + if (vlvf_index < 0) + return vlvf_index; + + wr32(wx, WX_PSR_VLAN_SWC_IDX, vlvf_index); + if (vlan_on) { + /* set the pool bit */ + if (vind < 32) { + bits = rd32(wx, WX_PSR_VLAN_SWC_VM_L); + bits |= (1 << vind); + wr32(wx, WX_PSR_VLAN_SWC_VM_L, bits); + } else { + bits = rd32(wx, WX_PSR_VLAN_SWC_VM_H); + bits |= (1 << (vind - 32)); + wr32(wx, WX_PSR_VLAN_SWC_VM_H, bits); + } + } else { + /* clear the pool bit */ + if (vind < 32) { + bits = rd32(wx, WX_PSR_VLAN_SWC_VM_L); + bits &= ~(1 << vind); + wr32(wx, WX_PSR_VLAN_SWC_VM_L, bits); + bits |= rd32(wx, WX_PSR_VLAN_SWC_VM_H); + } else { + bits = rd32(wx, WX_PSR_VLAN_SWC_VM_H); + bits &= ~(1 << (vind - 32)); + wr32(wx, WX_PSR_VLAN_SWC_VM_H, bits); + bits |= rd32(wx, WX_PSR_VLAN_SWC_VM_L); + } + } + + if (bits) { + wr32(wx, WX_PSR_VLAN_SWC, (WX_PSR_VLAN_SWC_VIEN | vlan)); + if (!vlan_on && vfta_changed) + *vfta_changed = false; + } else { + wr32(wx, WX_PSR_VLAN_SWC, 0); + } + + return 0; +} + +/** + * wx_set_vfta - Set VLAN filter table + * @wx: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * @vind: VMDq output index that maps queue to VLAN id in VFVFB + * @vlan_on: boolean flag to turn on/off VLAN in VFVF + * + * Turn on/off specified VLAN in the VLAN filter table. + **/ +static int wx_set_vfta(struct wx *wx, u32 vlan, u32 vind, bool vlan_on) +{ + u32 bitindex, vfta, targetbit; + bool vfta_changed = false; + int regindex, ret; + + /* this is a 2 part operation - first the VFTA, then the + * VLVF and VLVFB if VT Mode is set + * We don't write the VFTA until we know the VLVF part succeeded. + */ + + /* Part 1 + * The VFTA is a bitstring made up of 128 32-bit registers + * that enable the particular VLAN id, much like the MTA: + * bits[11-5]: which register + * bits[4-0]: which bit in the register + */ + regindex = (vlan >> 5) & 0x7F; + bitindex = vlan & 0x1F; + targetbit = (1 << bitindex); + /* errata 5 */ + vfta = wx->mac.vft_shadow[regindex]; + if (vlan_on) { + if (!(vfta & targetbit)) { + vfta |= targetbit; + vfta_changed = true; + } + } else { + if ((vfta & targetbit)) { + vfta &= ~targetbit; + vfta_changed = true; + } + } + /* Part 2 + * Call wx_set_vlvf to set VLVFB and VLVF + */ + ret = wx_set_vlvf(wx, vlan, vind, vlan_on, &vfta_changed); + if (ret != 0) + return ret; + + if (vfta_changed) + wr32(wx, WX_PSR_VLAN_TBL(regindex), vfta); + wx->mac.vft_shadow[regindex] = vfta; + + return 0; +} + +/** + * wx_clear_vfta - Clear VLAN filter table + * @wx: pointer to hardware structure + * + * Clears the VLAN filer table, and the VMDq index associated with the filter + **/ +static void wx_clear_vfta(struct wx *wx) +{ + u32 offset; + + for (offset = 0; offset < wx->mac.vft_size; offset++) { + wr32(wx, WX_PSR_VLAN_TBL(offset), 0); + wx->mac.vft_shadow[offset] = 0; + } + + for (offset = 0; offset < WX_PSR_VLAN_SWC_ENTRIES; offset++) { + wr32(wx, WX_PSR_VLAN_SWC_IDX, offset); + wr32(wx, WX_PSR_VLAN_SWC, 0); + wr32(wx, WX_PSR_VLAN_SWC_VM_L, 0); + wr32(wx, WX_PSR_VLAN_SWC_VM_H, 0); + } +} + +int wx_vlan_rx_add_vid(struct net_device *netdev, + __be16 proto, u16 vid) +{ + struct wx *wx = netdev_priv(netdev); + + /* add VID to filter table */ + wx_set_vfta(wx, vid, VMDQ_P(0), true); + set_bit(vid, wx->active_vlans); + + return 0; +} +EXPORT_SYMBOL(wx_vlan_rx_add_vid); + +int wx_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) +{ + struct wx *wx = netdev_priv(netdev); + + /* remove VID from filter table */ + if (vid) + wx_set_vfta(wx, vid, VMDQ_P(0), false); + clear_bit(vid, wx->active_vlans); + + return 0; +} +EXPORT_SYMBOL(wx_vlan_rx_kill_vid); + +/** + * wx_start_hw - Prepare hardware for Tx/Rx + * @wx: pointer to hardware structure + * + * Starts the hardware using the generic start_hw function + * and the generation start_hw function. + * Then performs revision-specific operations, if any. + **/ +void wx_start_hw(struct wx *wx) +{ + int i; + + /* Clear the VLAN filter table */ + wx_clear_vfta(wx); + WX_WRITE_FLUSH(wx); + /* Clear the rate limiters */ + for (i = 0; i < wx->mac.max_tx_queues; i++) { + wr32(wx, WX_TDM_RP_IDX, i); + wr32(wx, WX_TDM_RP_RATE, 0); + } +} +EXPORT_SYMBOL(wx_start_hw); + MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h index c173c56f0ab5..1f93ca32c921 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h @@ -26,10 +26,13 @@ void wx_set_rx_mode(struct net_device *netdev); int wx_change_mtu(struct net_device *netdev, int new_mtu); void wx_disable_rx_queue(struct wx *wx, struct wx_ring *ring); void wx_configure(struct wx *wx); +void wx_start_hw(struct wx *wx); int wx_disable_pcie_master(struct wx *wx); int wx_stop_adapter(struct wx *wx); void wx_reset_misc(struct wx *wx); int wx_get_pcie_msix_counts(struct wx *wx, u16 *msix_count, u16 max_msix_count); int wx_sw_init(struct wx *wx); +int wx_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid); +int wx_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid); #endif /* _WX_HW_H_ */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 84107208401e..680f1ad36240 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -634,6 +634,23 @@ static void wx_rx_checksum(struct wx_ring *ring, ring->rx_stats.csum_good_cnt++; } +static void wx_rx_vlan(struct wx_ring *ring, union wx_rx_desc *rx_desc, + struct sk_buff *skb) +{ + u16 ethertype; + u8 idx = 0; + + if ((ring->netdev->features & + (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX)) && + wx_test_staterr(rx_desc, WX_RXD_STAT_VP)) { + idx = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) & + 0x1c0) >> 6; + ethertype = ring->q_vector->wx->tpid[idx]; + __vlan_hwaccel_put_tag(skb, htons(ethertype), + le16_to_cpu(rx_desc->wb.upper.vlan)); + } +} + /** * wx_process_skb_fields - Populate skb header fields from Rx descriptor * @rx_ring: rx descriptor ring packet is being transacted on @@ -650,6 +667,7 @@ static void wx_process_skb_fields(struct wx_ring *rx_ring, { wx_rx_hash(rx_ring, rx_desc, skb); wx_rx_checksum(rx_ring, rx_desc, skb); + wx_rx_vlan(rx_ring, rx_desc, skb); skb_record_rx_queue(skb, rx_ring->queue_index); skb->protocol = eth_type_trans(skb, rx_ring->netdev); } diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 2c1fd0f1025d..91b2e4bfa206 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -6,6 +6,7 @@ #include #include +#include #include #define WX_NCSI_SUP 0x8000 @@ -65,6 +66,8 @@ #define WX_CFG_PORT_CTL_QINQ BIT(2) #define WX_CFG_PORT_CTL_D_VLAN BIT(0) /* double vlan*/ #define WX_CFG_TAG_TPID(_i) (0x14430 + ((_i) * 4)) +#define WX_CFG_PORT_CTL_NUM_VT_MASK GENMASK(13, 12) /* number of TVs */ + /* GPIO Registers */ #define WX_GPIO_DR 0x14800 @@ -88,6 +91,8 @@ /* TDM CTL BIT */ #define WX_TDM_CTL_TE BIT(0) /* Transmit Enable */ #define WX_TDM_PB_THRE(_i) (0x18020 + ((_i) * 4)) +#define WX_TDM_RP_IDX 0x1820C +#define WX_TDM_RP_RATE 0x18404 /***************************** RDB registers *********************************/ /* receive packet buffer */ @@ -151,6 +156,9 @@ #define WX_PSR_LAN_FLEX_DW_H(_i) (0x15C04 + ((_i) * 16)) #define WX_PSR_LAN_FLEX_MSK(_i) (0x15C08 + ((_i) * 16)) +/* vlan tbl */ +#define WX_PSR_VLAN_TBL(_i) (0x16000 + ((_i) * 4)) + /* mac switcher */ #define WX_PSR_MAC_SWC_AD_L 0x16200 #define WX_PSR_MAC_SWC_AD_H 0x16204 @@ -162,6 +170,15 @@ #define WX_PSR_MAC_SWC_IDX 0x16210 #define WX_CLEAR_VMDQ_ALL 0xFFFFFFFFU +/* vlan switch */ +#define WX_PSR_VLAN_SWC 0x16220 +#define WX_PSR_VLAN_SWC_VM_L 0x16224 +#define WX_PSR_VLAN_SWC_VM_H 0x16228 +#define WX_PSR_VLAN_SWC_IDX 0x16230 /* 64 vlan entries */ +/* VLAN pool filtering masks */ +#define WX_PSR_VLAN_SWC_VIEN BIT(31) /* filter is valid */ +#define WX_PSR_VLAN_SWC_ENTRIES 64 + /********************************* RSEC **************************************/ /* general rsec */ #define WX_RSC_CTL 0x17000 @@ -256,6 +273,7 @@ #define WX_PX_RR_RP(_i) (0x0100C + ((_i) * 0x40)) #define WX_PX_RR_CFG(_i) (0x01010 + ((_i) * 0x40)) /* PX_RR_CFG bit definitions */ +#define WX_PX_RR_CFG_VLAN BIT(31) #define WX_PX_RR_CFG_SPLIT_MODE BIT(26) #define WX_PX_RR_CFG_RR_THER_SHIFT 16 #define WX_PX_RR_CFG_RR_HDR_SZ GENMASK(15, 12) @@ -297,6 +315,7 @@ #define WX_MAX_TXD 8192 #define WX_MAX_JUMBO_FRAME_SIZE 9432 /* max payload 9414 */ +#define VMDQ_P(p) p /* Supported Rx Buffer Sizes */ #define WX_RXBUFFER_256 256 /* Used for skb receive header */ @@ -321,6 +340,7 @@ /******************* Receive Descriptor bit definitions **********************/ #define WX_RXD_STAT_DD BIT(0) /* Done */ #define WX_RXD_STAT_EOP BIT(1) /* End of Packet */ +#define WX_RXD_STAT_VP BIT(5) /* IEEE VLAN Pkt */ #define WX_RXD_STAT_L4CS BIT(7) /* L4 xsum calculated */ #define WX_RXD_STAT_IPCS BIT(8) /* IP xsum calculated */ #define WX_RXD_STAT_OUTERIPCS BIT(10) /* Cloud IP xsum calculated*/ @@ -566,6 +586,8 @@ struct wx_mac_info { u32 mta_shadow[128]; s32 mc_filter_type; u32 mcft_size; + u32 vft_shadow[128]; + u32 vft_size; u32 num_rar_entries; u32 rx_pb_size; u32 tx_pb_size; @@ -726,7 +748,6 @@ struct wx_ring_container { u8 count; /* total number of rings in vector */ u8 itr; /* current ITR setting for ring */ }; - struct wx_ring { struct wx_ring *next; /* pointer to next ring in q_vector */ struct wx_q_vector *q_vector; /* backpointer to host q_vector */ @@ -789,6 +810,8 @@ enum wx_isb_idx { }; struct wx { + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + u8 __iomem *hw_addr; struct pci_dev *pdev; struct net_device *netdev; -- cgit v1.2.3 From 6dbedcffcf543afe1297f86fd6620327482a3a98 Mon Sep 17 00:00:00 2001 From: Mengyuan Lou Date: Tue, 30 May 2023 10:26:28 +0800 Subject: net: libwx: Implement xx_set_features ops Implement wx_set_features function which to support ndo_set_features. Signed-off-by: Mengyuan Lou Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_lib.c | 20 ++++++++++++++++++++ drivers/net/ethernet/wangxun/libwx/wx_lib.h | 1 + drivers/net/ethernet/wangxun/libwx/wx_type.h | 2 ++ 3 files changed, 23 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 680f1ad36240..3dd328d33fcc 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -2704,4 +2704,24 @@ void wx_get_stats64(struct net_device *netdev, } EXPORT_SYMBOL(wx_get_stats64); +int wx_set_features(struct net_device *netdev, netdev_features_t features) +{ + netdev_features_t changed = netdev->features ^ features; + struct wx *wx = netdev_priv(netdev); + + if (changed & NETIF_F_RXHASH) + wr32m(wx, WX_RDB_RA_CTL, WX_RDB_RA_CTL_RSS_EN, + WX_RDB_RA_CTL_RSS_EN); + else + wr32m(wx, WX_RDB_RA_CTL, WX_RDB_RA_CTL_RSS_EN, 0); + + if (changed & + (NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_RX)) + wx_set_rx_mode(netdev); + + return 1; +} +EXPORT_SYMBOL(wx_set_features); + MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.h b/drivers/net/ethernet/wangxun/libwx/wx_lib.h index 50ee41f1fa10..df1f4a5951f0 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.h @@ -28,5 +28,6 @@ void wx_free_resources(struct wx *wx); int wx_setup_resources(struct wx *wx); void wx_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats); +int wx_set_features(struct net_device *netdev, netdev_features_t features); #endif /* _NGBE_LIB_H_ */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 91b2e4bfa206..5063846e1b52 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -111,6 +111,8 @@ #define WX_RDB_PL_CFG_L2HDR BIT(3) #define WX_RDB_PL_CFG_TUN_TUNHDR BIT(4) #define WX_RDB_PL_CFG_TUN_OUTL2HDR BIT(5) +#define WX_RDB_RA_CTL 0x194F4 +#define WX_RDB_RA_CTL_RSS_EN BIT(2) /* RSS Enable */ /******************************* PSR Registers *******************************/ /* psr control */ -- cgit v1.2.3 From 50a908a0bd8b9e589fcdcc26f2acd57a253eca8d Mon Sep 17 00:00:00 2001 From: Mengyuan Lou Date: Tue, 30 May 2023 10:26:29 +0800 Subject: net: ngbe: Add netdev features support Add features and hw_features that ngbe can support. Signed-off-by: Mengyuan Lou Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index df6b870aa871..f234c9c4b942 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -473,6 +473,7 @@ static const struct net_device_ops ngbe_netdev_ops = { .ndo_change_mtu = wx_change_mtu, .ndo_start_xmit = wx_xmit_frame, .ndo_set_rx_mode = wx_set_rx_mode, + .ndo_set_features = wx_set_features, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = wx_set_mac, .ndo_get_stats64 = wx_get_stats64, @@ -551,12 +552,18 @@ static int ngbe_probe(struct pci_dev *pdev, ngbe_set_ethtool_ops(netdev); netdev->netdev_ops = &ngbe_netdev_ops; - netdev->features |= NETIF_F_HIGHDMA; - netdev->features = NETIF_F_SG; - + netdev->features = NETIF_F_SG | NETIF_F_IP_CSUM | + NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_RXHASH | NETIF_F_RXCSUM; + netdev->features |= NETIF_F_SCTP_CRC | NETIF_F_TSO_MANGLEID; + netdev->vlan_features |= netdev->features; + netdev->features |= NETIF_F_IPV6_CSUM | NETIF_F_VLAN_FEATURES; /* copy netdev features into list of user selectable features */ - netdev->hw_features |= netdev->features | - NETIF_F_RXALL; + netdev->hw_features |= netdev->features | NETIF_F_RXALL; + netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; + netdev->features |= NETIF_F_HIGHDMA; + netdev->hw_features |= NETIF_F_GRO; + netdev->features |= NETIF_F_GRO; netdev->priv_flags |= IFF_UNICAST_FLT; netdev->priv_flags |= IFF_SUPP_NOFCS; -- cgit v1.2.3 From 361bf4f47cee800b9740d8e1f8ba73ccc248a934 Mon Sep 17 00:00:00 2001 From: Mengyuan Lou Date: Tue, 30 May 2023 10:26:30 +0800 Subject: net: ngbe: Implement vlan add and remove ops ngbe add ndo_vlan_rx_add_vid and ndo_vlan_rx_kill_vid. Signed-off-by: Mengyuan Lou Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 3 +++ drivers/net/ethernet/wangxun/ngbe/ngbe_type.h | 1 + 2 files changed, 4 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index f234c9c4b942..c99a5d3de72e 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -115,6 +115,7 @@ static int ngbe_sw_init(struct wx *wx) wx->mac.max_rx_queues = NGBE_MAX_RX_QUEUES; wx->mac.max_tx_queues = NGBE_MAX_TX_QUEUES; wx->mac.mcft_size = NGBE_MC_TBL_SIZE; + wx->mac.vft_size = NGBE_SP_VFT_TBL_SIZE; wx->mac.rx_pb_size = NGBE_RX_PB_SIZE; wx->mac.tx_pb_size = NGBE_TDB_PB_SZ; @@ -477,6 +478,8 @@ static const struct net_device_ops ngbe_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = wx_set_mac, .ndo_get_stats64 = wx_get_stats64, + .ndo_vlan_rx_add_vid = wx_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = wx_vlan_rx_kill_vid, }; /** diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h index 373d5af628cd..b70eca397b67 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h @@ -136,6 +136,7 @@ enum NGBE_MSCA_CMD_value { #define NGBE_RAR_ENTRIES 32 #define NGBE_RX_PB_SIZE 42 #define NGBE_MC_TBL_SIZE 128 +#define NGBE_SP_VFT_TBL_SIZE 128 #define NGBE_TDB_PB_SZ (20 * 1024) /* 160KB Packet Buffer */ /* TX/RX descriptor defines */ -- cgit v1.2.3 From 6670f1ece2c8c069428ed00c8344b8dbbdcf9748 Mon Sep 17 00:00:00 2001 From: Mengyuan Lou Date: Tue, 30 May 2023 10:26:31 +0800 Subject: net: txgbe: Add netdev features support Add features and hw_features that ngbe can support. Signed-off-by: Mengyuan Lou Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 5b8a121fb496..bcc9c2959177 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -491,6 +491,7 @@ static const struct net_device_ops txgbe_netdev_ops = { .ndo_change_mtu = wx_change_mtu, .ndo_start_xmit = wx_xmit_frame, .ndo_set_rx_mode = wx_set_rx_mode, + .ndo_set_features = wx_set_features, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = wx_set_mac, .ndo_get_stats64 = wx_get_stats64, @@ -596,11 +597,25 @@ static int txgbe_probe(struct pci_dev *pdev, goto err_free_mac_table; } - netdev->features |= NETIF_F_HIGHDMA; - netdev->features = NETIF_F_SG; - + netdev->features = NETIF_F_SG | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_RXHASH | + NETIF_F_RXCSUM | + NETIF_F_HW_CSUM; + + netdev->gso_partial_features = NETIF_F_GSO_ENCAP_ALL; + netdev->features |= netdev->gso_partial_features; + netdev->features |= NETIF_F_SCTP_CRC; + netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID; + netdev->hw_enc_features |= netdev->vlan_features; + netdev->features |= NETIF_F_VLAN_FEATURES; /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features | NETIF_F_RXALL; + netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC; + netdev->features |= NETIF_F_HIGHDMA; + netdev->hw_features |= NETIF_F_GRO; + netdev->features |= NETIF_F_GRO; netdev->priv_flags |= IFF_UNICAST_FLT; netdev->priv_flags |= IFF_SUPP_NOFCS; -- cgit v1.2.3 From 7df4af51deb3cf10a23ad6f6ec3079f5af3c049c Mon Sep 17 00:00:00 2001 From: Mengyuan Lou Date: Tue, 30 May 2023 10:26:32 +0800 Subject: net: txgbe: Implement vlan add and remove ops txgbe add ndo_vlan_rx_add_vid and ndo_vlan_rx_kill_vid. Signed-off-by: Mengyuan Lou Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 4 ++++ drivers/net/ethernet/wangxun/txgbe/txgbe_type.h | 1 + 2 files changed, 5 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index bcc9c2959177..0f0d9fa1cde1 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -258,6 +258,7 @@ static void txgbe_reset(struct wx *wx) if (err != 0) wx_err(wx, "Hardware Error: %d\n", err); + wx_start_hw(wx); /* do not flush user set addresses */ memcpy(old_addr, &wx->mac_table[0].addr, netdev->addr_len); wx_flush_sw_mac_table(wx); @@ -330,6 +331,7 @@ static int txgbe_sw_init(struct wx *wx) wx->mac.max_tx_queues = TXGBE_SP_MAX_TX_QUEUES; wx->mac.max_rx_queues = TXGBE_SP_MAX_RX_QUEUES; wx->mac.mcft_size = TXGBE_SP_MC_TBL_SIZE; + wx->mac.vft_size = TXGBE_SP_VFT_TBL_SIZE; wx->mac.rx_pb_size = TXGBE_SP_RX_PB_SIZE; wx->mac.tx_pb_size = TXGBE_SP_TDB_PB_SZ; @@ -495,6 +497,8 @@ static const struct net_device_ops txgbe_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = wx_set_mac, .ndo_get_stats64 = wx_get_stats64, + .ndo_vlan_rx_add_vid = wx_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = wx_vlan_rx_kill_vid, }; /** diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 63a1c733718d..032972369965 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -77,6 +77,7 @@ #define TXGBE_SP_MAX_RX_QUEUES 128 #define TXGBE_SP_RAR_ENTRIES 128 #define TXGBE_SP_MC_TBL_SIZE 128 +#define TXGBE_SP_VFT_TBL_SIZE 128 #define TXGBE_SP_RX_PB_SIZE 512 #define TXGBE_SP_TDB_PB_SZ (160 * 1024) /* 160KB Packet Buffer */ -- cgit v1.2.3 From 116f7b361ebbb6095257c27da327e27000488214 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 31 May 2023 12:00:07 +0100 Subject: chelsio: Support MSG_SPLICE_PAGES Make Chelsio's TLS offload sendmsg() support MSG_SPLICE_PAGES, splicing in pages from the source iterator if possible and copying the data in otherwise. This allows ->sendpage() to be replaced by something that can handle multiple multipage folios in a single transaction. Signed-off-by: David Howells cc: Ayush Sawal cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: Jens Axboe cc: Matthew Wilcox cc: netdev@vger.kernel.org Signed-off-by: Paolo Abeni --- drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c index ae6b17b96bf1..1d08386ac916 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c @@ -1092,7 +1092,17 @@ new_buf: if (copy > size) copy = size; - if (skb_tailroom(skb) > 0) { + if (msg->msg_flags & MSG_SPLICE_PAGES) { + err = skb_splice_from_iter(skb, &msg->msg_iter, copy, + sk->sk_allocation); + if (err < 0) { + if (err == -EMSGSIZE) + goto new_buf; + goto do_fault; + } + copy = err; + sk_wmem_queued_add(sk, copy); + } else if (skb_tailroom(skb) > 0) { copy = min(copy, skb_tailroom(skb)); if (is_tls_tx(csk)) copy = min_t(int, copy, csk->tlshws.txleft); -- cgit v1.2.3 From 26acc982c1c5c2835b0c6981d896329efa3557c3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 31 May 2023 12:00:08 +0100 Subject: chelsio: Convert chtls_sendpage() to use MSG_SPLICE_PAGES Convert chtls_sendpage() to use sendmsg() with MSG_SPLICE_PAGES rather than directly splicing in the pages itself. This allows ->sendpage() to be replaced by something that can handle multiple multipage folios in a single transaction. Signed-off-by: David Howells cc: Ayush Sawal cc: "David S. Miller" cc: Eric Dumazet cc: Jakub Kicinski cc: Paolo Abeni cc: Jens Axboe cc: Matthew Wilcox cc: netdev@vger.kernel.org Signed-off-by: Paolo Abeni --- .../chelsio/inline_crypto/chtls/chtls_io.c | 109 ++------------------- 1 file changed, 7 insertions(+), 102 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c index 1d08386ac916..5724bbbb6ee0 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c @@ -1240,110 +1240,15 @@ out_err: int chtls_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { - struct chtls_sock *csk; - struct chtls_dev *cdev; - int mss, err, copied; - struct tcp_sock *tp; - long timeo; - - tp = tcp_sk(sk); - copied = 0; - csk = rcu_dereference_sk_user_data(sk); - cdev = csk->cdev; - lock_sock(sk); - timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, }; + struct bio_vec bvec; - err = sk_stream_wait_connect(sk, &timeo); - if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && - err != 0) - goto out_err; - - mss = csk->mss; - csk_set_flag(csk, CSK_TX_MORE_DATA); - - while (size > 0) { - struct sk_buff *skb = skb_peek_tail(&csk->txq); - int copy, i; - - if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || - (copy = mss - skb->len) <= 0) { -new_buf: - if (!csk_mem_free(cdev, sk)) - goto wait_for_sndbuf; + if (flags & MSG_SENDPAGE_NOTLAST) + msg.msg_flags |= MSG_MORE; - if (is_tls_tx(csk)) { - skb = get_record_skb(sk, - select_size(sk, size, - flags, - TX_TLSHDR_LEN), - true); - } else { - skb = get_tx_skb(sk, 0); - } - if (!skb) - goto wait_for_memory; - copy = mss; - } - if (copy > size) - copy = size; - - i = skb_shinfo(skb)->nr_frags; - if (skb_can_coalesce(skb, i, page, offset)) { - skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); - } else if (i < MAX_SKB_FRAGS) { - get_page(page); - skb_fill_page_desc(skb, i, page, offset, copy); - } else { - tx_skb_finalize(skb); - push_frames_if_head(sk); - goto new_buf; - } - - skb->len += copy; - if (skb->len == mss) - tx_skb_finalize(skb); - skb->data_len += copy; - skb->truesize += copy; - sk->sk_wmem_queued += copy; - tp->write_seq += copy; - copied += copy; - offset += copy; - size -= copy; - - if (corked(tp, flags) && - (sk_stream_wspace(sk) < sk_stream_min_wspace(sk))) - ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND; - - if (!size) - break; - - if (unlikely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND)) - push_frames_if_head(sk); - continue; -wait_for_sndbuf: - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); -wait_for_memory: - err = csk_wait_memory(cdev, sk, &timeo); - if (err) - goto do_error; - } -out: - csk_reset_flag(csk, CSK_TX_MORE_DATA); - if (copied) - chtls_tcp_push(sk, flags); -done: - release_sock(sk); - return copied; - -do_error: - if (copied) - goto out; - -out_err: - if (csk_conn_inline(csk)) - csk_reset_flag(csk, CSK_TX_MORE_DATA); - copied = sk_stream_error(sk, flags, err); - goto done; + bvec_set_page(&bvec, page, size, offset); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); + return chtls_sendmsg(sk, &msg, size); } static void chtls_select_window(struct sock *sk) -- cgit v1.2.3 From 2145328515c8fa9b8a9f7889250bc6c032f2a0e6 Mon Sep 17 00:00:00 2001 From: Long Li Date: Sat, 13 May 2023 23:18:15 -0700 Subject: RDMA/mana_ib: Use v2 version of cfg_rx_steer_req to enable RX coalescing With RX coalescing, one CQE entry can be used to indicate multiple packets on the receive queue. This saves processing time and PCI bandwidth over the CQ. The MANA Ethernet driver also uses the v2 version of the protocol. It doesn't use RX coalescing and its behavior is not changed. Link: https://lore.kernel.org/r/1684045095-31228-1-git-send-email-longli@linuxonhyperv.com Signed-off-by: Long Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mana/qp.c | 5 ++++- drivers/net/ethernet/microsoft/mana/mana_en.c | 5 ++++- include/net/mana/mana.h | 4 +++- 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 54b61930a7fd..4b3b5b274e84 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -13,7 +13,7 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, u8 *rx_hash_key) { struct mana_port_context *mpc = netdev_priv(ndev); - struct mana_cfg_rx_steer_req *req = NULL; + struct mana_cfg_rx_steer_req_v2 *req; struct mana_cfg_rx_steer_resp resp = {}; mana_handle_t *req_indir_tab; struct gdma_context *gc; @@ -33,6 +33,8 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size, sizeof(resp)); + req->hdr.req.msg_version = GDMA_MESSAGE_V2; + req->vport = mpc->port_handle; req->rx_enable = 1; req->update_default_rxobj = 1; @@ -46,6 +48,7 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE; req->indir_tab_offset = sizeof(*req); req->update_indir_tab = true; + req->cqe_coalescing_enable = 1; req_indir_tab = (mana_handle_t *)(req + 1); /* The ind table passed to the hardware must have diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 06d6292e09b3..b3fcb767b9ab 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -972,7 +972,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, bool update_tab) { u16 num_entries = MANA_INDIRECT_TABLE_SIZE; - struct mana_cfg_rx_steer_req *req = NULL; + struct mana_cfg_rx_steer_req_v2 *req; struct mana_cfg_rx_steer_resp resp = {}; struct net_device *ndev = apc->ndev; mana_handle_t *req_indir_tab; @@ -987,6 +987,8 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size, sizeof(resp)); + req->hdr.req.msg_version = GDMA_MESSAGE_V2; + req->vport = apc->port_handle; req->num_indir_entries = num_entries; req->indir_tab_offset = sizeof(*req); @@ -996,6 +998,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, req->update_hashkey = update_key; req->update_indir_tab = update_tab; req->default_rxobj = apc->default_rxobj; + req->cqe_coalescing_enable = 0; if (update_key) memcpy(&req->hashkey, apc->hashkey, MANA_HASH_KEY_SIZE); diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index cd386aa7c7cc..1512bd48df81 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -581,7 +581,7 @@ struct mana_fence_rq_resp { }; /* HW DATA */ /* Configure vPort Rx Steering */ -struct mana_cfg_rx_steer_req { +struct mana_cfg_rx_steer_req_v2 { struct gdma_req_hdr hdr; mana_handle_t vport; u16 num_indir_entries; @@ -594,6 +594,8 @@ struct mana_cfg_rx_steer_req { u8 reserved; mana_handle_t default_rxobj; u8 hashkey[MANA_HASH_KEY_SIZE]; + u8 cqe_coalescing_enable; + u8 reserved2[7]; }; /* HW DATA */ struct mana_cfg_rx_steer_resp { -- cgit v1.2.3 From a60caf039e96d806b1ced893242bae82ba3ccf0d Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Mon, 29 May 2023 16:38:17 +0900 Subject: net: renesas: rswitch: Fix return value in error path of xmit Fix return value in the error path of rswitch_start_xmit(). If TX queues are full, this function should return NETDEV_TX_BUSY. Fixes: 3590918b5d07 ("net: ethernet: renesas: Add support for "Ethernet Switch"") Signed-off-by: Yoshihiro Shimoda Link: https://lore.kernel.org/r/20230529073817.1145208-1-yoshihiro.shimoda.uh@renesas.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rswitch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 29afaddb598d..aace87139cea 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1485,7 +1485,7 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd if (rswitch_get_num_cur_queues(gq) >= gq->ring_size - 1) { netif_stop_subqueue(ndev, 0); - return ret; + return NETDEV_TX_BUSY; } if (skb_put_padto(skb, ETH_ZLEN)) -- cgit v1.2.3 From abaf8d51b0cedb16af51fb6b2189370d7515977c Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 31 May 2023 08:44:57 -0700 Subject: ice: recycle/free all of the fragments from multi-buffer frame The ice driver caches next_to_clean value at the beginning of ice_clean_rx_irq() in order to remember the first buffer that has to be freed/recycled after main Rx processing loop. The end boundary is indicated by first descriptor of frame that Rx processing loop has ended its duties. Note that if mentioned loop ended in the middle of gathering multi-buffer frame, next_to_clean would be pointing to the descriptor in the middle of the frame BUT freeing/recycling stage will stop at the first descriptor. This means that next iteration of ice_clean_rx_irq() will miss the (first_desc, next_to_clean - 1) entries. When running various 9K MTU workloads, such splats were observed: [ 540.780716] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 540.787787] #PF: supervisor read access in kernel mode [ 540.793002] #PF: error_code(0x0000) - not-present page [ 540.798218] PGD 0 P4D 0 [ 540.800801] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 540.805231] CPU: 18 PID: 3984 Comm: xskxceiver Tainted: G W 6.3.0-rc7+ #96 [ 540.813619] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019 [ 540.824209] RIP: 0010:ice_clean_rx_irq+0x2b6/0xf00 [ice] [ 540.829678] Code: 74 24 10 e9 aa 00 00 00 8b 55 78 41 31 57 10 41 09 c4 4d 85 ff 0f 84 83 00 00 00 49 8b 57 08 41 8b 4f 1c 65 8b 35 1a fa 4b 3f <48> 8b 02 48 c1 e8 3a 39 c6 0f 85 a2 00 00 00 f6 42 08 02 0f 85 98 [ 540.848717] RSP: 0018:ffffc9000f42fc50 EFLAGS: 00010282 [ 540.854029] RAX: 0000000000000004 RBX: 0000000000000002 RCX: 000000000000fffe [ 540.861272] RDX: 0000000000000000 RSI: 0000000000000001 RDI: 00000000ffffffff [ 540.868519] RBP: ffff88984a05ac00 R08: 0000000000000000 R09: dead000000000100 [ 540.875760] R10: ffff88983fffcd00 R11: 000000000010f2b8 R12: 0000000000000004 [ 540.883008] R13: 0000000000000003 R14: 0000000000000800 R15: ffff889847a10040 [ 540.890253] FS: 00007f6ddf7fe640(0000) GS:ffff88afdf800000(0000) knlGS:0000000000000000 [ 540.898465] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 540.904299] CR2: 0000000000000000 CR3: 000000010d3da001 CR4: 00000000007706e0 [ 540.911542] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 540.918789] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 540.926032] PKRU: 55555554 [ 540.928790] Call Trace: [ 540.931276] [ 540.933418] ice_napi_poll+0x4ca/0x6d0 [ice] [ 540.937804] ? __pfx_ice_napi_poll+0x10/0x10 [ice] [ 540.942716] napi_busy_loop+0xd7/0x320 [ 540.946537] xsk_recvmsg+0x143/0x170 [ 540.950178] sock_recvmsg+0x99/0xa0 [ 540.953729] __sys_recvfrom+0xa8/0x120 [ 540.957543] ? do_futex+0xbd/0x1d0 [ 540.961008] ? __x64_sys_futex+0x73/0x1d0 [ 540.965083] __x64_sys_recvfrom+0x20/0x30 [ 540.969155] do_syscall_64+0x38/0x90 [ 540.972796] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 540.977934] RIP: 0033:0x7f6de5f27934 To fix this, set cached_ntc to first_desc so that at the end, when freeing/recycling buffers, descriptors from first to ntc are not missed. Fixes: 2fba7dc5157b ("ice: Add support for XDP multi-buffer on Rx side") Signed-off-by: Maciej Fijalkowski Reviewed-by: Simon Horman Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20230531154457.3216621-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 059bd911c51d..52d0a126eb61 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -1152,11 +1152,11 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) unsigned int total_rx_bytes = 0, total_rx_pkts = 0; unsigned int offset = rx_ring->rx_offset; struct xdp_buff *xdp = &rx_ring->xdp; + u32 cached_ntc = rx_ring->first_desc; struct ice_tx_ring *xdp_ring = NULL; struct bpf_prog *xdp_prog = NULL; u32 ntc = rx_ring->next_to_clean; u32 cnt = rx_ring->count; - u32 cached_ntc = ntc; u32 xdp_xmit = 0; u32 cached_ntu; bool failure; -- cgit v1.2.3 From 5ff9424ea03a1fce2298b271eec1dad5ff4df1be Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 31 May 2023 16:20:25 +0200 Subject: devlink: bring port new reply back In the offending fixes commit I mistakenly removed the reply message of the port new command. I was under impression it is a new port notification, partly due to the "notify" in the name of the helper function. Bring the code sending reply with new port message back, this time putting it directly to devlink_nl_cmd_port_new_doit() Fixes: c496daeb8630 ("devlink: remove duplicate port notification") Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230531142025.2605001-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/sf/devlink.c | 9 ++++--- drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h | 3 ++- include/net/devlink.h | 4 +++- net/devlink/leftover.c | 28 +++++++++++++++++++++- 4 files changed, 38 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index c7d4691cb65a..9c02e5ea797c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -282,7 +282,8 @@ out: static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, const struct devlink_port_new_attrs *new_attr, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + struct devlink_port **dl_port) { struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5_sf *sf; @@ -296,6 +297,7 @@ static int mlx5_sf_add(struct mlx5_core_dev *dev, struct mlx5_sf_table *table, new_attr->controller, new_attr->sfnum); if (err) goto esw_err; + *dl_port = &sf->dl_port; trace_mlx5_sf_add(dev, sf->port_index, sf->controller, sf->hw_fn_id, new_attr->sfnum); return 0; @@ -336,7 +338,8 @@ mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_ int mlx5_devlink_sf_port_new(struct devlink *devlink, const struct devlink_port_new_attrs *new_attr, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + struct devlink_port **dl_port) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_sf_table *table; @@ -352,7 +355,7 @@ int mlx5_devlink_sf_port_new(struct devlink *devlink, "Port add is only supported in eswitch switchdev mode or SF ports are disabled."); return -EOPNOTSUPP; } - err = mlx5_sf_add(dev, table, new_attr, extack); + err = mlx5_sf_add(dev, table, new_attr, extack, dl_port); mlx5_sf_table_put(table); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h index c5430b8dcdf6..860f9ddb7107 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -20,7 +20,8 @@ void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev); int mlx5_devlink_sf_port_new(struct devlink *devlink, const struct devlink_port_new_attrs *add_attr, - struct netlink_ext_ack *extack); + struct netlink_ext_ack *extack, + struct devlink_port **dl_port); int mlx5_devlink_sf_port_del(struct devlink *devlink, struct devlink_port *dl_port, struct netlink_ext_ack *extack); diff --git a/include/net/devlink.h b/include/net/devlink.h index fe42ad46cf3b..9a3c51aa6e81 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1434,6 +1434,7 @@ struct devlink_ops { * @devlink: Devlink instance * @attrs: attributes of the new port * @extack: extack for reporting error messages + * @devlink_port: pointer to store new devlink port pointer * * Devlink core will call this device driver function upon user request * to create a new port function of a specified flavor and optional @@ -1446,7 +1447,8 @@ struct devlink_ops { */ int (*port_new)(struct devlink *devlink, const struct devlink_port_new_attrs *attrs, - struct netlink_ext_ack *extack); + struct netlink_ext_ack *extack, + struct devlink_port **devlink_port); /** * Rate control callbacks. diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c index d5ca9fbe2d40..649a9701eb6a 100644 --- a/net/devlink/leftover.c +++ b/net/devlink/leftover.c @@ -1347,6 +1347,9 @@ static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, struct netlink_ext_ack *extack = info->extack; struct devlink_port_new_attrs new_attrs = {}; struct devlink *devlink = info->user_ptr[0]; + struct devlink_port *devlink_port; + struct sk_buff *msg; + int err; if (!devlink->ops->port_new) return -EOPNOTSUPP; @@ -1377,7 +1380,30 @@ static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb, new_attrs.sfnum_valid = true; } - return devlink->ops->port_new(devlink, &new_attrs, extack); + err = devlink->ops->port_new(devlink, &new_attrs, + extack, &devlink_port); + if (err) + return err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + err = -ENOMEM; + goto err_out_port_del; + } + err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW, + info->snd_portid, info->snd_seq, 0, NULL); + if (WARN_ON_ONCE(err)) + goto err_out_msg_free; + err = genlmsg_reply(msg, info); + if (err) + goto err_out_port_del; + return 0; + +err_out_msg_free: + nlmsg_free(msg); +err_out_port_del: + devlink_port->ops->port_del(devlink, devlink_port, NULL); + return err; } static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb, -- cgit v1.2.3 From 733b3e27650b1bbce3c21fcfdb4fca22063efd66 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 31 May 2023 22:41:32 +0200 Subject: r8169: use dev_err_probe in all appropriate places in rtl_init_one() In addition to properly handling probe deferrals dev_err_probe() conveniently combines printing an error message with returning the errno. So let's use it for every error path in rtl_init_one() to simplify the code. Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/f0596a19-d517-e301-b649-304f9247b75a@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 40 ++++++++++++------------------- 1 file changed, 15 insertions(+), 25 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 5e6308d574ba..9445f04f8d48 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5196,44 +5196,35 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* enable device (incl. PCI PM wakeup and hotplug setup) */ rc = pcim_enable_device(pdev); - if (rc < 0) { - dev_err(&pdev->dev, "enable failure\n"); - return rc; - } + if (rc < 0) + return dev_err_probe(&pdev->dev, rc, "enable failure\n"); if (pcim_set_mwi(pdev) < 0) dev_info(&pdev->dev, "Mem-Wr-Inval unavailable\n"); /* use first MMIO region */ region = ffs(pci_select_bars(pdev, IORESOURCE_MEM)) - 1; - if (region < 0) { - dev_err(&pdev->dev, "no MMIO resource found\n"); - return -ENODEV; - } + if (region < 0) + return dev_err_probe(&pdev->dev, -ENODEV, "no MMIO resource found\n"); rc = pcim_iomap_regions(pdev, BIT(region), KBUILD_MODNAME); - if (rc < 0) { - dev_err(&pdev->dev, "cannot remap MMIO, aborting\n"); - return rc; - } + if (rc < 0) + return dev_err_probe(&pdev->dev, rc, "cannot remap MMIO, aborting\n"); tp->mmio_addr = pcim_iomap_table(pdev)[region]; txconfig = RTL_R32(tp, TxConfig); - if (txconfig == ~0U) { - dev_err(&pdev->dev, "PCI read failed\n"); - return -EIO; - } + if (txconfig == ~0U) + return dev_err_probe(&pdev->dev, -EIO, "PCI read failed\n"); xid = (txconfig >> 20) & 0xfcf; /* Identify chip attached to board */ chipset = rtl8169_get_mac_version(xid, tp->supports_gmii); - if (chipset == RTL_GIGA_MAC_NONE) { - dev_err(&pdev->dev, "unknown chip XID %03x, contact r8169 maintainers (see MAINTAINERS file)\n", xid); - return -ENODEV; - } - + if (chipset == RTL_GIGA_MAC_NONE) + return dev_err_probe(&pdev->dev, -ENODEV, + "unknown chip XID %03x, contact r8169 maintainers (see MAINTAINERS file)\n", + xid); tp->mac_version = chipset; tp->dash_type = rtl_check_dash(tp); @@ -5253,10 +5244,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rtl_hw_reset(tp); rc = rtl_alloc_irq(tp); - if (rc < 0) { - dev_err(&pdev->dev, "Can't allocate interrupt\n"); - return rc; - } + if (rc < 0) + return dev_err_probe(&pdev->dev, rc, "Can't allocate interrupt\n"); + tp->irq = pci_irq_vector(pdev, 0); INIT_WORK(&tp->wk.work, rtl_task); -- cgit v1.2.3 From f93b30e50a81a16cefa262b68bde5c4096ea9235 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Thu, 1 Jun 2023 11:30:02 +0800 Subject: net: systemport: Replace platform_get_irq with platform_get_irq_optional Replace platform_get_irq with platform_get_irq_optional because wol_irq is optional. Signed-off-by: Jiasheng Jiang Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 38d0cdaf22a5..bf1611cce974 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2531,9 +2531,9 @@ static int bcm_sysport_probe(struct platform_device *pdev) priv->irq0 = platform_get_irq(pdev, 0); if (!priv->is_lite) { priv->irq1 = platform_get_irq(pdev, 1); - priv->wol_irq = platform_get_irq(pdev, 2); + priv->wol_irq = platform_get_irq_optional(pdev, 2); } else { - priv->wol_irq = platform_get_irq(pdev, 1); + priv->wol_irq = platform_get_irq_optional(pdev, 1); } if (priv->irq0 <= 0 || (priv->irq1 <= 0 && !priv->is_lite)) { ret = -EINVAL; -- cgit v1.2.3 From 0f0f5868689ecbf643b723fae1a353c5a11a8e46 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Fri, 2 Jun 2023 00:04:14 +0000 Subject: net: lan743x: Remove extranous gotos The gotos for cleanup aren't required, the function might as well just return the actual error code. Signed-off-by: Moritz Fischer Reviewed-by: Siddharth Vadapalli Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan743x_main.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 957d96a91a8a..f1bded993edc 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -160,16 +160,13 @@ static int lan743x_csr_init(struct lan743x_adapter *adapter) { struct lan743x_csr *csr = &adapter->csr; resource_size_t bar_start, bar_length; - int result; bar_start = pci_resource_start(adapter->pdev, 0); bar_length = pci_resource_len(adapter->pdev, 0); csr->csr_address = devm_ioremap(&adapter->pdev->dev, bar_start, bar_length); - if (!csr->csr_address) { - result = -ENOMEM; - goto clean_up; - } + if (!csr->csr_address) + return -ENOMEM; csr->id_rev = lan743x_csr_read(adapter, ID_REV); csr->fpga_rev = lan743x_csr_read(adapter, FPGA_REV); @@ -177,10 +174,8 @@ static int lan743x_csr_init(struct lan743x_adapter *adapter) "ID_REV = 0x%08X, FPGA_REV = %d.%d\n", csr->id_rev, FPGA_REV_GET_MAJOR_(csr->fpga_rev), FPGA_REV_GET_MINOR_(csr->fpga_rev)); - if (!ID_REV_IS_VALID_CHIP_ID_(csr->id_rev)) { - result = -ENODEV; - goto clean_up; - } + if (!ID_REV_IS_VALID_CHIP_ID_(csr->id_rev)) + return -ENODEV; csr->flags = LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR; switch (csr->id_rev & ID_REV_CHIP_REV_MASK_) { @@ -193,12 +188,7 @@ static int lan743x_csr_init(struct lan743x_adapter *adapter) break; } - result = lan743x_csr_light_reset(adapter); - if (result) - goto clean_up; - return 0; -clean_up: - return result; + return lan743x_csr_light_reset(adapter); } static void lan743x_intr_software_isr(struct lan743x_adapter *adapter) -- cgit v1.2.3 From 953bb24ddc118a5a3021a90a8cab8eae946238e7 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Tue, 29 Mar 2022 14:35:22 +0000 Subject: net/mlx5e: en_tc, Extend peer flows to a list Currently, mlx5e_flow is holding a pointer to a peer_flow, in case one was created. e.g. There is an assumption that mlx5e_flow can have only one peer. In order to support more than one peer, refactor mlx5e_flow to hold a list of peer flows. Signed-off-by: Mark Bloch Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/tc_priv.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 43 ++++++++++++++-------- 2 files changed, 28 insertions(+), 17 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index ba2b1f24ff14..8a500a966f06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -94,13 +94,13 @@ struct mlx5e_tc_flow { * destinations. */ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; - struct mlx5e_tc_flow *peer_flow; struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */ struct list_head hairpin; /* flows sharing the same hairpin */ struct list_head peer; /* flows with peer flow */ struct list_head unready; /* flows not ready to be offloaded (e.g * due to missing route) */ + struct list_head peer_flows; /* flows on peer */ struct net_device *orig_dev; /* netdev adding flow first */ int tmp_entry_index; struct list_head tmp_list; /* temporary flow list used by neigh update */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index fd9f928e25c7..9c9c7024772f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1989,6 +1989,8 @@ void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list) static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *peer_flow; + struct mlx5e_tc_flow *tmp; if (!flow_flag_test(flow, ESWITCH) || !flow_flag_test(flow, DUP)) @@ -2000,12 +2002,13 @@ static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) flow_flag_clear(flow, DUP); - if (refcount_dec_and_test(&flow->peer_flow->refcnt)) { - mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); - kfree(flow->peer_flow); + list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) { + if (refcount_dec_and_test(&peer_flow->refcnt)) { + mlx5e_tc_del_fdb_flow(peer_flow->priv, peer_flow); + list_del(&peer_flow->peer_flows); + kfree(peer_flow); + } } - - flow->peer_flow = NULL; } static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) @@ -4295,6 +4298,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, INIT_LIST_HEAD(&flow->hairpin); INIT_LIST_HEAD(&flow->l3_to_l2_reformat); INIT_LIST_HEAD(&flow->attrs); + INIT_LIST_HEAD(&flow->peer_flows); refcount_set(&flow->refcnt, 1); init_completion(&flow->init_done); init_completion(&flow->del_hw_done); @@ -4443,7 +4447,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, goto out; } - flow->peer_flow = peer_flow; + list_add_tail(&peer_flow->peer_flows, &flow->peer_flows); flow_flag_set(flow, DUP); mutex_lock(&esw->offloads.peer_mutex); list_add_tail(&flow->peer, &esw->offloads.peer_flows); @@ -4741,19 +4745,26 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, if (!peer_esw) goto out; - if (flow_flag_test(flow, DUP) && - flow_flag_test(flow->peer_flow, OFFLOADED)) { - u64 bytes2; - u64 packets2; - u64 lastuse2; + if (flow_flag_test(flow, DUP)) { + struct mlx5e_tc_flow *peer_flow; - if (flow_flag_test(flow, USE_ACT_STATS)) { - f->use_act_stats = true; - } else { - counter = mlx5e_tc_get_counter(flow->peer_flow); + list_for_each_entry(peer_flow, &flow->peer_flows, peer_flows) { + u64 packets2; + u64 lastuse2; + u64 bytes2; + + if (!flow_flag_test(peer_flow, OFFLOADED)) + continue; + if (flow_flag_test(flow, USE_ACT_STATS)) { + f->use_act_stats = true; + break; + } + + counter = mlx5e_tc_get_counter(peer_flow); if (!counter) goto no_peer_counter; - mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2); + mlx5_fc_query_cached(counter, &bytes2, &packets2, + &lastuse2); bytes += bytes2; packets += packets2; -- cgit v1.2.3 From b1661efa4dbbd7d4055543f036cae6c28257d292 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 29 Mar 2022 14:47:20 +0000 Subject: net/mlx5e: tc, Refactor peer add/del flow Move peer_eswitch outside mlx5e_tc_add_fdb_peer_flow() so downstream patch can call mlx5e_tc_add_fdb_peer_flow() with multiple peers. Move peer_eswitch in the remove flow as well in order to keep symmetry. Signed-off-by: Mark Bloch Signed-off-by: Shay Drory Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 65 +++++++++++++------------ 1 file changed, 34 insertions(+), 31 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9c9c7024772f..6f9adb940588 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1986,7 +1986,7 @@ void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list) mlx5e_flow_put(priv, flow); } -static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) +static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; struct mlx5e_tc_flow *peer_flow; @@ -2011,25 +2011,20 @@ static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) } } -static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) -{ - struct mlx5_core_dev *dev = flow->priv->mdev; - struct mlx5_devcom *devcom = dev->priv.devcom; - struct mlx5_eswitch *peer_esw; - - peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - if (!peer_esw) - return; - - __mlx5e_tc_del_fdb_peer_flow(flow); - mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); -} - static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { if (mlx5e_is_eswitch_flow(flow)) { + struct mlx5_devcom *devcom = flow->priv->mdev->priv.devcom; + struct mlx5_eswitch *peer_esw; + + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) { + mlx5e_tc_del_fdb_flow(priv, flow); + return; + } mlx5e_tc_del_fdb_peer_flow(flow); + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); mlx5e_tc_del_fdb_flow(priv, flow); } else { mlx5e_tc_del_nic_flow(priv, flow); @@ -4407,22 +4402,18 @@ out: static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, struct mlx5e_tc_flow *flow, - unsigned long flow_flags) + unsigned long flow_flags, + struct mlx5_eswitch *peer_esw) { struct mlx5e_priv *priv = flow->priv, *peer_priv; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; - struct mlx5_devcom *devcom = priv->mdev->priv.devcom; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_rep_priv *peer_urpriv; struct mlx5e_tc_flow *peer_flow; struct mlx5_core_dev *in_mdev; int err = 0; - peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - if (!peer_esw) - return -ENODEV; - peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH); peer_priv = netdev_priv(peer_urpriv->netdev); @@ -4454,7 +4445,6 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, mutex_unlock(&esw->offloads.peer_mutex); out: - mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); return err; } @@ -4465,9 +4455,11 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { + struct mlx5_devcom *devcom = priv->mdev->priv.devcom; struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *in_rep = rpriv->rep; struct mlx5_core_dev *in_mdev = priv->mdev; + struct mlx5_eswitch *peer_esw; struct mlx5e_tc_flow *flow; int err; @@ -4476,19 +4468,30 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (IS_ERR(flow)) return PTR_ERR(flow); - if (is_peer_flow_needed(flow)) { - err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags); - if (err) { - mlx5e_tc_del_fdb_flow(priv, flow); - goto out; - } + if (!is_peer_flow_needed(flow)) { + *__flow = flow; + return 0; } + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) { + err = -ENODEV; + goto clean_flow; + } + + err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags, peer_esw); + if (err) + goto peer_clean; + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + *__flow = flow; return 0; -out: +peer_clean: + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); +clean_flow: + mlx5e_tc_del_fdb_flow(priv, flow); return err; } @@ -5293,7 +5296,7 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw) struct mlx5e_tc_flow *flow, *tmp; list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer) - __mlx5e_tc_del_fdb_peer_flow(flow); + mlx5e_tc_del_fdb_peer_flow(flow); } void mlx5e_tc_reoffload_flows_work(struct work_struct *work) -- cgit v1.2.3 From ed7a8fe71836fda7d669d4a3afbd5f4dba742c18 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 30 Mar 2022 07:16:23 +0000 Subject: net/mlx5e: rep, store send to vport rules per peer Each representor, for each send queue, is holding a send_to_vport rule for the peer eswitch. In order to support more than one peer, and to map between the peer rules and peer eswitches, refactor representor to hold both the peer rules and pointer to the peer eswitches. This enables mlx5 to store send_to_vport rules per peer, where each peer have dedicate index via mlx5_get_dev_index(). Signed-off-by: Mark Bloch Signed-off-by: Shay Drory Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 99 ++++++++++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/en_rep.h | 7 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 18 ++-- 3 files changed, 98 insertions(+), 26 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 3e7041bd5705..3fbb454f7228 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -374,7 +374,9 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { struct mlx5e_rep_sq *rep_sq, *tmp; + struct mlx5e_rep_sq_peer *sq_peer; struct mlx5e_rep_priv *rpriv; + unsigned long i; if (esw->mode != MLX5_ESWITCH_OFFLOADS) return; @@ -382,8 +384,15 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, rpriv = mlx5e_rep_to_rep_priv(rep); list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) { mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule); - if (rep_sq->send_to_vport_rule_peer) - mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer); + xa_for_each(&rep_sq->sq_peer, i, sq_peer) { + if (sq_peer->rule) + mlx5_eswitch_del_send_to_vport_rule(sq_peer->rule); + + xa_erase(&rep_sq->sq_peer, i); + kfree(sq_peer); + } + + xa_destroy(&rep_sq->sq_peer); list_del(&rep_sq->list); kfree(rep_sq); } @@ -395,6 +404,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, { struct mlx5_eswitch *peer_esw = NULL; struct mlx5_flow_handle *flow_rule; + struct mlx5e_rep_sq_peer *sq_peer; struct mlx5e_rep_priv *rpriv; struct mlx5e_rep_sq *rep_sq; int err; @@ -414,6 +424,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, err = -ENOMEM; goto out_err; } + xa_init(&rep_sq->sq_peer); /* Add re-inject rule to the PF/representor sqs */ flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, @@ -427,15 +438,26 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, rep_sq->sqn = sqns_array[i]; if (peer_esw) { + int peer_rule_idx = mlx5_get_dev_index(peer_esw->dev); + + sq_peer = kzalloc(sizeof(*sq_peer), GFP_KERNEL); + if (!sq_peer) { + err = -ENOMEM; + goto out_sq_peer_err; + } + flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, sqns_array[i]); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); - mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule); - kfree(rep_sq); - goto out_err; + goto out_flow_rule_err; } - rep_sq->send_to_vport_rule_peer = flow_rule; + + sq_peer->rule = flow_rule; + sq_peer->peer = peer_esw; + err = xa_insert(&rep_sq->sq_peer, peer_rule_idx, sq_peer, GFP_KERNEL); + if (err) + goto out_xa_err; } list_add(&rep_sq->list, &rpriv->vport_sqs_list); @@ -446,6 +468,14 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, return 0; +out_xa_err: + mlx5_eswitch_del_send_to_vport_rule(flow_rule); +out_flow_rule_err: + kfree(sq_peer); +out_sq_peer_err: + mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule); + xa_destroy(&rep_sq->sq_peer); + kfree(rep_sq); out_err: mlx5e_sqs2vport_stop(esw, rep); @@ -1530,17 +1560,24 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) return rpriv->netdev; } -static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep) +static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep, + struct mlx5_eswitch *peer_esw) { + int i = mlx5_get_dev_index(peer_esw->dev); struct mlx5e_rep_priv *rpriv; struct mlx5e_rep_sq *rep_sq; + WARN_ON_ONCE(!peer_esw); rpriv = mlx5e_rep_to_rep_priv(rep); list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) { - if (!rep_sq->send_to_vport_rule_peer) + struct mlx5e_rep_sq_peer *sq_peer = xa_load(&rep_sq->sq_peer, i); + + if (!sq_peer || sq_peer->peer != peer_esw) continue; - mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer); - rep_sq->send_to_vport_rule_peer = NULL; + + mlx5_eswitch_del_send_to_vport_rule(sq_peer->rule); + xa_erase(&rep_sq->sq_peer, i); + kfree(sq_peer); } } @@ -1548,24 +1585,52 @@ static int mlx5e_vport_rep_event_pair(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, struct mlx5_eswitch *peer_esw) { + int i = mlx5_get_dev_index(peer_esw->dev); struct mlx5_flow_handle *flow_rule; + struct mlx5e_rep_sq_peer *sq_peer; struct mlx5e_rep_priv *rpriv; struct mlx5e_rep_sq *rep_sq; + int err; rpriv = mlx5e_rep_to_rep_priv(rep); list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) { - if (rep_sq->send_to_vport_rule_peer) + sq_peer = xa_load(&rep_sq->sq_peer, i); + + if (sq_peer && sq_peer->peer) continue; - flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, rep_sq->sqn); - if (IS_ERR(flow_rule)) + + flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, + rep_sq->sqn); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); goto err_out; - rep_sq->send_to_vport_rule_peer = flow_rule; + } + + if (sq_peer) { + sq_peer->rule = flow_rule; + sq_peer->peer = peer_esw; + continue; + } + sq_peer = kzalloc(sizeof(*sq_peer), GFP_KERNEL); + if (!sq_peer) { + err = -ENOMEM; + goto err_sq_alloc; + } + err = xa_insert(&rep_sq->sq_peer, i, sq_peer, GFP_KERNEL); + if (err) + goto err_xa; + sq_peer->rule = flow_rule; + sq_peer->peer = peer_esw; } return 0; +err_xa: + kfree(sq_peer); +err_sq_alloc: + mlx5_eswitch_del_send_to_vport_rule(flow_rule); err_out: - mlx5e_vport_rep_event_unpair(rep); - return PTR_ERR(flow_rule); + mlx5e_vport_rep_event_unpair(rep, peer_esw); + return err; } static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw, @@ -1578,7 +1643,7 @@ static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw, if (event == MLX5_SWITCHDEV_EVENT_PAIR) err = mlx5e_vport_rep_event_pair(esw, rep, data); else if (event == MLX5_SWITCHDEV_EVENT_UNPAIR) - mlx5e_vport_rep_event_unpair(rep); + mlx5e_vport_rep_event_unpair(rep, data); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 80b7f5079a5a..70640fa1ad7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -225,9 +225,14 @@ struct mlx5e_encap_entry { struct rcu_head rcu; }; +struct mlx5e_rep_sq_peer { + struct mlx5_flow_handle *rule; + void *peer; +}; + struct mlx5e_rep_sq { struct mlx5_flow_handle *send_to_vport_rule; - struct mlx5_flow_handle *send_to_vport_rule_peer; + struct xarray sq_peer; u32 sqn; struct list_head list; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 1b2f5e273525..9526382f1573 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2673,7 +2673,8 @@ void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, #define ESW_OFFLOADS_DEVCOM_PAIR (0) #define ESW_OFFLOADS_DEVCOM_UNPAIR (1) -static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw) +static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw) { const struct mlx5_eswitch_rep_ops *ops; struct mlx5_eswitch_rep *rep; @@ -2686,17 +2687,18 @@ static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw) ops = esw->offloads.rep_ops[rep_type]; if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED && ops->event) - ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, NULL); + ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, peer_esw); } } } -static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) +static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw) { #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) mlx5e_tc_clean_fdb_peer_flows(esw); #endif - mlx5_esw_offloads_rep_event_unpair(esw); + mlx5_esw_offloads_rep_event_unpair(esw, peer_esw); esw_del_fdb_peer_miss_rules(esw); } @@ -2728,7 +2730,7 @@ static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, return 0; err_out: - mlx5_esw_offloads_unpair(esw); + mlx5_esw_offloads_unpair(esw, peer_esw); return err; } @@ -2802,8 +2804,8 @@ static int mlx5_esw_offloads_devcom_event(int event, mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); esw->paired[mlx5_get_dev_index(peer_esw->dev)] = false; peer_esw->paired[mlx5_get_dev_index(esw->dev)] = false; - mlx5_esw_offloads_unpair(peer_esw); - mlx5_esw_offloads_unpair(esw); + mlx5_esw_offloads_unpair(peer_esw, esw); + mlx5_esw_offloads_unpair(esw, peer_esw); mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false); break; } @@ -2811,7 +2813,7 @@ static int mlx5_esw_offloads_devcom_event(int event, return 0; err_pair: - mlx5_esw_offloads_unpair(esw); + mlx5_esw_offloads_unpair(esw, peer_esw); err_peer: mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false); err_out: -- cgit v1.2.3 From 0af3613ddc915d136e9c56f645f80c4b1cb828ff Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 30 Mar 2022 08:51:16 +0000 Subject: net/mlx5e: en_tc, re-factor query route port query for peer esw outside of if scope. This is preparation for query route port over multiple peers. Signed-off-by: Mark Bloch Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 32 ++++++++++--------------- 1 file changed, 13 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 6f9adb940588..a096005fd163 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1666,8 +1666,10 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro { struct mlx5e_priv *out_priv, *route_priv; struct mlx5_core_dev *route_mdev; + struct mlx5_devcom *devcom; struct mlx5_eswitch *esw; u16 vhca_id; + int err; out_priv = netdev_priv(out_dev); esw = out_priv->mdev->priv.eswitch; @@ -1675,28 +1677,20 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro route_mdev = route_priv->mdev; vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id); - if (mlx5_lag_is_active(out_priv->mdev)) { - struct mlx5_devcom *devcom; - int err; - - /* In lag case we may get devices from different eswitch instances. - * If we failed to get vport num, it means, mostly, that we on the wrong - * eswitch. - */ - err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); - if (err != -ENOENT) - return err; - - rcu_read_lock(); - devcom = out_priv->mdev->priv.devcom; - esw = mlx5_devcom_get_peer_data_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - err = esw ? mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport) : -ENODEV; - rcu_read_unlock(); + err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); + if (!err) + return err; + if (!mlx5_lag_is_active(out_priv->mdev)) return err; - } - return mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); + rcu_read_lock(); + devcom = out_priv->mdev->priv.devcom; + esw = mlx5_devcom_get_peer_data_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + err = esw ? mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport) : -ENODEV; + rcu_read_unlock(); + + return err; } static int -- cgit v1.2.3 From 9be6c21fdcf8a7ec48262bb76f78c17ac2761ac6 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 6 Feb 2023 10:12:34 +0200 Subject: net/mlx5e: Handle offloads flows per peer Currently, E-switch offloads table have a list of all flows that create a peer_flow over the peer eswitch. In order to support more than one peer, extend E-switch offloads table peer_flow to hold an array of lists, where each peer have dedicate index via mlx5_get_dev_index(). Thereafter, extend original flow to hold an array of peers as well. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/tc_priv.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 37 +++++++++++++++++----- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4 ++- 4 files changed, 34 insertions(+), 11 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index 8a500a966f06..6cc23af66b5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -96,7 +96,7 @@ struct mlx5e_tc_flow { struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */ struct list_head hairpin; /* flows sharing the same hairpin */ - struct list_head peer; /* flows with peer flow */ + struct list_head peer[MLX5_MAX_PORTS]; /* flows with peer flow */ struct list_head unready; /* flows not ready to be offloaded (e.g * due to missing route) */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index a096005fd163..c7797e3de093 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1980,7 +1980,8 @@ void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list) mlx5e_flow_put(priv, flow); } -static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) +static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow, + int peer_index) { struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; struct mlx5e_tc_flow *peer_flow; @@ -1991,18 +1992,32 @@ static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) return; mutex_lock(&esw->offloads.peer_mutex); - list_del(&flow->peer); + list_del(&flow->peer[peer_index]); mutex_unlock(&esw->offloads.peer_mutex); - flow_flag_clear(flow, DUP); - list_for_each_entry_safe(peer_flow, tmp, &flow->peer_flows, peer_flows) { + if (peer_index != mlx5_get_dev_index(peer_flow->priv->mdev)) + continue; if (refcount_dec_and_test(&peer_flow->refcnt)) { mlx5e_tc_del_fdb_flow(peer_flow->priv, peer_flow); list_del(&peer_flow->peer_flows); kfree(peer_flow); } } + + if (list_empty(&flow->peer_flows)) + flow_flag_clear(flow, DUP); +} + +static void mlx5e_tc_del_fdb_peers_flow(struct mlx5e_tc_flow *flow) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) { + if (i == mlx5_get_dev_index(flow->priv->mdev)) + continue; + mlx5e_tc_del_fdb_peer_flow(flow, i); + } } static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, @@ -2017,7 +2032,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, mlx5e_tc_del_fdb_flow(priv, flow); return; } - mlx5e_tc_del_fdb_peer_flow(flow); + mlx5e_tc_del_fdb_peers_flow(flow); mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); mlx5e_tc_del_fdb_flow(priv, flow); } else { @@ -4403,6 +4418,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; + int i = mlx5_get_dev_index(peer_esw->dev); struct mlx5e_rep_priv *peer_urpriv; struct mlx5e_tc_flow *peer_flow; struct mlx5_core_dev *in_mdev; @@ -4435,7 +4451,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, list_add_tail(&peer_flow->peer_flows, &flow->peer_flows); flow_flag_set(flow, DUP); mutex_lock(&esw->offloads.peer_mutex); - list_add_tail(&flow->peer, &esw->offloads.peer_flows); + list_add_tail(&flow->peer[i], &esw->offloads.peer_flows[i]); mutex_unlock(&esw->offloads.peer_mutex); out: @@ -5288,9 +5304,14 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw) { struct mlx5e_tc_flow *flow, *tmp; + int i; - list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer) - mlx5e_tc_del_fdb_peer_flow(flow); + for (i = 0; i < MLX5_MAX_PORTS; i++) { + if (i == mlx5_get_dev_index(esw->dev)) + continue; + list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows[i], peer[i]) + mlx5e_tc_del_fdb_peers_flow(flow); + } } void mlx5e_tc_reoffload_flows_work(struct work_struct *work) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index f70124ad71cf..eadc39542e5e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -249,7 +249,7 @@ struct mlx5_esw_offload { struct mlx5_flow_group *vport_rx_drop_group; struct mlx5_flow_handle *vport_rx_drop_rule; struct xarray vport_reps; - struct list_head peer_flows; + struct list_head peer_flows[MLX5_MAX_PORTS]; struct mutex peer_mutex; struct mutex encap_tbl_lock; /* protects encap_tbl */ DECLARE_HASHTABLE(encap_tbl, 8); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 9526382f1573..a767f3d52c76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2825,8 +2825,10 @@ err_out: void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) { struct mlx5_devcom *devcom = esw->dev->priv.devcom; + int i; - INIT_LIST_HEAD(&esw->offloads.peer_flows); + for (i = 0; i < MLX5_MAX_PORTS; i++) + INIT_LIST_HEAD(&esw->offloads.peer_flows[i]); mutex_init(&esw->offloads.peer_mutex); if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) -- cgit v1.2.3 From 18e31d42267556fd98590d91dda161f2a39a1def Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 5 Feb 2023 14:44:40 +0200 Subject: net/mlx5: E-switch, enlarge peer miss group table There is an implicit assumption that peer miss group table require to handle only a single peer. Also, there is an assumption that total_vports of the master is greater or equal to the total_vports of each peer. Change the code to support peer miss group for more than one peer. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index a767f3d52c76..ca69ed487413 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1573,6 +1573,7 @@ esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw, u32 *flow_group_in, int *ix) { + int max_peer_ports = (esw->total_vports - 1) * (MLX5_MAX_PORTS - 1); int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *g; void *match_criteria; @@ -1599,8 +1600,8 @@ esw_create_peer_esw_miss_group(struct mlx5_eswitch *esw, MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, *ix); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, - *ix + esw->total_vports - 1); - *ix += esw->total_vports; + *ix + max_peer_ports); + *ix += max_peer_ports + 1; g = mlx5_create_flow_group(fdb, flow_group_in); if (IS_ERR(g)) { @@ -1702,7 +1703,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) * total vports of the peer (currently is also uses esw->total_vports). */ table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) + - esw->total_vports * 2 + MLX5_ESW_MISS_FLOWS; + esw->total_vports * MLX5_MAX_PORTS + MLX5_ESW_MISS_FLOWS; /* create the slow path fdb with encap set, so further table instances * can be created at run time while VFs are probed if the FW allows that. -- cgit v1.2.3 From 9bee385a6e3981d22d75873a059aa94d276ede32 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 5 Feb 2023 17:18:19 +0200 Subject: net/mlx5: E-switch, refactor FDB miss rule add/remove Currently, E-switch FDB have a single peer miss rule. In order to support more than one peer, refactor E-switch FDB to have peer miss rule per peer, and change the code to add/remove a rule from specific peer. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index eadc39542e5e..2a941e1cc686 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -218,7 +218,7 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *send_to_vport_meta_grp; struct mlx5_flow_group *peer_miss_grp; - struct mlx5_flow_handle **peer_miss_rules; + struct mlx5_flow_handle **peer_miss_rules[MLX5_MAX_PORTS]; struct mlx5_flow_group *miss_grp; struct mlx5_flow_handle **send_to_vport_meta_rules; struct mlx5_flow_handle *miss_rule_uni; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index ca69ed487413..a7f352777d9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1132,7 +1132,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, flows[vport->index] = flow; } - esw->fdb_table.offloads.peer_miss_rules = flows; + esw->fdb_table.offloads.peer_miss_rules[mlx5_get_dev_index(peer_dev)] = flows; kvfree(spec); return 0; @@ -1160,13 +1160,14 @@ alloc_flows_err: return err; } -static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw) +static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw, + struct mlx5_core_dev *peer_dev) { struct mlx5_flow_handle **flows; struct mlx5_vport *vport; unsigned long i; - flows = esw->fdb_table.offloads.peer_miss_rules; + flows = esw->fdb_table.offloads.peer_miss_rules[mlx5_get_dev_index(peer_dev)]; mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) mlx5_del_flow_rules(flows[vport->index]); @@ -2700,7 +2701,7 @@ static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw, mlx5e_tc_clean_fdb_peer_flows(esw); #endif mlx5_esw_offloads_rep_event_unpair(esw, peer_esw); - esw_del_fdb_peer_miss_rules(esw); + esw_del_fdb_peer_miss_rules(esw, peer_esw->dev); } static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, -- cgit v1.2.3 From 5e0202eb49ed02b9b9ec423684dd840e0edd8695 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 22 Feb 2023 11:54:19 +0200 Subject: net/mlx5: E-switch, Handle multiple master egress rules Currently, whenever a shared FDB is created, the slave eswitch is creating master egress rule to the master eswitch. In order to support more than two ports, which means there will be more than one slave eswitch, enlarge bounce_rule, which is used to create master egress rule, to an xarray. Signed-off-by: Shay Drory Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/esw/acl/egress_ofld.c | 15 ++-- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 8 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 92 +++++++++++++++------- 3 files changed, 79 insertions(+), 36 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c index 2e504c7461c6..ae815a8392c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c @@ -15,13 +15,15 @@ static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport) vport->egress.offloads.fwd_rule = NULL; } -static void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport) +static void esw_acl_egress_ofld_bounce_rules_destroy(struct mlx5_vport *vport) { - if (!vport->egress.offloads.bounce_rule) - return; + struct mlx5_flow_handle *bounce_rule; + unsigned long i; - mlx5_del_flow_rules(vport->egress.offloads.bounce_rule); - vport->egress.offloads.bounce_rule = NULL; + xa_for_each(&vport->egress.offloads.bounce_rules, i, bounce_rule) { + mlx5_del_flow_rules(bounce_rule); + xa_erase(&vport->egress.offloads.bounce_rules, i); + } } static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw, @@ -96,7 +98,7 @@ static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport) { esw_acl_egress_vlan_destroy(vport); esw_acl_egress_ofld_fwd2vport_destroy(vport); - esw_acl_egress_ofld_bounce_rule_destroy(vport); + esw_acl_egress_ofld_bounce_rules_destroy(vport); } static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw, @@ -194,6 +196,7 @@ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport vport->egress.acl = NULL; return err; } + vport->egress.type = VPORT_EGRESS_ACL_TYPE_DEFAULT; err = esw_acl_egress_ofld_groups_create(esw, vport); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 2a941e1cc686..05ae1c3a6e68 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -123,8 +123,14 @@ struct vport_ingress { } offloads; }; +enum vport_egress_acl_type { + VPORT_EGRESS_ACL_TYPE_DEFAULT, + VPORT_EGRESS_ACL_TYPE_SHARED_FDB, +}; + struct vport_egress { struct mlx5_flow_table *acl; + enum vport_egress_acl_type type; struct mlx5_flow_handle *allowed_vlan; struct mlx5_flow_group *vlan_grp; union { @@ -136,7 +142,7 @@ struct vport_egress { struct { struct mlx5_flow_group *fwd_grp; struct mlx5_flow_handle *fwd_rule; - struct mlx5_flow_handle *bounce_rule; + struct xarray bounce_rules; struct mlx5_flow_group *bounce_grp; } offloads; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index a7f352777d9e..ce70320b89b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2512,6 +2512,7 @@ static int __esw_set_master_egress_rule(struct mlx5_core_dev *master, struct mlx5_vport *vport, struct mlx5_flow_table *acl) { + u16 slave_index = MLX5_CAP_GEN(slave, vhca_id); struct mlx5_flow_handle *flow_rule = NULL; struct mlx5_flow_destination dest = {}; struct mlx5_flow_act flow_act = {}; @@ -2527,8 +2528,7 @@ static int __esw_set_master_egress_rule(struct mlx5_core_dev *master, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK); - MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(slave, vhca_id)); + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, slave_index); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); @@ -2543,44 +2543,35 @@ static int __esw_set_master_egress_rule(struct mlx5_core_dev *master, flow_rule = mlx5_add_flow_rules(acl, spec, &flow_act, &dest, 1); - if (IS_ERR(flow_rule)) + if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); - else - vport->egress.offloads.bounce_rule = flow_rule; + } else { + err = xa_insert(&vport->egress.offloads.bounce_rules, + slave_index, flow_rule, GFP_KERNEL); + if (err) + mlx5_del_flow_rules(flow_rule); + } kvfree(spec); return err; } -static int esw_set_master_egress_rule(struct mlx5_core_dev *master, - struct mlx5_core_dev *slave) +static int esw_master_egress_create_resources(struct mlx5_flow_namespace *egress_ns, + struct mlx5_vport *vport) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5_eswitch *esw = master->priv.eswitch; struct mlx5_flow_table_attr ft_attr = { - .max_fte = 1, .prio = 0, .level = 0, + .max_fte = MLX5_MAX_PORTS, .prio = 0, .level = 0, .flags = MLX5_FLOW_TABLE_OTHER_VPORT, }; - struct mlx5_flow_namespace *egress_ns; struct mlx5_flow_table *acl; struct mlx5_flow_group *g; - struct mlx5_vport *vport; void *match_criteria; u32 *flow_group_in; int err; - vport = mlx5_eswitch_get_vport(esw, esw->manager_vport); - if (IS_ERR(vport)) - return PTR_ERR(vport); - - egress_ns = mlx5_get_flow_vport_acl_namespace(master, - MLX5_FLOW_NAMESPACE_ESW_EGRESS, - vport->index); - if (!egress_ns) - return -EINVAL; - if (vport->egress.acl) - return -EINVAL; + return 0; flow_group_in = kvzalloc(inlen, GFP_KERNEL); if (!flow_group_in) @@ -2604,7 +2595,7 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master, MLX5_SET(create_flow_group_in, flow_group_in, source_eswitch_owner_vhca_id_valid, 1); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, MLX5_MAX_PORTS); g = mlx5_create_flow_group(acl, flow_group_in); if (IS_ERR(g)) { @@ -2612,19 +2603,15 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master, goto err_group; } - err = __esw_set_master_egress_rule(master, slave, vport, acl); - if (err) - goto err_rule; - vport->egress.acl = acl; vport->egress.offloads.bounce_grp = g; + vport->egress.type = VPORT_EGRESS_ACL_TYPE_SHARED_FDB; + xa_init_flags(&vport->egress.offloads.bounce_rules, XA_FLAGS_ALLOC); kvfree(flow_group_in); return 0; -err_rule: - mlx5_destroy_flow_group(g); err_group: mlx5_destroy_flow_table(acl); out: @@ -2632,6 +2619,52 @@ out: return err; } +static void esw_master_egress_destroy_resources(struct mlx5_vport *vport) +{ + mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp); + mlx5_destroy_flow_table(vport->egress.acl); +} + +static int esw_set_master_egress_rule(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + struct mlx5_eswitch *esw = master->priv.eswitch; + u16 slave_index = MLX5_CAP_GEN(slave, vhca_id); + struct mlx5_flow_namespace *egress_ns; + struct mlx5_vport *vport; + int err; + + vport = mlx5_eswitch_get_vport(esw, esw->manager_vport); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + egress_ns = mlx5_get_flow_vport_acl_namespace(master, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, + vport->index); + if (!egress_ns) + return -EINVAL; + + if (vport->egress.acl && vport->egress.type != VPORT_EGRESS_ACL_TYPE_SHARED_FDB) + return 0; + + err = esw_master_egress_create_resources(egress_ns, vport); + if (err) + return err; + + if (xa_load(&vport->egress.offloads.bounce_rules, slave_index)) + return -EINVAL; + + err = __esw_set_master_egress_rule(master, slave, vport, vport->egress.acl); + if (err) + goto err_rule; + + return 0; + +err_rule: + esw_master_egress_destroy_resources(vport); + return err; +} + static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev) { struct mlx5_vport *vport; @@ -2640,6 +2673,7 @@ static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev) dev->priv.eswitch->manager_vport); esw_acl_egress_ofld_cleanup(vport); + xa_destroy(&vport->egress.offloads.bounce_rules); } int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, -- cgit v1.2.3 From 014e4d48eaa36f1678642f9d9125ac5b4526bd3e Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 2 Feb 2023 10:27:45 +0200 Subject: net/mlx5: E-switch, generalize shared FDB creation Shared FDB creation is hard coded for only two eswitches. Generalize shared FDB creation so that any number of eswitches could create shared FDB. Signed-off-by: Shay Drory Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/esw/acl/egress_ofld.c | 12 ++++++++ .../net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 12 ++++---- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 32 +++++++++++--------- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 35 ++++++++++++++++++---- 5 files changed, 66 insertions(+), 26 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c index ae815a8392c6..24b1ca4e4ff8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c @@ -15,6 +15,18 @@ static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport) vport->egress.offloads.fwd_rule = NULL; } +void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport, int rule_index) +{ + struct mlx5_flow_handle *bounce_rule = + xa_load(&vport->egress.offloads.bounce_rules, rule_index); + + if (!bounce_rule) + return; + + mlx5_del_flow_rules(bounce_rule); + xa_erase(&vport->egress.offloads.bounce_rules, rule_index); +} + static void esw_acl_egress_ofld_bounce_rules_destroy(struct mlx5_vport *vport) { struct mlx5_flow_handle *bounce_rule; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h index c9f8469e9a47..536b04e83618 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h @@ -10,6 +10,7 @@ /* Eswitch acl egress external APIs */ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport); +void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport, int rule_index); int mlx5_esw_acl_egress_vport_bond(struct mlx5_eswitch *esw, u16 active_vport_num, u16 passive_vport_num); int mlx5_esw_acl_egress_vport_unbond(struct mlx5_eswitch *esw, u16 vport_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 05ae1c3a6e68..9833d1a587cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -754,9 +754,9 @@ void esw_vport_change_handle_locked(struct mlx5_vport *vport); bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller); -int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, - struct mlx5_eswitch *slave_esw); -void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, +int mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw, int max_slaves); +void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw, struct mlx5_eswitch *slave_esw); int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw); @@ -808,14 +808,14 @@ mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev, } static inline int -mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, - struct mlx5_eswitch *slave_esw) +mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw, int max_slaves) { return 0; } static inline void -mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, +mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw, struct mlx5_eswitch *slave_esw) {} static inline int diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index ce70320b89b3..98d75a33a624 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2557,11 +2557,11 @@ static int __esw_set_master_egress_rule(struct mlx5_core_dev *master, } static int esw_master_egress_create_resources(struct mlx5_flow_namespace *egress_ns, - struct mlx5_vport *vport) + struct mlx5_vport *vport, size_t count) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_table_attr ft_attr = { - .max_fte = MLX5_MAX_PORTS, .prio = 0, .level = 0, + .max_fte = count, .prio = 0, .level = 0, .flags = MLX5_FLOW_TABLE_OTHER_VPORT, }; struct mlx5_flow_table *acl; @@ -2595,7 +2595,7 @@ static int esw_master_egress_create_resources(struct mlx5_flow_namespace *egress MLX5_SET(create_flow_group_in, flow_group_in, source_eswitch_owner_vhca_id_valid, 1); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, MLX5_MAX_PORTS); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, count); g = mlx5_create_flow_group(acl, flow_group_in); if (IS_ERR(g)) { @@ -2626,7 +2626,7 @@ static void esw_master_egress_destroy_resources(struct mlx5_vport *vport) } static int esw_set_master_egress_rule(struct mlx5_core_dev *master, - struct mlx5_core_dev *slave) + struct mlx5_core_dev *slave, size_t count) { struct mlx5_eswitch *esw = master->priv.eswitch; u16 slave_index = MLX5_CAP_GEN(slave, vhca_id); @@ -2647,7 +2647,7 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master, if (vport->egress.acl && vport->egress.type != VPORT_EGRESS_ACL_TYPE_SHARED_FDB) return 0; - err = esw_master_egress_create_resources(egress_ns, vport); + err = esw_master_egress_create_resources(egress_ns, vport, count); if (err) return err; @@ -2665,19 +2665,24 @@ err_rule: return err; } -static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev) +static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev, + struct mlx5_core_dev *slave_dev) { struct mlx5_vport *vport; vport = mlx5_eswitch_get_vport(dev->priv.eswitch, dev->priv.eswitch->manager_vport); - esw_acl_egress_ofld_cleanup(vport); - xa_destroy(&vport->egress.offloads.bounce_rules); + esw_acl_egress_ofld_bounce_rule_destroy(vport, MLX5_CAP_GEN(slave_dev, vhca_id)); + + if (xa_empty(&vport->egress.offloads.bounce_rules)) { + esw_acl_egress_ofld_cleanup(vport); + xa_destroy(&vport->egress.offloads.bounce_rules); + } } -int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, - struct mlx5_eswitch *slave_esw) +int mlx5_eswitch_offloads_single_fdb_add_one(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw, int max_slaves) { int err; @@ -2687,7 +2692,7 @@ int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, return err; err = esw_set_master_egress_rule(master_esw->dev, - slave_esw->dev); + slave_esw->dev, max_slaves); if (err) goto err_acl; @@ -2695,15 +2700,14 @@ int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, err_acl: esw_set_slave_root_fdb(NULL, slave_esw->dev); - return err; } -void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, +void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw, struct mlx5_eswitch *slave_esw) { - esw_unset_master_egress_rule(master_esw->dev); esw_set_slave_root_fdb(NULL, slave_esw->dev); + esw_unset_master_egress_rule(master_esw->dev, slave_esw->dev); } #define ESW_OFFLOADS_DEVCOM_PAIR (0) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 5d331b940f4d..9bc2822881ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -550,6 +550,29 @@ char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) } } +static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_eswitch *master_esw = dev0->priv.eswitch; + int err; + int i; + + for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) { + struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch; + + err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw, + slave_esw, ldev->ports); + if (err) + goto err; + } + return 0; +err: + for (; i > MLX5_LAG_P1; i--) + mlx5_eswitch_offloads_single_fdb_del_one(master_esw, + ldev->pf[i].dev->priv.eswitch); + return err; +} + static int mlx5_create_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker, enum mlx5_lag_mode mode, @@ -557,7 +580,6 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, { bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; int err; @@ -575,8 +597,7 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, } if (shared_fdb) { - err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch, - dev1->priv.eswitch); + err = mlx5_lag_create_single_fdb(ldev); if (err) mlx5_core_err(dev0, "Can't enable single FDB mode\n"); else @@ -647,19 +668,21 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, int mlx5_deactivate_lag(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + struct mlx5_eswitch *master_esw = dev0->priv.eswitch; u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; bool roce_lag = __mlx5_lag_is_roce(ldev); unsigned long flags = ldev->mode_flags; int err; + int i; ldev->mode = MLX5_LAG_MODE_NONE; ldev->mode_flags = 0; mlx5_lag_mp_reset(ldev); if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { - mlx5_eswitch_offloads_destroy_single_fdb(dev0->priv.eswitch, - dev1->priv.eswitch); + for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) + mlx5_eswitch_offloads_single_fdb_del_one(master_esw, + ldev->pf[i].dev->priv.eswitch); clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); } -- cgit v1.2.3 From 6d5b7321d8af0d4f5ec81d8e739c7ed2a93cf12a Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 21 Feb 2023 10:17:06 +0200 Subject: net/mlx5: DR, handle more than one peer domain Currently, DR domain is using the assumption that each domain can only have a single peer. In order to support VF LAG of more then two ports, expand peer domain to use an array of peers, and align the code accordingly. Signed-off-by: Shay Drory Reviewed-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 12 +++++++----- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 5 +++-- drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 3 ++- .../net/ethernet/mellanox/mlx5/core/steering/dr_action.c | 5 +++-- .../net/ethernet/mellanox/mlx5/core/steering/dr_domain.c | 13 +++++++------ .../net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c | 9 +++++---- .../net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c | 9 +++++---- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c | 5 +++-- drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h | 3 ++- 12 files changed, 42 insertions(+), 30 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 98d75a33a624..761278e1af5c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2778,7 +2778,9 @@ static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw, struct mlx5_eswitch *peer_esw, bool pair) { + u8 peer_idx = mlx5_get_dev_index(peer_esw->dev); struct mlx5_flow_root_namespace *peer_ns; + u8 idx = mlx5_get_dev_index(esw->dev); struct mlx5_flow_root_namespace *ns; int err; @@ -2786,18 +2788,18 @@ static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw, ns = esw->dev->priv.steering->fdb_root_ns; if (pair) { - err = mlx5_flow_namespace_set_peer(ns, peer_ns); + err = mlx5_flow_namespace_set_peer(ns, peer_ns, peer_idx); if (err) return err; - err = mlx5_flow_namespace_set_peer(peer_ns, ns); + err = mlx5_flow_namespace_set_peer(peer_ns, ns, idx); if (err) { - mlx5_flow_namespace_set_peer(ns, NULL); + mlx5_flow_namespace_set_peer(ns, NULL, peer_idx); return err; } } else { - mlx5_flow_namespace_set_peer(ns, NULL); - mlx5_flow_namespace_set_peer(peer_ns, NULL); + mlx5_flow_namespace_set_peer(ns, NULL, peer_idx); + mlx5_flow_namespace_set_peer(peer_ns, NULL, idx); } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 144e59480686..11374c3744c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -139,7 +139,8 @@ static void mlx5_cmd_stub_modify_header_dealloc(struct mlx5_flow_root_namespace } static int mlx5_cmd_stub_set_peer(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_root_namespace *peer_ns) + struct mlx5_flow_root_namespace *peer_ns, + u8 peer_idx) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 8ef4254b9ea1..b6b9a5a20591 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -93,7 +93,8 @@ struct mlx5_flow_cmds { struct mlx5_modify_hdr *modify_hdr); int (*set_peer)(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_root_namespace *peer_ns); + struct mlx5_flow_root_namespace *peer_ns, + u8 peer_idx); int (*create_ns)(struct mlx5_flow_root_namespace *ns); int (*destroy_ns)(struct mlx5_flow_root_namespace *ns); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 19da02c41616..4ef04aa28771 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -3620,7 +3620,8 @@ void mlx5_destroy_match_definer(struct mlx5_core_dev *dev, } int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_root_namespace *peer_ns) + struct mlx5_flow_root_namespace *peer_ns, + u8 peer_idx) { if (peer_ns && ns->mode != peer_ns->mode) { mlx5_core_err(ns->dev, @@ -3628,7 +3629,7 @@ int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, return -EINVAL; } - return ns->cmds->set_peer(ns, peer_ns); + return ns->cmds->set_peer(ns, peer_ns, peer_idx); } /* This function should be called only at init stage of the namespace. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index f137a0611b77..200ec946409c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -295,7 +295,8 @@ void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void); int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_root_namespace *peer_ns); + struct mlx5_flow_root_namespace *peer_ns, + u8 peer_idx); int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns, enum mlx5_flow_steering_mode mode); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 0eb9a8d7f282..4e9bc1897a88 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -2071,8 +2071,9 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn, struct mlx5dr_action *action; u8 peer_vport; - peer_vport = vhca_id_valid && (vhca_id != dmn->info.caps.gvmi); - vport_dmn = peer_vport ? dmn->peer_dmn : dmn; + peer_vport = vhca_id_valid && mlx5_core_is_pf(dmn->mdev) && + (vhca_id != dmn->info.caps.gvmi); + vport_dmn = peer_vport ? dmn->peer_dmn[vhca_id] : dmn; if (!vport_dmn) { mlx5dr_dbg(dmn, "No peer vport domain for given vhca_id\n"); return NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 9a2dfe6ebe31..75dc85dc24ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -555,17 +555,18 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) } void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, - struct mlx5dr_domain *peer_dmn) + struct mlx5dr_domain *peer_dmn, + u8 peer_idx) { mlx5dr_domain_lock(dmn); - if (dmn->peer_dmn) - refcount_dec(&dmn->peer_dmn->refcount); + if (dmn->peer_dmn[peer_idx]) + refcount_dec(&dmn->peer_dmn[peer_idx]->refcount); - dmn->peer_dmn = peer_dmn; + dmn->peer_dmn[peer_idx] = peer_dmn; - if (dmn->peer_dmn) - refcount_inc(&dmn->peer_dmn->refcount); + if (dmn->peer_dmn[peer_idx]) + refcount_inc(&dmn->peer_dmn[peer_idx]->refcount); mlx5dr_domain_unlock(dmn); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c index 2010d4ac6519..69d7a8f3c402 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c @@ -1647,6 +1647,7 @@ dr_ste_v0_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, u8 *tag) { struct mlx5dr_match_misc *misc = &value->misc; + int id = misc->source_eswitch_owner_vhca_id; struct mlx5dr_cmd_vport_cap *vport_cap; struct mlx5dr_domain *dmn = sb->dmn; struct mlx5dr_domain *vport_dmn; @@ -1657,11 +1658,11 @@ dr_ste_v0_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, if (sb->vhca_id_valid) { /* Find port GVMI based on the eswitch_owner_vhca_id */ - if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi) + if (id == dmn->info.caps.gvmi) vport_dmn = dmn; - else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id == - dmn->peer_dmn->info.caps.gvmi)) - vport_dmn = dmn->peer_dmn; + else if (id < MLX5_MAX_PORTS && dmn->peer_dmn[id] && + (id == dmn->peer_dmn[id]->info.caps.gvmi)) + vport_dmn = dmn->peer_dmn[id]; else return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c index 4c0704ad166b..f4ef0b22b991 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -1979,6 +1979,7 @@ static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, u8 *tag) { struct mlx5dr_match_misc *misc = &value->misc; + int id = misc->source_eswitch_owner_vhca_id; struct mlx5dr_cmd_vport_cap *vport_cap; struct mlx5dr_domain *dmn = sb->dmn; struct mlx5dr_domain *vport_dmn; @@ -1988,11 +1989,11 @@ static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, if (sb->vhca_id_valid) { /* Find port GVMI based on the eswitch_owner_vhca_id */ - if (misc->source_eswitch_owner_vhca_id == dmn->info.caps.gvmi) + if (id == dmn->info.caps.gvmi) vport_dmn = dmn; - else if (dmn->peer_dmn && (misc->source_eswitch_owner_vhca_id == - dmn->peer_dmn->info.caps.gvmi)) - vport_dmn = dmn->peer_dmn; + else if (id < MLX5_MAX_PORTS && dmn->peer_dmn[id] && + (id == dmn->peer_dmn[id]->info.caps.gvmi)) + vport_dmn = dmn->peer_dmn[id]; else return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 678a993ab053..1622dbbe6b97 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -935,7 +935,7 @@ struct mlx5dr_domain_info { }; struct mlx5dr_domain { - struct mlx5dr_domain *peer_dmn; + struct mlx5dr_domain *peer_dmn[MLX5_MAX_PORTS]; struct mlx5_core_dev *mdev; u32 pdn; struct mlx5_uars_page *uar; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 984653756779..c6fda1cbfcff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -770,14 +770,15 @@ restore_fte: } static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns, - struct mlx5_flow_root_namespace *peer_ns) + struct mlx5_flow_root_namespace *peer_ns, + u8 peer_idx) { struct mlx5dr_domain *peer_domain = NULL; if (peer_ns) peer_domain = peer_ns->fs_dr_domain.dr_domain; mlx5dr_domain_set_peer(ns->fs_dr_domain.dr_domain, - peer_domain); + peer_domain, peer_idx); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index 9afd268a2573..5ba88f2ecb3f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -48,7 +48,8 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *domain); int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags); void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, - struct mlx5dr_domain *peer_dmn); + struct mlx5dr_domain *peer_dmn, + u8 peer_idx); struct mlx5dr_table * mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags, -- cgit v1.2.3 From e67f928a5204cc577ad35dc8c3ebe60ef64bade8 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 7 Feb 2023 16:08:33 +0200 Subject: net/mlx5: Devcom, Rename paired to ready In downstream patch devcom will provide support for more than two devices. The term 'paired' will be renamed as 'ready' to convey a more accurate meaning. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 ++-- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c | 20 ++++++++++---------- drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h | 10 +++++----- 6 files changed, 22 insertions(+), 22 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 3fbb454f7228..51e147c0dd71 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -414,7 +414,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, return 0; rpriv = mlx5e_rep_to_rep_priv(rep); - if (mlx5_devcom_is_paired(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS)) + if (mlx5_devcom_comp_is_ready(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS)) peer_esw = mlx5_devcom_get_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index c7797e3de093..9739a61026d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4210,8 +4210,8 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) flow_flag_test(flow, INGRESS); bool act_is_encap = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); - bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom, - MLX5_DEVCOM_ESW_OFFLOADS); + bool esw_paired = mlx5_devcom_comp_is_ready(esw_attr->in_mdev->priv.devcom, + MLX5_DEVCOM_ESW_OFFLOADS); if (!esw_paired) return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 761278e1af5c..aeb15b10048e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2836,14 +2836,14 @@ static int mlx5_esw_offloads_devcom_event(int event, esw->paired[mlx5_get_dev_index(peer_esw->dev)] = true; peer_esw->paired[mlx5_get_dev_index(esw->dev)] = true; - mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true); + mlx5_devcom_comp_set_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true); break; case ESW_OFFLOADS_DEVCOM_UNPAIR: if (!esw->paired[mlx5_get_dev_index(peer_esw->dev)]) break; - mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); + mlx5_devcom_comp_set_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); esw->paired[mlx5_get_dev_index(peer_esw->dev)] = false; peer_esw->paired[mlx5_get_dev_index(esw->dev)] = false; mlx5_esw_offloads_unpair(peer_esw, esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 9bc2822881ca..c820f7d266de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -824,8 +824,8 @@ bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) is_mdev_switchdev_mode(dev1) && mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) && mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) && - mlx5_devcom_is_paired(dev0->priv.devcom, - MLX5_DEVCOM_ESW_OFFLOADS) && + mlx5_devcom_comp_is_ready(dev0->priv.devcom, + MLX5_DEVCOM_ESW_OFFLOADS) && MLX5_CAP_GEN(dev1, lag_native_fdb_selection) && MLX5_CAP_ESW(dev1, root_ft_on_other_esw) && MLX5_CAP_ESW(dev0, esw_shared_ingress_acl)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c index b7d779d08d83..7446900a589e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -19,7 +19,7 @@ struct mlx5_devcom_component { mlx5_devcom_event_handler_t handler; struct rw_semaphore sem; - bool paired; + bool ready; }; struct mlx5_devcom_list { @@ -218,25 +218,25 @@ int mlx5_devcom_send_event(struct mlx5_devcom *devcom, return err; } -void mlx5_devcom_set_paired(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id, - bool paired) +void mlx5_devcom_comp_set_ready(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + bool ready) { struct mlx5_devcom_component *comp; comp = &devcom->priv->components[id]; WARN_ON(!rwsem_is_locked(&comp->sem)); - WRITE_ONCE(comp->paired, paired); + WRITE_ONCE(comp->ready, ready); } -bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id) +bool mlx5_devcom_comp_is_ready(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) { if (IS_ERR_OR_NULL(devcom)) return false; - return READ_ONCE(devcom->priv->components[id].paired); + return READ_ONCE(devcom->priv->components[id].ready); } void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, @@ -250,7 +250,7 @@ void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, comp = &devcom->priv->components[id]; down_read(&comp->sem); - if (!READ_ONCE(comp->paired)) { + if (!READ_ONCE(comp->ready)) { up_read(&comp->sem); return NULL; } @@ -278,7 +278,7 @@ void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom /* This can change concurrently, however 'data' pointer will remain * valid for the duration of RCU read section. */ - if (!READ_ONCE(comp->paired)) + if (!READ_ONCE(comp->ready)) return NULL; return rcu_dereference(comp->device[i].data); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index 9a496f4722da..d465de8459b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -33,11 +33,11 @@ int mlx5_devcom_send_event(struct mlx5_devcom *devcom, int event, void *event_data); -void mlx5_devcom_set_paired(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id, - bool paired); -bool mlx5_devcom_is_paired(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id); +void mlx5_devcom_comp_set_ready(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + bool ready); +bool mlx5_devcom_comp_is_ready(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, enum mlx5_devcom_components id); -- cgit v1.2.3 From 8611df722030171e31535da569d3da488d2cd3b6 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 2 Feb 2023 10:27:45 +0200 Subject: net/mlx5: E-switch, mark devcom as not ready when all eswitches are unpaired Whenever an eswitch is unpaired with another, the driver mark devcom as not ready. While this is correct in case we are pairing only two eswitches, in order to support pairing of more than two eswitches, driver need to mark devcom as not ready only when all eswitches are unpaired. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 9833d1a587cc..d6e4ca436f39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -343,6 +343,7 @@ struct mlx5_eswitch { int mode; u16 manager_vport; u16 first_host_vport; + u8 num_peers; struct mlx5_esw_functions esw_funcs; struct { u32 large_group_num; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index aeb15b10048e..09367a320741 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2836,6 +2836,8 @@ static int mlx5_esw_offloads_devcom_event(int event, esw->paired[mlx5_get_dev_index(peer_esw->dev)] = true; peer_esw->paired[mlx5_get_dev_index(esw->dev)] = true; + esw->num_peers++; + peer_esw->num_peers++; mlx5_devcom_comp_set_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true); break; @@ -2843,7 +2845,10 @@ static int mlx5_esw_offloads_devcom_event(int event, if (!esw->paired[mlx5_get_dev_index(peer_esw->dev)]) break; - mlx5_devcom_comp_set_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); + peer_esw->num_peers--; + esw->num_peers--; + if (!esw->num_peers && !peer_esw->num_peers) + mlx5_devcom_comp_set_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); esw->paired[mlx5_get_dev_index(peer_esw->dev)] = false; peer_esw->paired[mlx5_get_dev_index(esw->dev)] = false; mlx5_esw_offloads_unpair(peer_esw, esw); @@ -2884,6 +2889,7 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) mlx5_esw_offloads_devcom_event, esw); + esw->num_peers = 0; mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS, ESW_OFFLOADS_DEVCOM_PAIR, esw); -- cgit v1.2.3 From 90ca127c62e9963e8efd032409f4f4e70308de37 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 2 Jun 2023 11:51:22 -0700 Subject: net/mlx5: Devcom, introduce devcom_for_each_peer_entry Introduce generic APIs which will retrieve all peers. This API replace mlx5_devcom_get/release_peer_data which retrieve only a single peer. Signed-off-by: Mark Bloch Signed-off-by: Shay Drory Reviewed-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 94 +++++++++++++--------- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 44 +++++----- .../net/ethernet/mellanox/mlx5/core/esw/bridge.c | 30 +++++-- .../ethernet/mellanox/mlx5/core/esw/bridge_mcast.c | 21 ++++- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 7 ++ .../net/ethernet/mellanox/mlx5/core/lib/devcom.c | 89 ++++++++++++++------ .../net/ethernet/mellanox/mlx5/core/lib/devcom.h | 23 ++++-- 7 files changed, 209 insertions(+), 99 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 51e147c0dd71..965a8261c99b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -398,25 +398,64 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, } } +static int mlx5e_sqs2vport_add_peers_rules(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, + struct mlx5_devcom *devcom, + struct mlx5e_rep_sq *rep_sq, int i) +{ + struct mlx5_eswitch *peer_esw = NULL; + struct mlx5_flow_handle *flow_rule; + int tmp; + + mlx5_devcom_for_each_peer_entry(devcom, MLX5_DEVCOM_ESW_OFFLOADS, + peer_esw, tmp) { + int peer_rule_idx = mlx5_get_dev_index(peer_esw->dev); + struct mlx5e_rep_sq_peer *sq_peer; + int err; + + sq_peer = kzalloc(sizeof(*sq_peer), GFP_KERNEL); + if (!sq_peer) + return -ENOMEM; + + flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, + rep, rep_sq->sqn); + if (IS_ERR(flow_rule)) { + kfree(sq_peer); + return PTR_ERR(flow_rule); + } + + sq_peer->rule = flow_rule; + sq_peer->peer = peer_esw; + err = xa_insert(&rep_sq->sq_peer, peer_rule_idx, sq_peer, GFP_KERNEL); + if (err) { + kfree(sq_peer); + mlx5_eswitch_del_send_to_vport_rule(flow_rule); + return err; + } + } + + return 0; +} + static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u32 *sqns_array, int sqns_num) { - struct mlx5_eswitch *peer_esw = NULL; struct mlx5_flow_handle *flow_rule; - struct mlx5e_rep_sq_peer *sq_peer; struct mlx5e_rep_priv *rpriv; struct mlx5e_rep_sq *rep_sq; + struct mlx5_devcom *devcom; + bool devcom_locked = false; int err; int i; if (esw->mode != MLX5_ESWITCH_OFFLOADS) return 0; + devcom = esw->dev->priv.devcom; rpriv = mlx5e_rep_to_rep_priv(rep); - if (mlx5_devcom_comp_is_ready(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS)) - peer_esw = mlx5_devcom_get_peer_data(esw->dev->priv.devcom, - MLX5_DEVCOM_ESW_OFFLOADS); + if (mlx5_devcom_comp_is_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS) && + mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) + devcom_locked = true; for (i = 0; i < sqns_num; i++) { rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL); @@ -424,7 +463,6 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, err = -ENOMEM; goto out_err; } - xa_init(&rep_sq->sq_peer); /* Add re-inject rule to the PF/representor sqs */ flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, @@ -437,50 +475,30 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, rep_sq->send_to_vport_rule = flow_rule; rep_sq->sqn = sqns_array[i]; - if (peer_esw) { - int peer_rule_idx = mlx5_get_dev_index(peer_esw->dev); - - sq_peer = kzalloc(sizeof(*sq_peer), GFP_KERNEL); - if (!sq_peer) { - err = -ENOMEM; - goto out_sq_peer_err; - } - - flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, - rep, sqns_array[i]); - if (IS_ERR(flow_rule)) { - err = PTR_ERR(flow_rule); - goto out_flow_rule_err; + xa_init(&rep_sq->sq_peer); + if (devcom_locked) { + err = mlx5e_sqs2vport_add_peers_rules(esw, rep, devcom, rep_sq, i); + if (err) { + mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule); + xa_destroy(&rep_sq->sq_peer); + kfree(rep_sq); + goto out_err; } - - sq_peer->rule = flow_rule; - sq_peer->peer = peer_esw; - err = xa_insert(&rep_sq->sq_peer, peer_rule_idx, sq_peer, GFP_KERNEL); - if (err) - goto out_xa_err; } list_add(&rep_sq->list, &rpriv->vport_sqs_list); } - if (peer_esw) - mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (devcom_locked) + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); return 0; -out_xa_err: - mlx5_eswitch_del_send_to_vport_rule(flow_rule); -out_flow_rule_err: - kfree(sq_peer); -out_sq_peer_err: - mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule); - xa_destroy(&rep_sq->sq_peer); - kfree(rep_sq); out_err: mlx5e_sqs2vport_stop(esw, rep); - if (peer_esw) - mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (devcom_locked) + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 9739a61026d8..88631fb9f966 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1670,6 +1670,7 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro struct mlx5_eswitch *esw; u16 vhca_id; int err; + int i; out_priv = netdev_priv(out_dev); esw = out_priv->mdev->priv.eswitch; @@ -1686,8 +1687,13 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro rcu_read_lock(); devcom = out_priv->mdev->priv.devcom; - esw = mlx5_devcom_get_peer_data_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - err = esw ? mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport) : -ENODEV; + err = -ENODEV; + mlx5_devcom_for_each_peer_entry_rcu(devcom, MLX5_DEVCOM_ESW_OFFLOADS, + esw, i) { + err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport); + if (!err) + break; + } rcu_read_unlock(); return err; @@ -2025,15 +2031,14 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, { if (mlx5e_is_eswitch_flow(flow)) { struct mlx5_devcom *devcom = flow->priv->mdev->priv.devcom; - struct mlx5_eswitch *peer_esw; - peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - if (!peer_esw) { + if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) { mlx5e_tc_del_fdb_flow(priv, flow); return; } + mlx5e_tc_del_fdb_peers_flow(flow); - mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); mlx5e_tc_del_fdb_flow(priv, flow); } else { mlx5e_tc_del_nic_flow(priv, flow); @@ -4472,6 +4477,7 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_eswitch *peer_esw; struct mlx5e_tc_flow *flow; int err; + int i; flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, in_mdev); @@ -4483,23 +4489,27 @@ mlx5e_add_fdb_flow(struct mlx5e_priv *priv, return 0; } - peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - if (!peer_esw) { + if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) { err = -ENODEV; goto clean_flow; } - err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags, peer_esw); - if (err) - goto peer_clean; - mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + mlx5_devcom_for_each_peer_entry(devcom, + MLX5_DEVCOM_ESW_OFFLOADS, + peer_esw, i) { + err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags, peer_esw); + if (err) + goto peer_clean; + } - *__flow = flow; + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + *__flow = flow; return 0; peer_clean: - mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + mlx5e_tc_del_fdb_peers_flow(flow); + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); clean_flow: mlx5e_tc_del_fdb_flow(priv, flow); return err; @@ -4719,7 +4729,6 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, { struct mlx5_devcom *devcom = priv->mdev->priv.devcom; struct rhashtable *tc_ht = get_tc_ht(priv, flags); - struct mlx5_eswitch *peer_esw; struct mlx5e_tc_flow *flow; struct mlx5_fc *counter; u64 lastuse = 0; @@ -4754,8 +4763,7 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, /* Under multipath it's possible for one rule to be currently * un-offloaded while the other rule is offloaded. */ - peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - if (!peer_esw) + if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) goto out; if (flow_flag_test(flow, DUP)) { @@ -4786,7 +4794,7 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, } no_peer_counter: - mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); out: flow_stats_update(&f->stats, bytes, packets, 0, lastuse, FLOW_ACTION_HW_STATS_DELAYED); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index 1ba03e219111..bea7cc645461 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -647,22 +647,35 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, } static struct mlx5_flow_handle * -mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, const unsigned char *addr, +mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, u16 esw_owner_vhca_id, + const unsigned char *addr, struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, struct mlx5_esw_bridge *bridge) { struct mlx5_devcom *devcom = bridge->br_offloads->esw->dev->priv.devcom; + struct mlx5_eswitch *tmp, *peer_esw = NULL; static struct mlx5_flow_handle *handle; - struct mlx5_eswitch *peer_esw; + int i; - peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - if (!peer_esw) + if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) return ERR_PTR(-ENODEV); + mlx5_devcom_for_each_peer_entry(devcom, + MLX5_DEVCOM_ESW_OFFLOADS, + tmp, i) { + if (mlx5_esw_is_owner(tmp, vport_num, esw_owner_vhca_id)) { + peer_esw = tmp; + break; + } + } + if (!peer_esw) { + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + return ERR_PTR(-ENODEV); + } + handle = mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id, bridge, peer_esw); - - mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); return handle; } @@ -1369,8 +1382,9 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, u16 esw_ow entry->ingress_counter = counter; handle = peer ? - mlx5_esw_bridge_ingress_flow_peer_create(vport_num, addr, vlan, - mlx5_fc_id(counter), bridge) : + mlx5_esw_bridge_ingress_flow_peer_create(vport_num, esw_owner_vhca_id, + addr, vlan, mlx5_fc_id(counter), + bridge) : mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan, mlx5_fc_id(counter), bridge); if (IS_ERR(handle)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c index 2eae594a5e80..2455f8b93c1e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c @@ -540,16 +540,29 @@ static struct mlx5_flow_handle * mlx5_esw_bridge_mcast_filter_flow_peer_create(struct mlx5_esw_bridge_port *port) { struct mlx5_devcom *devcom = port->bridge->br_offloads->esw->dev->priv.devcom; + struct mlx5_eswitch *tmp, *peer_esw = NULL; static struct mlx5_flow_handle *handle; - struct mlx5_eswitch *peer_esw; + int i; - peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); - if (!peer_esw) + if (!mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS)) return ERR_PTR(-ENODEV); + mlx5_devcom_for_each_peer_entry(devcom, + MLX5_DEVCOM_ESW_OFFLOADS, + tmp, i) { + if (mlx5_esw_is_owner(tmp, port->vport_num, port->esw_owner_vhca_id)) { + peer_esw = tmp; + break; + } + } + if (!peer_esw) { + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + return ERR_PTR(-ENODEV); + } + handle = mlx5_esw_bridge_mcast_flow_with_esw_create(port, peer_esw); - mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS); return handle; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index d6e4ca436f39..c42c16d9ccbc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -585,6 +585,13 @@ mlx5_esw_is_manager_vport(const struct mlx5_eswitch *esw, u16 vport_num) return esw->manager_vport == vport_num; } +static inline bool mlx5_esw_is_owner(struct mlx5_eswitch *esw, u16 vport_num, + u16 esw_owner_vhca_id) +{ + return esw_owner_vhca_id == MLX5_CAP_GEN(esw->dev, vhca_id) || + (vport_num == MLX5_VPORT_UPLINK && mlx5_lag_is_master(esw->dev)); +} + static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev) { return mlx5_core_is_ecpf_esw_manager(dev) ? diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c index 7446900a589e..96a3b7b9a5cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -239,55 +239,92 @@ bool mlx5_devcom_comp_is_ready(struct mlx5_devcom *devcom, return READ_ONCE(devcom->priv->components[id].ready); } -void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id) +bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) { struct mlx5_devcom_component *comp; - int i; if (IS_ERR_OR_NULL(devcom)) - return NULL; + return false; comp = &devcom->priv->components[id]; down_read(&comp->sem); if (!READ_ONCE(comp->ready)) { up_read(&comp->sem); - return NULL; + return false; } - for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) - if (i != devcom->idx) - break; + return true; +} + +void mlx5_devcom_for_each_peer_end(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id) +{ + struct mlx5_devcom_component *comp = &devcom->priv->components[id]; - return rcu_dereference_protected(comp->device[i].data, lockdep_is_held(&comp->sem)); + up_read(&comp->sem); } -void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id) +void *mlx5_devcom_get_next_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + int *i) { struct mlx5_devcom_component *comp; - int i; + void *ret; + int idx; - if (IS_ERR_OR_NULL(devcom)) - return NULL; + comp = &devcom->priv->components[id]; - for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) - if (i != devcom->idx) - break; + if (*i == MLX5_DEVCOM_PORTS_SUPPORTED) + return NULL; + for (idx = *i; idx < MLX5_DEVCOM_PORTS_SUPPORTED; idx++) { + if (idx != devcom->idx) { + ret = rcu_dereference_protected(comp->device[idx].data, + lockdep_is_held(&comp->sem)); + if (ret) + break; + } + } - comp = &devcom->priv->components[id]; - /* This can change concurrently, however 'data' pointer will remain - * valid for the duration of RCU read section. - */ - if (!READ_ONCE(comp->ready)) + if (idx == MLX5_DEVCOM_PORTS_SUPPORTED) { + *i = idx; return NULL; + } + *i = idx + 1; - return rcu_dereference(comp->device[i].data); + return ret; } -void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id) +void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, + int *i) { - struct mlx5_devcom_component *comp = &devcom->priv->components[id]; + struct mlx5_devcom_component *comp; + void *ret; + int idx; - up_read(&comp->sem); + comp = &devcom->priv->components[id]; + + if (*i == MLX5_DEVCOM_PORTS_SUPPORTED) + return NULL; + for (idx = *i; idx < MLX5_DEVCOM_PORTS_SUPPORTED; idx++) { + if (idx != devcom->idx) { + /* This can change concurrently, however 'data' pointer will remain + * valid for the duration of RCU read section. + */ + if (!READ_ONCE(comp->ready)) + return NULL; + ret = rcu_dereference(comp->device[idx].data); + if (ret) + break; + } + } + + if (idx == MLX5_DEVCOM_PORTS_SUPPORTED) { + *i = idx; + return NULL; + } + *i = idx + 1; + + return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index d465de8459b4..b7f72f1a5367 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -39,11 +39,24 @@ void mlx5_devcom_comp_set_ready(struct mlx5_devcom *devcom, bool mlx5_devcom_comp_is_ready(struct mlx5_devcom *devcom, enum mlx5_devcom_components id); -void *mlx5_devcom_get_peer_data(struct mlx5_devcom *devcom, - enum mlx5_devcom_components id); -void *mlx5_devcom_get_peer_data_rcu(struct mlx5_devcom *devcom, enum mlx5_devcom_components id); -void mlx5_devcom_release_peer_data(struct mlx5_devcom *devcom, +bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id); +void mlx5_devcom_for_each_peer_end(struct mlx5_devcom *devcom, enum mlx5_devcom_components id); +void *mlx5_devcom_get_next_peer_data(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, int *i); -#endif +#define mlx5_devcom_for_each_peer_entry(devcom, id, data, i) \ + for (i = 0, data = mlx5_devcom_get_next_peer_data(devcom, id, &i); \ + data; \ + data = mlx5_devcom_get_next_peer_data(devcom, id, &i)) + +void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom *devcom, + enum mlx5_devcom_components id, int *i); +#define mlx5_devcom_for_each_peer_entry_rcu(devcom, id, data, i) \ + for (i = 0, data = mlx5_devcom_get_next_peer_data_rcu(devcom, id, &i); \ + data; \ + data = mlx5_devcom_get_next_peer_data_rcu(devcom, id, &i)) + +#endif -- cgit v1.2.3 From e2a82bf8a428165a803c037228bdaa67cbe4764c Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Mon, 6 Feb 2023 13:50:13 +0200 Subject: net/mlx5: Devcom, extend mlx5_devcom_send_event to work with more than two devices mlx5_devcom_send_event is used to send event from one eswitch to the other. In other words, only one event is sent, which means, no error mechanism is needed. However, In case devcom have more than two eswitches, a proper error mechanism is needed. Hence, in case of error, devcom will perform the error unwind, since devcom knows how many events were successful. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4 +++- drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c | 17 +++++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h | 2 +- 3 files changed, 19 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 09367a320741..29de4e759f4f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2892,7 +2892,8 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) esw->num_peers = 0; mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS, - ESW_OFFLOADS_DEVCOM_PAIR, esw); + ESW_OFFLOADS_DEVCOM_PAIR, + ESW_OFFLOADS_DEVCOM_UNPAIR, esw); } void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) @@ -2906,6 +2907,7 @@ void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) return; mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS, + ESW_OFFLOADS_DEVCOM_UNPAIR, ESW_OFFLOADS_DEVCOM_UNPAIR, esw); mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c index 96a3b7b9a5cd..8472bbb3cd58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -193,7 +193,7 @@ void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom, int mlx5_devcom_send_event(struct mlx5_devcom *devcom, enum mlx5_devcom_components id, - int event, + int event, int rollback_event, void *event_data) { struct mlx5_devcom_component *comp; @@ -210,10 +210,23 @@ int mlx5_devcom_send_event(struct mlx5_devcom *devcom, if (i != devcom->idx && data) { err = comp->handler(event, data, event_data); - break; + if (err) + goto rollback; } } + up_write(&comp->sem); + return 0; + +rollback: + while (i--) { + void *data = rcu_dereference_protected(comp->device[i].data, + lockdep_is_held(&comp->sem)); + + if (i != devcom->idx && data) + comp->handler(rollback_event, data, event_data); + } + up_write(&comp->sem); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index b7f72f1a5367..bb1970ba8730 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -30,7 +30,7 @@ void mlx5_devcom_unregister_component(struct mlx5_devcom *devcom, int mlx5_devcom_send_event(struct mlx5_devcom *devcom, enum mlx5_devcom_components id, - int event, + int event, int rollback_event, void *event_data); void mlx5_devcom_comp_set_ready(struct mlx5_devcom *devcom, -- cgit v1.2.3 From 7190d0ff0e17690a9b1279d84a06473600ba2060 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Fri, 2 Jun 2023 17:46:58 +0800 Subject: net: enetc: correct the statistics of rx bytes The rx_bytes of struct net_device_stats should count the length of ethernet frames excluding the FCS. However, there are two problems with the rx_bytes statistics of the current enetc driver. one is that the length of VLAN header is not counted if the VLAN extraction feature is enabled. The other is that the length of L2 header is not counted, because eth_type_trans() is invoked before updating rx_bytes which will subtract the length of L2 header from skb->len. BTW, the rx_bytes statistics of XDP path also have similar problem, I will fix it in another patch. Fixes: a800abd3ecb9 ("net: enetc: move skb creation into enetc_build_skb") Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 3c4fa26f0f9b..d6c0f3f46c2a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1229,7 +1229,13 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, if (!skb) break; - rx_byte_cnt += skb->len; + /* When set, the outer VLAN header is extracted and reported + * in the receive buffer descriptor. So rx_byte_cnt should + * add the length of the extracted VLAN header. + */ + if (bd_status & ENETC_RXBD_FLAG_VLAN) + rx_byte_cnt += VLAN_HLEN; + rx_byte_cnt += skb->len + ETH_HLEN; rx_frm_cnt++; napi_gro_receive(napi, skb); -- cgit v1.2.3 From fdebd850cc065495abf1d64756496050bb22db67 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Fri, 2 Jun 2023 17:46:59 +0800 Subject: net: enetc: correct rx_bytes statistics of XDP The rx_bytes statistics of XDP are always zero, because rx_byte_cnt is not updated after it is initialized to 0. So fix it. Fixes: d1b15102dd16 ("net: enetc: add support for XDP_DROP and XDP_PASS") Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index d6c0f3f46c2a..9e1b2536e9a9 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1571,6 +1571,14 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, enetc_build_xdp_buff(rx_ring, bd_status, &rxbd, &i, &cleaned_cnt, &xdp_buff); + /* When set, the outer VLAN header is extracted and reported + * in the receive buffer descriptor. So rx_byte_cnt should + * add the length of the extracted VLAN header. + */ + if (bd_status & ENETC_RXBD_FLAG_VLAN) + rx_byte_cnt += VLAN_HLEN; + rx_byte_cnt += xdp_get_buff_len(&xdp_buff); + xdp_act = bpf_prog_run_xdp(prog, &xdp_buff); switch (xdp_act) { -- cgit v1.2.3 From db48abbaa18e571106711b42affe68ca6f36ca5a Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 1 Jun 2023 16:14:52 +0200 Subject: net: ethernet: altera-tse: Convert to mdio-regmap and use PCS Lynx The newly introduced regmap-based MDIO driver allows for an easy mapping of an mdiodevice onto the memory-mapped TSE PCS, which is actually a Lynx PCS. Convert Altera TSE to use this PCS instead of the pcs-altera-tse, which is nothing more than a memory-mapped Lynx PCS. Signed-off-by: Maxime Chevallier Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/altera/Kconfig | 2 + drivers/net/ethernet/altera/altera_tse_main.c | 57 ++++++++++++++++++++++----- 2 files changed, 50 insertions(+), 9 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/altera/Kconfig b/drivers/net/ethernet/altera/Kconfig index dd7fd41ccde5..93533ba03429 100644 --- a/drivers/net/ethernet/altera/Kconfig +++ b/drivers/net/ethernet/altera/Kconfig @@ -5,6 +5,8 @@ config ALTERA_TSE select PHYLIB select PHYLINK select PCS_ALTERA_TSE + select MDIO_REGMAP + select REGMAP_MMIO help This driver supports the Altera Triple-Speed (TSE) Ethernet MAC. diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 190ff1bcd94e..d866c0f1b503 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -27,14 +27,16 @@ #include #include #include +#include #include #include #include #include #include -#include +#include #include #include +#include #include #include @@ -1132,13 +1134,16 @@ static int request_and_map(struct platform_device *pdev, const char *name, static int altera_tse_probe(struct platform_device *pdev) { const struct of_device_id *of_id = NULL; + struct regmap_config pcs_regmap_cfg; struct altera_tse_private *priv; + struct mdio_regmap_config mrc; struct resource *control_port; + struct regmap *pcs_regmap; struct resource *dma_res; struct resource *pcs_res; + struct mii_bus *pcs_bus; struct net_device *ndev; void __iomem *descmap; - int pcs_reg_width = 2; int ret = -ENODEV; ndev = alloc_etherdev(sizeof(struct altera_tse_private)); @@ -1255,12 +1260,32 @@ static int altera_tse_probe(struct platform_device *pdev) * address space, but if it's not the case, we fallback to the mdiophy0 * from the MAC's address space */ - ret = request_and_map(pdev, "pcs", &pcs_res, - &priv->pcs_base); + ret = request_and_map(pdev, "pcs", &pcs_res, &priv->pcs_base); if (ret) { + /* If we can't find a dedicated resource for the PCS, fallback + * to the internal PCS, that has a different address stride + */ priv->pcs_base = priv->mac_dev + tse_csroffs(mdio_phy0); - pcs_reg_width = 4; + pcs_regmap_cfg.reg_bits = 32; + /* Values are MDIO-like values, on 16 bits */ + pcs_regmap_cfg.val_bits = 16; + pcs_regmap_cfg.reg_shift = REGMAP_UPSHIFT(2); + } else { + pcs_regmap_cfg.reg_bits = 16; + pcs_regmap_cfg.val_bits = 16; + pcs_regmap_cfg.reg_shift = REGMAP_UPSHIFT(1); + } + + /* Create a regmap for the PCS so that it can be used by the PCS driver */ + pcs_regmap = devm_regmap_init_mmio(&pdev->dev, priv->pcs_base, + &pcs_regmap_cfg); + if (IS_ERR(pcs_regmap)) { + ret = PTR_ERR(pcs_regmap); + goto err_free_netdev; } + mrc.regmap = pcs_regmap; + mrc.parent = &pdev->dev; + mrc.valid_addr = 0x0; /* Rx IRQ */ priv->rx_irq = platform_get_irq_byname(pdev, "rx_irq"); @@ -1384,7 +1409,18 @@ static int altera_tse_probe(struct platform_device *pdev) (unsigned long) control_port->start, priv->rx_irq, priv->tx_irq); - priv->pcs = alt_tse_pcs_create(ndev, priv->pcs_base, pcs_reg_width); + snprintf(mrc.name, MII_BUS_ID_SIZE, "%s-pcs-mii", ndev->name); + pcs_bus = devm_mdio_regmap_register(&pdev->dev, &mrc); + if (IS_ERR(pcs_bus)) { + ret = PTR_ERR(pcs_bus); + goto err_init_pcs; + } + + priv->pcs = lynx_pcs_create_mdiodev(pcs_bus, 0); + if (IS_ERR(priv->pcs)) { + ret = PTR_ERR(priv->pcs); + goto err_init_pcs; + } priv->phylink_config.dev = &ndev->dev; priv->phylink_config.type = PHYLINK_NETDEV; @@ -1407,12 +1443,13 @@ static int altera_tse_probe(struct platform_device *pdev) if (IS_ERR(priv->phylink)) { dev_err(&pdev->dev, "failed to create phylink\n"); ret = PTR_ERR(priv->phylink); - goto err_init_phy; + goto err_init_phylink; } return 0; - -err_init_phy: +err_init_phylink: + lynx_pcs_destroy(priv->pcs); +err_init_pcs: unregister_netdev(ndev); err_register_netdev: netif_napi_del(&priv->napi); @@ -1433,6 +1470,8 @@ static int altera_tse_remove(struct platform_device *pdev) altera_tse_mdio_destroy(ndev); unregister_netdev(ndev); phylink_destroy(priv->phylink); + lynx_pcs_destroy(priv->pcs); + free_netdev(ndev); return 0; -- cgit v1.2.3 From 5d1f3fe7d2d54d04b44aa5b9b62b305fdcf653ec Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 1 Jun 2023 16:14:54 +0200 Subject: net: stmmac: dwmac-sogfpga: use the lynx pcs driver dwmac_socfpga re-implements support for the TSE PCS, which is identical to the already existing TSE PCS, which in turn is the same as the Lynx PCS. Drop the existing TSE re-implemenation and use the Lynx PCS instead, relying on the regmap-mdio driver to translate MDIO accesses into mmio accesses. Add a lynx_pcs reference in the stmmac's internal structure, and use .mac_select_pcs() to return the relevant PCS to be used. Signed-off-by: Maxime Chevallier Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 3 + drivers/net/ethernet/stmicro/stmmac/Makefile | 2 +- drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c | 257 --------------------- drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h | 29 --- drivers/net/ethernet/stmicro/stmmac/common.h | 2 + .../net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 91 ++++++-- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 12 +- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 3 + 8 files changed, 83 insertions(+), 316 deletions(-) delete mode 100644 drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c delete mode 100644 drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 5f5a997f21f3..5583f0b055ec 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -158,6 +158,9 @@ config DWMAC_SOCFPGA default ARCH_INTEL_SOCFPGA depends on OF && (ARCH_INTEL_SOCFPGA || COMPILE_TEST) select MFD_SYSCON + select MDIO_REGMAP + select REGMAP_MMIO + select PCS_LYNX help Support for ethernet controller on Altera SOCFPGA diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 8738fdbb4b2d..7dd3d388068b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -35,7 +35,7 @@ obj-$(CONFIG_DWMAC_IMX8) += dwmac-imx.o obj-$(CONFIG_DWMAC_TEGRA) += dwmac-tegra.o obj-$(CONFIG_DWMAC_VISCONTI) += dwmac-visconti.o stmmac-platform-objs:= stmmac_platform.o -dwmac-altr-socfpga-objs := altr_tse_pcs.o dwmac-socfpga.o +dwmac-altr-socfpga-objs := dwmac-socfpga.o obj-$(CONFIG_STMMAC_PCI) += stmmac-pci.o obj-$(CONFIG_DWMAC_INTEL) += dwmac-intel.o diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c deleted file mode 100644 index 00f6d347eaf7..000000000000 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c +++ /dev/null @@ -1,257 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright Altera Corporation (C) 2016. All rights reserved. - * - * Author: Tien Hock Loh - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "stmmac.h" -#include "stmmac_platform.h" -#include "altr_tse_pcs.h" - -#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0 -#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII BIT(1) -#define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII BIT(2) -#define SYSMGR_EMACGRP_CTRL_PHYSEL_WIDTH 2 -#define SYSMGR_EMACGRP_CTRL_PHYSEL_MASK GENMASK(1, 0) - -#define TSE_PCS_CONTROL_AN_EN_MASK BIT(12) -#define TSE_PCS_CONTROL_REG 0x00 -#define TSE_PCS_CONTROL_RESTART_AN_MASK BIT(9) -#define TSE_PCS_CTRL_AUTONEG_SGMII 0x1140 -#define TSE_PCS_IF_MODE_REG 0x28 -#define TSE_PCS_LINK_TIMER_0_REG 0x24 -#define TSE_PCS_LINK_TIMER_1_REG 0x26 -#define TSE_PCS_SIZE 0x40 -#define TSE_PCS_STATUS_AN_COMPLETED_MASK BIT(5) -#define TSE_PCS_STATUS_LINK_MASK 0x0004 -#define TSE_PCS_STATUS_REG 0x02 -#define TSE_PCS_SGMII_SPEED_1000 BIT(3) -#define TSE_PCS_SGMII_SPEED_100 BIT(2) -#define TSE_PCS_SGMII_SPEED_10 0x0 -#define TSE_PCS_SW_RST_MASK 0x8000 -#define TSE_PCS_PARTNER_ABILITY_REG 0x0A -#define TSE_PCS_PARTNER_DUPLEX_FULL 0x1000 -#define TSE_PCS_PARTNER_DUPLEX_HALF 0x0000 -#define TSE_PCS_PARTNER_DUPLEX_MASK 0x1000 -#define TSE_PCS_PARTNER_SPEED_MASK GENMASK(11, 10) -#define TSE_PCS_PARTNER_SPEED_1000 BIT(11) -#define TSE_PCS_PARTNER_SPEED_100 BIT(10) -#define TSE_PCS_PARTNER_SPEED_10 0x0000 -#define TSE_PCS_PARTNER_SPEED_1000 BIT(11) -#define TSE_PCS_PARTNER_SPEED_100 BIT(10) -#define TSE_PCS_PARTNER_SPEED_10 0x0000 -#define TSE_PCS_SGMII_SPEED_MASK GENMASK(3, 2) -#define TSE_PCS_SGMII_LINK_TIMER_0 0x0D40 -#define TSE_PCS_SGMII_LINK_TIMER_1 0x0003 -#define TSE_PCS_SW_RESET_TIMEOUT 100 -#define TSE_PCS_USE_SGMII_AN_MASK BIT(1) -#define TSE_PCS_USE_SGMII_ENA BIT(0) -#define TSE_PCS_IF_USE_SGMII 0x03 - -#define AUTONEGO_LINK_TIMER 20 - -static int tse_pcs_reset(void __iomem *base, struct tse_pcs *pcs) -{ - int counter = 0; - u16 val; - - val = readw(base + TSE_PCS_CONTROL_REG); - val |= TSE_PCS_SW_RST_MASK; - writew(val, base + TSE_PCS_CONTROL_REG); - - while (counter < TSE_PCS_SW_RESET_TIMEOUT) { - val = readw(base + TSE_PCS_CONTROL_REG); - val &= TSE_PCS_SW_RST_MASK; - if (val == 0) - break; - counter++; - udelay(1); - } - if (counter >= TSE_PCS_SW_RESET_TIMEOUT) { - dev_err(pcs->dev, "PCS could not get out of sw reset\n"); - return -ETIMEDOUT; - } - - return 0; -} - -int tse_pcs_init(void __iomem *base, struct tse_pcs *pcs) -{ - int ret = 0; - - writew(TSE_PCS_IF_USE_SGMII, base + TSE_PCS_IF_MODE_REG); - - writew(TSE_PCS_CTRL_AUTONEG_SGMII, base + TSE_PCS_CONTROL_REG); - - writew(TSE_PCS_SGMII_LINK_TIMER_0, base + TSE_PCS_LINK_TIMER_0_REG); - writew(TSE_PCS_SGMII_LINK_TIMER_1, base + TSE_PCS_LINK_TIMER_1_REG); - - ret = tse_pcs_reset(base, pcs); - if (ret == 0) - writew(SGMII_ADAPTER_ENABLE, - pcs->sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); - - return ret; -} - -static void pcs_link_timer_callback(struct tse_pcs *pcs) -{ - u16 val = 0; - void __iomem *tse_pcs_base = pcs->tse_pcs_base; - void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base; - - val = readw(tse_pcs_base + TSE_PCS_STATUS_REG); - val &= TSE_PCS_STATUS_LINK_MASK; - - if (val != 0) { - dev_dbg(pcs->dev, "Adapter: Link is established\n"); - writew(SGMII_ADAPTER_ENABLE, - sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); - } else { - mod_timer(&pcs->aneg_link_timer, jiffies + - msecs_to_jiffies(AUTONEGO_LINK_TIMER)); - } -} - -static void auto_nego_timer_callback(struct tse_pcs *pcs) -{ - u16 val = 0; - u16 speed = 0; - u16 duplex = 0; - void __iomem *tse_pcs_base = pcs->tse_pcs_base; - void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base; - - val = readw(tse_pcs_base + TSE_PCS_STATUS_REG); - val &= TSE_PCS_STATUS_AN_COMPLETED_MASK; - - if (val != 0) { - dev_dbg(pcs->dev, "Adapter: Auto Negotiation is completed\n"); - val = readw(tse_pcs_base + TSE_PCS_PARTNER_ABILITY_REG); - speed = val & TSE_PCS_PARTNER_SPEED_MASK; - duplex = val & TSE_PCS_PARTNER_DUPLEX_MASK; - - if (speed == TSE_PCS_PARTNER_SPEED_10 && - duplex == TSE_PCS_PARTNER_DUPLEX_FULL) - dev_dbg(pcs->dev, - "Adapter: Link Partner is Up - 10/Full\n"); - else if (speed == TSE_PCS_PARTNER_SPEED_100 && - duplex == TSE_PCS_PARTNER_DUPLEX_FULL) - dev_dbg(pcs->dev, - "Adapter: Link Partner is Up - 100/Full\n"); - else if (speed == TSE_PCS_PARTNER_SPEED_1000 && - duplex == TSE_PCS_PARTNER_DUPLEX_FULL) - dev_dbg(pcs->dev, - "Adapter: Link Partner is Up - 1000/Full\n"); - else if (speed == TSE_PCS_PARTNER_SPEED_10 && - duplex == TSE_PCS_PARTNER_DUPLEX_HALF) - dev_err(pcs->dev, - "Adapter does not support Half Duplex\n"); - else if (speed == TSE_PCS_PARTNER_SPEED_100 && - duplex == TSE_PCS_PARTNER_DUPLEX_HALF) - dev_err(pcs->dev, - "Adapter does not support Half Duplex\n"); - else if (speed == TSE_PCS_PARTNER_SPEED_1000 && - duplex == TSE_PCS_PARTNER_DUPLEX_HALF) - dev_err(pcs->dev, - "Adapter does not support Half Duplex\n"); - else - dev_err(pcs->dev, - "Adapter: Invalid Partner Speed and Duplex\n"); - - if (duplex == TSE_PCS_PARTNER_DUPLEX_FULL && - (speed == TSE_PCS_PARTNER_SPEED_10 || - speed == TSE_PCS_PARTNER_SPEED_100 || - speed == TSE_PCS_PARTNER_SPEED_1000)) - writew(SGMII_ADAPTER_ENABLE, - sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); - } else { - val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG); - val |= TSE_PCS_CONTROL_RESTART_AN_MASK; - writew(val, tse_pcs_base + TSE_PCS_CONTROL_REG); - - tse_pcs_reset(tse_pcs_base, pcs); - mod_timer(&pcs->aneg_link_timer, jiffies + - msecs_to_jiffies(AUTONEGO_LINK_TIMER)); - } -} - -static void aneg_link_timer_callback(struct timer_list *t) -{ - struct tse_pcs *pcs = from_timer(pcs, t, aneg_link_timer); - - if (pcs->autoneg == AUTONEG_ENABLE) - auto_nego_timer_callback(pcs); - else if (pcs->autoneg == AUTONEG_DISABLE) - pcs_link_timer_callback(pcs); -} - -void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev, - unsigned int speed) -{ - void __iomem *tse_pcs_base = pcs->tse_pcs_base; - u32 val; - - pcs->autoneg = phy_dev->autoneg; - - if (phy_dev->autoneg == AUTONEG_ENABLE) { - val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG); - val |= TSE_PCS_CONTROL_AN_EN_MASK; - writew(val, tse_pcs_base + TSE_PCS_CONTROL_REG); - - val = readw(tse_pcs_base + TSE_PCS_IF_MODE_REG); - val |= TSE_PCS_USE_SGMII_AN_MASK; - writew(val, tse_pcs_base + TSE_PCS_IF_MODE_REG); - - val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG); - val |= TSE_PCS_CONTROL_RESTART_AN_MASK; - - tse_pcs_reset(tse_pcs_base, pcs); - - timer_setup(&pcs->aneg_link_timer, aneg_link_timer_callback, - 0); - mod_timer(&pcs->aneg_link_timer, jiffies + - msecs_to_jiffies(AUTONEGO_LINK_TIMER)); - } else if (phy_dev->autoneg == AUTONEG_DISABLE) { - val = readw(tse_pcs_base + TSE_PCS_CONTROL_REG); - val &= ~TSE_PCS_CONTROL_AN_EN_MASK; - writew(val, tse_pcs_base + TSE_PCS_CONTROL_REG); - - val = readw(tse_pcs_base + TSE_PCS_IF_MODE_REG); - val &= ~TSE_PCS_USE_SGMII_AN_MASK; - writew(val, tse_pcs_base + TSE_PCS_IF_MODE_REG); - - val = readw(tse_pcs_base + TSE_PCS_IF_MODE_REG); - val &= ~TSE_PCS_SGMII_SPEED_MASK; - - switch (speed) { - case 1000: - val |= TSE_PCS_SGMII_SPEED_1000; - break; - case 100: - val |= TSE_PCS_SGMII_SPEED_100; - break; - case 10: - val |= TSE_PCS_SGMII_SPEED_10; - break; - default: - return; - } - writew(val, tse_pcs_base + TSE_PCS_IF_MODE_REG); - - tse_pcs_reset(tse_pcs_base, pcs); - - timer_setup(&pcs->aneg_link_timer, aneg_link_timer_callback, - 0); - mod_timer(&pcs->aneg_link_timer, jiffies + - msecs_to_jiffies(AUTONEGO_LINK_TIMER)); - } -} diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h deleted file mode 100644 index 694ac25ef426..000000000000 --- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* Copyright Altera Corporation (C) 2016. All rights reserved. - * - * Author: Tien Hock Loh - */ - -#ifndef __TSE_PCS_H__ -#define __TSE_PCS_H__ - -#include -#include - -#define SGMII_ADAPTER_CTRL_REG 0x00 -#define SGMII_ADAPTER_ENABLE 0x0000 -#define SGMII_ADAPTER_DISABLE 0x0001 - -struct tse_pcs { - struct device *dev; - void __iomem *tse_pcs_base; - void __iomem *sgmii_adapter_base; - struct timer_list aneg_link_timer; - int autoneg; -}; - -int tse_pcs_init(void __iomem *base, struct tse_pcs *pcs); -void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev, - unsigned int speed); - -#endif /* __TSE_PCS_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 4ad692c4116c..52c5ec553276 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #if IS_ENABLED(CONFIG_VLAN_8021Q) #define STMMAC_VLAN_TAG_USED @@ -519,6 +520,7 @@ struct mac_device_info { const struct stmmac_tc_ops *tc; const struct stmmac_mmc_ops *mmc; struct dw_xpcs *xpcs; + struct phylink_pcs *lynx_pcs; /* Lynx external PCS */ struct mii_regs mii; /* MII register Addresses */ struct mac_link link; void __iomem *pcsr; /* vpointer to device CSRs */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 6ee050300b31..e399fccbafe5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -10,14 +10,13 @@ #include #include #include +#include #include #include #include "stmmac.h" #include "stmmac_platform.h" -#include "altr_tse_pcs.h" - #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII 0x0 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII 0x1 #define SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII 0x2 @@ -37,6 +36,10 @@ #define EMAC_SPLITTER_CTRL_SPEED_100 0x3 #define EMAC_SPLITTER_CTRL_SPEED_1000 0x0 +#define SGMII_ADAPTER_CTRL_REG 0x00 +#define SGMII_ADAPTER_ENABLE 0x0000 +#define SGMII_ADAPTER_DISABLE 0x0001 + struct socfpga_dwmac; struct socfpga_dwmac_ops { int (*set_phy_mode)(struct socfpga_dwmac *dwmac_priv); @@ -50,16 +53,18 @@ struct socfpga_dwmac { struct reset_control *stmmac_rst; struct reset_control *stmmac_ocp_rst; void __iomem *splitter_base; + void __iomem *tse_pcs_base; + void __iomem *sgmii_adapter_base; bool f2h_ptp_ref_clk; - struct tse_pcs pcs; const struct socfpga_dwmac_ops *ops; + struct mdio_device *pcs_mdiodev; }; static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) { struct socfpga_dwmac *dwmac = (struct socfpga_dwmac *)priv; void __iomem *splitter_base = dwmac->splitter_base; - void __iomem *sgmii_adapter_base = dwmac->pcs.sgmii_adapter_base; + void __iomem *sgmii_adapter_base = dwmac->sgmii_adapter_base; struct device *dev = dwmac->dev; struct net_device *ndev = dev_get_drvdata(dev); struct phy_device *phy_dev = ndev->phydev; @@ -89,11 +94,9 @@ static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) writel(val, splitter_base + EMAC_SPLITTER_CTRL_REG); } - if (phy_dev && sgmii_adapter_base) { + if (phy_dev && sgmii_adapter_base) writew(SGMII_ADAPTER_ENABLE, sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); - tse_pcs_fix_mac_speed(&dwmac->pcs, phy_dev, speed); - } } static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *dev) @@ -183,11 +186,11 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device * goto err_node_put; } - dwmac->pcs.sgmii_adapter_base = + dwmac->sgmii_adapter_base = devm_ioremap_resource(dev, &res_sgmii_adapter); - if (IS_ERR(dwmac->pcs.sgmii_adapter_base)) { - ret = PTR_ERR(dwmac->pcs.sgmii_adapter_base); + if (IS_ERR(dwmac->sgmii_adapter_base)) { + ret = PTR_ERR(dwmac->sgmii_adapter_base); goto err_node_put; } } @@ -205,11 +208,11 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device * goto err_node_put; } - dwmac->pcs.tse_pcs_base = + dwmac->tse_pcs_base = devm_ioremap_resource(dev, &res_tse_pcs); - if (IS_ERR(dwmac->pcs.tse_pcs_base)) { - ret = PTR_ERR(dwmac->pcs.tse_pcs_base); + if (IS_ERR(dwmac->tse_pcs_base)) { + ret = PTR_ERR(dwmac->tse_pcs_base); goto err_node_put; } } @@ -235,6 +238,13 @@ static int socfpga_get_plat_phymode(struct socfpga_dwmac *dwmac) return priv->plat->interface; } +static void socfpga_sgmii_config(struct socfpga_dwmac *dwmac, bool enable) +{ + u16 val = enable ? SGMII_ADAPTER_ENABLE : SGMII_ADAPTER_DISABLE; + + writew(val, dwmac->sgmii_adapter_base + SGMII_ADAPTER_CTRL_REG); +} + static int socfpga_set_phy_mode_common(int phymode, u32 *val) { switch (phymode) { @@ -310,12 +320,8 @@ static int socfpga_gen5_set_phy_mode(struct socfpga_dwmac *dwmac) */ reset_control_deassert(dwmac->stmmac_ocp_rst); reset_control_deassert(dwmac->stmmac_rst); - if (phymode == PHY_INTERFACE_MODE_SGMII) { - if (tse_pcs_init(dwmac->pcs.tse_pcs_base, &dwmac->pcs) != 0) { - dev_err(dwmac->dev, "Unable to initialize TSE PCS"); - return -EINVAL; - } - } + if (phymode == PHY_INTERFACE_MODE_SGMII) + socfpga_sgmii_config(dwmac, true); return 0; } @@ -367,12 +373,8 @@ static int socfpga_gen10_set_phy_mode(struct socfpga_dwmac *dwmac) */ reset_control_deassert(dwmac->stmmac_ocp_rst); reset_control_deassert(dwmac->stmmac_rst); - if (phymode == PHY_INTERFACE_MODE_SGMII) { - if (tse_pcs_init(dwmac->pcs.tse_pcs_base, &dwmac->pcs) != 0) { - dev_err(dwmac->dev, "Unable to initialize TSE PCS"); - return -EINVAL; - } - } + if (phymode == PHY_INTERFACE_MODE_SGMII) + socfpga_sgmii_config(dwmac, true); return 0; } @@ -386,6 +388,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) struct net_device *ndev; struct stmmac_priv *stpriv; const struct socfpga_dwmac_ops *ops; + struct regmap_config pcs_regmap_cfg; ops = device_get_match_data(&pdev->dev); if (!ops) { @@ -443,6 +446,44 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) if (ret) goto err_dvr_remove; + memset(&pcs_regmap_cfg, 0, sizeof(pcs_regmap_cfg)); + pcs_regmap_cfg.reg_bits = 16; + pcs_regmap_cfg.val_bits = 16; + pcs_regmap_cfg.reg_shift = REGMAP_UPSHIFT(1); + + /* Create a regmap for the PCS so that it can be used by the PCS driver, + * if we have such a PCS + */ + if (dwmac->tse_pcs_base) { + struct mdio_regmap_config mrc; + struct regmap *pcs_regmap; + struct mii_bus *pcs_bus; + + pcs_regmap = devm_regmap_init_mmio(&pdev->dev, dwmac->tse_pcs_base, + &pcs_regmap_cfg); + if (IS_ERR(pcs_regmap)) { + ret = PTR_ERR(pcs_regmap); + goto err_dvr_remove; + } + + mrc.regmap = pcs_regmap; + mrc.parent = &pdev->dev; + mrc.valid_addr = 0x0; + + snprintf(mrc.name, MII_BUS_ID_SIZE, "%s-pcs-mii", ndev->name); + pcs_bus = devm_mdio_regmap_register(&pdev->dev, &mrc); + if (IS_ERR(pcs_bus)) { + ret = PTR_ERR(pcs_bus); + goto err_dvr_remove; + } + + stpriv->hw->lynx_pcs = lynx_pcs_create_mdiodev(pcs_bus, 0); + if (IS_ERR(stpriv->hw->lynx_pcs)) { + ret = PTR_ERR(stpriv->hw->lynx_pcs); + goto err_dvr_remove; + } + } + return 0; err_dvr_remove: diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 52cab9de05f2..fa07b0d50b46 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -937,10 +937,13 @@ static struct phylink_pcs *stmmac_mac_select_pcs(struct phylink_config *config, { struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); - if (!priv->hw->xpcs) - return NULL; + if (priv->hw->xpcs) + return &priv->hw->xpcs->pcs; + + if (priv->hw->lynx_pcs) + return priv->hw->lynx_pcs; - return &priv->hw->xpcs->pcs; + return NULL; } static void stmmac_mac_config(struct phylink_config *config, unsigned int mode, @@ -3813,7 +3816,8 @@ static int __stmmac_open(struct net_device *dev, if (priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI && (!priv->hw->xpcs || - xpcs_get_an_mode(priv->hw->xpcs, mode) != DW_AN_C73)) { + xpcs_get_an_mode(priv->hw->xpcs, mode) != DW_AN_C73) && + !priv->hw->lynx_pcs) { ret = stmmac_init_phy(dev); if (ret) { netdev_err(priv->dev, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 3db1cb0fd160..c784a6731f08 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -665,6 +665,9 @@ int stmmac_mdio_unregister(struct net_device *ndev) if (priv->hw->xpcs) xpcs_destroy(priv->hw->xpcs); + if (priv->hw->lynx_pcs) + lynx_pcs_destroy(priv->hw->lynx_pcs); + mdiobus_unregister(priv->mii); priv->mii->priv = NULL; mdiobus_free(priv->mii); -- cgit v1.2.3 From be35db17c8729aa07aafec02e1201c06c03f22b0 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 2 Jun 2023 18:20:05 +0200 Subject: mlxsw: spectrum_router: Clarify a comment "Reserved for X" usually means that only X is supposed to use a given object. Here, it is used in the sense that X should consider the object "reserved", as in "restricted". Replace the comment simply by "X", with the implication that that's where the field is used. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4a73e2fe95ef..c905c8f153b4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -96,8 +96,8 @@ struct mlxsw_sp_rif_subport { struct mlxsw_sp_rif_ipip_lb { struct mlxsw_sp_rif common; struct mlxsw_sp_rif_ipip_lb_config lb_config; - u16 ul_vr_id; /* Reserved for Spectrum-2. */ - u16 ul_rif_id; /* Reserved for Spectrum. */ + u16 ul_vr_id; /* Spectrum-1. */ + u16 ul_rif_id; /* Spectrum-2+. */ }; struct mlxsw_sp_rif_params_ipip_lb { -- cgit v1.2.3 From 5afef6748c19032ef9953b0b97f75fd0593178f3 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 2 Jun 2023 18:20:06 +0200 Subject: mlxsw: spectrum_router: Use extack in mlxsw_sp~_rif_ipip_lb_configure() In commit 26029225d992 ("mlxsw: spectrum_router: Propagate extack further"), the mlxsw_sp_rif_ops.configure callback got a new argument, extack. However the callbacks that deal with tunnel configuration, mlxsw_sp1_rif_ipip_lb_configure() and mlxsw_sp2_rif_ipip_lb_configure(), were never updated to pass the parameter further. Do that now. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index c905c8f153b4..20ece1b49175 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -9724,7 +9724,7 @@ mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif, struct mlxsw_sp_vr *ul_vr; int err; - ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL); + ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, extack); if (IS_ERR(ul_vr)) return PTR_ERR(ul_vr); @@ -9923,7 +9923,7 @@ mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif, struct mlxsw_sp_rif *ul_rif; int err; - ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL); + ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, extack); if (IS_ERR(ul_rif)) return PTR_ERR(ul_rif); -- cgit v1.2.3 From 3903249ee1afb9aa06d77e2c39c4be2d3df25e0e Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 2 Jun 2023 18:20:07 +0200 Subject: mlxsw: spectrum_router: Do not query MAX_RIFS on each iteration MLXSW_CORE_RES_GET involves a call to spectrum_core, a separate module. Instead of making the call on every iteration, cache it up front, and use the value. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 20ece1b49175..f88b0197a6ac 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -7699,9 +7699,10 @@ static struct mlxsw_sp_rif * mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) { + int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); int i; - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) + for (i = 0; i < max_rifs; i++) if (mlxsw_sp->router->rifs[i] && mlxsw_sp->router->rifs[i]->dev == dev) return mlxsw_sp->router->rifs[i]; @@ -10041,11 +10042,12 @@ err_rifs_table_init: static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp) { + int max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); int i; WARN_ON_ONCE(atomic_read(&mlxsw_sp->router->rifs_count)); - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) + for (i = 0; i < max_rifs; i++) WARN_ON_ONCE(mlxsw_sp->router->rifs[i]); devl_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_RIFS); -- cgit v1.2.3 From 75426cc0b31616b11d635076bd1692f2ff2f4a5f Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 2 Jun 2023 18:20:08 +0200 Subject: mlxsw: spectrum_router: Do not query MAX_VRS on each iteration MLXSW_CORE_RES_GET involves a call to spectrum_core, a separate module. Instead of making the call on every iteration, cache it up front, and use the value. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index f88b0197a6ac..7304e8a29cf9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -748,10 +748,11 @@ static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr) static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) { + int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); struct mlxsw_sp_vr *vr; int i; - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + for (i = 0; i < max_vrs; i++) { vr = &mlxsw_sp->router->vrs[i]; if (!mlxsw_sp_vr_is_used(vr)) return vr; @@ -792,12 +793,13 @@ static u32 mlxsw_sp_fix_tb_id(u32 tb_id) static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id) { + int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); struct mlxsw_sp_vr *vr; int i; tb_id = mlxsw_sp_fix_tb_id(tb_id); - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + for (i = 0; i < max_vrs; i++) { vr = &mlxsw_sp->router->vrs[i]; if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id) return vr; @@ -959,6 +961,7 @@ static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib *fib, struct mlxsw_sp_lpm_tree *new_tree) { + int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); enum mlxsw_sp_l3proto proto = fib->proto; struct mlxsw_sp_lpm_tree *old_tree; u8 old_id, new_id = new_tree->id; @@ -968,7 +971,7 @@ static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, old_tree = mlxsw_sp->router->lpm.proto_trees[proto]; old_id = old_tree->id; - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + for (i = 0; i < max_vrs; i++) { vr = &mlxsw_sp->router->vrs[i]; if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id)) continue; @@ -7298,9 +7301,10 @@ static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) { + int max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); int i, j; - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + for (i = 0; i < max_vrs; i++) { struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i]; if (!mlxsw_sp_vr_is_used(vr)) -- cgit v1.2.3 From 9bc009734774549f8bb8d7e526ba10e70d751a7c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 2 Jun 2023 21:04:55 +0200 Subject: net: stmmac: dwmac-qcom-ethqos: fix a regression on EMAC < 3 We must not assign plat_dat->dwmac4_addrs unconditionally as for structures which don't set them, this will result in the core driver using zeroes everywhere and breaking the driver for older HW. On EMAC < 2 the address should remain NULL. Fixes: b68376191c69 ("net: stmmac: dwmac-qcom-ethqos: Add EMAC3 support") Signed-off-by: Bartosz Golaszewski Reviewed-by: Andrew Halaney Reviewed-by: Siddharth Vadapalli Reviewed-by: Vinod Koul Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 16a8c361283b..f07905f00f98 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -644,7 +644,8 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->fix_mac_speed = ethqos_fix_mac_speed; plat_dat->dump_debug_regs = rgmii_dump; plat_dat->has_gmac4 = 1; - plat_dat->dwmac4_addrs = &data->dwmac4_addrs; + if (ethqos->has_emac3) + plat_dat->dwmac4_addrs = &data->dwmac4_addrs; plat_dat->pmt = 1; plat_dat->tso_en = of_property_read_bool(np, "snps,tso"); if (of_device_is_compatible(np, "qcom,qcs404-ethqos")) -- cgit v1.2.3 From 42510dffd0e2c27046905f742172ed6662af5557 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 5 Jun 2023 16:56:00 +0530 Subject: qed/qede: Fix scheduling while atomic Statistics read through bond interface via sysfs causes below bug and traces as it triggers the bonding module to collect the slave device statistics while holding the spinlock, beneath that qede->qed driver statistics flow gets scheduled out due to usleep_range() used in PTT acquire logic [ 3673.988874] Hardware name: HPE ProLiant DL365 Gen10 Plus/ProLiant DL365 Gen10 Plus, BIOS A42 10/29/2021 [ 3673.988878] Call Trace: [ 3673.988891] dump_stack_lvl+0x34/0x44 [ 3673.988908] __schedule_bug.cold+0x47/0x53 [ 3673.988918] __schedule+0x3fb/0x560 [ 3673.988929] schedule+0x43/0xb0 [ 3673.988932] schedule_hrtimeout_range_clock+0xbf/0x1b0 [ 3673.988937] ? __hrtimer_init+0xc0/0xc0 [ 3673.988950] usleep_range+0x5e/0x80 [ 3673.988955] qed_ptt_acquire+0x2b/0xd0 [qed] [ 3673.988981] _qed_get_vport_stats+0x141/0x240 [qed] [ 3673.989001] qed_get_vport_stats+0x18/0x80 [qed] [ 3673.989016] qede_fill_by_demand_stats+0x37/0x400 [qede] [ 3673.989028] qede_get_stats64+0x19/0xe0 [qede] [ 3673.989034] dev_get_stats+0x5c/0xc0 [ 3673.989045] netstat_show.constprop.0+0x52/0xb0 [ 3673.989055] dev_attr_show+0x19/0x40 [ 3673.989065] sysfs_kf_seq_show+0x9b/0xf0 [ 3673.989076] seq_read_iter+0x120/0x4b0 [ 3673.989087] new_sync_read+0x118/0x1a0 [ 3673.989095] vfs_read+0xf3/0x180 [ 3673.989099] ksys_read+0x5f/0xe0 [ 3673.989102] do_syscall_64+0x3b/0x90 [ 3673.989109] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 3673.989115] RIP: 0033:0x7f8467d0b082 [ 3673.989119] Code: c0 e9 b2 fe ff ff 50 48 8d 3d ca 05 08 00 e8 35 e7 01 00 0f 1f 44 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 56 c3 0f 1f 44 00 00 48 83 ec 28 48 89 54 24 [ 3673.989121] RSP: 002b:00007ffffb21fd08 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 3673.989127] RAX: ffffffffffffffda RBX: 000000000100eca0 RCX: 00007f8467d0b082 [ 3673.989128] RDX: 00000000000003ff RSI: 00007ffffb21fdc0 RDI: 0000000000000003 [ 3673.989130] RBP: 00007f8467b96028 R08: 0000000000000010 R09: 00007ffffb21ec00 [ 3673.989132] R10: 00007ffffb27b170 R11: 0000000000000246 R12: 00000000000000f0 [ 3673.989134] R13: 0000000000000003 R14: 00007f8467b92000 R15: 0000000000045a05 [ 3673.989139] CPU: 30 PID: 285188 Comm: read_all Kdump: loaded Tainted: G W OE Fix this by collecting the statistics asynchronously from a periodic delayed work scheduled at default stats coalescing interval and return the recent copy of statisitcs from .ndo_get_stats64(), also add ability to configure/retrieve stats coalescing interval using below commands - ethtool -C ethx stats-block-usecs ethtool -c ethx Fixes: 133fac0eedc3 ("qede: Add basic ethtool support") Cc: Sudarsana Kalluru Cc: David Miller Signed-off-by: Manish Chopra Link: https://lore.kernel.org/r/20230605112600.48238-1-manishc@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 2 +- drivers/net/ethernet/qlogic/qede/qede.h | 4 +++ drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 24 +++++++++++++++-- drivers/net/ethernet/qlogic/qede/qede_main.c | 34 ++++++++++++++++++++++++- 4 files changed, 60 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 2edd6bf64a3c..7776d3bdd459 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1903,7 +1903,7 @@ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats) { u32 i; - if (!cdev) { + if (!cdev || cdev->recov_in_prog) { memset(stats, 0, sizeof(*stats)); return; } diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index f9931ecb7baa..4d83ceebdc49 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -269,6 +269,10 @@ struct qede_dev { #define QEDE_ERR_WARN 3 struct qede_dump_info dump_info; + struct delayed_work periodic_task; + unsigned long stats_coal_ticks; + u32 stats_coal_usecs; + spinlock_t stats_lock; /* lock for vport stats access */ }; enum QEDE_STATE { diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 374a86b875a3..95820cf1cd6c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -429,6 +429,8 @@ static void qede_get_ethtool_stats(struct net_device *dev, } } + spin_lock(&edev->stats_lock); + for (i = 0; i < QEDE_NUM_STATS; i++) { if (qede_is_irrelevant_stat(edev, i)) continue; @@ -438,6 +440,8 @@ static void qede_get_ethtool_stats(struct net_device *dev, buf++; } + spin_unlock(&edev->stats_lock); + __qede_unlock(edev); } @@ -829,6 +833,7 @@ out: coal->rx_coalesce_usecs = rx_coal; coal->tx_coalesce_usecs = tx_coal; + coal->stats_block_coalesce_usecs = edev->stats_coal_usecs; return rc; } @@ -842,6 +847,19 @@ int qede_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal, int i, rc = 0; u16 rxc, txc; + if (edev->stats_coal_usecs != coal->stats_block_coalesce_usecs) { + edev->stats_coal_usecs = coal->stats_block_coalesce_usecs; + if (edev->stats_coal_usecs) { + edev->stats_coal_ticks = usecs_to_jiffies(edev->stats_coal_usecs); + schedule_delayed_work(&edev->periodic_task, 0); + + DP_INFO(edev, "Configured stats coal ticks=%lu jiffies\n", + edev->stats_coal_ticks); + } else { + cancel_delayed_work_sync(&edev->periodic_task); + } + } + if (!netif_running(dev)) { DP_INFO(edev, "Interface is down\n"); return -EINVAL; @@ -2252,7 +2270,8 @@ out: } static const struct ethtool_ops qede_ethtool_ops = { - .supported_coalesce_params = ETHTOOL_COALESCE_USECS, + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_STATS_BLOCK_USECS, .get_link_ksettings = qede_get_link_ksettings, .set_link_ksettings = qede_set_link_ksettings, .get_drvinfo = qede_get_drvinfo, @@ -2303,7 +2322,8 @@ static const struct ethtool_ops qede_ethtool_ops = { }; static const struct ethtool_ops qede_vf_ethtool_ops = { - .supported_coalesce_params = ETHTOOL_COALESCE_USECS, + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_STATS_BLOCK_USECS, .get_link_ksettings = qede_get_link_ksettings, .get_drvinfo = qede_get_drvinfo, .get_msglevel = qede_get_msglevel, diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 4c6c685820e3..4b004a728190 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -307,6 +307,8 @@ void qede_fill_by_demand_stats(struct qede_dev *edev) edev->ops->get_vport_stats(edev->cdev, &stats); + spin_lock(&edev->stats_lock); + p_common->no_buff_discards = stats.common.no_buff_discards; p_common->packet_too_big_discard = stats.common.packet_too_big_discard; p_common->ttl0_discard = stats.common.ttl0_discard; @@ -404,6 +406,8 @@ void qede_fill_by_demand_stats(struct qede_dev *edev) p_ah->tx_1519_to_max_byte_packets = stats.ah.tx_1519_to_max_byte_packets; } + + spin_unlock(&edev->stats_lock); } static void qede_get_stats64(struct net_device *dev, @@ -412,9 +416,10 @@ static void qede_get_stats64(struct net_device *dev, struct qede_dev *edev = netdev_priv(dev); struct qede_stats_common *p_common; - qede_fill_by_demand_stats(edev); p_common = &edev->stats.common; + spin_lock(&edev->stats_lock); + stats->rx_packets = p_common->rx_ucast_pkts + p_common->rx_mcast_pkts + p_common->rx_bcast_pkts; stats->tx_packets = p_common->tx_ucast_pkts + p_common->tx_mcast_pkts + @@ -434,6 +439,8 @@ static void qede_get_stats64(struct net_device *dev, stats->collisions = edev->stats.bb.tx_total_collisions; stats->rx_crc_errors = p_common->rx_crc_errors; stats->rx_frame_errors = p_common->rx_align_errors; + + spin_unlock(&edev->stats_lock); } #ifdef CONFIG_QED_SRIOV @@ -1063,6 +1070,23 @@ static void qede_unlock(struct qede_dev *edev) rtnl_unlock(); } +static void qede_periodic_task(struct work_struct *work) +{ + struct qede_dev *edev = container_of(work, struct qede_dev, + periodic_task.work); + + qede_fill_by_demand_stats(edev); + schedule_delayed_work(&edev->periodic_task, edev->stats_coal_ticks); +} + +static void qede_init_periodic_task(struct qede_dev *edev) +{ + INIT_DELAYED_WORK(&edev->periodic_task, qede_periodic_task); + spin_lock_init(&edev->stats_lock); + edev->stats_coal_usecs = USEC_PER_SEC; + edev->stats_coal_ticks = usecs_to_jiffies(USEC_PER_SEC); +} + static void qede_sp_task(struct work_struct *work) { struct qede_dev *edev = container_of(work, struct qede_dev, @@ -1082,6 +1106,7 @@ static void qede_sp_task(struct work_struct *work) */ if (test_and_clear_bit(QEDE_SP_RECOVERY, &edev->sp_flags)) { + cancel_delayed_work_sync(&edev->periodic_task); #ifdef CONFIG_QED_SRIOV /* SRIOV must be disabled outside the lock to avoid a deadlock. * The recovery of the active VFs is currently not supported. @@ -1272,6 +1297,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, */ INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task); mutex_init(&edev->qede_lock); + qede_init_periodic_task(edev); rc = register_netdev(edev->ndev); if (rc) { @@ -1296,6 +1322,11 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, edev->rx_copybreak = QEDE_RX_HDR_SIZE; qede_log_probe(edev); + + /* retain user config (for example - after recovery) */ + if (edev->stats_coal_usecs) + schedule_delayed_work(&edev->periodic_task, 0); + return 0; err4: @@ -1364,6 +1395,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) unregister_netdev(ndev); cancel_delayed_work_sync(&edev->sp_task); + cancel_delayed_work_sync(&edev->periodic_task); edev->ops->common->set_power_state(cdev, PCI_D0); -- cgit v1.2.3 From 4f48c30312b7af5365878ab191bb41e7b899e09b Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Mon, 5 Jun 2023 12:51:16 -0700 Subject: pds_core: Fix FW recovery detection Commit 523847df1b37 ("pds_core: add devcmd device interfaces") included initial support for FW recovery detection. Unfortunately, the ordering in pdsc_is_fw_good() was incorrect, which was causing FW recovery to be undetected by the driver. Fix this by making sure to update the cached fw_status by calling pdsc_is_fw_running() before setting the local FW gen. Fixes: 523847df1b37 ("pds_core: add devcmd device interfaces") Signed-off-by: Shannon Nelson Signed-off-by: Brett Creeley Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230605195116.49653-1-brett.creeley@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/dev.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index f7c597ea5daf..debe5216fe29 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -68,9 +68,15 @@ bool pdsc_is_fw_running(struct pdsc *pdsc) bool pdsc_is_fw_good(struct pdsc *pdsc) { - u8 gen = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION; + bool fw_running = pdsc_is_fw_running(pdsc); + u8 gen; - return pdsc_is_fw_running(pdsc) && gen == pdsc->fw_generation; + /* Make sure to update the cached fw_status by calling + * pdsc_is_fw_running() before getting the generation + */ + gen = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION; + + return fw_running && gen == pdsc->fw_generation; } static u8 pdsc_devcmd_status(struct pdsc *pdsc) -- cgit v1.2.3 From bf15bb38ec7f4ff522da5c20e1673dbda7159938 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Tue, 6 Jun 2023 10:12:53 -0700 Subject: ice: make writes to /dev/gnssX synchronous The current ice driver's GNSS write implementation buffers writes and works through them asynchronously in a kthread. That's bad because: - The GNSS write_raw operation is supposed to be synchronous[1][2]. - There is no upper bound on the number of pending writes. Userspace can submit writes much faster than the driver can process, consuming unlimited amounts of kernel memory. A patch that's currently on review[3] ("[v3,net] ice: Write all GNSS buffers instead of first one") would add one more problem: - The possibility of waiting for a very long time to flush the write work when doing rmmod, softlockups. To fix these issues, simplify the implementation: Drop the buffering, the write_work, and make the writes synchronous. I tested this with gpsd and ubxtool. [1] https://events19.linuxfoundation.org/wp-content/uploads/2017/12/The-GNSS-Subsystem-Johan-Hovold-Hovold-Consulting-AB.pdf "User interface" slide. [2] A comment in drivers/gnss/core.c:gnss_write(): /* Ignoring O_NONBLOCK, write_raw() is synchronous. */ [3] https://patchwork.ozlabs.org/project/intel-wired-lan/patch/20230217120541.16745-1-karol.kolacinski@intel.com/ Fixes: d6b98c8d242a ("ice: add write functionality for GNSS TTY") Signed-off-by: Michal Schmidt Reviewed-by: Simon Horman Tested-by: Sunitha Mekala (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/ice/ice_common.c | 2 +- drivers/net/ethernet/intel/ice/ice_common.h | 2 +- drivers/net/ethernet/intel/ice/ice_gnss.c | 64 ++--------------------------- drivers/net/ethernet/intel/ice/ice_gnss.h | 10 ----- 4 files changed, 6 insertions(+), 72 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 0157f6e98d3e..eb2dc0983776 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -5160,7 +5160,7 @@ ice_aq_read_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, */ int ice_aq_write_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, - u16 bus_addr, __le16 addr, u8 params, u8 *data, + u16 bus_addr, __le16 addr, u8 params, const u8 *data, struct ice_sq_cd *cd) { struct ice_aq_desc desc = { 0 }; diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 8ba5f935a092..81961a7d6598 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -229,7 +229,7 @@ ice_aq_read_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, struct ice_sq_cd *cd); int ice_aq_write_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr, - u16 bus_addr, __le16 addr, u8 params, u8 *data, + u16 bus_addr, __le16 addr, u8 params, const u8 *data, struct ice_sq_cd *cd); bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw); #endif /* _ICE_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index 2ea8a2b11bcd..bd0ed155e11b 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -16,8 +16,8 @@ * * number of bytes written - success * * negative - error code */ -static unsigned int -ice_gnss_do_write(struct ice_pf *pf, unsigned char *buf, unsigned int size) +static int +ice_gnss_do_write(struct ice_pf *pf, const unsigned char *buf, unsigned int size) { struct ice_aqc_link_topo_addr link_topo; struct ice_hw *hw = &pf->hw; @@ -72,39 +72,7 @@ err_out: dev_err(ice_pf_to_dev(pf), "GNSS failed to write, offset=%u, size=%u, err=%d\n", offset, size, err); - return offset; -} - -/** - * ice_gnss_write_pending - Write all pending data to internal GNSS - * @work: GNSS write work structure - */ -static void ice_gnss_write_pending(struct kthread_work *work) -{ - struct gnss_serial *gnss = container_of(work, struct gnss_serial, - write_work); - struct ice_pf *pf = gnss->back; - - if (!pf) - return; - - if (!test_bit(ICE_FLAG_GNSS, pf->flags)) - return; - - if (!list_empty(&gnss->queue)) { - struct gnss_write_buf *write_buf = NULL; - unsigned int bytes; - - write_buf = list_first_entry(&gnss->queue, - struct gnss_write_buf, queue); - - bytes = ice_gnss_do_write(pf, write_buf->buf, write_buf->size); - dev_dbg(ice_pf_to_dev(pf), "%u bytes written to GNSS\n", bytes); - - list_del(&write_buf->queue); - kfree(write_buf->buf); - kfree(write_buf); - } + return err; } /** @@ -220,8 +188,6 @@ static struct gnss_serial *ice_gnss_struct_init(struct ice_pf *pf) pf->gnss_serial = gnss; kthread_init_delayed_work(&gnss->read_work, ice_gnss_read); - INIT_LIST_HEAD(&gnss->queue); - kthread_init_work(&gnss->write_work, ice_gnss_write_pending); kworker = kthread_create_worker(0, "ice-gnss-%s", dev_name(dev)); if (IS_ERR(kworker)) { kfree(gnss); @@ -281,7 +247,6 @@ static void ice_gnss_close(struct gnss_device *gdev) if (!gnss) return; - kthread_cancel_work_sync(&gnss->write_work); kthread_cancel_delayed_work_sync(&gnss->read_work); } @@ -300,10 +265,7 @@ ice_gnss_write(struct gnss_device *gdev, const unsigned char *buf, size_t count) { struct ice_pf *pf = gnss_get_drvdata(gdev); - struct gnss_write_buf *write_buf; struct gnss_serial *gnss; - unsigned char *cmd_buf; - int err = count; /* We cannot write a single byte using our I2C implementation. */ if (count <= 1 || count > ICE_GNSS_TTY_WRITE_BUF) @@ -319,24 +281,7 @@ ice_gnss_write(struct gnss_device *gdev, const unsigned char *buf, if (!gnss) return -ENODEV; - cmd_buf = kcalloc(count, sizeof(*buf), GFP_KERNEL); - if (!cmd_buf) - return -ENOMEM; - - memcpy(cmd_buf, buf, count); - write_buf = kzalloc(sizeof(*write_buf), GFP_KERNEL); - if (!write_buf) { - kfree(cmd_buf); - return -ENOMEM; - } - - write_buf->buf = cmd_buf; - write_buf->size = count; - INIT_LIST_HEAD(&write_buf->queue); - list_add_tail(&write_buf->queue, &gnss->queue); - kthread_queue_work(gnss->kworker, &gnss->write_work); - - return err; + return ice_gnss_do_write(pf, buf, count); } static const struct gnss_operations ice_gnss_ops = { @@ -432,7 +377,6 @@ void ice_gnss_exit(struct ice_pf *pf) if (pf->gnss_serial) { struct gnss_serial *gnss = pf->gnss_serial; - kthread_cancel_work_sync(&gnss->write_work); kthread_cancel_delayed_work_sync(&gnss->read_work); kthread_destroy_worker(gnss->kworker); gnss->kworker = NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.h b/drivers/net/ethernet/intel/ice/ice_gnss.h index b8bb8b63d081..75e567ad7059 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.h +++ b/drivers/net/ethernet/intel/ice/ice_gnss.h @@ -22,26 +22,16 @@ */ #define ICE_GNSS_UBX_WRITE_BYTES (ICE_MAX_I2C_WRITE_BYTES + 1) -struct gnss_write_buf { - struct list_head queue; - unsigned int size; - unsigned char *buf; -}; - /** * struct gnss_serial - data used to initialize GNSS TTY port * @back: back pointer to PF * @kworker: kwork thread for handling periodic work * @read_work: read_work function for handling GNSS reads - * @write_work: write_work function for handling GNSS writes - * @queue: write buffers queue */ struct gnss_serial { struct ice_pf *back; struct kthread_worker *kworker; struct kthread_delayed_work read_work; - struct kthread_work write_work; - struct list_head queue; }; #if IS_ENABLED(CONFIG_GNSS) -- cgit v1.2.3 From f71be9d084c92e0ef36e248303f32f8e4cf623da Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 7 Jun 2023 02:18:49 +0900 Subject: net: liquidio: fix mixed module-builtin object With CONFIG_LIQUIDIO=m and CONFIG_LIQUIDIO_VF=y (or vice versa), $(common-objs) are linked to a module and also to vmlinux even though the expected CFLAGS are different between builtins and modules. This is the same situation as fixed by commit 637a642f5ca5 ("zstd: Fixing mixed module-builtin objects"). Introduce the new module, liquidio-core, to provide the common functions to liquidio and liquidio-vf. Signed-off-by: Masahiro Yamada Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/Kconfig | 5 +++++ drivers/net/ethernet/cavium/liquidio/Makefile | 8 +++++--- .../ethernet/cavium/liquidio/cn23xx_pf_device.c | 4 ++++ .../ethernet/cavium/liquidio/cn23xx_vf_device.c | 3 +++ .../net/ethernet/cavium/liquidio/cn66xx_device.c | 1 + .../net/ethernet/cavium/liquidio/cn68xx_device.c | 1 + drivers/net/ethernet/cavium/liquidio/lio_core.c | 16 +++++++++++++++ drivers/net/ethernet/cavium/liquidio/lio_ethtool.c | 1 + .../net/ethernet/cavium/liquidio/octeon_device.c | 24 ++++++++++++++++++++++ drivers/net/ethernet/cavium/liquidio/octeon_droq.c | 4 ++++ .../net/ethernet/cavium/liquidio/octeon_mem_ops.c | 5 +++++ drivers/net/ethernet/cavium/liquidio/octeon_nic.c | 3 +++ .../net/ethernet/cavium/liquidio/request_manager.c | 14 +++++++++++++ .../ethernet/cavium/liquidio/response_manager.c | 3 +++ 14 files changed, 89 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig index 1c76c95b0b27..ca742cc146d7 100644 --- a/drivers/net/ethernet/cavium/Kconfig +++ b/drivers/net/ethernet/cavium/Kconfig @@ -62,6 +62,9 @@ config CAVIUM_PTP Precision Time Protocol or other purposes. Timestamps can be used in BGX, TNS, GTI, and NIC blocks. +config LIQUIDIO_CORE + tristate + config LIQUIDIO tristate "Cavium LiquidIO support" depends on 64BIT && PCI @@ -69,6 +72,7 @@ config LIQUIDIO depends on PTP_1588_CLOCK_OPTIONAL select FW_LOADER select LIBCRC32C + select LIQUIDIO_CORE select NET_DEVLINK help This driver supports Cavium LiquidIO Intelligent Server Adapters @@ -92,6 +96,7 @@ config LIQUIDIO_VF tristate "Cavium LiquidIO VF support" depends on 64BIT && PCI_MSI depends on PTP_1588_CLOCK_OPTIONAL + select LIQUIDIO_CORE help This driver supports Cavium LiquidIO Intelligent Server Adapter based on CN23XX chips. diff --git a/drivers/net/ethernet/cavium/liquidio/Makefile b/drivers/net/ethernet/cavium/liquidio/Makefile index bc9937502043..4ee80af88e79 100644 --- a/drivers/net/ethernet/cavium/liquidio/Makefile +++ b/drivers/net/ethernet/cavium/liquidio/Makefile @@ -3,7 +3,9 @@ # Cavium Liquidio ethernet device driver # -common-objs := lio_ethtool.o \ +obj-$(CONFIG_LIQUIDIO_CORE) += liquidio-core.o +liquidio-core-y := \ + lio_ethtool.o \ lio_core.o \ request_manager.o \ response_manager.o \ @@ -18,7 +20,7 @@ common-objs := lio_ethtool.o \ octeon_nic.o obj-$(CONFIG_LIQUIDIO) += liquidio.o -liquidio-y := lio_main.o octeon_console.o lio_vf_rep.o $(common-objs) +liquidio-y := lio_main.o octeon_console.o lio_vf_rep.o obj-$(CONFIG_LIQUIDIO_VF) += liquidio_vf.o -liquidio_vf-y := lio_vf_main.o $(common-objs) +liquidio_vf-y := lio_vf_main.o diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c index 285d3825cad3..068ed52b66c9 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -1375,6 +1375,7 @@ int setup_cn23xx_octeon_pf_device(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(setup_cn23xx_octeon_pf_device); int validate_cn23xx_pf_config_info(struct octeon_device *oct, struct octeon_config *conf23xx) @@ -1433,6 +1434,7 @@ int cn23xx_fw_loaded(struct octeon_device *oct) val = octeon_read_csr64(oct, CN23XX_SLI_SCRATCH2); return (val >> SCR2_BIT_FW_LOADED) & 1ULL; } +EXPORT_SYMBOL_GPL(cn23xx_fw_loaded); void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx, u8 *mac) @@ -1454,6 +1456,7 @@ void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx, octeon_mbox_write(oct, &mbox_cmd); } } +EXPORT_SYMBOL_GPL(cn23xx_tell_vf_its_macaddr_changed); static void cn23xx_get_vf_stats_callback(struct octeon_device *oct, @@ -1508,3 +1511,4 @@ int cn23xx_get_vf_stats(struct octeon_device *oct, int vfidx, return 0; } +EXPORT_SYMBOL_GPL(cn23xx_get_vf_stats); diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c index b3bd2767d3dd..dd5d80fee24f 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c @@ -384,6 +384,7 @@ void cn23xx_vf_ask_pf_to_do_flr(struct octeon_device *oct) octeon_mbox_write(oct, &mbox_cmd); } +EXPORT_SYMBOL_GPL(cn23xx_vf_ask_pf_to_do_flr); static void octeon_pfvf_hs_callback(struct octeon_device *oct, struct octeon_mbox_cmd *cmd, @@ -466,6 +467,7 @@ int cn23xx_octeon_pfvf_handshake(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(cn23xx_octeon_pfvf_handshake); static void cn23xx_handle_vf_mbox_intr(struct octeon_ioq_vector *ioq_vector) { @@ -678,3 +680,4 @@ int cn23xx_setup_octeon_vf_device(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(cn23xx_setup_octeon_vf_device); diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c index 39643be8c30a..93fccfec288d 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c @@ -697,6 +697,7 @@ int lio_setup_cn66xx_octeon_device(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(lio_setup_cn66xx_octeon_device); int lio_validate_cn6xxx_config_info(struct octeon_device *oct, struct octeon_config *conf6xxx) diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c index 30254e4cf70f..b5103def3761 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c @@ -181,3 +181,4 @@ int lio_setup_cn68xx_octeon_device(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(lio_setup_cn68xx_octeon_device); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c index 882b2be06ea0..9cc6303c82ff 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_core.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -26,6 +26,9 @@ #include "octeon_main.h" #include "octeon_network.h" +MODULE_AUTHOR("Cavium Networks, "); +MODULE_LICENSE("GPL"); + /* OOM task polling interval */ #define LIO_OOM_POLL_INTERVAL_MS 250 @@ -71,6 +74,7 @@ void lio_delete_glists(struct lio *lio) kfree(lio->glist); lio->glist = NULL; } +EXPORT_SYMBOL_GPL(lio_delete_glists); /** * lio_setup_glists - Setup gather lists @@ -154,6 +158,7 @@ int lio_setup_glists(struct octeon_device *oct, struct lio *lio, int num_iqs) return 0; } +EXPORT_SYMBOL_GPL(lio_setup_glists); int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1) { @@ -180,6 +185,7 @@ int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1) } return ret; } +EXPORT_SYMBOL_GPL(liquidio_set_feature); void octeon_report_tx_completion_to_bql(void *txq, unsigned int pkts_compl, unsigned int bytes_compl) @@ -395,6 +401,7 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr) nctrl->ncmd.s.cmd); } } +EXPORT_SYMBOL_GPL(liquidio_link_ctrl_cmd_completion); void octeon_pf_changed_vf_macaddr(struct octeon_device *oct, u8 *mac) { @@ -478,6 +485,7 @@ int setup_rx_oom_poll_fn(struct net_device *netdev) return 0; } +EXPORT_SYMBOL_GPL(setup_rx_oom_poll_fn); void cleanup_rx_oom_poll_fn(struct net_device *netdev) { @@ -495,6 +503,7 @@ void cleanup_rx_oom_poll_fn(struct net_device *netdev) } } } +EXPORT_SYMBOL_GPL(cleanup_rx_oom_poll_fn); /* Runs in interrupt context. */ static void lio_update_txq_status(struct octeon_device *oct, int iq_num) @@ -899,6 +908,7 @@ int liquidio_setup_io_queues(struct octeon_device *octeon_dev, int ifidx, return 0; } +EXPORT_SYMBOL_GPL(liquidio_setup_io_queues); static int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret) @@ -1194,6 +1204,7 @@ int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs) } return 0; } +EXPORT_SYMBOL_GPL(octeon_setup_interrupt); /** * liquidio_change_mtu - Net device change_mtu @@ -1256,6 +1267,7 @@ int liquidio_change_mtu(struct net_device *netdev, int new_mtu) WRITE_ONCE(sc->caller_is_done, true); return 0; } +EXPORT_SYMBOL_GPL(liquidio_change_mtu); int lio_wait_for_clean_oq(struct octeon_device *oct) { @@ -1279,6 +1291,7 @@ int lio_wait_for_clean_oq(struct octeon_device *oct) return pending_pkts; } +EXPORT_SYMBOL_GPL(lio_wait_for_clean_oq); static void octnet_nic_stats_callback(struct octeon_device *oct_dev, @@ -1509,6 +1522,7 @@ lio_fetch_stats_exit: return; } +EXPORT_SYMBOL_GPL(lio_fetch_stats); int liquidio_set_speed(struct lio *lio, int speed) { @@ -1659,6 +1673,7 @@ int liquidio_get_speed(struct lio *lio) return retval; } +EXPORT_SYMBOL_GPL(liquidio_get_speed); int liquidio_set_fec(struct lio *lio, int on_off) { @@ -1812,3 +1827,4 @@ int liquidio_get_fec(struct lio *lio) return retval; } +EXPORT_SYMBOL_GPL(liquidio_get_fec); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index 2c10ae3f7fc1..9d56181a301f 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -3180,3 +3180,4 @@ void liquidio_set_ethtool_ops(struct net_device *netdev) else netdev->ethtool_ops = &lio_ethtool_ops; } +EXPORT_SYMBOL_GPL(liquidio_set_ethtool_ops); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index e159194d0aef..364f4f912dc2 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -564,6 +564,7 @@ void octeon_init_device_list(int conf_type) for (i = 0; i < MAX_OCTEON_DEVICES; i++) oct_set_config_info(i, conf_type); } +EXPORT_SYMBOL_GPL(octeon_init_device_list); static void *__retrieve_octeon_config_info(struct octeon_device *oct, u16 card_type) @@ -633,6 +634,7 @@ char *lio_get_state_string(atomic_t *state_ptr) return oct_dev_state_str[OCT_DEV_STATE_INVALID]; return oct_dev_state_str[istate]; } +EXPORT_SYMBOL_GPL(lio_get_state_string); static char *get_oct_app_string(u32 app_mode) { @@ -661,6 +663,7 @@ void octeon_free_device_mem(struct octeon_device *oct) octeon_device[i] = NULL; octeon_device_count--; } +EXPORT_SYMBOL_GPL(octeon_free_device_mem); static struct octeon_device *octeon_allocate_device_mem(u32 pci_id, u32 priv_size) @@ -747,6 +750,7 @@ struct octeon_device *octeon_allocate_device(u32 pci_id, return oct; } +EXPORT_SYMBOL_GPL(octeon_allocate_device); /** Register a device's bus location at initialization time. * @param octeon_dev - pointer to the octeon device structure. @@ -804,6 +808,7 @@ int octeon_register_device(struct octeon_device *oct, return refcount; } +EXPORT_SYMBOL_GPL(octeon_register_device); /** Deregister a device at de-initialization time. * @param octeon_dev - pointer to the octeon device structure. @@ -821,6 +826,7 @@ int octeon_deregister_device(struct octeon_device *oct) return refcount; } +EXPORT_SYMBOL_GPL(octeon_deregister_device); int octeon_allocate_ioq_vector(struct octeon_device *oct, u32 num_ioqs) @@ -853,12 +859,14 @@ octeon_allocate_ioq_vector(struct octeon_device *oct, u32 num_ioqs) return 0; } +EXPORT_SYMBOL_GPL(octeon_allocate_ioq_vector); void octeon_free_ioq_vector(struct octeon_device *oct) { vfree(oct->ioq_vector); } +EXPORT_SYMBOL_GPL(octeon_free_ioq_vector); /* this function is only for setting up the first queue */ int octeon_setup_instr_queues(struct octeon_device *oct) @@ -904,6 +912,7 @@ int octeon_setup_instr_queues(struct octeon_device *oct) oct->num_iqs++; return 0; } +EXPORT_SYMBOL_GPL(octeon_setup_instr_queues); int octeon_setup_output_queues(struct octeon_device *oct) { @@ -940,6 +949,7 @@ int octeon_setup_output_queues(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(octeon_setup_output_queues); int octeon_set_io_queues_off(struct octeon_device *oct) { @@ -989,6 +999,7 @@ int octeon_set_io_queues_off(struct octeon_device *oct) } return 0; } +EXPORT_SYMBOL_GPL(octeon_set_io_queues_off); void octeon_set_droq_pkt_op(struct octeon_device *oct, u32 q_no, @@ -1027,6 +1038,7 @@ int octeon_init_dispatch_list(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(octeon_init_dispatch_list); void octeon_delete_dispatch_list(struct octeon_device *oct) { @@ -1058,6 +1070,7 @@ void octeon_delete_dispatch_list(struct octeon_device *oct) kfree(temp); } } +EXPORT_SYMBOL_GPL(octeon_delete_dispatch_list); octeon_dispatch_fn_t octeon_get_dispatch(struct octeon_device *octeon_dev, u16 opcode, @@ -1180,6 +1193,7 @@ octeon_register_dispatch_fn(struct octeon_device *oct, return 0; } +EXPORT_SYMBOL_GPL(octeon_register_dispatch_fn); int octeon_core_drv_init(struct octeon_recv_info *recv_info, void *buf) { @@ -1262,6 +1276,7 @@ core_drv_init_err: octeon_free_recv_info(recv_info); return 0; } +EXPORT_SYMBOL_GPL(octeon_core_drv_init); int octeon_get_tx_qsize(struct octeon_device *oct, u32 q_no) @@ -1272,6 +1287,7 @@ int octeon_get_tx_qsize(struct octeon_device *oct, u32 q_no) return -1; } +EXPORT_SYMBOL_GPL(octeon_get_tx_qsize); int octeon_get_rx_qsize(struct octeon_device *oct, u32 q_no) { @@ -1280,6 +1296,7 @@ int octeon_get_rx_qsize(struct octeon_device *oct, u32 q_no) return oct->droq[q_no]->max_count; return -1; } +EXPORT_SYMBOL_GPL(octeon_get_rx_qsize); /* Retruns the host firmware handshake OCTEON specific configuration */ struct octeon_config *octeon_get_conf(struct octeon_device *oct) @@ -1302,6 +1319,7 @@ struct octeon_config *octeon_get_conf(struct octeon_device *oct) } return default_oct_conf; } +EXPORT_SYMBOL_GPL(octeon_get_conf); /* scratch register address is same in all the OCT-II and CN70XX models */ #define CNXX_SLI_SCRATCH1 0x3C0 @@ -1318,6 +1336,7 @@ struct octeon_device *lio_get_device(u32 octeon_id) else return octeon_device[octeon_id]; } +EXPORT_SYMBOL_GPL(lio_get_device); u64 lio_pci_readq(struct octeon_device *oct, u64 addr) { @@ -1349,6 +1368,7 @@ u64 lio_pci_readq(struct octeon_device *oct, u64 addr) return val64; } +EXPORT_SYMBOL_GPL(lio_pci_readq); void lio_pci_writeq(struct octeon_device *oct, u64 val, @@ -1369,6 +1389,7 @@ void lio_pci_writeq(struct octeon_device *oct, spin_unlock_irqrestore(&oct->pci_win_lock, flags); } +EXPORT_SYMBOL_GPL(lio_pci_writeq); int octeon_mem_access_ok(struct octeon_device *oct) { @@ -1388,6 +1409,7 @@ int octeon_mem_access_ok(struct octeon_device *oct) return access_okay ? 0 : 1; } +EXPORT_SYMBOL_GPL(octeon_mem_access_ok); int octeon_wait_for_ddr_init(struct octeon_device *oct, u32 *timeout) { @@ -1408,6 +1430,7 @@ int octeon_wait_for_ddr_init(struct octeon_device *oct, u32 *timeout) return ret; } +EXPORT_SYMBOL_GPL(octeon_wait_for_ddr_init); /* Get the octeon id assigned to the octeon device passed as argument. * This function is exported to other modules. @@ -1462,3 +1485,4 @@ void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq) } } } +EXPORT_SYMBOL_GPL(lio_enable_irq); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c index d4080bddcb6b..0d6ee30affb9 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c @@ -107,6 +107,7 @@ u32 octeon_droq_check_hw_for_pkts(struct octeon_droq *droq) return last_count; } +EXPORT_SYMBOL_GPL(octeon_droq_check_hw_for_pkts); static void octeon_droq_compute_max_packet_bufs(struct octeon_droq *droq) { @@ -216,6 +217,7 @@ int octeon_delete_droq(struct octeon_device *oct, u32 q_no) return 0; } +EXPORT_SYMBOL_GPL(octeon_delete_droq); int octeon_init_droq(struct octeon_device *oct, u32 q_no, @@ -773,6 +775,7 @@ octeon_droq_process_packets(struct octeon_device *oct, return 0; } +EXPORT_SYMBOL_GPL(octeon_droq_process_packets); /* * Utility function to poll for packets. check_hw_for_packets must be @@ -921,6 +924,7 @@ int octeon_unregister_droq_ops(struct octeon_device *oct, u32 q_no) return 0; } +EXPORT_SYMBOL_GPL(octeon_unregister_droq_ops); int octeon_create_droq(struct octeon_device *oct, u32 q_no, u32 num_descs, diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c index 7ccab36143c1..d70132437af3 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c @@ -164,6 +164,7 @@ octeon_pci_read_core_mem(struct octeon_device *oct, { __octeon_pci_rw_core_mem(oct, coreaddr, buf, len, 1); } +EXPORT_SYMBOL_GPL(octeon_pci_read_core_mem); void octeon_pci_write_core_mem(struct octeon_device *oct, @@ -173,6 +174,7 @@ octeon_pci_write_core_mem(struct octeon_device *oct, { __octeon_pci_rw_core_mem(oct, coreaddr, (u8 *)buf, len, 0); } +EXPORT_SYMBOL_GPL(octeon_pci_write_core_mem); u64 octeon_read_device_mem64(struct octeon_device *oct, u64 coreaddr) { @@ -182,6 +184,7 @@ u64 octeon_read_device_mem64(struct octeon_device *oct, u64 coreaddr) return be64_to_cpu(ret); } +EXPORT_SYMBOL_GPL(octeon_read_device_mem64); u32 octeon_read_device_mem32(struct octeon_device *oct, u64 coreaddr) { @@ -191,6 +194,7 @@ u32 octeon_read_device_mem32(struct octeon_device *oct, u64 coreaddr) return be32_to_cpu(ret); } +EXPORT_SYMBOL_GPL(octeon_read_device_mem32); void octeon_write_device_mem32(struct octeon_device *oct, u64 coreaddr, u32 val) @@ -199,3 +203,4 @@ void octeon_write_device_mem32(struct octeon_device *oct, u64 coreaddr, __octeon_pci_rw_core_mem(oct, coreaddr, (u8 *)&t, 4, 0); } +EXPORT_SYMBOL_GPL(octeon_write_device_mem32); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c index 1a706f81bbb0..dee56ea740e7 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c @@ -79,6 +79,7 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, return sc; } +EXPORT_SYMBOL_GPL(octeon_alloc_soft_command_resp); int octnet_send_nic_data_pkt(struct octeon_device *oct, struct octnic_data_pkt *ndata, @@ -90,6 +91,7 @@ int octnet_send_nic_data_pkt(struct octeon_device *oct, ndata->buf, ndata->datasize, ndata->reqtype); } +EXPORT_SYMBOL_GPL(octnet_send_nic_data_pkt); static inline struct octeon_soft_command *octnic_alloc_ctrl_pkt_sc(struct octeon_device *oct, @@ -196,3 +198,4 @@ octnet_send_nic_ctrl_pkt(struct octeon_device *oct, return retval; } +EXPORT_SYMBOL_GPL(octnet_send_nic_ctrl_pkt); diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index 32f854c0cd79..de8a6ce86ad7 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -185,6 +185,7 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no) } return 1; } +EXPORT_SYMBOL_GPL(octeon_delete_instr_queue); /* Return 0 on success, 1 on failure */ int octeon_setup_iq(struct octeon_device *oct, @@ -258,6 +259,7 @@ int lio_wait_for_instr_fetch(struct octeon_device *oct) return instr_cnt; } +EXPORT_SYMBOL_GPL(lio_wait_for_instr_fetch); static inline void ring_doorbell(struct octeon_device *oct, struct octeon_instr_queue *iq) @@ -282,6 +284,7 @@ octeon_ring_doorbell_locked(struct octeon_device *oct, u32 iq_no) ring_doorbell(oct, iq); spin_unlock(&iq->post_lock); } +EXPORT_SYMBOL_GPL(octeon_ring_doorbell_locked); static inline void __copy_cmd_into_iq(struct octeon_instr_queue *iq, u8 *cmd) @@ -345,6 +348,7 @@ octeon_register_reqtype_free_fn(struct octeon_device *oct, int reqtype, return 0; } +EXPORT_SYMBOL_GPL(octeon_register_reqtype_free_fn); static inline void __add_to_request_list(struct octeon_instr_queue *iq, @@ -430,6 +434,7 @@ lio_process_iq_request_list(struct octeon_device *oct, return inst_count; } +EXPORT_SYMBOL_GPL(lio_process_iq_request_list); /* Can only be called from process context */ int @@ -566,6 +571,7 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no, return st.status; } +EXPORT_SYMBOL_GPL(octeon_send_command); void octeon_prepare_soft_command(struct octeon_device *oct, @@ -673,6 +679,7 @@ octeon_prepare_soft_command(struct octeon_device *oct, } } } +EXPORT_SYMBOL_GPL(octeon_prepare_soft_command); int octeon_send_soft_command(struct octeon_device *oct, struct octeon_soft_command *sc) @@ -726,6 +733,7 @@ int octeon_send_soft_command(struct octeon_device *oct, return (octeon_send_command(oct, sc->iq_no, 1, &sc->cmd, sc, len, REQTYPE_SOFT_COMMAND)); } +EXPORT_SYMBOL_GPL(octeon_send_soft_command); int octeon_setup_sc_buffer_pool(struct octeon_device *oct) { @@ -755,6 +763,7 @@ int octeon_setup_sc_buffer_pool(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(octeon_setup_sc_buffer_pool); int octeon_free_sc_done_list(struct octeon_device *oct) { @@ -794,6 +803,7 @@ int octeon_free_sc_done_list(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(octeon_free_sc_done_list); int octeon_free_sc_zombie_list(struct octeon_device *oct) { @@ -818,6 +828,7 @@ int octeon_free_sc_zombie_list(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(octeon_free_sc_zombie_list); int octeon_free_sc_buffer_pool(struct octeon_device *oct) { @@ -842,6 +853,7 @@ int octeon_free_sc_buffer_pool(struct octeon_device *oct) return 0; } +EXPORT_SYMBOL_GPL(octeon_free_sc_buffer_pool); struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct, u32 datasize, @@ -913,6 +925,7 @@ struct octeon_soft_command *octeon_alloc_soft_command(struct octeon_device *oct, return sc; } +EXPORT_SYMBOL_GPL(octeon_alloc_soft_command); void octeon_free_soft_command(struct octeon_device *oct, struct octeon_soft_command *sc) @@ -925,3 +938,4 @@ void octeon_free_soft_command(struct octeon_device *oct, spin_unlock_bh(&oct->sc_buf_pool.lock); } +EXPORT_SYMBOL_GPL(octeon_free_soft_command); diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c index ac7747ccf56a..861050966e18 100644 --- a/drivers/net/ethernet/cavium/liquidio/response_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c @@ -52,12 +52,14 @@ int octeon_setup_response_list(struct octeon_device *oct) return ret; } +EXPORT_SYMBOL_GPL(octeon_setup_response_list); void octeon_delete_response_list(struct octeon_device *oct) { cancel_delayed_work_sync(&oct->dma_comp_wq.wk.work); destroy_workqueue(oct->dma_comp_wq.wq); } +EXPORT_SYMBOL_GPL(octeon_delete_response_list); int lio_process_ordered_list(struct octeon_device *octeon_dev, u32 force_quit) @@ -219,6 +221,7 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev, return 0; } +EXPORT_SYMBOL_GPL(lio_process_ordered_list); static void oct_poll_req_completion(struct work_struct *work) { -- cgit v1.2.3 From e7214663e023be5e518e8d0d8f2dca6848731652 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Tue, 6 Jun 2023 15:49:45 +0200 Subject: net: txgbe: Avoid passing uninitialised parameter to pci_wake_from_d3() txgbe_shutdown() relies on txgbe_dev_shutdown() to initialise wake by passing it by reference. However, txgbe_dev_shutdown() doesn't use this parameter at all. wake is then passed uninitialised by txgbe_dev_shutdown() to pci_wake_from_d3(). Resolve this problem by: * Removing the unused parameter from txgbe_dev_shutdown() * Removing the uninitialised variable wake from txgbe_dev_shutdown() * Passing false to pci_wake_from_d3() - this assumes that although uninitialised wake was in practice false (0). I'm not sure that this counts as a bug, as I'm not sure that it manifests in any unwanted behaviour. But in any case, the issue was introduced by: 3ce7547e5b71 ("net: txgbe: Add build support for txgbe") Flagged by Smatch as: .../txgbe_main.c:486 txgbe_shutdown() error: uninitialized symbol 'wake'. No functional change intended. Compile tested only. Signed-off-by: Simon Horman Reviewed-by: Jiawen Wu Reviewed-by: Maciej Fijalkowski Signed-off-by: David S. Miller --- drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 0f0d9fa1cde1..cfe47f3d2503 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -457,7 +457,7 @@ static int txgbe_close(struct net_device *netdev) return 0; } -static void txgbe_dev_shutdown(struct pci_dev *pdev, bool *enable_wake) +static void txgbe_dev_shutdown(struct pci_dev *pdev) { struct wx *wx = pci_get_drvdata(pdev); struct net_device *netdev; @@ -477,12 +477,10 @@ static void txgbe_dev_shutdown(struct pci_dev *pdev, bool *enable_wake) static void txgbe_shutdown(struct pci_dev *pdev) { - bool wake; - - txgbe_dev_shutdown(pdev, &wake); + txgbe_dev_shutdown(pdev); if (system_state == SYSTEM_POWER_OFF) { - pci_wake_from_d3(pdev, wake); + pci_wake_from_d3(pdev, false); pci_set_power_state(pdev, PCI_D3hot); } } -- cgit v1.2.3 From 2d830f7a41343302ab19e73d4f44f5ccb6940a25 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 7 Jun 2023 15:59:37 +0200 Subject: net: altera-tse: Initialize local structs before using it The regmap_config and mdio_regmap_config objects needs to be zeroed before using them. This will cause spurious errors at probe time as config->pad_bits is containing random uninitialized data. Fixes: db48abbaa18e ("net: ethernet: altera-tse: Convert to mdio-regmap and use PCS Lynx") Signed-off-by: Maxime Chevallier Reviewed-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/altera/altera_tse_main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index d866c0f1b503..215f9fb89c5b 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -1255,6 +1255,8 @@ static int altera_tse_probe(struct platform_device *pdev) if (ret) goto err_free_netdev; + memset(&pcs_regmap_cfg, 0, sizeof(pcs_regmap_cfg)); + memset(&mrc, 0, sizeof(mrc)); /* SGMII PCS address space. The location can vary depending on how the * IP is integrated. We can have a resource dedicated to it at a specific * address space, but if it's not the case, we fallback to the mdiophy0 -- cgit v1.2.3 From fae555f5a56f1d10cc5dc6ec3ad4f07243f6ce3c Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 7 Jun 2023 15:59:38 +0200 Subject: net: altera_tse: Use the correct Kconfig option for the PCS_LYNX dependency Use the correct Kconfig dependency for altera_tse as PCS_ALTERA_TSE was replaced by PCS_LYNX. Fixes: db48abbaa18e ("net: ethernet: altera-tse: Convert to mdio-regmap and use PCS Lynx") Signed-off-by: Maxime Chevallier Reviewed-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/altera/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/altera/Kconfig b/drivers/net/ethernet/altera/Kconfig index 93533ba03429..17985319088c 100644 --- a/drivers/net/ethernet/altera/Kconfig +++ b/drivers/net/ethernet/altera/Kconfig @@ -4,7 +4,7 @@ config ALTERA_TSE depends on HAS_DMA select PHYLIB select PHYLINK - select PCS_ALTERA_TSE + select PCS_LYNX select MDIO_REGMAP select REGMAP_MMIO help -- cgit v1.2.3 From a8dd7404c21447b46e792a483f4d73af66ccaf8d Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 7 Jun 2023 15:59:39 +0200 Subject: net: stmmac: make the pcs_lynx cleanup sequence specific to dwmac_socfpga So far, only the dwmac_socfpga variant of stmmac uses PCS Lynx. Use a dedicated cleanup sequence for dwmac_socfpga instead of using the generic stmmac one. Fixes: 5d1f3fe7d2d5 ("net: stmmac: dwmac-sogfpga: use the lynx pcs driver") Suggested-by: Russell King (Oracle) Signed-off-by: Maxime Chevallier Reviewed-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/common.h | 1 - drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 14 +++++++++++++- drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 3 --- 3 files changed, 13 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 52c5ec553276..16e67c18b6f7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -16,7 +16,6 @@ #include #include #include -#include #include #if IS_ENABLED(CONFIG_VLAN_8021Q) #define STMMAC_VLAN_TAG_USED diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index e399fccbafe5..1fb808be843b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -494,6 +495,17 @@ err_remove_config_dt: return ret; } +static void socfpga_dwmac_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct stmmac_priv *priv = netdev_priv(ndev); + struct phylink_pcs *pcs = priv->hw->lynx_pcs; + + stmmac_pltfr_remove(pdev); + + lynx_pcs_destroy(pcs); +} + #ifdef CONFIG_PM_SLEEP static int socfpga_dwmac_resume(struct device *dev) { @@ -565,7 +577,7 @@ MODULE_DEVICE_TABLE(of, socfpga_dwmac_match); static struct platform_driver socfpga_dwmac_driver = { .probe = socfpga_dwmac_probe, - .remove_new = stmmac_pltfr_remove, + .remove_new = socfpga_dwmac_remove, .driver = { .name = "socfpga-dwmac", .pm = &socfpga_dwmac_pm_ops, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index c784a6731f08..3db1cb0fd160 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -665,9 +665,6 @@ int stmmac_mdio_unregister(struct net_device *ndev) if (priv->hw->xpcs) xpcs_destroy(priv->hw->xpcs); - if (priv->hw->lynx_pcs) - lynx_pcs_destroy(priv->hw->lynx_pcs); - mdiobus_unregister(priv->mii); priv->mii->priv = NULL; mdiobus_free(priv->mii); -- cgit v1.2.3 From fa19a5d9dcffddae2cb5774df98b09ca3f2ea783 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 7 Jun 2023 15:59:40 +0200 Subject: net: altera_tse: explicitly disable autoscan on the regmap-mdio bus Set the .autoscan flag to false on the regmap-mdio bus, to avoid using a random uninitialized value. We don't want autoscan in this case as the mdio device is a PCS and not a PHY. Fixes: db48abbaa18e ("net: ethernet: altera-tse: Convert to mdio-regmap and use PCS Lynx") Suggested-by: Russell King (Oracle) Signed-off-by: Maxime Chevallier Reviewed-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/altera/altera_tse_main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 215f9fb89c5b..2e15800e5310 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -1288,6 +1288,7 @@ static int altera_tse_probe(struct platform_device *pdev) mrc.regmap = pcs_regmap; mrc.parent = &pdev->dev; mrc.valid_addr = 0x0; + mrc.autoscan = false; /* Rx IRQ */ priv->rx_irq = platform_get_irq_byname(pdev, "rx_irq"); -- cgit v1.2.3 From 06b9dede1e7d8b7a199f8014aca5a7d7137b41b0 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 7 Jun 2023 15:59:41 +0200 Subject: net: dwmac_socfpga: initialize local data for mdio regmap configuration Explicitly zero-ize the local mdio_regmap_config data, and explicitly set the .autoscan parameter, as we only have a PCS on this bus. Fixes: 5d1f3fe7d2d5 ("net: stmmac: dwmac-sogfpga: use the lynx pcs driver") Suggested-by: Russell King (Oracle) Suggested-by: Maciej Fijalkowski Signed-off-by: Maxime Chevallier Reviewed-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 1fb808be843b..6267bcb60206 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -389,7 +389,6 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) struct net_device *ndev; struct stmmac_priv *stpriv; const struct socfpga_dwmac_ops *ops; - struct regmap_config pcs_regmap_cfg; ops = device_get_match_data(&pdev->dev); if (!ops) { @@ -447,19 +446,22 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) if (ret) goto err_dvr_remove; - memset(&pcs_regmap_cfg, 0, sizeof(pcs_regmap_cfg)); - pcs_regmap_cfg.reg_bits = 16; - pcs_regmap_cfg.val_bits = 16; - pcs_regmap_cfg.reg_shift = REGMAP_UPSHIFT(1); - /* Create a regmap for the PCS so that it can be used by the PCS driver, * if we have such a PCS */ if (dwmac->tse_pcs_base) { + struct regmap_config pcs_regmap_cfg; struct mdio_regmap_config mrc; struct regmap *pcs_regmap; struct mii_bus *pcs_bus; + memset(&pcs_regmap_cfg, 0, sizeof(pcs_regmap_cfg)); + memset(&mrc, 0, sizeof(mrc)); + + pcs_regmap_cfg.reg_bits = 16; + pcs_regmap_cfg.val_bits = 16; + pcs_regmap_cfg.reg_shift = REGMAP_UPSHIFT(1); + pcs_regmap = devm_regmap_init_mmio(&pdev->dev, dwmac->tse_pcs_base, &pcs_regmap_cfg); if (IS_ERR(pcs_regmap)) { @@ -470,6 +472,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) mrc.regmap = pcs_regmap; mrc.parent = &pdev->dev; mrc.valid_addr = 0x0; + mrc.autoscan = false; snprintf(mrc.name, MII_BUS_ID_SIZE, "%s-pcs-mii", ndev->name); pcs_bus = devm_mdio_regmap_register(&pdev->dev, &mrc); -- cgit v1.2.3 From 222dd185833e464faad2d175c14bca584b6b6dad Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 6 Jun 2023 00:12:06 -0700 Subject: {net/RDMA}/mlx5: introduce lag_for_each_peer Introduce a generic APIs to iterate over all the devices which are part of the LAG. This API replace mlx5_lag_get_peer_mdev() which retrieve only a single peer device from the lag. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/ib_rep.c | 98 ++++++++++++++--------- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 24 +++--- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 21 +++-- include/linux/mlx5/driver.h | 8 +- 4 files changed, 100 insertions(+), 51 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index a4db22fe1883..c7a4ee896121 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -30,45 +30,65 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev); +static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports) +{ + struct mlx5_core_dev *peer_dev; + int i; + + mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { + u32 peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev); + + if (mlx5_lag_is_mpesw(peer_dev)) + *num_ports += peer_num_ports; + else + /* Only 1 ib port is the representor for all uplinks */ + *num_ports += peer_num_ports - 1; + } +} + static int mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { u32 num_ports = mlx5_eswitch_get_total_vports(dev); + struct mlx5_core_dev *lag_master = dev; const struct mlx5_ib_profile *profile; struct mlx5_core_dev *peer_dev; struct mlx5_ib_dev *ibdev; - int second_uplink = false; - u32 peer_num_ports; + int new_uplink = false; int vport_index; int ret; + int i; vport_index = rep->vport_index; if (mlx5_lag_is_shared_fdb(dev)) { - peer_dev = mlx5_lag_get_peer_mdev(dev); - peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev); if (mlx5_lag_is_master(dev)) { - if (mlx5_lag_is_mpesw(dev)) - num_ports += peer_num_ports; - else - num_ports += peer_num_ports - 1; - + mlx5_ib_num_ports_update(dev, &num_ports); } else { if (rep->vport == MLX5_VPORT_UPLINK) { if (!mlx5_lag_is_mpesw(dev)) return 0; - second_uplink = true; + new_uplink = true; } + mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { + u32 peer_n_ports = mlx5_eswitch_get_total_vports(peer_dev); + + if (mlx5_lag_is_master(peer_dev)) + lag_master = peer_dev; + else if (!mlx5_lag_is_mpesw(dev)) + /* Only 1 ib port is the representor for all uplinks */ + peer_n_ports--; - vport_index += peer_num_ports; - dev = peer_dev; + if (mlx5_get_dev_index(peer_dev) < mlx5_get_dev_index(dev)) + vport_index += peer_n_ports; + } } } - if (rep->vport == MLX5_VPORT_UPLINK && !second_uplink) + if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink) profile = &raw_eth_profile; else - return mlx5_ib_set_vport_rep(dev, rep, vport_index); + return mlx5_ib_set_vport_rep(lag_master, rep, vport_index); ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!ibdev) @@ -85,8 +105,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) vport_index = rep->vport_index; ibdev->port[vport_index].rep = rep; ibdev->port[vport_index].roce.netdev = - mlx5_ib_get_rep_netdev(dev->priv.eswitch, rep->vport); - ibdev->mdev = dev; + mlx5_ib_get_rep_netdev(lag_master->priv.eswitch, rep->vport); + ibdev->mdev = lag_master; ibdev->num_ports = num_ports; ret = __mlx5_ib_add(ibdev, profile); @@ -94,8 +114,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) goto fail_add; rep->rep_data[REP_IB].priv = ibdev; - if (mlx5_lag_is_shared_fdb(dev)) - mlx5_ib_register_peer_vport_reps(dev); + if (mlx5_lag_is_shared_fdb(lag_master)) + mlx5_ib_register_peer_vport_reps(lag_master); return 0; @@ -118,23 +138,27 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep); int vport_index = rep->vport_index; struct mlx5_ib_port *port; + int i; if (WARN_ON(!mdev)) return; + if (!dev) + return; + if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev)) { - struct mlx5_core_dev *peer_mdev; - if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(mdev)) return; - peer_mdev = mlx5_lag_get_peer_mdev(mdev); - vport_index += mlx5_eswitch_get_total_vports(peer_mdev); + for (i = 0; i < dev->num_ports; i++) { + if (dev->port[i].rep == rep) + break; + } + if (WARN_ON(i == dev->num_ports)) + return; + vport_index = i; } - if (!dev) - return; - port = &dev->port[vport_index]; write_lock(&port->roce.netdev_lock); port->roce.netdev = NULL; @@ -143,16 +167,18 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) port->rep = NULL; if (rep->vport == MLX5_VPORT_UPLINK) { - struct mlx5_core_dev *peer_mdev; - struct mlx5_eswitch *esw; if (mlx5_lag_is_shared_fdb(mdev) && !mlx5_lag_is_master(mdev)) return; if (mlx5_lag_is_shared_fdb(mdev)) { - peer_mdev = mlx5_lag_get_peer_mdev(mdev); - esw = peer_mdev->priv.eswitch; - mlx5_eswitch_unregister_vport_reps(esw, REP_IB); + struct mlx5_core_dev *peer_mdev; + struct mlx5_eswitch *esw; + + mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) { + esw = peer_mdev->priv.eswitch; + mlx5_eswitch_unregister_vport_reps(esw, REP_IB); + } } __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); } @@ -166,14 +192,14 @@ static const struct mlx5_eswitch_rep_ops rep_ops = { static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev) { - struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev); + struct mlx5_core_dev *peer_mdev; struct mlx5_eswitch *esw; + int i; - if (!peer_mdev) - return; - - esw = peer_mdev->priv.eswitch; - mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB); + mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) { + esw = peer_mdev->priv.eswitch; + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB); + } } struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 11374c3744c5..8a10ed4d8cbb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -244,16 +244,22 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns, ft->type == FS_FT_FDB && mlx5_lag_is_shared_fdb(dev) && mlx5_lag_is_master(dev)) { - err = mlx5_cmd_set_slave_root_fdb(dev, - mlx5_lag_get_peer_mdev(dev), - !disconnect, (!disconnect) ? - ft->id : 0); - if (err && !disconnect) { - MLX5_SET(set_flow_table_root_in, in, op_mod, 0); - MLX5_SET(set_flow_table_root_in, in, table_id, - ns->root_ft->id); - mlx5_cmd_exec_in(dev, set_flow_table_root, in); + struct mlx5_core_dev *peer_dev; + int i; + + mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { + err = mlx5_cmd_set_slave_root_fdb(dev, peer_dev, !disconnect, + (!disconnect) ? ft->id : 0); + if (err && !disconnect) { + MLX5_SET(set_flow_table_root_in, in, op_mod, 0); + MLX5_SET(set_flow_table_root_in, in, table_id, + ns->root_ft->id); + mlx5_cmd_exec_in(dev, set_flow_table_root, in); + } + if (err) + break; } + } return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index c820f7d266de..c55e36e0571d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1519,26 +1519,37 @@ u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_lag_get_num_ports); -struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) +struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i) { struct mlx5_core_dev *peer_dev = NULL; struct mlx5_lag *ldev; unsigned long flags; + int idx; spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); if (!ldev) goto unlock; - peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ? - ldev->pf[MLX5_LAG_P2].dev : - ldev->pf[MLX5_LAG_P1].dev; + if (*i == ldev->ports) + goto unlock; + for (idx = *i; idx < ldev->ports; idx++) + if (ldev->pf[idx].dev != dev) + break; + + if (idx == ldev->ports) { + *i = idx; + goto unlock; + } + *i = idx + 1; + + peer_dev = ldev->pf[idx].dev; unlock: spin_unlock_irqrestore(&lag_lock, flags); return peer_dev; } -EXPORT_SYMBOL(mlx5_lag_get_peer_mdev); +EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev); int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, u64 *values, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 94d2be5848ae..9a744c48eec2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1174,7 +1174,13 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, u64 *values, int num_counters, size_t *offsets); -struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev); +struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i); + +#define mlx5_lag_for_each_peer_mdev(dev, peer, i) \ + for (i = 0, peer = mlx5_lag_get_next_peer_mdev(dev, &i); \ + peer; \ + peer = mlx5_lag_get_next_peer_mdev(dev, &i)) + u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev); struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); -- cgit v1.2.3 From 4c103aea4bedfb109e91bed2023178059947fc4c Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 6 Jun 2023 00:12:07 -0700 Subject: net/mlx5: LAG, check if all eswitches are paired for shared FDB Shared FDB LAG can only work if all eswitches are paired. Also, whenever two eswitches are paired, devcom is marked as ready. Therefore, in case of device with two eswitches, checking devcom was sufficient. However, this is not correct for device with more than two eswitches, which will be introduced in downstream patch. Hence, check all eswitches are paired explicitly. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 9 +++++++++ drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 4 +++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index c42c16d9ccbc..d3608f198e0a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -779,6 +779,13 @@ static inline int mlx5_eswitch_num_vfs(struct mlx5_eswitch *esw) return 0; } +static inline int mlx5_eswitch_get_npeers(struct mlx5_eswitch *esw) +{ + if (mlx5_esw_allowed(esw)) + return esw->num_peers; + return 0; +} + static inline struct mlx5_flow_table * mlx5_eswitch_get_slow_fdb(struct mlx5_eswitch *esw) { @@ -826,6 +833,8 @@ static inline void mlx5_eswitch_offloads_single_fdb_del_one(struct mlx5_eswitch *master_esw, struct mlx5_eswitch *slave_esw) {} +static inline int mlx5_eswitch_get_npeers(struct mlx5_eswitch *esw) { return 0; } + static inline int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index c55e36e0571d..dd8a19d85617 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -828,7 +828,9 @@ bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) MLX5_DEVCOM_ESW_OFFLOADS) && MLX5_CAP_GEN(dev1, lag_native_fdb_selection) && MLX5_CAP_ESW(dev1, root_ft_on_other_esw) && - MLX5_CAP_ESW(dev0, esw_shared_ingress_acl)) + MLX5_CAP_ESW(dev0, esw_shared_ingress_acl) && + mlx5_eswitch_get_npeers(dev0->priv.eswitch) == MLX5_CAP_GEN(dev0, num_lag_ports) - 1 && + mlx5_eswitch_get_npeers(dev1->priv.eswitch) == MLX5_CAP_GEN(dev1, num_lag_ports) - 1) return true; return false; -- cgit v1.2.3 From 86a12124dc0249d60bb5f01497b3a86e99efae6d Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 6 Jun 2023 00:12:08 -0700 Subject: net/mlx5: LAG, generalize handling of shared FDB Shared FDB handling is using the assumption that shared FDB can only be created from two devices. In order to support shared FDB of more than two devices, iterate over all LAG ports instead of hard coding only the first two LAG ports whenever handling shared FDB. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 66 +++++++++++++---------- 1 file changed, 38 insertions(+), 28 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index dd8a19d85617..00773aab9d20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -512,8 +512,11 @@ static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev, return; if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) && - tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) + tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) { + if (ldev->ports > 2) + ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); + } } static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, @@ -782,7 +785,6 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) { bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; bool roce_lag; int err; int i; @@ -807,30 +809,35 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) if (shared_fdb || roce_lag) mlx5_lag_add_devices(ldev); - if (shared_fdb) { - if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) - mlx5_eswitch_reload_reps(dev0->priv.eswitch); - if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) - mlx5_eswitch_reload_reps(dev1->priv.eswitch); - } + if (shared_fdb) + for (i = 0; i < ldev->ports; i++) + if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) + mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); } bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; - - if (is_mdev_switchdev_mode(dev0) && - is_mdev_switchdev_mode(dev1) && - mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) && - mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) && - mlx5_devcom_comp_is_ready(dev0->priv.devcom, - MLX5_DEVCOM_ESW_OFFLOADS) && - MLX5_CAP_GEN(dev1, lag_native_fdb_selection) && - MLX5_CAP_ESW(dev1, root_ft_on_other_esw) && - MLX5_CAP_ESW(dev0, esw_shared_ingress_acl) && - mlx5_eswitch_get_npeers(dev0->priv.eswitch) == MLX5_CAP_GEN(dev0, num_lag_ports) - 1 && - mlx5_eswitch_get_npeers(dev1->priv.eswitch) == MLX5_CAP_GEN(dev1, num_lag_ports) - 1) + struct mlx5_core_dev *dev; + int i; + + for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; + if (is_mdev_switchdev_mode(dev) && + mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && + MLX5_CAP_GEN(dev, lag_native_fdb_selection) && + MLX5_CAP_ESW(dev, root_ft_on_other_esw) && + mlx5_eswitch_get_npeers(dev->priv.eswitch) == + MLX5_CAP_GEN(dev, num_lag_ports) - 1) + continue; + return false; + } + + dev = ldev->pf[MLX5_LAG_P1].dev; + if (is_mdev_switchdev_mode(dev) && + mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && + mlx5_devcom_comp_is_ready(dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS) && + MLX5_CAP_ESW(dev, esw_shared_ingress_acl) && + mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1) return true; return false; @@ -867,7 +874,6 @@ static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond) static void mlx5_do_bond(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; struct lag_tracker tracker = { }; bool do_bond, roce_lag; int err; @@ -908,20 +914,24 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) for (i = 1; i < ldev->ports; i++) mlx5_nic_vport_enable_roce(ldev->pf[i].dev); } else if (shared_fdb) { + int i; + dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); - err = mlx5_eswitch_reload_reps(dev0->priv.eswitch); - if (!err) - err = mlx5_eswitch_reload_reps(dev1->priv.eswitch); + for (i = 0; i < ldev->ports; i++) { + err = mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); + if (err) + break; + } if (err) { dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); mlx5_deactivate_lag(ldev); mlx5_lag_add_devices(ldev); - mlx5_eswitch_reload_reps(dev0->priv.eswitch); - mlx5_eswitch_reload_reps(dev1->priv.eswitch); + for (i = 0; i < ldev->ports; i++) + mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); mlx5_core_err(dev0, "Failed to enable lag\n"); return; } -- cgit v1.2.3 From c83e6ab96ef20bcdb5bfb12f42bd2e6734920bfd Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 6 Jun 2023 00:12:09 -0700 Subject: net/mlx5: LAG, change mlx5_shared_fdb_supported() to static mlx5_shared_fdb_supported() is used only in a single file. Change the function to be static. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 00773aab9d20..6ce71c42c755 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -815,7 +815,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) mlx5_eswitch_reload_reps(ldev->pf[i].dev->priv.eswitch); } -bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) +static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev; int i; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index bc1f1dd3e283..d7e7fa2348a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -111,7 +111,6 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, bool shared_fdb); int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, struct net_device *ndev); -bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev); char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags); void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, -- cgit v1.2.3 From d61bab396115dafc9ad572afa57d464e2e00b396 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 6 Jun 2023 00:12:10 -0700 Subject: net/mlx5: LAG, block multipath LAG in case ldev have more than 2 ports multipath LAG is not supported over more than two ports. Add a check in order to block multipath LAG over such configurations. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index d85a8dfc153d..976caa8e6922 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -14,6 +14,7 @@ static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) return ldev->mode == MLX5_LAG_MODE_MULTIPATH; } +#define MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS 2 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) { if (!mlx5_lag_is_ready(ldev)) @@ -22,6 +23,9 @@ static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev)) return false; + if (ldev->ports > MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS) + return false; + return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev, ldev->pf[MLX5_LAG_P2].dev); } -- cgit v1.2.3 From 7718c1c8ac325019a3d727e994faab9cf2438263 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 6 Jun 2023 00:12:11 -0700 Subject: net/mlx5: LAG, block multiport eswitch LAG in case ldev have more than 2 ports multiport eswitch LAG is not supported over more than two ports. Add a check in order to block multiport eswitch LAG over such devices. Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index 0c0ef600f643..0e869a76dfe4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -65,6 +65,7 @@ err_metadata: return err; } +#define MLX5_LAG_MPESW_OFFLOADS_SUPPORTED_PORTS 2 static int enable_mpesw(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; @@ -74,6 +75,9 @@ static int enable_mpesw(struct mlx5_lag *ldev) if (ldev->mode != MLX5_LAG_MODE_NONE) return -EINVAL; + if (ldev->ports > MLX5_LAG_MPESW_OFFLOADS_SUPPORTED_PORTS) + return -EOPNOTSUPP; + if (mlx5_eswitch_mode(dev0) != MLX5_ESWITCH_OFFLOADS || !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table) || !MLX5_CAP_GEN(dev0, create_lag_when_not_master_up) || -- cgit v1.2.3 From 6ec0b55e72a5c6fb056ad1eea12a3b5a74a402fe Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 6 Jun 2023 00:12:12 -0700 Subject: net/mlx5: Enable 4 ports VF LAG Now, after all preparation are done, enable 4 ports VF LAG Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c | 5 +++-- drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 6ce71c42c755..ffd7e17b8ebe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -711,7 +711,7 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev) return 0; } -#define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2 +#define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 4 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) { #ifdef CONFIG_MLX5_ESWITCH @@ -737,7 +737,7 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) return false; - if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS) + if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports > MLX5_LAG_OFFLOADS_SUPPORTED_PORTS) return false; #else for (i = 0; i < ldev->ports; i++) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c index 8472bbb3cd58..78c94b22bdc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -75,13 +75,14 @@ struct mlx5_devcom *mlx5_devcom_register_device(struct mlx5_core_dev *dev) if (!mlx5_core_is_pf(dev)) return NULL; - if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED) + if (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_DEVCOM_PORTS_SUPPORTED) return NULL; mlx5_dev_list_lock(); sguid0 = mlx5_query_nic_system_image_guid(dev); list_for_each_entry(iter, &devcom_list, list) { - struct mlx5_core_dev *tmp_dev = NULL; + /* There is at least one device in iter */ + struct mlx5_core_dev *tmp_dev; idx = -1; for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index bb1970ba8730..d953a01b8eaa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -6,7 +6,7 @@ #include -#define MLX5_DEVCOM_PORTS_SUPPORTED 2 +#define MLX5_DEVCOM_PORTS_SUPPORTED 4 enum mlx5_devcom_components { MLX5_DEVCOM_ESW_OFFLOADS, -- cgit v1.2.3 From a33682e4e78e249155abbe5e8ee880d5760b5e28 Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Tue, 6 Jun 2023 00:12:14 -0700 Subject: net/mlx5e: Expose catastrophic steering error counters Add generated_pkt_steering_fail and handled_pkt_steering_fail to devlink heatlth reporter. generated_pkt_steering_fail indicates the number of packets dropped due to illegal steering operation within the vport steering domain. handled_pkt_steering_fail indicates the number of packets dropped due to illegal steering operation, originated by the vport. Also, update devlink reporter functionality documentation with the newly exposed counters. Signed-off-by: Lama Kayal Reviewed-by: Rahul Rameshbabu Signed-off-by: Saeed Mahameed --- .../device_drivers/ethernet/mellanox/mlx5/devlink.rst | 7 +++++++ drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c | 10 ++++++++++ include/linux/mlx5/mlx5_ifc.h | 12 ++++++++++-- 3 files changed, 27 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst index 3354ca3608ee..a4edf908b707 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst @@ -290,6 +290,13 @@ Description of the vnic counters: - nic_receive_steering_discard number of packets that completed RX flow steering but were discarded due to a mismatch in flow table. +- generated_pkt_steering_fail + number of packets generated by the VNIC experiencing unexpected steering + failure (at any point in steering flow). +- handled_pkt_steering_fail + number of packets handled by the VNIC experiencing unexpected steering + failure (at any point in steering flow owned by the VNIC, including the FDB + for the eswitch owner). User commands examples: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c index 9114661cd967..b0128336ff01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c @@ -76,6 +76,16 @@ int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, if (err) return err; + err = devlink_fmsg_u64_pair_put(fmsg, "generated_pkt_steering_fail", + VNIC_ENV_GET64(&vnic, generated_pkt_steering_fail)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail", + VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail)); + if (err) + return err; + err = devlink_fmsg_obj_nest_end(fmsg); if (err) return err; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index b89778d0d326..af3a92ad2e6b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1755,7 +1755,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_328[0x2]; u8 relaxed_ordering_read[0x1]; u8 log_max_pd[0x5]; - u8 reserved_at_330[0x9]; + u8 reserved_at_330[0x7]; + u8 vnic_env_cnt_steering_fail[0x1]; + u8 reserved_at_338[0x1]; u8 q_counter_aggregation[0x1]; u8 q_counter_other_vport[0x1]; u8 log_max_xrcd[0x5]; @@ -3673,7 +3675,13 @@ struct mlx5_ifc_vnic_diagnostic_statistics_bits { u8 eth_wqe_too_small[0x20]; - u8 reserved_at_220[0xdc0]; + u8 reserved_at_220[0xc0]; + + u8 generated_pkt_steering_fail[0x40]; + + u8 handled_pkt_steering_fail[0x40]; + + u8 reserved_at_360[0xc80]; }; struct mlx5_ifc_traffic_counter_bits { -- cgit v1.2.3 From f4692ab13a1f7ad2c2098b838c2820c113ce8a07 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 6 Jun 2023 00:12:15 -0700 Subject: net/mlx5e: Remove RX page cache leftovers Remove unused definitions left after the removal of the RX page cache feature. Signed-off-by: Tariq Toukan Reviewed-by: Dragos Tatulea Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 8e999f238194..ceabe57c511a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -594,13 +594,6 @@ struct mlx5e_mpw_info { #define MLX5E_MAX_RX_FRAGS 4 -/* a single cache unit is capable to serve one napi call (for non-striding rq) - * or a MPWQE (for striding rq). - */ -#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_MAX_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ - MLX5_MPWRQ_MAX_PAGES_PER_WQE : NAPI_POLL_WEIGHT) -#define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) - struct mlx5e_rq; typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); typedef struct sk_buff * -- cgit v1.2.3 From de1f0a650824ed1905d5d48190f65f309cee5163 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Tue, 6 Jun 2023 00:12:16 -0700 Subject: net/mlx5e: TC, refactor access to hash key Currently, a temp object is filled and used as a key for rhashtable_lookup. Lookups will only works while key remains the first attribute in the relevant rhashtable node object. Fix this by passing a key, instead of a object containing the key. Signed-off-by: Oz Shlomo Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c index 07c1895a2b23..7aa926e542d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c @@ -25,8 +25,8 @@ struct mlx5e_tc_act_stats { static const struct rhashtable_params act_counters_ht_params = { .head_offset = offsetof(struct mlx5e_tc_act_stats, hash), - .key_offset = 0, - .key_len = offsetof(struct mlx5e_tc_act_stats, counter), + .key_offset = offsetof(struct mlx5e_tc_act_stats, tc_act_cookie), + .key_len = sizeof_field(struct mlx5e_tc_act_stats, tc_act_cookie), .automatic_shrinking = true, }; @@ -169,14 +169,11 @@ mlx5e_tc_act_stats_fill_stats(struct mlx5e_tc_act_stats_handle *handle, { struct rhashtable *ht = &handle->ht; struct mlx5e_tc_act_stats *item; - struct mlx5e_tc_act_stats key; u64 pkts, bytes, lastused; int err = 0; - key.tc_act_cookie = fl_act->cookie; - rcu_read_lock(); - item = rhashtable_lookup(ht, &key, act_counters_ht_params); + item = rhashtable_lookup(ht, &fl_act->cookie, act_counters_ht_params); if (!item) { rcu_read_unlock(); err = -ENOENT; -- cgit v1.2.3 From 97bd788efb9052963b43ba41c8aaff3ed12e1ede Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 6 Jun 2023 00:12:17 -0700 Subject: net/mlx5: Skip inline mode check after mlx5_eswitch_enable_locked() failure Commit bffaa916588e ("net/mlx5: E-Switch, Add control for inline mode") added inline mode checking to esw_offloads_start() with a warning printed out in case there is a problem. Tne inline mode checking was done even after mlx5_eswitch_enable_locked() call failed, which is pointless. Later on, commit 8c98ee77d911 ("net/mlx5e: E-Switch, Add extack messages to devlink callbacks") converted the error/warning prints to extack setting, which caused that the inline mode check error to overwrite possible previous extack message when mlx5_eswitch_enable_locked() failed. User then gets confusing error message. Fix this by skipping check of inline mode after mlx5_eswitch_enable_locked() call failed. Signed-off-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 29de4e759f4f..eafb098db6b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2178,6 +2178,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, "Failed setting eswitch to offloads"); esw->mode = MLX5_ESWITCH_LEGACY; mlx5_rescan_drivers(esw->dev); + return err; } if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) { if (mlx5_eswitch_inline_mode_get(esw, @@ -2187,7 +2188,7 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, "Inline mode is different between vports"); } } - return err; + return 0; } static void mlx5_esw_offloads_rep_mark_set(struct mlx5_eswitch *esw, -- cgit v1.2.3 From eb8e9fae0a22d07c6a09983ec52a2dcdc9d4d82b Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Tue, 6 Jun 2023 00:12:18 -0700 Subject: mlx5/core: E-Switch, Allocate ECPF vport if it's an eswitch manager Eswitch vport is needed for eswitch manager when creating LAG, to create egress rules. However, this was not handled when ECPF is an eswitch manager. Signed-off-by: Bodong Wang Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 31956cd9d1bb..ecd8864d5d11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1601,7 +1601,8 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) idx++; } - if (mlx5_ecpf_vport_exists(dev)) { + if (mlx5_ecpf_vport_exists(dev) || + mlx5_core_is_ecpf_esw_manager(dev)) { err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_ECPF); if (err) goto err; -- cgit v1.2.3 From 803ea346bd3ff1ac80fc65cf5899c0ad045d1788 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Jun 2023 00:12:19 -0700 Subject: net/mlx5e: simplify condition after napi budget handling change Since recent commit budget can't be 0 here. Signed-off-by: Jakub Kicinski Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index fbb2d963fb7e..a7d9b7cb4297 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -207,7 +207,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) } ch_stats->aff_change++; aff_change = true; - if (budget && work_done == budget) + if (work_done == budget) work_done--; } -- cgit v1.2.3 From 649c3fed36730a53447d8f479c14e431363563b6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Jun 2023 18:08:25 -0700 Subject: eth: bnxt: fix the wake condition The down condition should be the negation of the wake condition, IOW when I moved it from: if (cond && wake()) to if (__netif_txq_completed_wake(cond)) Cond should have been negated. Flip it now. This bug leads to occasional crashes with netconsole. It may also lead to queue never waking up in case BQL is not enabled. Reported-by: David Wei Fixes: 08a096780d92 ("bnxt: use new queue try_stop/try_wake macros") Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20230607010826.960226-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index dcd9367f05af..1f04cd4cfab9 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -692,7 +692,7 @@ next_tx_int: __netif_txq_completed_wake(txq, nr_pkts, tx_bytes, bnxt_tx_avail(bp, txr), bp->tx_wake_thresh, - READ_ONCE(txr->dev_state) != BNXT_DEV_STATE_CLOSING); + READ_ONCE(txr->dev_state) == BNXT_DEV_STATE_CLOSING); } static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, -- cgit v1.2.3 From f0d751973f739feb3742d4e5f4c0914e8b84e7c7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 6 Jun 2023 18:08:26 -0700 Subject: eth: ixgbe: fix the wake condition Flip the netif_carrier_ok() condition in queue wake logic. When I moved it to inside __netif_txq_completed_wake() I missed negating it. This made the condition ineffective and could probably lead to crashes. Fixes: 301f227fc860 ("net: piggy back on the memory barrier in bql when waking queues") Reviewed-by: Tony Nguyen Link: https://lore.kernel.org/r/20230607010826.960226-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 5d83c887a3fc..1726297f2e0d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1256,7 +1256,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, if (!__netif_txq_completed_wake(txq, total_packets, total_bytes, ixgbe_desc_unused(tx_ring), TX_WAKE_THRESHOLD, - netif_carrier_ok(tx_ring->netdev) && + !netif_carrier_ok(tx_ring->netdev) || test_bit(__IXGBE_DOWN, &adapter->state))) ++tx_ring->tx_stats.restart_queue; -- cgit v1.2.3 From a9f31047baca57d47440c879cf259b86f900260c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 6 Jun 2023 14:43:47 -0700 Subject: net: bcmgenet: Fix EEE implementation We had a number of short comings: - EEE must be re-evaluated whenever the state machine detects a link change as wight be switching from a link partner with EEE enabled/disabled - tx_lpi_enabled controls whether EEE should be enabled/disabled for the transmit path, which applies to the TBUF block - We do not need to forcibly enable EEE upon system resume, as the PHY state machine will trigger a link event that will do that, too Fixes: 6ef398ea60d9 ("net: bcmgenet: add EEE support") Signed-off-by: Florian Fainelli Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20230606214348.2408018-1-florian.fainelli@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 22 ++++++++-------------- drivers/net/ethernet/broadcom/genet/bcmgenet.h | 3 +++ drivers/net/ethernet/broadcom/genet/bcmmii.c | 5 +++++ 3 files changed, 16 insertions(+), 14 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index eca0c92c0c84..2b5761ad2f92 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1272,7 +1272,8 @@ static void bcmgenet_get_ethtool_stats(struct net_device *dev, } } -static void bcmgenet_eee_enable_set(struct net_device *dev, bool enable) +void bcmgenet_eee_enable_set(struct net_device *dev, bool enable, + bool tx_lpi_enabled) { struct bcmgenet_priv *priv = netdev_priv(dev); u32 off = priv->hw_params->tbuf_offset + TBUF_ENERGY_CTRL; @@ -1292,7 +1293,7 @@ static void bcmgenet_eee_enable_set(struct net_device *dev, bool enable) /* Enable EEE and switch to a 27Mhz clock automatically */ reg = bcmgenet_readl(priv->base + off); - if (enable) + if (tx_lpi_enabled) reg |= TBUF_EEE_EN | TBUF_PM_EN; else reg &= ~(TBUF_EEE_EN | TBUF_PM_EN); @@ -1313,6 +1314,7 @@ static void bcmgenet_eee_enable_set(struct net_device *dev, bool enable) priv->eee.eee_enabled = enable; priv->eee.eee_active = enable; + priv->eee.tx_lpi_enabled = tx_lpi_enabled; } static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e) @@ -1328,6 +1330,7 @@ static int bcmgenet_get_eee(struct net_device *dev, struct ethtool_eee *e) e->eee_enabled = p->eee_enabled; e->eee_active = p->eee_active; + e->tx_lpi_enabled = p->tx_lpi_enabled; e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER); return phy_ethtool_get_eee(dev->phydev, e); @@ -1337,7 +1340,6 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) { struct bcmgenet_priv *priv = netdev_priv(dev); struct ethtool_eee *p = &priv->eee; - int ret = 0; if (GENET_IS_V1(priv)) return -EOPNOTSUPP; @@ -1348,16 +1350,11 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e) p->eee_enabled = e->eee_enabled; if (!p->eee_enabled) { - bcmgenet_eee_enable_set(dev, false); + bcmgenet_eee_enable_set(dev, false, false); } else { - ret = phy_init_eee(dev->phydev, false); - if (ret) { - netif_err(priv, hw, dev, "EEE initialization failed\n"); - return ret; - } - + p->eee_active = phy_init_eee(dev->phydev, false) >= 0; bcmgenet_umac_writel(priv, e->tx_lpi_timer, UMAC_EEE_LPI_TIMER); - bcmgenet_eee_enable_set(dev, true); + bcmgenet_eee_enable_set(dev, p->eee_active, e->tx_lpi_enabled); } return phy_ethtool_set_eee(dev->phydev, e); @@ -4279,9 +4276,6 @@ static int bcmgenet_resume(struct device *d) if (!device_may_wakeup(d)) phy_resume(dev->phydev); - if (priv->eee.eee_enabled) - bcmgenet_eee_enable_set(dev, true); - bcmgenet_netif_start(dev); netif_device_attach(dev); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 946f6e283c4e..1985c0ec4da2 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -703,4 +703,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, void bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv, enum bcmgenet_power_mode mode); +void bcmgenet_eee_enable_set(struct net_device *dev, bool enable, + bool tx_lpi_enabled); + #endif /* __BCMGENET_H__ */ diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index be042905ada2..c15ed0acdb77 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -87,6 +87,11 @@ static void bcmgenet_mac_config(struct net_device *dev) reg |= CMD_TX_EN | CMD_RX_EN; } bcmgenet_umac_writel(priv, reg, UMAC_CMD); + + priv->eee.eee_active = phy_init_eee(phydev, 0) >= 0; + bcmgenet_eee_enable_set(dev, + priv->eee.eee_enabled && priv->eee.eee_active, + priv->eee.tx_lpi_enabled); } /* setup netdev link state when PHY link status change and -- cgit v1.2.3 From 095d5dc0c1d9f3284e3c575ccf4c0e8b04b548f8 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Wed, 7 Jun 2023 00:54:04 -0700 Subject: bnxt_en: Fix bnxt_hwrm_update_rss_hash_cfg() We must specify the vnic id of the vnic in the input structure of this firmware message. Otherwise we will get an error from the firmware. Fixes: 98a4322b70e8 ("bnxt_en: update RSS config using difference algorithm") Reviewed-by: Kalesh Anakkur Purayil Reviewed-by: Somnath Kotur Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1f04cd4cfab9..45739b4a84a2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5350,6 +5350,7 @@ static void bnxt_hwrm_update_rss_hash_cfg(struct bnxt *bp) if (hwrm_req_init(bp, req, HWRM_VNIC_RSS_QCFG)) return; + req->vnic_id = cpu_to_le16(vnic->fw_vnic_id); /* all contexts configured to same hash_type, zero always exists */ req->rss_ctx_idx = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[0]); resp = hwrm_req_hold(bp, req); -- cgit v1.2.3 From 1d997801c7cc6a7f542e46d5a6bf16f893ad3fe9 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Wed, 7 Jun 2023 00:54:05 -0700 Subject: bnxt_en: Don't issue AP reset during ethtool's reset operation Only older NIC controller's firmware uses the PROC AP reset type. Firmware on 5731X/5741X and newer chips does not support this reset type. When bnxt_reset() issues a series of resets, this PROC AP reset may actually fail on these newer chips because the firmware is not ready to accept this unsupported command yet. Avoid this unnecessary error by skipping this reset type on chips that don't support it. Fixes: 7a13240e3718 ("bnxt_en: fix ethtool_reset_flags ABI violations") Reviewed-by: Pavan Chebbi Signed-off-by: Sreekanth Reddy Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 2dd8ee4a6f75..8fd5071d8b09 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -3831,7 +3831,7 @@ static int bnxt_reset(struct net_device *dev, u32 *flags) } } - if (req & BNXT_FW_RESET_AP) { + if (!BNXT_CHIP_P4_PLUS(bp) && (req & BNXT_FW_RESET_AP)) { /* This feature is not supported in older firmware versions */ if (bp->hwrm_spec_code >= 0x10803) { if (!bnxt_firmware_reset_ap(dev)) { -- cgit v1.2.3 From 1a9e4f501bc6ff1b6ecb60df54fbf2b54db43bfe Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Wed, 7 Jun 2023 00:54:06 -0700 Subject: bnxt_en: Query default VLAN before VNIC setup on a VF We need to call bnxt_hwrm_func_qcfg() on a VF to query the default VLAN that may be setup by the PF. If a default VLAN is enabled, the VF cannot support VLAN acceleration on the receive side and the VNIC must be setup to strip out the default VLAN tag. If a default VLAN is not enabled, the VF can support VLAN acceleration on the receive side. The VNIC should be set up to strip or not strip the VLAN based on the RX VLAN acceleration setting. Without this call to determine the default VLAN before calling bnxt_setup_vnic(), the VNIC may not be set up correctly. For example, bnxt_setup_vnic() may set up to strip the VLAN tag based on stale default VLAN information. If RX VLAN acceleration is not enabled, the VLAN tag will be incorrectly stripped and the RX data path will not work correctly. Fixes: cf6645f8ebc6 ("bnxt_en: Add function for VF driver to query default VLAN.") Reviewed-by: Pavan Chebbi Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 45739b4a84a2..b3fa840cb71f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8813,6 +8813,9 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) goto err_out; } + if (BNXT_VF(bp)) + bnxt_hwrm_func_qcfg(bp); + rc = bnxt_setup_vnic(bp, 0); if (rc) goto err_out; -- cgit v1.2.3 From 83474a9b252ab23e6003865c2775024344cb9c09 Mon Sep 17 00:00:00 2001 From: Vikas Gupta Date: Wed, 7 Jun 2023 00:54:07 -0700 Subject: bnxt_en: Skip firmware fatal error recovery if chip is not accessible Driver starts firmware fatal error recovery by detecting heartbeat failure or fw reset count register changing. But these checks are not reliable if the device is not accessible. This can happen while DPC (Downstream Port containment) is in progress. Skip firmware fatal recovery if pci_device_is_present() returns false. Fixes: acfb50e4e773 ("bnxt_en: Add FW fatal devlink_health_reporter.") Reviewed-by: Somnath Kotur Reviewed-by: Pavan Chebbi Signed-off-by: Vikas Gupta Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b3fa840cb71f..546eb5d22b7a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11602,6 +11602,7 @@ static void bnxt_tx_timeout(struct net_device *dev, unsigned int txqueue) static void bnxt_fw_health_check(struct bnxt *bp) { struct bnxt_fw_health *fw_health = bp->fw_health; + struct pci_dev *pdev = bp->pdev; u32 val; if (!fw_health->enabled || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) @@ -11615,7 +11616,7 @@ static void bnxt_fw_health_check(struct bnxt *bp) } val = bnxt_fw_health_readl(bp, BNXT_FW_HEARTBEAT_REG); - if (val == fw_health->last_fw_heartbeat) { + if (val == fw_health->last_fw_heartbeat && pci_device_is_present(pdev)) { fw_health->arrests++; goto fw_reset; } @@ -11623,7 +11624,7 @@ static void bnxt_fw_health_check(struct bnxt *bp) fw_health->last_fw_heartbeat = val; val = bnxt_fw_health_readl(bp, BNXT_FW_RESET_CNT_REG); - if (val != fw_health->last_fw_reset_cnt) { + if (val != fw_health->last_fw_reset_cnt && pci_device_is_present(pdev)) { fw_health->discoveries++; goto fw_reset; } -- cgit v1.2.3 From 319a7827df9784048abe072afe6b4fb4501d8de4 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Wed, 7 Jun 2023 00:54:08 -0700 Subject: bnxt_en: Prevent kernel panic when receiving unexpected PHC_UPDATE event The firmware can send PHC_RTC_UPDATE async event on a PF that may not have PTP registered. In such a case, there will be a null pointer deference for bp->ptp_cfg when we try to handle the event. Fix it by not registering for this event with the firmware if !bp->ptp_cfg. Also, check that bp->ptp_cfg is valid before proceeding when we receive the event. Fixes: 8bcf6f04d4a5 ("bnxt_en: Handle async event when the PHC is updated in RTC mode") Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 1 + 2 files changed, 7 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 546eb5d22b7a..bb0332210674 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -2365,6 +2365,9 @@ static int bnxt_async_event_process(struct bnxt *bp, struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; u64 ns; + if (!ptp) + goto async_event_process_exit; + spin_lock_bh(&ptp->ptp_lock); bnxt_ptp_update_current_time(bp); ns = (((u64)BNXT_EVENT_PHC_RTC_UPDATE(data1) << @@ -4763,6 +4766,9 @@ int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size, if (event_id == ASYNC_EVENT_CMPL_EVENT_ID_ERROR_RECOVERY && !(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)) continue; + if (event_id == ASYNC_EVENT_CMPL_EVENT_ID_PHC_UPDATE && + !bp->ptp_cfg) + continue; __set_bit(bnxt_async_events_arr[i], async_events_bmap); } if (bmap && bmap_size) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index e46689128e32..f3886710e778 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -952,6 +952,7 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) bnxt_ptp_timecounter_init(bp, true); bnxt_ptp_adjfine_rtc(bp, 0); } + bnxt_hwrm_func_drv_rgtr(bp, NULL, 0, true); ptp->ptp_info = bnxt_ptp_caps; if ((bp->fw_cap & BNXT_FW_CAP_PTP_PPS)) { -- cgit v1.2.3 From 1eb4ef12591348c440ac9d6efcf7521e73cf2b10 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Wed, 7 Jun 2023 00:54:09 -0700 Subject: bnxt_en: Implement .set_port / .unset_port UDP tunnel callbacks As per the new udp tunnel framework, drivers which need to know the details of a port entry (i.e. port type) when it gets deleted should use the .set_port / .unset_port callbacks. Implementing the current .udp_tunnel_sync callback would mean that the deleted tunnel port entry would be all zeros. This used to work on older firmware because it would not check the input when deleting a tunnel port. With newer firmware, the delete will now fail and subsequent tunnel port allocation will fail as a result. Fixes: 442a35a5a7aa ("bnxt: convert to new udp_tunnel_nic infra") Reviewed-by: Kalesh Anakkur Purayil Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index bb0332210674..b499bc9c4e06 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -13036,26 +13036,37 @@ static void bnxt_cfg_ntp_filters(struct bnxt *bp) #endif /* CONFIG_RFS_ACCEL */ -static int bnxt_udp_tunnel_sync(struct net_device *netdev, unsigned int table) +static int bnxt_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, + unsigned int entry, struct udp_tunnel_info *ti) { struct bnxt *bp = netdev_priv(netdev); - struct udp_tunnel_info ti; unsigned int cmd; - udp_tunnel_nic_get_port(netdev, table, 0, &ti); - if (ti.type == UDP_TUNNEL_TYPE_VXLAN) + if (ti->type == UDP_TUNNEL_TYPE_VXLAN) cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN; else cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE; - if (ti.port) - return bnxt_hwrm_tunnel_dst_port_alloc(bp, ti.port, cmd); + return bnxt_hwrm_tunnel_dst_port_alloc(bp, ti->port, cmd); +} + +static int bnxt_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table, + unsigned int entry, struct udp_tunnel_info *ti) +{ + struct bnxt *bp = netdev_priv(netdev); + unsigned int cmd; + + if (ti->type == UDP_TUNNEL_TYPE_VXLAN) + cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN; + else + cmd = TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE; return bnxt_hwrm_tunnel_dst_port_free(bp, cmd); } static const struct udp_tunnel_nic_info bnxt_udp_tunnels = { - .sync_table = bnxt_udp_tunnel_sync, + .set_port = bnxt_udp_tunnel_set_port, + .unset_port = bnxt_udp_tunnel_unset_port, .flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP | UDP_TUNNEL_NIC_INFO_OPEN_ONLY, .tables = { -- cgit v1.2.3 From c3e382ad6d15a8041ab8a168ad3ff90137ee8a45 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 6 Jun 2023 17:21:00 +0800 Subject: net: txgbe: Add software nodes to support phylink Register software nodes for GPIO, I2C, SFP and PHYLINK. Define the device properties. Signed-off-by: Jiawen Wu Reviewed-by: Andrew Lunn Reviewed-by: Piotr Raczynski Reviewed-by: Maciej Fijalkowski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/wangxun/libwx/wx_type.h | 1 + drivers/net/ethernet/wangxun/txgbe/Makefile | 1 + drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 22 +++++- drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c | 89 +++++++++++++++++++++++++ drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h | 10 +++ drivers/net/ethernet/wangxun/txgbe/txgbe_type.h | 49 ++++++++++++++ 6 files changed, 171 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c create mode 100644 drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 5063846e1b52..c61c18a842c4 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -814,6 +814,7 @@ enum wx_isb_idx { struct wx { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; + void *priv; u8 __iomem *hw_addr; struct pci_dev *pdev; struct net_device *netdev; diff --git a/drivers/net/ethernet/wangxun/txgbe/Makefile b/drivers/net/ethernet/wangxun/txgbe/Makefile index 6db14a2cb2d0..7507f762edfe 100644 --- a/drivers/net/ethernet/wangxun/txgbe/Makefile +++ b/drivers/net/ethernet/wangxun/txgbe/Makefile @@ -8,4 +8,5 @@ obj-$(CONFIG_TXGBE) += txgbe.o txgbe-objs := txgbe_main.o \ txgbe_hw.o \ + txgbe_phy.o \ txgbe_ethtool.o diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index cfe47f3d2503..920ee3a3bfa3 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -15,6 +15,7 @@ #include "../libwx/wx_hw.h" #include "txgbe_type.h" #include "txgbe_hw.h" +#include "txgbe_phy.h" #include "txgbe_ethtool.h" char txgbe_driver_name[] = "txgbe"; @@ -516,6 +517,7 @@ static int txgbe_probe(struct pci_dev *pdev, struct net_device *netdev; int err, expected_gts; struct wx *wx = NULL; + struct txgbe *txgbe; u16 eeprom_verh = 0, eeprom_verl = 0, offset = 0; u16 eeprom_cfg_blkh = 0, eeprom_cfg_blkl = 0; @@ -680,10 +682,23 @@ static int txgbe_probe(struct pci_dev *pdev, "0x%08x", etrack_id); } - err = register_netdev(netdev); + txgbe = devm_kzalloc(&pdev->dev, sizeof(*txgbe), GFP_KERNEL); + if (!txgbe) { + err = -ENOMEM; + goto err_release_hw; + } + + txgbe->wx = wx; + wx->priv = txgbe; + + err = txgbe_init_phy(txgbe); if (err) goto err_release_hw; + err = register_netdev(netdev); + if (err) + goto err_remove_phy; + pci_set_drvdata(pdev, wx); netif_tx_stop_all_queues(netdev); @@ -711,6 +726,8 @@ static int txgbe_probe(struct pci_dev *pdev, return 0; +err_remove_phy: + txgbe_remove_phy(txgbe); err_release_hw: wx_clear_interrupt_scheme(wx); wx_control_hw(wx, false); @@ -736,11 +753,14 @@ err_pci_disable_dev: static void txgbe_remove(struct pci_dev *pdev) { struct wx *wx = pci_get_drvdata(pdev); + struct txgbe *txgbe = wx->priv; struct net_device *netdev; netdev = wx->netdev; unregister_netdev(netdev); + txgbe_remove_phy(txgbe); + pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c new file mode 100644 index 000000000000..be4b5ad74a3c --- /dev/null +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2015 - 2023 Beijing WangXun Technology Co., Ltd. */ + +#include +#include +#include + +#include "../libwx/wx_type.h" +#include "txgbe_type.h" +#include "txgbe_phy.h" + +static int txgbe_swnodes_register(struct txgbe *txgbe) +{ + struct txgbe_nodes *nodes = &txgbe->nodes; + struct pci_dev *pdev = txgbe->wx->pdev; + struct software_node *swnodes; + u32 id; + + id = (pdev->bus->number << 8) | pdev->devfn; + + snprintf(nodes->gpio_name, sizeof(nodes->gpio_name), "txgbe_gpio-%x", id); + snprintf(nodes->i2c_name, sizeof(nodes->i2c_name), "txgbe_i2c-%x", id); + snprintf(nodes->sfp_name, sizeof(nodes->sfp_name), "txgbe_sfp-%x", id); + snprintf(nodes->phylink_name, sizeof(nodes->phylink_name), "txgbe_phylink-%x", id); + + swnodes = nodes->swnodes; + + /* GPIO 0: tx fault + * GPIO 1: tx disable + * GPIO 2: sfp module absent + * GPIO 3: rx signal lost + * GPIO 4: rate select, 1G(0) 10G(1) + * GPIO 5: rate select, 1G(0) 10G(1) + */ + nodes->gpio_props[0] = PROPERTY_ENTRY_STRING("pinctrl-names", "default"); + swnodes[SWNODE_GPIO] = NODE_PROP(nodes->gpio_name, nodes->gpio_props); + nodes->gpio0_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_GPIO], 0, GPIO_ACTIVE_HIGH); + nodes->gpio1_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_GPIO], 1, GPIO_ACTIVE_HIGH); + nodes->gpio2_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_GPIO], 2, GPIO_ACTIVE_LOW); + nodes->gpio3_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_GPIO], 3, GPIO_ACTIVE_HIGH); + nodes->gpio4_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_GPIO], 4, GPIO_ACTIVE_HIGH); + nodes->gpio5_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_GPIO], 5, GPIO_ACTIVE_HIGH); + + nodes->i2c_props[0] = PROPERTY_ENTRY_STRING("compatible", "snps,designware-i2c"); + nodes->i2c_props[1] = PROPERTY_ENTRY_BOOL("wx,i2c-snps-model"); + nodes->i2c_props[2] = PROPERTY_ENTRY_U32("clock-frequency", I2C_MAX_STANDARD_MODE_FREQ); + swnodes[SWNODE_I2C] = NODE_PROP(nodes->i2c_name, nodes->i2c_props); + nodes->i2c_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_I2C]); + + nodes->sfp_props[0] = PROPERTY_ENTRY_STRING("compatible", "sff,sfp"); + nodes->sfp_props[1] = PROPERTY_ENTRY_REF_ARRAY("i2c-bus", nodes->i2c_ref); + nodes->sfp_props[2] = PROPERTY_ENTRY_REF_ARRAY("tx-fault-gpios", nodes->gpio0_ref); + nodes->sfp_props[3] = PROPERTY_ENTRY_REF_ARRAY("tx-disable-gpios", nodes->gpio1_ref); + nodes->sfp_props[4] = PROPERTY_ENTRY_REF_ARRAY("mod-def0-gpios", nodes->gpio2_ref); + nodes->sfp_props[5] = PROPERTY_ENTRY_REF_ARRAY("los-gpios", nodes->gpio3_ref); + nodes->sfp_props[6] = PROPERTY_ENTRY_REF_ARRAY("rate-select1-gpios", nodes->gpio4_ref); + nodes->sfp_props[7] = PROPERTY_ENTRY_REF_ARRAY("rate-select0-gpios", nodes->gpio5_ref); + swnodes[SWNODE_SFP] = NODE_PROP(nodes->sfp_name, nodes->sfp_props); + nodes->sfp_ref[0] = SOFTWARE_NODE_REFERENCE(&swnodes[SWNODE_SFP]); + + nodes->phylink_props[0] = PROPERTY_ENTRY_STRING("managed", "in-band-status"); + nodes->phylink_props[1] = PROPERTY_ENTRY_REF_ARRAY("sfp", nodes->sfp_ref); + swnodes[SWNODE_PHYLINK] = NODE_PROP(nodes->phylink_name, nodes->phylink_props); + + nodes->group[SWNODE_GPIO] = &swnodes[SWNODE_GPIO]; + nodes->group[SWNODE_I2C] = &swnodes[SWNODE_I2C]; + nodes->group[SWNODE_SFP] = &swnodes[SWNODE_SFP]; + nodes->group[SWNODE_PHYLINK] = &swnodes[SWNODE_PHYLINK]; + + return software_node_register_node_group(nodes->group); +} + +int txgbe_init_phy(struct txgbe *txgbe) +{ + int ret; + + ret = txgbe_swnodes_register(txgbe); + if (ret) { + wx_err(txgbe->wx, "failed to register software nodes\n"); + return ret; + } + + return 0; +} + +void txgbe_remove_phy(struct txgbe *txgbe) +{ + software_node_unregister_node_group(txgbe->nodes.group); +} diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h new file mode 100644 index 000000000000..1ab592124986 --- /dev/null +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2015 - 2023 Beijing WangXun Technology Co., Ltd. */ + +#ifndef _TXGBE_PHY_H_ +#define _TXGBE_PHY_H_ + +int txgbe_init_phy(struct txgbe *txgbe); +void txgbe_remove_phy(struct txgbe *txgbe); + +#endif /* _TXGBE_NODE_H_ */ diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 032972369965..9aa399acd9a0 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -4,6 +4,8 @@ #ifndef _TXGBE_TYPE_H_ #define _TXGBE_TYPE_H_ +#include + /* Device IDs */ #define TXGBE_DEV_ID_SP1000 0x1001 #define TXGBE_DEV_ID_WX1820 0x2001 @@ -100,4 +102,51 @@ extern char txgbe_driver_name[]; +static inline struct txgbe *netdev_to_txgbe(struct net_device *netdev) +{ + struct wx *wx = netdev_priv(netdev); + + return wx->priv; +} + +#define NODE_PROP(_NAME, _PROP) \ + (const struct software_node) { \ + .name = _NAME, \ + .properties = _PROP, \ + } + +enum txgbe_swnodes { + SWNODE_GPIO = 0, + SWNODE_I2C, + SWNODE_SFP, + SWNODE_PHYLINK, + SWNODE_MAX +}; + +struct txgbe_nodes { + char gpio_name[32]; + char i2c_name[32]; + char sfp_name[32]; + char phylink_name[32]; + struct property_entry gpio_props[1]; + struct property_entry i2c_props[3]; + struct property_entry sfp_props[8]; + struct property_entry phylink_props[2]; + struct software_node_ref_args i2c_ref[1]; + struct software_node_ref_args gpio0_ref[1]; + struct software_node_ref_args gpio1_ref[1]; + struct software_node_ref_args gpio2_ref[1]; + struct software_node_ref_args gpio3_ref[1]; + struct software_node_ref_args gpio4_ref[1]; + struct software_node_ref_args gpio5_ref[1]; + struct software_node_ref_args sfp_ref[1]; + struct software_node swnodes[SWNODE_MAX]; + const struct software_node *group[SWNODE_MAX + 1]; +}; + +struct txgbe { + struct wx *wx; + struct txgbe_nodes nodes; +}; + #endif /* _TXGBE_TYPE_H_ */ -- cgit v1.2.3 From b63f20485e433e6e548df81f5c76556d7f187266 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 6 Jun 2023 17:21:01 +0800 Subject: net: txgbe: Register fixed rate clock In order for I2C to be able to work in standard mode, register a fixed rate clock for each I2C device. Signed-off-by: Jiawen Wu Reviewed-by: Andrew Lunn Reviewed-by: Maciej Fijalkowski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/wangxun/Kconfig | 1 + drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c | 41 +++++++++++++++++++++++++ drivers/net/ethernet/wangxun/txgbe/txgbe_type.h | 2 ++ 3 files changed, 44 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/Kconfig b/drivers/net/ethernet/wangxun/Kconfig index c9d88673d306..190d42a203b4 100644 --- a/drivers/net/ethernet/wangxun/Kconfig +++ b/drivers/net/ethernet/wangxun/Kconfig @@ -40,6 +40,7 @@ config NGBE config TXGBE tristate "Wangxun(R) 10GbE PCI Express adapters support" depends on PCI + depends on COMMON_CLK select LIBWX help This driver supports Wangxun(R) 10GbE PCI Express family of diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index be4b5ad74a3c..06506cfb8d06 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -2,6 +2,8 @@ /* Copyright (c) 2015 - 2023 Beijing WangXun Technology Co., Ltd. */ #include +#include +#include #include #include @@ -70,6 +72,32 @@ static int txgbe_swnodes_register(struct txgbe *txgbe) return software_node_register_node_group(nodes->group); } +static int txgbe_clock_register(struct txgbe *txgbe) +{ + struct pci_dev *pdev = txgbe->wx->pdev; + struct clk_lookup *clock; + char clk_name[32]; + struct clk *clk; + + snprintf(clk_name, sizeof(clk_name), "i2c_designware.%d", + (pdev->bus->number << 8) | pdev->devfn); + + clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, 156250000); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + clock = clkdev_create(clk, NULL, clk_name); + if (!clock) { + clk_unregister(clk); + return -ENOMEM; + } + + txgbe->clk = clk; + txgbe->clock = clock; + + return 0; +} + int txgbe_init_phy(struct txgbe *txgbe) { int ret; @@ -80,10 +108,23 @@ int txgbe_init_phy(struct txgbe *txgbe) return ret; } + ret = txgbe_clock_register(txgbe); + if (ret) { + wx_err(txgbe->wx, "failed to register clock: %d\n", ret); + goto err_unregister_swnode; + } + return 0; + +err_unregister_swnode: + software_node_unregister_node_group(txgbe->nodes.group); + + return ret; } void txgbe_remove_phy(struct txgbe *txgbe) { + clkdev_drop(txgbe->clock); + clk_unregister(txgbe->clk); software_node_unregister_node_group(txgbe->nodes.group); } diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 9aa399acd9a0..856d0f9d045b 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -147,6 +147,8 @@ struct txgbe_nodes { struct txgbe { struct wx *wx; struct txgbe_nodes nodes; + struct clk_lookup *clock; + struct clk *clk; }; #endif /* _TXGBE_TYPE_H_ */ -- cgit v1.2.3 From c625e72561f6c9bd5d1a86aa4d1d5a68db43d2e0 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 6 Jun 2023 17:21:02 +0800 Subject: net: txgbe: Register I2C platform device Register the platform device to use Designware I2C bus master driver. Use regmap to read/write I2C device region from given base offset. Signed-off-by: Jiawen Wu Reviewed-by: Andrew Lunn Reviewed-by: Piotr Raczynski Reviewed-by: Andy Shevchenko Reviewed-by: Maciej Fijalkowski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/wangxun/Kconfig | 3 ++ drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c | 70 +++++++++++++++++++++++++ drivers/net/ethernet/wangxun/txgbe/txgbe_type.h | 4 ++ 3 files changed, 77 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/Kconfig b/drivers/net/ethernet/wangxun/Kconfig index 190d42a203b4..128cc1cb0605 100644 --- a/drivers/net/ethernet/wangxun/Kconfig +++ b/drivers/net/ethernet/wangxun/Kconfig @@ -41,6 +41,9 @@ config TXGBE tristate "Wangxun(R) 10GbE PCI Express adapters support" depends on PCI depends on COMMON_CLK + select REGMAP + select I2C + select I2C_DESIGNWARE_PLATFORM select LIBWX help This driver supports Wangxun(R) 10GbE PCI Express family of diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index 06506cfb8d06..24a729150e08 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include "../libwx/wx_type.h" #include "txgbe_type.h" @@ -98,6 +100,64 @@ static int txgbe_clock_register(struct txgbe *txgbe) return 0; } +static int txgbe_i2c_read(void *context, unsigned int reg, unsigned int *val) +{ + struct wx *wx = context; + + *val = rd32(wx, reg + TXGBE_I2C_BASE); + + return 0; +} + +static int txgbe_i2c_write(void *context, unsigned int reg, unsigned int val) +{ + struct wx *wx = context; + + wr32(wx, reg + TXGBE_I2C_BASE, val); + + return 0; +} + +static const struct regmap_config i2c_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_read = txgbe_i2c_read, + .reg_write = txgbe_i2c_write, + .fast_io = true, +}; + +static int txgbe_i2c_register(struct txgbe *txgbe) +{ + struct platform_device_info info = {}; + struct platform_device *i2c_dev; + struct regmap *i2c_regmap; + struct pci_dev *pdev; + struct wx *wx; + + wx = txgbe->wx; + pdev = wx->pdev; + i2c_regmap = devm_regmap_init(&pdev->dev, NULL, wx, &i2c_regmap_config); + if (IS_ERR(i2c_regmap)) { + wx_err(wx, "failed to init I2C regmap\n"); + return PTR_ERR(i2c_regmap); + } + + info.parent = &pdev->dev; + info.fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_I2C]); + info.name = "i2c_designware"; + info.id = (pdev->bus->number << 8) | pdev->devfn; + + info.res = &DEFINE_RES_IRQ(pdev->irq); + info.num_res = 1; + i2c_dev = platform_device_register_full(&info); + if (IS_ERR(i2c_dev)) + return PTR_ERR(i2c_dev); + + txgbe->i2c_dev = i2c_dev; + + return 0; +} + int txgbe_init_phy(struct txgbe *txgbe) { int ret; @@ -114,8 +174,17 @@ int txgbe_init_phy(struct txgbe *txgbe) goto err_unregister_swnode; } + ret = txgbe_i2c_register(txgbe); + if (ret) { + wx_err(txgbe->wx, "failed to init i2c interface: %d\n", ret); + goto err_unregister_clk; + } + return 0; +err_unregister_clk: + clkdev_drop(txgbe->clock); + clk_unregister(txgbe->clk); err_unregister_swnode: software_node_unregister_node_group(txgbe->nodes.group); @@ -124,6 +193,7 @@ err_unregister_swnode: void txgbe_remove_phy(struct txgbe *txgbe) { + platform_device_unregister(txgbe->i2c_dev); clkdev_drop(txgbe->clock); clk_unregister(txgbe->clk); software_node_unregister_node_group(txgbe->nodes.group); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 856d0f9d045b..6e471a4d68cc 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -55,6 +55,9 @@ #define TXGBE_TS_CTL 0x10300 #define TXGBE_TS_CTL_EVAL_MD BIT(31) +/* I2C registers */ +#define TXGBE_I2C_BASE 0x14900 + /* Part Number String Length */ #define TXGBE_PBANUM_LENGTH 32 @@ -147,6 +150,7 @@ struct txgbe_nodes { struct txgbe { struct wx *wx; struct txgbe_nodes nodes; + struct platform_device *i2c_dev; struct clk_lookup *clock; struct clk *clk; }; -- cgit v1.2.3 From 04d94236182e4f46bea6a48e3830b27f361f090f Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 6 Jun 2023 17:21:03 +0800 Subject: net: txgbe: Add SFP module identify Register SFP platform device to get modules information. Signed-off-by: Jiawen Wu Reviewed-by: Andrew Lunn Reviewed-by: Piotr Raczynski Reviewed-by: Maciej Fijalkowski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/wangxun/Kconfig | 3 +++ drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c | 28 +++++++++++++++++++++++++ drivers/net/ethernet/wangxun/txgbe/txgbe_type.h | 1 + 3 files changed, 32 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/Kconfig b/drivers/net/ethernet/wangxun/Kconfig index 128cc1cb0605..59f3a3f492cf 100644 --- a/drivers/net/ethernet/wangxun/Kconfig +++ b/drivers/net/ethernet/wangxun/Kconfig @@ -44,6 +44,9 @@ config TXGBE select REGMAP select I2C select I2C_DESIGNWARE_PLATFORM + select PHYLINK + select HWMON if TXGBE=y + select SFP select LIBWX help This driver supports Wangxun(R) 10GbE PCI Express family of diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index 24a729150e08..d95dc131e91b 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -158,6 +158,25 @@ static int txgbe_i2c_register(struct txgbe *txgbe) return 0; } +static int txgbe_sfp_register(struct txgbe *txgbe) +{ + struct pci_dev *pdev = txgbe->wx->pdev; + struct platform_device_info info = {}; + struct platform_device *sfp_dev; + + info.parent = &pdev->dev; + info.fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_SFP]); + info.name = "sfp"; + info.id = (pdev->bus->number << 8) | pdev->devfn; + sfp_dev = platform_device_register_full(&info); + if (IS_ERR(sfp_dev)) + return PTR_ERR(sfp_dev); + + txgbe->sfp_dev = sfp_dev; + + return 0; +} + int txgbe_init_phy(struct txgbe *txgbe) { int ret; @@ -180,8 +199,16 @@ int txgbe_init_phy(struct txgbe *txgbe) goto err_unregister_clk; } + ret = txgbe_sfp_register(txgbe); + if (ret) { + wx_err(txgbe->wx, "failed to register sfp\n"); + goto err_unregister_i2c; + } + return 0; +err_unregister_i2c: + platform_device_unregister(txgbe->i2c_dev); err_unregister_clk: clkdev_drop(txgbe->clock); clk_unregister(txgbe->clk); @@ -193,6 +220,7 @@ err_unregister_swnode: void txgbe_remove_phy(struct txgbe *txgbe) { + platform_device_unregister(txgbe->sfp_dev); platform_device_unregister(txgbe->i2c_dev); clkdev_drop(txgbe->clock); clk_unregister(txgbe->clk); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 6e471a4d68cc..f420e2569247 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -150,6 +150,7 @@ struct txgbe_nodes { struct txgbe { struct wx *wx; struct txgbe_nodes nodes; + struct platform_device *sfp_dev; struct platform_device *i2c_dev; struct clk_lookup *clock; struct clk *clk; -- cgit v1.2.3 From b83c37315a620fc8dcb5f3cffe4753765228d1f4 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 6 Jun 2023 17:21:04 +0800 Subject: net: txgbe: Support GPIO to SFP socket Register GPIO chip and handle GPIO IRQ for SFP socket. Signed-off-by: Jiawen Wu Reviewed-by: Andy Shevchenko Reviewed-by: Maciej Fijalkowski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/wangxun/Kconfig | 2 + drivers/net/ethernet/wangxun/libwx/wx_lib.c | 3 +- drivers/net/ethernet/wangxun/libwx/wx_type.h | 3 + drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 20 +- drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c | 251 ++++++++++++++++++++++++ drivers/net/ethernet/wangxun/txgbe/txgbe_type.h | 23 +++ 6 files changed, 283 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/Kconfig b/drivers/net/ethernet/wangxun/Kconfig index 59f3a3f492cf..3744735fa708 100644 --- a/drivers/net/ethernet/wangxun/Kconfig +++ b/drivers/net/ethernet/wangxun/Kconfig @@ -47,6 +47,8 @@ config TXGBE select PHYLINK select HWMON if TXGBE=y select SFP + select GPIOLIB + select GPIOLIB_IRQCHIP select LIBWX help This driver supports Wangxun(R) 10GbE PCI Express family of diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 3dd328d33fcc..2c3f08be8c37 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -2048,7 +2048,8 @@ void wx_free_irq(struct wx *wx) free_irq(entry->vector, q_vector); } - free_irq(wx->msix_entries[vector].vector, wx); + if (wx->mac.type == wx_mac_em) + free_irq(wx->msix_entries[vector].vector, wx); } EXPORT_SYMBOL(wx_free_irq); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index c61c18a842c4..29dfb561887d 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -83,7 +83,9 @@ #define WX_GPIO_INTMASK 0x14834 #define WX_GPIO_INTTYPE_LEVEL 0x14838 #define WX_GPIO_POLARITY 0x1483C +#define WX_GPIO_INTSTATUS 0x14844 #define WX_GPIO_EOI 0x1484C +#define WX_GPIO_EXT 0x14850 /*********************** Transmit DMA registers **************************/ /* transmit global control */ @@ -847,6 +849,7 @@ struct wx { bool wol_enabled; bool ncsi_enabled; bool gpio_ctrl; + raw_spinlock_t gpio_lock; /* Tx fast path data */ int num_tx_queues; diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index 920ee3a3bfa3..edfab3859dc3 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -82,6 +82,8 @@ static int txgbe_enumerate_functions(struct wx *wx) **/ static void txgbe_irq_enable(struct wx *wx, bool queues) { + wr32(wx, WX_PX_MISC_IEN, TXGBE_PX_MISC_IEN_MASK); + /* unmask interrupt */ wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); if (queues) @@ -129,17 +131,6 @@ static irqreturn_t txgbe_intr(int __always_unused irq, void *data) return IRQ_HANDLED; } -static irqreturn_t txgbe_msix_other(int __always_unused irq, void *data) -{ - struct wx *wx = data; - - /* re-enable the original interrupt state */ - if (netif_running(wx->netdev)) - txgbe_irq_enable(wx, false); - - return IRQ_HANDLED; -} - /** * txgbe_request_msix_irqs - Initialize MSI-X interrupts * @wx: board private structure @@ -171,13 +162,6 @@ static int txgbe_request_msix_irqs(struct wx *wx) } } - err = request_irq(wx->msix_entries[vector].vector, - txgbe_msix_other, 0, netdev->name, wx); - if (err) { - wx_err(wx, "request_irq for msix_other failed: %d\n", err); - goto free_queue_irqs; - } - return 0; free_queue_irqs: diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index d95dc131e91b..97c018a10c39 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2015 - 2023 Beijing WangXun Technology Co., Ltd. */ +#include +#include #include #include #include @@ -10,6 +12,7 @@ #include #include "../libwx/wx_type.h" +#include "../libwx/wx_hw.h" #include "txgbe_type.h" #include "txgbe_phy.h" @@ -74,6 +77,248 @@ static int txgbe_swnodes_register(struct txgbe *txgbe) return software_node_register_node_group(nodes->group); } +static int txgbe_gpio_get(struct gpio_chip *chip, unsigned int offset) +{ + struct wx *wx = gpiochip_get_data(chip); + int val; + + val = rd32m(wx, WX_GPIO_EXT, BIT(offset)); + + return !!(val & BIT(offset)); +} + +static int txgbe_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) +{ + struct wx *wx = gpiochip_get_data(chip); + u32 val; + + val = rd32(wx, WX_GPIO_DDR); + if (BIT(offset) & val) + return GPIO_LINE_DIRECTION_OUT; + + return GPIO_LINE_DIRECTION_IN; +} + +static int txgbe_gpio_direction_in(struct gpio_chip *chip, unsigned int offset) +{ + struct wx *wx = gpiochip_get_data(chip); + unsigned long flags; + + raw_spin_lock_irqsave(&wx->gpio_lock, flags); + wr32m(wx, WX_GPIO_DDR, BIT(offset), 0); + raw_spin_unlock_irqrestore(&wx->gpio_lock, flags); + + return 0; +} + +static int txgbe_gpio_direction_out(struct gpio_chip *chip, unsigned int offset, + int val) +{ + struct wx *wx = gpiochip_get_data(chip); + unsigned long flags; + u32 set; + + set = val ? BIT(offset) : 0; + + raw_spin_lock_irqsave(&wx->gpio_lock, flags); + wr32m(wx, WX_GPIO_DR, BIT(offset), set); + wr32m(wx, WX_GPIO_DDR, BIT(offset), BIT(offset)); + raw_spin_unlock_irqrestore(&wx->gpio_lock, flags); + + return 0; +} + +static void txgbe_gpio_irq_ack(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + struct wx *wx = gpiochip_get_data(gc); + unsigned long flags; + + raw_spin_lock_irqsave(&wx->gpio_lock, flags); + wr32(wx, WX_GPIO_EOI, BIT(hwirq)); + raw_spin_unlock_irqrestore(&wx->gpio_lock, flags); +} + +static void txgbe_gpio_irq_mask(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + struct wx *wx = gpiochip_get_data(gc); + unsigned long flags; + + gpiochip_disable_irq(gc, hwirq); + + raw_spin_lock_irqsave(&wx->gpio_lock, flags); + wr32m(wx, WX_GPIO_INTMASK, BIT(hwirq), BIT(hwirq)); + raw_spin_unlock_irqrestore(&wx->gpio_lock, flags); +} + +static void txgbe_gpio_irq_unmask(struct irq_data *d) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + struct wx *wx = gpiochip_get_data(gc); + unsigned long flags; + + gpiochip_enable_irq(gc, hwirq); + + raw_spin_lock_irqsave(&wx->gpio_lock, flags); + wr32m(wx, WX_GPIO_INTMASK, BIT(hwirq), 0); + raw_spin_unlock_irqrestore(&wx->gpio_lock, flags); +} + +static void txgbe_toggle_trigger(struct gpio_chip *gc, unsigned int offset) +{ + struct wx *wx = gpiochip_get_data(gc); + u32 pol, val; + + pol = rd32(wx, WX_GPIO_POLARITY); + val = rd32(wx, WX_GPIO_EXT); + + if (val & BIT(offset)) + pol &= ~BIT(offset); + else + pol |= BIT(offset); + + wr32(wx, WX_GPIO_POLARITY, pol); +} + +static int txgbe_gpio_set_type(struct irq_data *d, unsigned int type) +{ + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + irq_hw_number_t hwirq = irqd_to_hwirq(d); + struct wx *wx = gpiochip_get_data(gc); + u32 level, polarity, mask; + unsigned long flags; + + mask = BIT(hwirq); + + if (type & IRQ_TYPE_LEVEL_MASK) { + level = 0; + irq_set_handler_locked(d, handle_level_irq); + } else { + level = mask; + irq_set_handler_locked(d, handle_edge_irq); + } + + if (type == IRQ_TYPE_EDGE_RISING || type == IRQ_TYPE_LEVEL_HIGH) + polarity = mask; + else + polarity = 0; + + raw_spin_lock_irqsave(&wx->gpio_lock, flags); + + wr32m(wx, WX_GPIO_INTEN, mask, mask); + wr32m(wx, WX_GPIO_INTTYPE_LEVEL, mask, level); + if (type == IRQ_TYPE_EDGE_BOTH) + txgbe_toggle_trigger(gc, hwirq); + else + wr32m(wx, WX_GPIO_POLARITY, mask, polarity); + + raw_spin_unlock_irqrestore(&wx->gpio_lock, flags); + + return 0; +} + +static const struct irq_chip txgbe_gpio_irq_chip = { + .name = "txgbe_gpio_irq", + .irq_ack = txgbe_gpio_irq_ack, + .irq_mask = txgbe_gpio_irq_mask, + .irq_unmask = txgbe_gpio_irq_unmask, + .irq_set_type = txgbe_gpio_set_type, + .flags = IRQCHIP_IMMUTABLE, + GPIOCHIP_IRQ_RESOURCE_HELPERS, +}; + +static void txgbe_irq_handler(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct wx *wx = irq_desc_get_handler_data(desc); + struct txgbe *txgbe = wx->priv; + irq_hw_number_t hwirq; + unsigned long gpioirq; + struct gpio_chip *gc; + unsigned long flags; + + chained_irq_enter(chip, desc); + + gpioirq = rd32(wx, WX_GPIO_INTSTATUS); + + gc = txgbe->gpio; + for_each_set_bit(hwirq, &gpioirq, gc->ngpio) { + int gpio = irq_find_mapping(gc->irq.domain, hwirq); + u32 irq_type = irq_get_trigger_type(gpio); + + generic_handle_domain_irq(gc->irq.domain, hwirq); + + if ((irq_type & IRQ_TYPE_SENSE_MASK) == IRQ_TYPE_EDGE_BOTH) { + raw_spin_lock_irqsave(&wx->gpio_lock, flags); + txgbe_toggle_trigger(gc, hwirq); + raw_spin_unlock_irqrestore(&wx->gpio_lock, flags); + } + } + + chained_irq_exit(chip, desc); + + /* unmask interrupt */ + wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); +} + +static int txgbe_gpio_init(struct txgbe *txgbe) +{ + struct gpio_irq_chip *girq; + struct gpio_chip *gc; + struct device *dev; + struct wx *wx; + int ret; + + wx = txgbe->wx; + dev = &wx->pdev->dev; + + raw_spin_lock_init(&wx->gpio_lock); + + gc = devm_kzalloc(dev, sizeof(*gc), GFP_KERNEL); + if (!gc) + return -ENOMEM; + + gc->label = devm_kasprintf(dev, GFP_KERNEL, "txgbe_gpio-%x", + (wx->pdev->bus->number << 8) | wx->pdev->devfn); + if (!gc->label) + return -ENOMEM; + + gc->base = -1; + gc->ngpio = 6; + gc->owner = THIS_MODULE; + gc->parent = dev; + gc->fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_GPIO]); + gc->get = txgbe_gpio_get; + gc->get_direction = txgbe_gpio_get_direction; + gc->direction_input = txgbe_gpio_direction_in; + gc->direction_output = txgbe_gpio_direction_out; + + girq = &gc->irq; + gpio_irq_chip_set_chip(girq, &txgbe_gpio_irq_chip); + girq->parent_handler = txgbe_irq_handler; + girq->parent_handler_data = wx; + girq->num_parents = 1; + girq->parents = devm_kcalloc(dev, girq->num_parents, + sizeof(*girq->parents), GFP_KERNEL); + if (!girq->parents) + return -ENOMEM; + girq->parents[0] = wx->msix_entries[wx->num_q_vectors].vector; + girq->default_type = IRQ_TYPE_NONE; + girq->handler = handle_bad_irq; + + ret = devm_gpiochip_add_data(dev, gc, wx); + if (ret) + return ret; + + txgbe->gpio = gc; + + return 0; +} + static int txgbe_clock_register(struct txgbe *txgbe) { struct pci_dev *pdev = txgbe->wx->pdev; @@ -187,6 +432,12 @@ int txgbe_init_phy(struct txgbe *txgbe) return ret; } + ret = txgbe_gpio_init(txgbe); + if (ret) { + wx_err(txgbe->wx, "failed to init gpio\n"); + goto err_unregister_swnode; + } + ret = txgbe_clock_register(txgbe); if (ret) { wx_err(txgbe->wx, "failed to register clock: %d\n", ret); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index f420e2569247..60ecc5a90203 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -55,6 +55,28 @@ #define TXGBE_TS_CTL 0x10300 #define TXGBE_TS_CTL_EVAL_MD BIT(31) +/* GPIO register bit */ +#define TXGBE_GPIOBIT_0 BIT(0) /* I:tx fault */ +#define TXGBE_GPIOBIT_1 BIT(1) /* O:tx disabled */ +#define TXGBE_GPIOBIT_2 BIT(2) /* I:sfp module absent */ +#define TXGBE_GPIOBIT_3 BIT(3) /* I:rx signal lost */ +#define TXGBE_GPIOBIT_4 BIT(4) /* O:rate select, 1G(0) 10G(1) */ +#define TXGBE_GPIOBIT_5 BIT(5) /* O:rate select, 1G(0) 10G(1) */ + +/* Extended Interrupt Enable Set */ +#define TXGBE_PX_MISC_ETH_LKDN BIT(8) +#define TXGBE_PX_MISC_DEV_RST BIT(10) +#define TXGBE_PX_MISC_ETH_EVENT BIT(17) +#define TXGBE_PX_MISC_ETH_LK BIT(18) +#define TXGBE_PX_MISC_ETH_AN BIT(19) +#define TXGBE_PX_MISC_INT_ERR BIT(20) +#define TXGBE_PX_MISC_GPIO BIT(26) +#define TXGBE_PX_MISC_IEN_MASK \ + (TXGBE_PX_MISC_ETH_LKDN | TXGBE_PX_MISC_DEV_RST | \ + TXGBE_PX_MISC_ETH_EVENT | TXGBE_PX_MISC_ETH_LK | \ + TXGBE_PX_MISC_ETH_AN | TXGBE_PX_MISC_INT_ERR | \ + TXGBE_PX_MISC_GPIO) + /* I2C registers */ #define TXGBE_I2C_BASE 0x14900 @@ -154,6 +176,7 @@ struct txgbe { struct platform_device *i2c_dev; struct clk_lookup *clock; struct clk *clk; + struct gpio_chip *gpio; }; #endif /* _TXGBE_TYPE_H_ */ -- cgit v1.2.3 From 854cace61387b6f60734d9ec254443a6894c480d Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 6 Jun 2023 17:21:06 +0800 Subject: net: txgbe: Implement phylink pcs Register MDIO bus for PCS layer to use Synopsys designware XPCS, support 10GBASE-R interface to the controller. Signed-off-by: Jiawen Wu Reviewed-by: Andrew Lunn Reviewed-by: Maciej Fijalkowski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/wangxun/Kconfig | 1 + drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c | 89 ++++++++++++++++++++++++- drivers/net/ethernet/wangxun/txgbe/txgbe_type.h | 5 ++ 3 files changed, 93 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/Kconfig b/drivers/net/ethernet/wangxun/Kconfig index 3744735fa708..39596cd13539 100644 --- a/drivers/net/ethernet/wangxun/Kconfig +++ b/drivers/net/ethernet/wangxun/Kconfig @@ -49,6 +49,7 @@ config TXGBE select SFP select GPIOLIB select GPIOLIB_IRQCHIP + select PCS_XPCS select LIBWX help This driver supports Wangxun(R) 10GbE PCI Express family of diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index 97c018a10c39..58e12c35627a 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "../libwx/wx_type.h" #include "../libwx/wx_hw.h" @@ -77,6 +78,81 @@ static int txgbe_swnodes_register(struct txgbe *txgbe) return software_node_register_node_group(nodes->group); } +static int txgbe_pcs_read(struct mii_bus *bus, int addr, int devnum, int regnum) +{ + struct wx *wx = bus->priv; + u32 offset, val; + + if (addr) + return -EOPNOTSUPP; + + offset = devnum << 16 | regnum; + + /* Set the LAN port indicator to IDA_ADDR */ + wr32(wx, TXGBE_XPCS_IDA_ADDR, offset); + + /* Read the data from IDA_DATA register */ + val = rd32(wx, TXGBE_XPCS_IDA_DATA); + + return (u16)val; +} + +static int txgbe_pcs_write(struct mii_bus *bus, int addr, int devnum, int regnum, u16 val) +{ + struct wx *wx = bus->priv; + u32 offset; + + if (addr) + return -EOPNOTSUPP; + + offset = devnum << 16 | regnum; + + /* Set the LAN port indicator to IDA_ADDR */ + wr32(wx, TXGBE_XPCS_IDA_ADDR, offset); + + /* Write the data to IDA_DATA register */ + wr32(wx, TXGBE_XPCS_IDA_DATA, val); + + return 0; +} + +static int txgbe_mdio_pcs_init(struct txgbe *txgbe) +{ + struct mii_bus *mii_bus; + struct dw_xpcs *xpcs; + struct pci_dev *pdev; + struct wx *wx; + int ret = 0; + + wx = txgbe->wx; + pdev = wx->pdev; + + mii_bus = devm_mdiobus_alloc(&pdev->dev); + if (!mii_bus) + return -ENOMEM; + + mii_bus->name = "txgbe_pcs_mdio_bus"; + mii_bus->read_c45 = &txgbe_pcs_read; + mii_bus->write_c45 = &txgbe_pcs_write; + mii_bus->parent = &pdev->dev; + mii_bus->phy_mask = ~0; + mii_bus->priv = wx; + snprintf(mii_bus->id, MII_BUS_ID_SIZE, "txgbe_pcs-%x", + (pdev->bus->number << 8) | pdev->devfn); + + ret = devm_mdiobus_register(&pdev->dev, mii_bus); + if (ret) + return ret; + + xpcs = xpcs_create_mdiodev(mii_bus, 0, PHY_INTERFACE_MODE_10GBASER); + if (IS_ERR(xpcs)) + return PTR_ERR(xpcs); + + txgbe->xpcs = xpcs; + + return 0; +} + static int txgbe_gpio_get(struct gpio_chip *chip, unsigned int offset) { struct wx *wx = gpiochip_get_data(chip); @@ -432,16 +508,22 @@ int txgbe_init_phy(struct txgbe *txgbe) return ret; } + ret = txgbe_mdio_pcs_init(txgbe); + if (ret) { + wx_err(txgbe->wx, "failed to init mdio pcs: %d\n", ret); + goto err_unregister_swnode; + } + ret = txgbe_gpio_init(txgbe); if (ret) { wx_err(txgbe->wx, "failed to init gpio\n"); - goto err_unregister_swnode; + goto err_destroy_xpcs; } ret = txgbe_clock_register(txgbe); if (ret) { wx_err(txgbe->wx, "failed to register clock: %d\n", ret); - goto err_unregister_swnode; + goto err_destroy_xpcs; } ret = txgbe_i2c_register(txgbe); @@ -463,6 +545,8 @@ err_unregister_i2c: err_unregister_clk: clkdev_drop(txgbe->clock); clk_unregister(txgbe->clk); +err_destroy_xpcs: + xpcs_destroy(txgbe->xpcs); err_unregister_swnode: software_node_unregister_node_group(txgbe->nodes.group); @@ -475,5 +559,6 @@ void txgbe_remove_phy(struct txgbe *txgbe) platform_device_unregister(txgbe->i2c_dev); clkdev_drop(txgbe->clock); clk_unregister(txgbe->clk); + xpcs_destroy(txgbe->xpcs); software_node_unregister_node_group(txgbe->nodes.group); } diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 60ecc5a90203..76470582ba1e 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -80,6 +80,10 @@ /* I2C registers */ #define TXGBE_I2C_BASE 0x14900 +/************************************** ETH PHY ******************************/ +#define TXGBE_XPCS_IDA_ADDR 0x13000 +#define TXGBE_XPCS_IDA_DATA 0x13004 + /* Part Number String Length */ #define TXGBE_PBANUM_LENGTH 32 @@ -172,6 +176,7 @@ struct txgbe_nodes { struct txgbe { struct wx *wx; struct txgbe_nodes nodes; + struct dw_xpcs *xpcs; struct platform_device *sfp_dev; struct platform_device *i2c_dev; struct clk_lookup *clock; -- cgit v1.2.3 From 08f08f9390e4d7770e400b98eb4a25909593508e Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 6 Jun 2023 17:21:07 +0800 Subject: net: txgbe: Support phylink MAC layer Add phylink support to Wangxun 10Gb Ethernet controller for the 10GBASE-R interface. Signed-off-by: Jiawen Wu Reviewed-by: Maciej Fijalkowski Signed-off-by: Paolo Abeni --- drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c | 28 +++++ drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 23 ++--- drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c | 113 ++++++++++++++++++++- drivers/net/ethernet/wangxun/txgbe/txgbe_type.h | 5 + 4 files changed, 154 insertions(+), 15 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c index d914e9a05404..859da112586a 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c @@ -6,11 +6,39 @@ #include #include "../libwx/wx_ethtool.h" +#include "../libwx/wx_type.h" +#include "txgbe_type.h" #include "txgbe_ethtool.h" +static int txgbe_nway_reset(struct net_device *netdev) +{ + struct txgbe *txgbe = netdev_to_txgbe(netdev); + + return phylink_ethtool_nway_reset(txgbe->phylink); +} + +static int txgbe_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct txgbe *txgbe = netdev_to_txgbe(netdev); + + return phylink_ethtool_ksettings_get(txgbe->phylink, cmd); +} + +static int txgbe_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *cmd) +{ + struct txgbe *txgbe = netdev_to_txgbe(netdev); + + return phylink_ethtool_ksettings_set(txgbe->phylink, cmd); +} + static const struct ethtool_ops txgbe_ethtool_ops = { .get_drvinfo = wx_get_drvinfo, + .nway_reset = txgbe_nway_reset, .get_link = ethtool_op_get_link, + .get_link_ksettings = txgbe_get_link_ksettings, + .set_link_ksettings = txgbe_set_link_ksettings, }; void txgbe_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index edfab3859dc3..46eba6d6188b 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -204,7 +205,8 @@ static int txgbe_request_irq(struct wx *wx) static void txgbe_up_complete(struct wx *wx) { - u32 reg; + struct net_device *netdev = wx->netdev; + struct txgbe *txgbe; wx_control_hw(wx, true); wx_configure_vectors(wx); @@ -213,24 +215,17 @@ static void txgbe_up_complete(struct wx *wx) smp_mb__before_atomic(); wx_napi_enable_all(wx); + txgbe = netdev_to_txgbe(netdev); + phylink_start(txgbe->phylink); + /* clear any pending interrupts, may auto mask */ rd32(wx, WX_PX_IC(0)); rd32(wx, WX_PX_IC(1)); rd32(wx, WX_PX_MISC_IC); txgbe_irq_enable(wx, true); - /* Configure MAC Rx and Tx when link is up */ - reg = rd32(wx, WX_MAC_RX_CFG); - wr32(wx, WX_MAC_RX_CFG, reg); - wr32(wx, WX_MAC_PKT_FLT, WX_MAC_PKT_FLT_PR); - reg = rd32(wx, WX_MAC_WDG_TIMEOUT); - wr32(wx, WX_MAC_WDG_TIMEOUT, reg); - reg = rd32(wx, WX_MAC_TX_CFG); - wr32(wx, WX_MAC_TX_CFG, (reg & ~WX_MAC_TX_CFG_SPEED_MASK) | WX_MAC_TX_CFG_SPEED_10G); - /* enable transmits */ - netif_tx_start_all_queues(wx->netdev); - netif_carrier_on(wx->netdev); + netif_tx_start_all_queues(netdev); } static void txgbe_reset(struct wx *wx) @@ -265,7 +260,6 @@ static void txgbe_disable_device(struct wx *wx) wx_disable_rx_queue(wx, wx->rx_ring[i]); netif_tx_stop_all_queues(netdev); - netif_carrier_off(netdev); netif_tx_disable(netdev); wx_irq_disable(wx); @@ -296,8 +290,11 @@ static void txgbe_disable_device(struct wx *wx) static void txgbe_down(struct wx *wx) { + struct txgbe *txgbe = netdev_to_txgbe(wx->netdev); + txgbe_disable_device(wx); txgbe_reset(wx); + phylink_stop(txgbe->phylink); wx_clean_all_tx_rings(wx); wx_clean_all_rx_rings(wx); diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c index 58e12c35627a..8779645a54be 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c @@ -11,8 +11,10 @@ #include #include #include +#include #include "../libwx/wx_type.h" +#include "../libwx/wx_lib.h" #include "../libwx/wx_hw.h" #include "txgbe_type.h" #include "txgbe_phy.h" @@ -153,6 +155,95 @@ static int txgbe_mdio_pcs_init(struct txgbe *txgbe) return 0; } +static struct phylink_pcs *txgbe_phylink_mac_select(struct phylink_config *config, + phy_interface_t interface) +{ + struct txgbe *txgbe = netdev_to_txgbe(to_net_dev(config->dev)); + + return &txgbe->xpcs->pcs; +} + +static void txgbe_mac_config(struct phylink_config *config, unsigned int mode, + const struct phylink_link_state *state) +{ +} + +static void txgbe_mac_link_down(struct phylink_config *config, + unsigned int mode, phy_interface_t interface) +{ + struct wx *wx = netdev_priv(to_net_dev(config->dev)); + + wr32m(wx, WX_MAC_TX_CFG, WX_MAC_TX_CFG_TE, 0); +} + +static void txgbe_mac_link_up(struct phylink_config *config, + struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, + bool tx_pause, bool rx_pause) +{ + struct wx *wx = netdev_priv(to_net_dev(config->dev)); + u32 txcfg, wdg; + + txcfg = rd32(wx, WX_MAC_TX_CFG); + txcfg &= ~WX_MAC_TX_CFG_SPEED_MASK; + + switch (speed) { + case SPEED_10000: + txcfg |= WX_MAC_TX_CFG_SPEED_10G; + break; + case SPEED_1000: + case SPEED_100: + case SPEED_10: + txcfg |= WX_MAC_TX_CFG_SPEED_1G; + break; + default: + break; + } + + wr32(wx, WX_MAC_TX_CFG, txcfg | WX_MAC_TX_CFG_TE); + + /* Re configure MAC Rx */ + wr32m(wx, WX_MAC_RX_CFG, WX_MAC_RX_CFG_RE, WX_MAC_RX_CFG_RE); + wr32(wx, WX_MAC_PKT_FLT, WX_MAC_PKT_FLT_PR); + wdg = rd32(wx, WX_MAC_WDG_TIMEOUT); + wr32(wx, WX_MAC_WDG_TIMEOUT, wdg); +} + +static const struct phylink_mac_ops txgbe_mac_ops = { + .mac_select_pcs = txgbe_phylink_mac_select, + .mac_config = txgbe_mac_config, + .mac_link_down = txgbe_mac_link_down, + .mac_link_up = txgbe_mac_link_up, +}; + +static int txgbe_phylink_init(struct txgbe *txgbe) +{ + struct phylink_config *config; + struct fwnode_handle *fwnode; + struct wx *wx = txgbe->wx; + phy_interface_t phy_mode; + struct phylink *phylink; + + config = devm_kzalloc(&wx->pdev->dev, sizeof(*config), GFP_KERNEL); + if (!config) + return -ENOMEM; + + config->dev = &wx->netdev->dev; + config->type = PHYLINK_NETDEV; + config->mac_capabilities = MAC_10000FD | MAC_1000FD | MAC_SYM_PAUSE | MAC_ASYM_PAUSE; + phy_mode = PHY_INTERFACE_MODE_10GBASER; + __set_bit(PHY_INTERFACE_MODE_10GBASER, config->supported_interfaces); + fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_PHYLINK]); + phylink = phylink_create(config, fwnode, phy_mode, &txgbe_mac_ops); + if (IS_ERR(phylink)) + return PTR_ERR(phylink); + + txgbe->phylink = phylink; + + return 0; +} + static int txgbe_gpio_get(struct gpio_chip *chip, unsigned int offset) { struct wx *wx = gpiochip_get_data(chip); @@ -316,6 +407,9 @@ static void txgbe_irq_handler(struct irq_desc *desc) unsigned long gpioirq; struct gpio_chip *gc; unsigned long flags; + u32 eicr; + + eicr = wx_misc_isb(wx, WX_ISB_MISC); chained_irq_enter(chip, desc); @@ -337,6 +431,12 @@ static void txgbe_irq_handler(struct irq_desc *desc) chained_irq_exit(chip, desc); + if (eicr & (TXGBE_PX_MISC_ETH_LK | TXGBE_PX_MISC_ETH_LKDN)) { + u32 reg = rd32(wx, TXGBE_CFG_PORT_ST); + + phylink_mac_change(txgbe->phylink, !!(reg & TXGBE_CFG_PORT_ST_LINK_UP)); + } + /* unmask interrupt */ wx_intr_enable(wx, TXGBE_INTR_MISC(wx)); } @@ -514,16 +614,22 @@ int txgbe_init_phy(struct txgbe *txgbe) goto err_unregister_swnode; } + ret = txgbe_phylink_init(txgbe); + if (ret) { + wx_err(txgbe->wx, "failed to init phylink\n"); + goto err_destroy_xpcs; + } + ret = txgbe_gpio_init(txgbe); if (ret) { wx_err(txgbe->wx, "failed to init gpio\n"); - goto err_destroy_xpcs; + goto err_destroy_phylink; } ret = txgbe_clock_register(txgbe); if (ret) { wx_err(txgbe->wx, "failed to register clock: %d\n", ret); - goto err_destroy_xpcs; + goto err_destroy_phylink; } ret = txgbe_i2c_register(txgbe); @@ -545,6 +651,8 @@ err_unregister_i2c: err_unregister_clk: clkdev_drop(txgbe->clock); clk_unregister(txgbe->clk); +err_destroy_phylink: + phylink_destroy(txgbe->phylink); err_destroy_xpcs: xpcs_destroy(txgbe->xpcs); err_unregister_swnode: @@ -559,6 +667,7 @@ void txgbe_remove_phy(struct txgbe *txgbe) platform_device_unregister(txgbe->i2c_dev); clkdev_drop(txgbe->clock); clk_unregister(txgbe->clk); + phylink_destroy(txgbe->phylink); xpcs_destroy(txgbe->xpcs); software_node_unregister_node_group(txgbe->nodes.group); } diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h index 76470582ba1e..51199c355f95 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h @@ -77,6 +77,10 @@ TXGBE_PX_MISC_ETH_AN | TXGBE_PX_MISC_INT_ERR | \ TXGBE_PX_MISC_GPIO) +/* Port cfg registers */ +#define TXGBE_CFG_PORT_ST 0x14404 +#define TXGBE_CFG_PORT_ST_LINK_UP BIT(0) + /* I2C registers */ #define TXGBE_I2C_BASE 0x14900 @@ -177,6 +181,7 @@ struct txgbe { struct wx *wx; struct txgbe_nodes nodes; struct dw_xpcs *xpcs; + struct phylink *phylink; struct platform_device *sfp_dev; struct platform_device *i2c_dev; struct clk_lookup *clock; -- cgit v1.2.3 From 05a1308a2e08e4a375bf60eb4c6c057a201d81fc Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 25 May 2023 12:52:58 +0200 Subject: ice: Don't dereference NULL in ice_gnss_read error path If pf is NULL in ice_gnss_read() then it will be dereferenced in the error path by a call to dev_dbg(ice_pf_to_dev(pf), ...). Avoid this by simply returning in this case. If logging is desired an alternate approach might be to use pr_err() before returning. Flagged by Smatch as: .../ice_gnss.c:196 ice_gnss_read() error: we previously assumed 'pf' could be null (see line 131) Fixes: 43113ff73453 ("ice: add TTY for GNSS module for E810T device") Signed-off-by: Simon Horman Reviewed-by: Tariq Toukan Tested-by: Sunitha Mekala (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_gnss.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index bd0ed155e11b..75c9de675f20 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -96,12 +96,7 @@ static void ice_gnss_read(struct kthread_work *work) int err = 0; pf = gnss->back; - if (!pf) { - err = -EFAULT; - goto exit; - } - - if (!test_bit(ICE_FLAG_GNSS, pf->flags)) + if (!pf || !test_bit(ICE_FLAG_GNSS, pf->flags)) return; hw = &pf->hw; @@ -159,7 +154,6 @@ free_buf: free_page((unsigned long)buf); requeue: kthread_queue_delayed_work(gnss->kworker, &gnss->read_work, delay); -exit: if (err) dev_dbg(ice_pf_to_dev(pf), "GNSS failed to read err=%d\n", err); } -- cgit v1.2.3 From 6e8b2c88fc8cf95ed09de25946b20b7536c88cd5 Mon Sep 17 00:00:00 2001 From: Karol Kolacinski Date: Thu, 1 Jun 2023 14:15:03 -0700 Subject: ice: handle extts in the miscellaneous interrupt thread The ice_ptp_extts_work() and ice_ptp_periodic_work() functions are both scheduled on the same kthread worker, pf.ptp.kworker. The ice_ptp_periodic_work() function sends to the firmware to interact with the PHY, and must block to wait for responses. This can cause delay in responding to the PFINT_OICR_TSYN_EVNT interrupt cause, ultimately resulting in disruption to processing an input signal of the frequency is high enough. In our testing, even 100 Hz signals get disrupted. Fix this by instead processing the signal inside the miscellaneous interrupt thread prior to handling Tx timestamps. Use atomic bits in a new pf->misc_thread bitmap in order to safely communicate which tasks require processing within the ice_misc_intr_thread_fn(). This ensures the communication of desired tasks from the ice_misc_intr() are correctly processed without racing even in the event that the interrupt triggers again before the thread function exits. Fixes: 172db5f91d5f ("ice: add support for auxiliary input/output pins") Signed-off-by: Karol Kolacinski Signed-off-by: Jacob Keller Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 7 +++++++ drivers/net/ethernet/intel/ice/ice_main.c | 29 +++++++++++++++++++++-------- drivers/net/ethernet/intel/ice/ice_ptp.c | 12 +++--------- drivers/net/ethernet/intel/ice/ice_ptp.h | 4 ++-- 4 files changed, 33 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index b4bca1d964a9..4ba3d99439a0 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -508,6 +508,12 @@ enum ice_pf_flags { ICE_PF_FLAGS_NBITS /* must be last */ }; +enum ice_misc_thread_tasks { + ICE_MISC_THREAD_EXTTS_EVENT, + ICE_MISC_THREAD_TX_TSTAMP, + ICE_MISC_THREAD_NBITS /* must be last */ +}; + struct ice_switchdev_info { struct ice_vsi *control_vsi; struct ice_vsi *uplink_vsi; @@ -550,6 +556,7 @@ struct ice_pf { DECLARE_BITMAP(features, ICE_F_MAX); DECLARE_BITMAP(state, ICE_STATE_NBITS); DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS); + DECLARE_BITMAP(misc_thread, ICE_MISC_THREAD_NBITS); unsigned long *avail_txqs; /* bitmap to track PF Tx queue usage */ unsigned long *avail_rxqs; /* bitmap to track PF Rx queue usage */ unsigned long serv_tmr_period; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 62e91512aeab..314a42808e39 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3139,20 +3139,28 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & PFINT_OICR_TSYN_TX_M) { ena_mask &= ~PFINT_OICR_TSYN_TX_M; - if (!hw->reset_ongoing) + if (!hw->reset_ongoing) { + set_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread); ret = IRQ_WAKE_THREAD; + } } if (oicr & PFINT_OICR_TSYN_EVNT_M) { u8 tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned; u32 gltsyn_stat = rd32(hw, GLTSYN_STAT(tmr_idx)); - /* Save EVENTs from GTSYN register */ - pf->ptp.ext_ts_irq |= gltsyn_stat & (GLTSYN_STAT_EVENT0_M | - GLTSYN_STAT_EVENT1_M | - GLTSYN_STAT_EVENT2_M); ena_mask &= ~PFINT_OICR_TSYN_EVNT_M; - kthread_queue_work(pf->ptp.kworker, &pf->ptp.extts_work); + + if (hw->func_caps.ts_func_info.src_tmr_owned) { + /* Save EVENTs from GLTSYN register */ + pf->ptp.ext_ts_irq |= gltsyn_stat & + (GLTSYN_STAT_EVENT0_M | + GLTSYN_STAT_EVENT1_M | + GLTSYN_STAT_EVENT2_M); + + set_bit(ICE_MISC_THREAD_EXTTS_EVENT, pf->misc_thread); + ret = IRQ_WAKE_THREAD; + } } #define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M) @@ -3196,8 +3204,13 @@ static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data) if (ice_is_reset_in_progress(pf->state)) return IRQ_HANDLED; - while (!ice_ptp_process_ts(pf)) - usleep_range(50, 100); + if (test_and_clear_bit(ICE_MISC_THREAD_EXTTS_EVENT, pf->misc_thread)) + ice_ptp_extts_event(pf); + + if (test_and_clear_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread)) { + while (!ice_ptp_process_ts(pf)) + usleep_range(50, 100); + } return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index d4b6c997141d..6f51ebaf1d70 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1458,15 +1458,11 @@ static int ice_ptp_adjfine(struct ptp_clock_info *info, long scaled_ppm) } /** - * ice_ptp_extts_work - Workqueue task function - * @work: external timestamp work structure - * - * Service for PTP external clock event + * ice_ptp_extts_event - Process PTP external clock event + * @pf: Board private structure */ -static void ice_ptp_extts_work(struct kthread_work *work) +void ice_ptp_extts_event(struct ice_pf *pf) { - struct ice_ptp *ptp = container_of(work, struct ice_ptp, extts_work); - struct ice_pf *pf = container_of(ptp, struct ice_pf, ptp); struct ptp_clock_event event; struct ice_hw *hw = &pf->hw; u8 chan, tmr_idx; @@ -2558,7 +2554,6 @@ void ice_ptp_prepare_for_reset(struct ice_pf *pf) ice_ptp_cfg_timestamp(pf, false); kthread_cancel_delayed_work_sync(&ptp->work); - kthread_cancel_work_sync(&ptp->extts_work); if (test_bit(ICE_PFR_REQ, pf->state)) return; @@ -2656,7 +2651,6 @@ static int ice_ptp_init_work(struct ice_pf *pf, struct ice_ptp *ptp) /* Initialize work functions */ kthread_init_delayed_work(&ptp->work, ice_ptp_periodic_work); - kthread_init_work(&ptp->extts_work, ice_ptp_extts_work); /* Allocate a kworker for handling work required for the ports * connected to the PTP hardware clock. diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 9cda2f43e0e5..9f8902c1e743 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -169,7 +169,6 @@ struct ice_ptp_port { * struct ice_ptp - data used for integrating with CONFIG_PTP_1588_CLOCK * @port: data for the PHY port initialization procedure * @work: delayed work function for periodic tasks - * @extts_work: work function for handling external Tx timestamps * @cached_phc_time: a cached copy of the PHC time for timestamp extension * @cached_phc_jiffies: jiffies when cached_phc_time was last updated * @ext_ts_chan: the external timestamp channel in use @@ -190,7 +189,6 @@ struct ice_ptp_port { struct ice_ptp { struct ice_ptp_port port; struct kthread_delayed_work work; - struct kthread_work extts_work; u64 cached_phc_time; unsigned long cached_phc_jiffies; u8 ext_ts_chan; @@ -256,6 +254,7 @@ int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr); void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena); int ice_get_ptp_clock_index(struct ice_pf *pf); +void ice_ptp_extts_event(struct ice_pf *pf); s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb); bool ice_ptp_process_ts(struct ice_pf *pf); @@ -284,6 +283,7 @@ static inline int ice_get_ptp_clock_index(struct ice_pf *pf) return -1; } +static inline void ice_ptp_extts_event(struct ice_pf *pf) { } static inline s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) { -- cgit v1.2.3 From d578e618f192f453baf4fd7e32fec88ed7e678b8 Mon Sep 17 00:00:00 2001 From: Karol Kolacinski Date: Thu, 1 Jun 2023 14:15:04 -0700 Subject: ice: always return IRQ_WAKE_THREAD in ice_misc_intr() Refactor the ice_misc_intr() function to always return IRQ_WAKE_THREAD, and schedule the service task during the soft IRQ thread function instead of at the end of the hard IRQ handler. Remove the duplicate call to ice_service_task_schedule() that happened when we got a PCI exception. Signed-off-by: Karol Kolacinski Signed-off-by: Jacob Keller Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 314a42808e39..45dab7f62198 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3058,7 +3058,6 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) { struct ice_pf *pf = (struct ice_pf *)data; struct ice_hw *hw = &pf->hw; - irqreturn_t ret = IRQ_NONE; struct device *dev; u32 oicr, ena_mask; @@ -3139,10 +3138,8 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & PFINT_OICR_TSYN_TX_M) { ena_mask &= ~PFINT_OICR_TSYN_TX_M; - if (!hw->reset_ongoing) { + if (!hw->reset_ongoing) set_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread); - ret = IRQ_WAKE_THREAD; - } } if (oicr & PFINT_OICR_TSYN_EVNT_M) { @@ -3159,7 +3156,6 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) GLTSYN_STAT_EVENT2_M); set_bit(ICE_MISC_THREAD_EXTTS_EVENT, pf->misc_thread); - ret = IRQ_WAKE_THREAD; } } @@ -3180,16 +3176,12 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) if (oicr & (PFINT_OICR_PCI_EXCEPTION_M | PFINT_OICR_ECC_ERR_M)) { set_bit(ICE_PFR_REQ, pf->state); - ice_service_task_schedule(pf); } } - if (!ret) - ret = IRQ_HANDLED; - ice_service_task_schedule(pf); ice_irq_dynamic_ena(hw, NULL, NULL); - return ret; + return IRQ_WAKE_THREAD; } /** @@ -3204,6 +3196,8 @@ static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data) if (ice_is_reset_in_progress(pf->state)) return IRQ_HANDLED; + ice_service_task_schedule(pf); + if (test_and_clear_bit(ICE_MISC_THREAD_EXTTS_EVENT, pf->misc_thread)) ice_ptp_extts_event(pf); -- cgit v1.2.3 From ae39eb42dd06e058215d0f782365b84039d686d4 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 1 Jun 2023 14:15:05 -0700 Subject: ice: introduce ICE_TX_TSTAMP_WORK enumeration The ice_ptp_process_ts() function and its various helper functions return a boolean value indicating whether any work is remaining. This use of a boolean has grown confusing as we have multiple helpers that pass status between each other. Readers must be aware of what "true" and "false" mean, and it is very easy to get their meaning inverted. The names of the functions are not standard "yes/no" questions, which is the best practice for boolean returns. Replace this use of an enumeration with a custom type, enum ice_tx_tstamp_work. This enumeration clearly indicates whether all work is done, or if more work is pending. To aid in readability, factor the actual list iteration and processing out into ice_ptp_process_tx_tstamp(), making it void. Then call this in ice_ptp_tx_tstamp() ensuring that we always check the Tracker list at the end when determining the appropriate return value. Now the return value is an explicit name instead of the true or false value. This is easier to follow and makes reading the resulting callers much simpler. In addition, this paves the way for future work to allow E822 hardware to process timestamps for all functions using a single interrupt on the clock owning PF. Signed-off-by: Jacob Keller Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- drivers/net/ethernet/intel/ice/ice_ptp.c | 50 ++++++++++++++++++------------- drivers/net/ethernet/intel/ice/ice_ptp.h | 12 +++++++- 3 files changed, 42 insertions(+), 22 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 45dab7f62198..6811e2a3c154 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3202,7 +3202,7 @@ static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data) ice_ptp_extts_event(pf); if (test_and_clear_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread)) { - while (!ice_ptp_process_ts(pf)) + while (ice_ptp_process_ts(pf) == ICE_TX_TSTAMP_WORK_PENDING) usleep_range(50, 100); } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 6f51ebaf1d70..81d96a40d5a7 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -617,7 +617,7 @@ ice_ptp_is_tx_tracker_up(struct ice_ptp_tx *tx) } /** - * ice_ptp_tx_tstamp - Process Tx timestamps for a port + * ice_ptp_process_tx_tstamp - Process Tx timestamps for a port * @tx: the PTP Tx timestamp tracker * * Process timestamps captured by the PHY associated with this port. To do @@ -633,15 +633,6 @@ ice_ptp_is_tx_tracker_up(struct ice_ptp_tx *tx) * 6) extend the 40 bit timestamp value to get a 64 bit timestamp value * 7) send this 64 bit timestamp to the stack * - * Returns true if all timestamps were handled, and false if any slots remain - * without a timestamp. - * - * After looping, if we still have waiting SKBs, return false. This may cause - * us effectively poll even when not strictly necessary. We do this because - * it's possible a new timestamp was requested around the same time as the - * interrupt. In some cases hardware might not interrupt us again when the - * timestamp is captured. - * * Note that we do not hold the tracking lock while reading the Tx timestamp. * This is because reading the timestamp requires taking a mutex that might * sleep. @@ -673,10 +664,9 @@ ice_ptp_is_tx_tracker_up(struct ice_ptp_tx *tx) * the packet will never be sent by hardware and discard it without reading * the timestamp register. */ -static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) +static void ice_ptp_process_tx_tstamp(struct ice_ptp_tx *tx) { struct ice_ptp_port *ptp_port; - bool more_timestamps; struct ice_pf *pf; struct ice_hw *hw; u64 tstamp_ready; @@ -685,7 +675,7 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) u8 idx; if (!tx->init) - return true; + return; ptp_port = container_of(tx, struct ice_ptp_port, tx); pf = ptp_port_to_pf(ptp_port); @@ -694,7 +684,7 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) /* Read the Tx ready status first */ err = ice_get_phy_tx_tstamp_ready(hw, tx->block, &tstamp_ready); if (err) - return false; + return; /* Drop packets if the link went down */ link_up = ptp_port->link_up; @@ -782,15 +772,34 @@ skip_ts_read: skb_tstamp_tx(skb, &shhwtstamps); dev_kfree_skb_any(skb); } +} - /* Check if we still have work to do. If so, re-queue this task to - * poll for remaining timestamps. - */ +/** + * ice_ptp_tx_tstamp - Process Tx timestamps for this function. + * @tx: Tx tracking structure to initialize + * + * Returns: ICE_TX_TSTAMP_WORK_PENDING if there are any outstanding incomplete + * Tx timestamps, or ICE_TX_TSTAMP_WORK_DONE otherwise. + */ +static enum ice_tx_tstamp_work ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) +{ + bool more_timestamps; + + if (!tx->init) + return ICE_TX_TSTAMP_WORK_DONE; + + /* Process the Tx timestamp tracker */ + ice_ptp_process_tx_tstamp(tx); + + /* Check if there are outstanding Tx timestamps */ spin_lock(&tx->lock); more_timestamps = tx->init && !bitmap_empty(tx->in_use, tx->len); spin_unlock(&tx->lock); - return !more_timestamps; + if (more_timestamps) + return ICE_TX_TSTAMP_WORK_PENDING; + + return ICE_TX_TSTAMP_WORK_DONE; } /** @@ -2426,9 +2435,10 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) * ice_ptp_process_ts - Process the PTP Tx timestamps * @pf: Board private structure * - * Returns true if timestamps are processed. + * Returns: ICE_TX_TSTAMP_WORK_PENDING if there are any outstanding Tx + * timestamps that need processing, and ICE_TX_TSTAMP_WORK_DONE otherwise. */ -bool ice_ptp_process_ts(struct ice_pf *pf) +enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf) { return ice_ptp_tx_tstamp(&pf->ptp.port.tx); } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 9f8902c1e743..995a57019ba7 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -108,6 +108,16 @@ struct ice_tx_tstamp { u64 cached_tstamp; }; +/** + * enum ice_tx_tstamp_work - Status of Tx timestamp work function + * @ICE_TX_TSTAMP_WORK_DONE: Tx timestamp processing is complete + * @ICE_TX_TSTAMP_WORK_PENDING: More Tx timestamps are pending + */ +enum ice_tx_tstamp_work { + ICE_TX_TSTAMP_WORK_DONE = 0, + ICE_TX_TSTAMP_WORK_PENDING, +}; + /** * struct ice_ptp_tx - Tracking structure for all Tx timestamp requests on a port * @lock: lock to prevent concurrent access to fields of this struct @@ -256,7 +266,7 @@ int ice_get_ptp_clock_index(struct ice_pf *pf); void ice_ptp_extts_event(struct ice_pf *pf); s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb); -bool ice_ptp_process_ts(struct ice_pf *pf); +enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf); void ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring, -- cgit v1.2.3 From 78c50d6961fc05491ebbc71c35d87324b1a4f49a Mon Sep 17 00:00:00 2001 From: Kamil Maziarz Date: Tue, 6 Jun 2023 12:33:58 +0200 Subject: ice: Fix XDP memory leak when NIC is brought up and down Fix the buffer leak that occurs while switching the port up and down with traffic and XDP by checking for an active XDP program and freeing all empty TX buffers. Fixes: efc2214b6047 ("ice: Add support for XDP") Signed-off-by: Kamil Maziarz Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Acked-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index a1f7c8edc22f..03513d4871ab 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7056,6 +7056,10 @@ int ice_down(struct ice_vsi *vsi) ice_for_each_txq(vsi, i) ice_clean_tx_ring(vsi->tx_rings[i]); + if (ice_is_xdp_ena_vsi(vsi)) + ice_for_each_xdp_txq(vsi, i) + ice_clean_tx_ring(vsi->xdp_rings[i]); + ice_for_each_rxq(vsi, i) ice_clean_rx_ring(vsi->rx_rings[i]); -- cgit v1.2.3 From 21225873be1472b7c59ed3650396af0e40578112 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Wed, 7 Jun 2023 17:10:48 +0800 Subject: net: enetc: correct the indexes of highest and 2nd highest TCs For ENETC hardware, the TCs are numbered from 0 to N-1, where N is the number of TCs. Numerically higher TC has higher priority. It's obvious that the highest priority TC index should be N-1 and the 2nd highest priority TC index should be N-2. However, the previous logic uses netdev_get_prio_tc_map() to get the indexes of highest priority and 2nd highest priority TCs, it does not make sense and is incorrect to give a "tc" argument to netdev_get_prio_tc_map(). So the driver may get the wrong indexes of the two highest priotiry TCs which would lead to failed to set the CBS for the two highest priotiry TCs. e.g. $ tc qdisc add dev eno0 parent root handle 100: mqprio num_tc 6 \ map 0 0 1 1 2 3 4 5 queues 1@0 1@1 1@2 1@3 2@4 2@6 hw 1 $ tc qdisc replace dev eno0 parent 100:6 cbs idleslope 100000 \ sendslope -900000 hicredit 12 locredit -113 offload 1 $ Error: Specified device failed to setup cbs hardware offload. ^^^^^ In this example, the previous logic deems the indexes of the two highest priotiry TCs should be 3 and 2. Actually, the indexes are 5 and 4, because the number of TCs is 6. So it would be failed to configure the CBS for the two highest priority TCs. Fixes: c431047c4efe ("enetc: add support Credit Based Shaper(CBS) for hardware offload") Signed-off-by: Wei Fang Reviewed-by: Vladimir Oltean Reviewed-by: Maciej Fijalkowski Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc_qos.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 83c27bbbc6ed..126007ab70f6 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -181,8 +181,8 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data) int bw_sum = 0; u8 bw; - prio_top = netdev_get_prio_tc_map(ndev, tc_nums - 1); - prio_next = netdev_get_prio_tc_map(ndev, tc_nums - 2); + prio_top = tc_nums - 1; + prio_next = tc_nums - 2; /* Support highest prio and second prio tc in cbs mode */ if (tc != prio_top && tc != prio_next) -- cgit v1.2.3 From 9a8648cce8d8a4a7770b45239912e20edb9736ad Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 1 Jun 2023 14:15:06 -0700 Subject: ice: trigger PFINT_OICR_TSYN_TX interrupt instead of polling In ice_misc_intr_thread_fn(), if we do not complete all Tx timestamp work, the thread function will poll continuously forever. For E822 hardware, this wastes time as the return value from ice_ptp_process_ts() is accurate and always reports correctly that the PHY actually has new timestamp data. In addition, if we receive enough timestamps with the right pacing, we may never exit this polling. Should this occur, other tasks handled by the ice_misc_intr_thread_fn() will never be processed. Fix this by instead writing to PFINT_OICR, causing an emulated interrupt to be triggered immediately. This does take slightly more processing than just re-checking the timestamps. However, it allows all of the other interrupt causes a chance to be processed first in the hard IRQ function. Note that the OICR interrupt is configured to be throttled to no more than once every 124 microseconds. This gives an effective interrupt rate of ~8000 interrupts per second. This should thus not cause a significant increase in overall CPU usage when compared to sleeping. Signed-off-by: Jacob Keller Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 6811e2a3c154..2665f72b5461 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3202,8 +3202,15 @@ static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data) ice_ptp_extts_event(pf); if (test_and_clear_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread)) { - while (ice_ptp_process_ts(pf) == ICE_TX_TSTAMP_WORK_PENDING) - usleep_range(50, 100); + struct ice_hw *hw = &pf->hw; + + /* Process outstanding Tx timestamps. If there is more work, + * re-arm the interrupt to trigger again. + */ + if (ice_ptp_process_ts(pf) == ICE_TX_TSTAMP_WORK_PENDING) { + wr32(hw, PFINT_OICR, PFINT_OICR_TSYN_TX_M); + ice_flush(hw); + } } return IRQ_HANDLED; -- cgit v1.2.3 From 0ec38df36ea1cc4f21bf7cd61a89942b034883c5 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 1 Jun 2023 14:15:07 -0700 Subject: ice: do not re-enable miscellaneous interrupt until thread_fn completes The ice driver uses threaded IRQ for managing Tx timestamps via the devm_request_threaded_irq() interface. The ice_misc_intr() handler function is responsible for processing the hard interrupt context, and can wake the ice_misc_intr_thread_fn() by returning IRQ_WAKE_THREAD. The request_threaded_irq() function comment says: @handler is still called in hard interrupt context and has to check whether the interrupt originates from the device. If yes, it needs to disable the interrupt on the device and return IRQ_WAKE_THREAD which will wake up the handler thread and run the @thread_fn. We currently re-enable the Other Interrupt Cause Register (OCIR) at the end of ice_misc_intr(). In practice, this seems to be ok, but it can make communicating between the handler function and the thread function difficult. This is because the interrupt can trigger again while the thread function is still processing. Move the OICR update to the end of the thread function, leaving the other interrupt cause disabled in hardware until we complete one pass of the thread function. This prevents the miscellaneous interrupt from firing until after we finish the thread function. Signed-off-by: Jacob Keller Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 2665f72b5461..aa57d26a0ac7 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3179,8 +3179,6 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) } } - ice_irq_dynamic_ena(hw, NULL, NULL); - return IRQ_WAKE_THREAD; } @@ -3192,6 +3190,9 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data) { struct ice_pf *pf = data; + struct ice_hw *hw; + + hw = &pf->hw; if (ice_is_reset_in_progress(pf->state)) return IRQ_HANDLED; @@ -3202,8 +3203,6 @@ static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data) ice_ptp_extts_event(pf); if (test_and_clear_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread)) { - struct ice_hw *hw = &pf->hw; - /* Process outstanding Tx timestamps. If there is more work, * re-arm the interrupt to trigger again. */ @@ -3213,6 +3212,8 @@ static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data) } } + ice_irq_dynamic_ena(hw, NULL, NULL); + return IRQ_HANDLED; } -- cgit v1.2.3 From 37ff78e977f1a4676354a6c6ebbbf293e540abc1 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 7 Jun 2023 14:19:26 +0200 Subject: mlxsw: spectrum_nve_vxlan: Fix unsupported flag regression The recently added 'VXLAN_F_LOCALBYPASS' flag is set by default on VXLAN devices and denotes a behavior that is irrelevant for the hardware data path. Add it to the lists of IPv4 and IPv6 supported flags to avoid rejecting offload of VXLAN devices which have this flag set. Fixes: 69474a8a5837 ("net: vxlan: Add nolocalbypass option to vxlan.") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/5533e63643bf719bbe286fef60f749c9cad35005.1686139716.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c index d309b77a0194..bb8eeb86edf7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c @@ -11,10 +11,12 @@ #include "spectrum_nve.h" #define MLXSW_SP_NVE_VXLAN_IPV4_SUPPORTED_FLAGS (VXLAN_F_UDP_ZERO_CSUM_TX | \ - VXLAN_F_LEARN) + VXLAN_F_LEARN | \ + VXLAN_F_LOCALBYPASS) #define MLXSW_SP_NVE_VXLAN_IPV6_SUPPORTED_FLAGS (VXLAN_F_IPV6 | \ VXLAN_F_UDP_ZERO_CSUM6_TX | \ - VXLAN_F_UDP_ZERO_CSUM6_RX) + VXLAN_F_UDP_ZERO_CSUM6_RX | \ + VXLAN_F_LOCALBYPASS) static bool mlxsw_sp_nve_vxlan_ipv4_flags_check(const struct vxlan_config *cfg, struct netlink_ext_ack *extack) -- cgit v1.2.3 From 6292d7436cf2f0a2ea8800a1d2cbb155d237818a Mon Sep 17 00:00:00 2001 From: Yuezhen Luan Date: Wed, 7 Jun 2023 09:41:16 -0700 Subject: igb: Fix extts capture value format for 82580/i354/i350 82580/i354/i350 features circle-counter-like timestamp registers that are different with newer i210. The EXTTS capture value in AUXTSMPx should be converted from raw circle counter value to timestamp value in resolution of 1 nanosec by the driver. This issue can be reproduced on i350 nics, connecting an 1PPS signal to a SDP pin, and run 'ts2phc' command to read external 1PPS timestamp value. On i210 this works fine, but on i350 the extts is not correctly converted. The i350/i354/82580's SYSTIM and other timestamp registers are 40bit counters, presenting time range of 2^40 ns, that means these registers overflows every about 1099s. This causes all these regs can't be used directly in contrast to the newer i210/i211s. The igb driver needs to convert these raw register values to valid time stamp format by using kernel timecounter apis for i350s families. Here the igb_extts() just forgot to do the convert. Fixes: 38970eac41db ("igb: support EXTTS on 82580/i354/i350") Signed-off-by: Yuezhen Luan Reviewed-by: Jacob Keller Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230607164116.3768175-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igb/igb_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 58872a4c2540..bb3db387d49c 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6947,6 +6947,7 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt) struct e1000_hw *hw = &adapter->hw; struct ptp_clock_event event; struct timespec64 ts; + unsigned long flags; if (pin < 0 || pin >= IGB_N_SDP) return; @@ -6954,9 +6955,12 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt) if (hw->mac.type == e1000_82580 || hw->mac.type == e1000_i354 || hw->mac.type == e1000_i350) { - s64 ns = rd32(auxstmpl); + u64 ns = rd32(auxstmpl); - ns += ((s64)(rd32(auxstmph) & 0xFF)) << 32; + ns += ((u64)(rd32(auxstmph) & 0xFF)) << 32; + spin_lock_irqsave(&adapter->tmreg_lock, flags); + ns = timecounter_cyc2time(&adapter->tc, ns); + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); ts = ns_to_timespec64(ns); } else { ts.tv_nsec = rd32(auxstmpl); -- cgit v1.2.3 From c8cc2ae229ff0aa9b7e67fd38f5d73bece111e71 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Wed, 7 Jun 2023 13:59:53 +0800 Subject: net: pch_gbe: Allow build on MIPS_GENERIC kernel MIPS Boston board, which is using MIPS_GENERIC kernel is using EG20T PCH and thus need this driver. Dependency of PCH_GBE, PTP_1588_CLOCK_PCH is also fixed for MIPS_GENERIC. Note that CONFIG_PCH_GBE is selected in arch/mips/configs/generic/ board-boston.config for a while, some how it's never wired up in Kconfig. Signed-off-by: Jiaxun Yang Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230607055953.34110-1-jiaxun.yang@flygoat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/oki-semi/pch_gbe/Kconfig | 2 +- drivers/ptp/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig index 4e18b64dceb9..9651cc714ef2 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig +++ b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig @@ -5,7 +5,7 @@ config PCH_GBE tristate "OKI SEMICONDUCTOR IOH(ML7223/ML7831) GbE" - depends on PCI && (X86_32 || COMPILE_TEST) + depends on PCI && (MIPS_GENERIC || X86_32 || COMPILE_TEST) depends on PTP_1588_CLOCK select MII select PTP_1588_CLOCK_PCH diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index b00201d81313..32dff1b4f891 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -102,7 +102,7 @@ config PTP_1588_CLOCK_INES config PTP_1588_CLOCK_PCH tristate "Intel PCH EG20T as PTP clock" - depends on X86_32 || COMPILE_TEST + depends on MIPS_GENERIC || X86_32 || COMPILE_TEST depends on HAS_IOMEM && PCI depends on NET depends on PTP_1588_CLOCK -- cgit v1.2.3 From 6c79a9c8b1f3eb00b336fc2557fc5e1a5c15bd20 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 7 Jun 2023 12:58:13 +0100 Subject: net: dpaa2-mac: allow lynx PCS to manage mdiodev lifetime Put the mdiodev after lynx_pcs_create() so that the Lynx PCS driver can manage the lifetime of the mdiodev its using. Signed-off-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index cb70855e2b9a..c0f7dd3b4ac1 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -271,9 +271,9 @@ static int dpaa2_pcs_create(struct dpaa2_mac *mac, } mac->pcs = lynx_pcs_create(mdiodev); + mdio_device_put(mdiodev); if (!mac->pcs) { netdev_err(mac->net_dev, "lynx_pcs_create() failed\n"); - mdio_device_free(mdiodev); return -ENOMEM; } @@ -285,10 +285,7 @@ static void dpaa2_pcs_destroy(struct dpaa2_mac *mac) struct phylink_pcs *phylink_pcs = mac->pcs; if (phylink_pcs) { - struct mdio_device *mdio = lynx_get_mdio_device(phylink_pcs); - lynx_pcs_destroy(phylink_pcs); - mdio_device_free(mdio); mac->pcs = NULL; } } -- cgit v1.2.3 From d7b6ea1a14e454ae480274a68daed9136409376f Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 7 Jun 2023 12:58:18 +0100 Subject: net: fman_memac: allow lynx PCS to handle mdiodev lifetime Put the mdiodev after lynx_pcs_create() so that the Lynx PCS driver can manage the lifetime of the mdiodev its using. Signed-off-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fman/fman_memac.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index 625c79d5636f..8f45caf4af12 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -976,14 +976,10 @@ static int memac_init(struct fman_mac *memac) static void pcs_put(struct phylink_pcs *pcs) { - struct mdio_device *mdiodev; - if (IS_ERR_OR_NULL(pcs)) return; - mdiodev = lynx_get_mdio_device(pcs); lynx_pcs_destroy(pcs); - mdio_device_free(mdiodev); } static int memac_free(struct fman_mac *memac) @@ -1055,8 +1051,7 @@ static struct phylink_pcs *memac_pcs_create(struct device_node *mac_node, return ERR_PTR(-EPROBE_DEFER); pcs = lynx_pcs_create(mdiodev); - if (!pcs) - mdio_device_free(mdiodev); + mdio_device_put(mdiodev); return pcs; } -- cgit v1.2.3 From 595fa7634d71be88689f61456f38b4dc252366b4 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 7 Jun 2023 12:58:34 +0100 Subject: net: dpaa2-mac: use lynx_pcs_create_fwnode() Use lynx_pcs_create_fwnode() to create a lynx PCS from a fwnode handle. Signed-off-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index c0f7dd3b4ac1..38e6208f9e1a 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -247,8 +247,8 @@ static int dpaa2_pcs_create(struct dpaa2_mac *mac, struct fwnode_handle *dpmac_node, int id) { - struct mdio_device *mdiodev; struct fwnode_handle *node; + struct phylink_pcs *pcs; node = fwnode_find_reference(dpmac_node, "pcs-handle", 0); if (IS_ERR(node)) { @@ -263,20 +263,22 @@ static int dpaa2_pcs_create(struct dpaa2_mac *mac, return -ENODEV; } - mdiodev = fwnode_mdio_find_device(node); + pcs = lynx_pcs_create_fwnode(node); fwnode_handle_put(node); - if (!mdiodev) { + + if (pcs == ERR_PTR(-EPROBE_DEFER)) { netdev_dbg(mac->net_dev, "missing PCS device\n"); return -EPROBE_DEFER; } - mac->pcs = lynx_pcs_create(mdiodev); - mdio_device_put(mdiodev); - if (!mac->pcs) { - netdev_err(mac->net_dev, "lynx_pcs_create() failed\n"); - return -ENOMEM; + if (IS_ERR(pcs)) { + netdev_err(mac->net_dev, + "lynx_pcs_create_fwnode() failed: %pe\n", pcs); + return PTR_ERR(pcs); } + mac->pcs = pcs; + return 0; } -- cgit v1.2.3 From 929a629c211f1e6a4be1b941efab595fe6bfaa69 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 7 Jun 2023 12:58:39 +0100 Subject: net: fman_memac: use lynx_pcs_create_fwnode() Use lynx_pcs_create_fwnode() to create a lynx PCS from a fwnode handle. Signed-off-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fman/fman_memac.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index 8f45caf4af12..4fbdae996d05 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -1039,19 +1039,14 @@ static struct phylink_pcs *memac_pcs_create(struct device_node *mac_node, int index) { struct device_node *node; - struct mdio_device *mdiodev = NULL; struct phylink_pcs *pcs; node = of_parse_phandle(mac_node, "pcsphy-handle", index); - if (node && of_device_is_available(node)) - mdiodev = of_mdio_find_device(node); - of_node_put(node); - - if (!mdiodev) - return ERR_PTR(-EPROBE_DEFER); + if (!node || !of_device_is_available(node)) + return ERR_PTR(-ENODEV); - pcs = lynx_pcs_create(mdiodev); - mdio_device_put(mdiodev); + pcs = lynx_pcs_create_fwnode(of_fwnode_handle(node)); + of_node_put(node); return pcs; } -- cgit v1.2.3 From 8c1d0b339d675366ad02fe8c571cdeed0dd28435 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 7 Jun 2023 12:58:59 +0100 Subject: net: dpaa2: use pcs-lynx's check for fwnode availability Use pcs-lynx's check rather than our own when determining if the device is available. Signed-off-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index 38e6208f9e1a..d860d9fe73af 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -257,12 +257,6 @@ static int dpaa2_pcs_create(struct dpaa2_mac *mac, return 0; } - if (!fwnode_device_is_available(node)) { - netdev_err(mac->net_dev, "pcs-handle node not available\n"); - fwnode_handle_put(node); - return -ENODEV; - } - pcs = lynx_pcs_create_fwnode(node); fwnode_handle_put(node); @@ -271,6 +265,11 @@ static int dpaa2_pcs_create(struct dpaa2_mac *mac, return -EPROBE_DEFER; } + if (pcs == ERR_PTR(-ENODEV)) { + netdev_err(mac->net_dev, "pcs-handle node not available\n"); + return PTR_ERR(pcs); + } + if (IS_ERR(pcs)) { netdev_err(mac->net_dev, "lynx_pcs_create_fwnode() failed: %pe\n", pcs); -- cgit v1.2.3 From 32fc30353f7c4d5370acf6ef8fb3be9363dce3c2 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 7 Jun 2023 12:59:04 +0100 Subject: net: fman_memac: use pcs-lynx's check for fwnode availability Use pcs-lynx's check rather than our own when determining if the device is available. This fixes a bug where the reference gained by of_parse_phandle() is not dropped if the device is not available. Signed-off-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fman/fman_memac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index 4fbdae996d05..3b75cc543be9 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -1042,7 +1042,7 @@ static struct phylink_pcs *memac_pcs_create(struct device_node *mac_node, struct phylink_pcs *pcs; node = of_parse_phandle(mac_node, "pcsphy-handle", index); - if (!node || !of_device_is_available(node)) + if (!node) return ERR_PTR(-ENODEV); pcs = lynx_pcs_create_fwnode(of_fwnode_handle(node)); -- cgit v1.2.3 From c289a1601abd7313ef08ec13184df385cfb4d388 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Jun 2023 19:19:14 +0100 Subject: chelsio/chtls: Use splice_eof() to flush Allow splice to end a Chelsio TLS record after prematurely ending a splice/sendfile due to getting an EOF condition (->splice_read() returned 0) after splice had called sendmsg() with MSG_MORE set when the user didn't set MSG_MORE. Suggested-by: Linus Torvalds Link: https://lore.kernel.org/r/CAHk-=wh=V579PDYvkpnTobCLGczbgxpMgGmmhqiTyE34Cpi5Gg@mail.gmail.com/ Signed-off-by: David Howells cc: Ayush Sawal cc: Jens Axboe cc: Matthew Wilcox Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h | 1 + drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c | 9 +++++++++ drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c | 1 + 3 files changed, 11 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h index 41714203ace8..da4818d2c856 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h @@ -568,6 +568,7 @@ void chtls_destroy_sock(struct sock *sk); int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len); +void chtls_splice_eof(struct socket *sock); int chtls_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); int send_tx_flowc_wr(struct sock *sk, int compl, diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c index 5724bbbb6ee0..e08ac960c967 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c @@ -1237,6 +1237,15 @@ out_err: goto done; } +void chtls_splice_eof(struct socket *sock) +{ + struct sock *sk = sock->sk; + + lock_sock(sk); + chtls_tcp_push(sk, 0); + release_sock(sk); +} + int chtls_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c index 1e55b12fee51..6b6787eafd2f 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c @@ -606,6 +606,7 @@ static void __init chtls_init_ulp_ops(void) chtls_cpl_prot.destroy = chtls_destroy_sock; chtls_cpl_prot.shutdown = chtls_shutdown; chtls_cpl_prot.sendmsg = chtls_sendmsg; + chtls_cpl_prot.splice_eof = chtls_splice_eof; chtls_cpl_prot.sendpage = chtls_sendpage; chtls_cpl_prot.recvmsg = chtls_recvmsg; chtls_cpl_prot.setsockopt = chtls_setsockopt; -- cgit v1.2.3 From 0ad4982c520ed87ea7ebfc9381ea1f617ed75364 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 8 Jun 2023 10:57:27 +0900 Subject: net: renesas: rswitch: Fix timestamp feature after all descriptors are used The timestamp descriptors were intended to act cyclically. Descriptors from index 0 through gq->ring_size - 1 contain actual information, and the last index (gq->ring_size) should have LINKFIX to indicate the first index 0 descriptor. However, the LINKFIX value is missing, causing the timestamp feature to stop after all descriptors are used. To resolve this issue, set the LINKFIX to the timestamp descritors. Reported-by: Phong Hoang Fixes: 33f5d733b589 ("net: renesas: rswitch: Improve TX timestamp accuracy") Signed-off-by: Yoshihiro Shimoda Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/rswitch.c | 36 +++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index aace87139cea..fa6d6202b129 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -347,17 +347,6 @@ out: return -ENOMEM; } -static int rswitch_gwca_ts_queue_alloc(struct rswitch_private *priv) -{ - struct rswitch_gwca_queue *gq = &priv->gwca.ts_queue; - - gq->ring_size = TS_RING_SIZE; - gq->ts_ring = dma_alloc_coherent(&priv->pdev->dev, - sizeof(struct rswitch_ts_desc) * - (gq->ring_size + 1), &gq->ring_dma, GFP_KERNEL); - return !gq->ts_ring ? -ENOMEM : 0; -} - static void rswitch_desc_set_dptr(struct rswitch_desc *desc, dma_addr_t addr) { desc->dptrl = cpu_to_le32(lower_32_bits(addr)); @@ -533,6 +522,28 @@ static void rswitch_gwca_linkfix_free(struct rswitch_private *priv) gwca->linkfix_table = NULL; } +static int rswitch_gwca_ts_queue_alloc(struct rswitch_private *priv) +{ + struct rswitch_gwca_queue *gq = &priv->gwca.ts_queue; + struct rswitch_ts_desc *desc; + + gq->ring_size = TS_RING_SIZE; + gq->ts_ring = dma_alloc_coherent(&priv->pdev->dev, + sizeof(struct rswitch_ts_desc) * + (gq->ring_size + 1), &gq->ring_dma, GFP_KERNEL); + + if (!gq->ts_ring) + return -ENOMEM; + + rswitch_gwca_ts_queue_fill(priv, 0, TS_RING_SIZE); + desc = &gq->ts_ring[gq->ring_size]; + desc->desc.die_dt = DT_LINKFIX; + rswitch_desc_set_dptr(&desc->desc, gq->ring_dma); + INIT_LIST_HEAD(&priv->gwca.ts_info_list); + + return 0; +} + static struct rswitch_gwca_queue *rswitch_gwca_get(struct rswitch_private *priv) { struct rswitch_gwca_queue *gq; @@ -1780,9 +1791,6 @@ static int rswitch_init(struct rswitch_private *priv) if (err < 0) goto err_ts_queue_alloc; - rswitch_gwca_ts_queue_fill(priv, 0, TS_RING_SIZE); - INIT_LIST_HEAD(&priv->gwca.ts_info_list); - for (i = 0; i < RSWITCH_NUM_PORTS; i++) { err = rswitch_device_alloc(priv, i); if (err < 0) { -- cgit v1.2.3 From c0e489372a294044feea650b38f38c888eff57a4 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Thu, 8 Jun 2023 10:46:25 +0530 Subject: octeontx2-af: Fix promiscuous mode CN10KB silicon introduced a new exact match feature, which is used for DMAC filtering. The state of installed DMAC filters in this exact match table is getting corrupted when promiscuous mode is toggled. Fix this by not touching Exact match related config when promiscuous mode is toggled. Fixes: 2dba9459d2c9 ("octeontx2-af: Wrapper functions for MAC addr add/del/update/reset") Signed-off-by: Ratheesh Kannoth Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/af/rvu_npc_hash.c | 29 ++-------------------- 1 file changed, 2 insertions(+), 27 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c index 51209119f0f2..9f11c1e40737 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c @@ -1164,10 +1164,8 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i { struct npc_exact_table *table; u16 *cnt, old_cnt; - bool promisc; table = rvu->hw->table; - promisc = table->promisc_mode[drop_mcam_idx]; cnt = &table->cnt_cmd_rules[drop_mcam_idx]; old_cnt = *cnt; @@ -1179,16 +1177,13 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i *enable_or_disable_cam = false; - if (promisc) - goto done; - - /* If all rules are deleted and not already in promisc mode; disable cam */ + /* If all rules are deleted, disable cam */ if (!*cnt && val < 0) { *enable_or_disable_cam = true; goto done; } - /* If rule got added and not already in promisc mode; enable cam */ + /* If rule got added, enable cam */ if (!old_cnt && val > 0) { *enable_or_disable_cam = true; goto done; @@ -1443,7 +1438,6 @@ int rvu_npc_exact_promisc_disable(struct rvu *rvu, u16 pcifunc) u32 drop_mcam_idx; bool *promisc; bool rc; - u32 cnt; table = rvu->hw->table; @@ -1466,17 +1460,8 @@ int rvu_npc_exact_promisc_disable(struct rvu *rvu, u16 pcifunc) return LMAC_AF_ERR_INVALID_PARAM; } *promisc = false; - cnt = __rvu_npc_exact_cmd_rules_cnt_update(rvu, drop_mcam_idx, 0, NULL); mutex_unlock(&table->lock); - /* If no dmac filter entries configured, disable drop rule */ - if (!cnt) - rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, false); - else - rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, !*promisc); - - dev_dbg(rvu->dev, "%s: disabled promisc mode (cgx=%d lmac=%d, cnt=%d)\n", - __func__, cgx_id, lmac_id, cnt); return 0; } @@ -1494,7 +1479,6 @@ int rvu_npc_exact_promisc_enable(struct rvu *rvu, u16 pcifunc) u32 drop_mcam_idx; bool *promisc; bool rc; - u32 cnt; table = rvu->hw->table; @@ -1517,17 +1501,8 @@ int rvu_npc_exact_promisc_enable(struct rvu *rvu, u16 pcifunc) return LMAC_AF_ERR_INVALID_PARAM; } *promisc = true; - cnt = __rvu_npc_exact_cmd_rules_cnt_update(rvu, drop_mcam_idx, 0, NULL); mutex_unlock(&table->lock); - /* If no dmac filter entries configured, disable drop rule */ - if (!cnt) - rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, false); - else - rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, !*promisc); - - dev_dbg(rvu->dev, "%s: Enabled promisc mode (cgx=%d lmac=%d cnt=%d)\n", - __func__, cgx_id, lmac_id, cnt); return 0; } -- cgit v1.2.3 From 18a92b05425493c3d131c47689443d7ae860c986 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Wed, 8 Mar 2023 00:02:12 +0200 Subject: net/mlx5: Simplify unload all rep code Instead of using type specific iterators which are only used in one place just traverse the xarray. It will provide suitable ordering based on the vport numbers. This will also eliminate the need for changes here when new types are added. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 48 +--------------------- 1 file changed, 1 insertion(+), 47 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index eafb098db6b0..625982454575 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -55,13 +55,6 @@ #define mlx5_esw_for_each_rep(esw, i, rep) \ xa_for_each(&((esw)->offloads.vport_reps), i, rep) -#define mlx5_esw_for_each_sf_rep(esw, i, rep) \ - xa_for_each_marked(&((esw)->offloads.vport_reps), i, rep, MLX5_ESW_VPT_SF) - -#define mlx5_esw_for_each_vf_rep(esw, index, rep) \ - mlx5_esw_for_each_entry_marked(&((esw)->offloads.vport_reps), index, \ - rep, (esw)->esw_funcs.num_vfs, MLX5_ESW_VPT_VF) - /* There are two match-all miss flows, one for unicast dst mac and * one for multicast. */ @@ -2191,18 +2184,6 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, return 0; } -static void mlx5_esw_offloads_rep_mark_set(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep, - xa_mark_t mark) -{ - bool mark_set; - - /* Copy the mark from vport to its rep */ - mark_set = xa_get_mark(&esw->vports, rep->vport, mark); - if (mark_set) - xa_set_mark(&esw->offloads.vport_reps, rep->vport, mark); -} - static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx5_vport *vport) { struct mlx5_eswitch_rep *rep; @@ -2222,9 +2203,6 @@ static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx if (err) goto insert_err; - mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_HOST_FN); - mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_VF); - mlx5_esw_offloads_rep_mark_set(esw, rep, MLX5_ESW_VPT_SF); return 0; insert_err: @@ -2365,37 +2343,13 @@ static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, esw->offloads.rep_ops[rep_type]->unload(rep); } -static void __unload_reps_sf_vport(struct mlx5_eswitch *esw, u8 rep_type) -{ - struct mlx5_eswitch_rep *rep; - unsigned long i; - - mlx5_esw_for_each_sf_rep(esw, i, rep) - __esw_offloads_unload_rep(esw, rep, rep_type); -} - static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) { struct mlx5_eswitch_rep *rep; unsigned long i; - __unload_reps_sf_vport(esw, rep_type); - - mlx5_esw_for_each_vf_rep(esw, i, rep) - __esw_offloads_unload_rep(esw, rep, rep_type); - - if (mlx5_ecpf_vport_exists(esw->dev)) { - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF); - __esw_offloads_unload_rep(esw, rep, rep_type); - } - - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF); + mlx5_esw_for_each_rep(esw, i, rep) __esw_offloads_unload_rep(esw, rep, rep_type); - } - - rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); - __esw_offloads_unload_rep(esw, rep, rep_type); } int mlx5_esw_offloads_rep_load(struct mlx5_eswitch *esw, u16 vport_num) -- cgit v1.2.3 From dc13180824b78e1e4e7ae1ce22160ae8e5fb858e Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 7 Mar 2023 19:36:14 +0200 Subject: net/mlx5: Enable devlink port for embedded cpu VF vports Enable creation of a devlink port for EC VF vports. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/esw/devlink_port.c | 8 +++++++- drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 20 ++++++++++++++++++++ include/linux/mlx5/driver.h | 6 ++++++ 3 files changed, 33 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index f370f67d9e33..af779c700278 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -18,7 +18,8 @@ static bool mlx5_esw_devlink_port_supported(struct mlx5_eswitch *esw, u16 vport_ { return vport_num == MLX5_VPORT_UPLINK || (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) || - mlx5_eswitch_is_vf_vport(esw, vport_num); + mlx5_eswitch_is_vf_vport(esw, vport_num) || + mlx5_core_is_ec_vf_vport(esw->dev, vport_num); } static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 vport_num) @@ -56,6 +57,11 @@ static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16 dl_port->attrs.switch_id.id_len = ppid.id_len; devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum, vport_num - 1, external); + } else if (mlx5_core_is_ec_vf_vport(esw->dev, vport_num)) { + memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len); + dl_port->attrs.switch_id.id_len = ppid.id_len; + devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum, + vport_num - 1, false); } return dl_port; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 1d879374acaa..0e7b5c6e4020 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -343,4 +343,24 @@ bool mlx5_rdma_supported(struct mlx5_core_dev *dev); bool mlx5_vnet_supported(struct mlx5_core_dev *dev); bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev); +static inline u16 mlx5_core_ec_vf_vport_base(const struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN_2(dev, ec_vf_vport_base); +} + +static inline u16 mlx5_core_ec_sriov_enabled(const struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf(dev) && mlx5_core_ec_vf_vport_base(dev); +} + +static inline bool mlx5_core_is_ec_vf_vport(const struct mlx5_core_dev *dev, u16 vport_num) +{ + int base_vport = mlx5_core_ec_vf_vport_base(dev); + int max_vport = base_vport + mlx5_core_max_ec_vfs(dev); + + if (!mlx5_core_ec_sriov_enabled(dev)) + return false; + + return (vport_num >= base_vport && vport_num < max_vport); +} #endif /* __MLX5_CORE_H__ */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9a744c48eec2..252b6a6965b8 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -474,6 +474,7 @@ struct mlx5_core_sriov { struct mlx5_vf_context *vfs_ctx; int num_vfs; u16 max_vfs; + u16 max_ec_vfs; }; struct mlx5_fc_pool { @@ -1244,6 +1245,11 @@ static inline u16 mlx5_core_max_vfs(const struct mlx5_core_dev *dev) return dev->priv.sriov.max_vfs; } +static inline u16 mlx5_core_max_ec_vfs(const struct mlx5_core_dev *dev) +{ + return dev->priv.sriov.max_ec_vfs; +} + static inline int mlx5_get_gid_table_len(u16 param) { if (param > 4) { -- cgit v1.2.3 From 9ac0b128248e19d06475f4592fe87f6ce18bc554 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 7 Mar 2023 19:51:22 +0200 Subject: net/mlx5: Update vport caps query/set for EC VFs These functions are for query/set by vport, there was an underlying assumption that vport was equal to function ID. That's not the case for EC VF functions. Set the ec_vf_function bit accordingly. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 19 +++++++++++++++---- include/linux/mlx5/vport.h | 2 +- 3 files changed, 19 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 0e7b5c6e4020..7ca0c7a547aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -325,10 +325,10 @@ void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend); int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery); int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery); -int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 function_id, +int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 vport, u16 opmod); -#define mlx5_vport_get_other_func_general_cap(dev, fid, out) \ - mlx5_vport_get_other_func_cap(dev, fid, out, MLX5_CAP_GENERAL) +#define mlx5_vport_get_other_func_general_cap(dev, vport, out) \ + mlx5_vport_get_other_func_cap(dev, vport, out, MLX5_CAP_GENERAL) void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work); static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index bc66b078a8a1..6d3984dd5b21 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1161,23 +1161,32 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); -int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out, +static int mlx5_vport_to_func_id(const struct mlx5_core_dev *dev, u16 vport, bool ec_vf_func) +{ + return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev) + : vport; +} + +int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out, u16 opmod) { + bool ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport); u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {}; opmod = (opmod << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01); MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); MLX5_SET(query_hca_cap_in, in, op_mod, opmod); - MLX5_SET(query_hca_cap_in, in, function_id, function_id); + MLX5_SET(query_hca_cap_in, in, function_id, mlx5_vport_to_func_id(dev, vport, ec_vf_func)); MLX5_SET(query_hca_cap_in, in, other_function, true); + MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func); return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out); } EXPORT_SYMBOL_GPL(mlx5_vport_get_other_func_cap); int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, - u16 function_id, u16 opmod) + u16 vport, u16 opmod) { + bool ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport); int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); void *set_hca_cap; void *set_ctx; @@ -1191,8 +1200,10 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap MLX5_SET(set_hca_cap_in, set_ctx, op_mod, opmod << 1); set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); memcpy(set_hca_cap, hca_cap, MLX5_ST_SZ_BYTES(cmd_hca_cap)); - MLX5_SET(set_hca_cap_in, set_ctx, function_id, function_id); + MLX5_SET(set_hca_cap_in, set_ctx, function_id, + mlx5_vport_to_func_id(dev, vport, ec_vf_func)); MLX5_SET(set_hca_cap_in, set_ctx, other_function, true); + MLX5_SET(set_hca_cap_in, set_ctx, ec_vf_function, ec_vf_func); ret = mlx5_cmd_exec_in(dev, set_hca_cap, set_ctx); kfree(set_ctx); diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 7f31432f44c2..fbb9bf447889 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -132,6 +132,6 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev, int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev); u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev); -int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out, +int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out, u16 opmod); #endif /* __MLX5_VPORT_H__ */ -- cgit v1.2.3 From a7719b29a82199b90ebbf355d3332e0fbfbf6045 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 7 Mar 2023 21:24:55 +0200 Subject: net/mlx5: Add management of EC VF vports Add init, load, unload, and cleanup of the EC VF vports. This includes changes in how eswitch SRIOV is managed. Previous on an embedded CPU platform the number of VFs provided when enabling the eswitch was always 0, host VFs vports are handled in the eswitch functions change event handler. Now track the number of EC VFs as well, so they can be handled properly in the enable/disable flows. There are only 3 marks available for use in xarrays, all 3 were already in use for this use case. EC VF vports are in a known range so we can access them by index instead of marks. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 125 ++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 13 +++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 22 ++++ 3 files changed, 143 insertions(+), 17 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index ecd8864d5d11..b33d852aae34 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1051,6 +1051,18 @@ static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw) } } +static void mlx5_eswitch_clear_ec_vf_vports_info(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) { + memset(&vport->qos, 0, sizeof(vport->qos)); + memset(&vport->info, 0, sizeof(vport->info)); + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + } +} + /* Public E-Switch API */ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num, enum mlx5_eswitch_vport_event enabled_events) @@ -1090,6 +1102,19 @@ void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs) } } +static void mlx5_eswitch_unload_ec_vf_vports(struct mlx5_eswitch *esw, + u16 num_ec_vfs) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, num_ec_vfs) { + if (!vport->enabled) + continue; + mlx5_eswitch_unload_vport(esw, vport->vport); + } +} + int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, enum mlx5_eswitch_vport_event enabled_events) { @@ -1110,6 +1135,26 @@ vf_err: return err; } +static int mlx5_eswitch_load_ec_vf_vports(struct mlx5_eswitch *esw, u16 num_ec_vfs, + enum mlx5_eswitch_vport_event enabled_events) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, num_ec_vfs) { + err = mlx5_eswitch_load_vport(esw, vport->vport, enabled_events); + if (err) + goto vf_err; + } + + return 0; + +vf_err: + mlx5_eswitch_unload_ec_vf_vports(esw, num_ec_vfs); + return err; +} + static int host_pf_enable_hca(struct mlx5_core_dev *dev) { if (!mlx5_core_is_ecpf(dev)) @@ -1154,6 +1199,12 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, ret = mlx5_eswitch_load_vport(esw, MLX5_VPORT_ECPF, enabled_events); if (ret) goto ecpf_err; + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + ret = mlx5_eswitch_load_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs, + enabled_events); + if (ret) + goto ec_vf_err; + } } /* Enable VF vports */ @@ -1164,6 +1215,9 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, return 0; vf_err: + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs); +ec_vf_err: if (mlx5_ecpf_vport_exists(esw->dev)) mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); ecpf_err: @@ -1180,8 +1234,11 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) { mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); - if (mlx5_ecpf_vport_exists(esw->dev)) + if (mlx5_ecpf_vport_exists(esw->dev)) { + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_vfs); mlx5_eswitch_unload_vport(esw, MLX5_VPORT_ECPF); + } host_pf_disable_hca(esw->dev); mlx5_eswitch_unload_vport(esw, MLX5_VPORT_PF); @@ -1225,6 +1282,9 @@ mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, int num_vfs) esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out, host_params_context.host_num_of_vfs); + if (mlx5_core_ec_sriov_enabled(esw->dev)) + esw->esw_funcs.num_ec_vfs = num_vfs; + kvfree(out); } @@ -1332,9 +1392,9 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) mlx5_eswitch_event_handlers_register(esw); - esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n", + esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", - esw->esw_funcs.num_vfs, esw->enabled_vports); + esw->esw_funcs.num_vfs, esw->esw_funcs.num_ec_vfs, esw->enabled_vports); mlx5_esw_mode_change_notify(esw, esw->mode); @@ -1356,7 +1416,7 @@ abort: int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { bool toggle_lag; - int ret; + int ret = 0; if (!mlx5_esw_allowed(esw)) return 0; @@ -1376,10 +1436,21 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) vport_events = (esw->mode == MLX5_ESWITCH_LEGACY) ? MLX5_LEGACY_SRIOV_VPORT_EVENTS : MLX5_VPORT_UC_ADDR_CHANGE; - ret = mlx5_eswitch_load_vf_vports(esw, num_vfs, vport_events); - if (!ret) - esw->esw_funcs.num_vfs = num_vfs; + /* If this is the ECPF the number of host VFs is managed via the + * eswitch function change event handler, and any num_vfs provided + * here are intended to be EC VFs. + */ + if (!mlx5_core_is_ecpf(esw->dev)) { + ret = mlx5_eswitch_load_vf_vports(esw, num_vfs, vport_events); + if (!ret) + esw->esw_funcs.num_vfs = num_vfs; + } else if (mlx5_core_ec_sriov_enabled(esw->dev)) { + ret = mlx5_eswitch_load_ec_vf_vports(esw, num_vfs, vport_events); + if (!ret) + esw->esw_funcs.num_ec_vfs = num_vfs; + } } + up_write(&esw->mode_lock); if (toggle_lag) @@ -1399,16 +1470,22 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) /* If driver is unloaded, this function is called twice by remove_one() * and mlx5_unload(). Prevent the second call. */ - if (!esw->esw_funcs.num_vfs && !clear_vf) + if (!esw->esw_funcs.num_vfs && !esw->esw_funcs.num_ec_vfs && !clear_vf) goto unlock; - esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), active vports(%d)\n", + esw_info(esw->dev, "Unload vfs: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", - esw->esw_funcs.num_vfs, esw->enabled_vports); - - mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); - if (clear_vf) - mlx5_eswitch_clear_vf_vports_info(esw); + esw->esw_funcs.num_vfs, esw->esw_funcs.num_ec_vfs, esw->enabled_vports); + + if (!mlx5_core_is_ecpf(esw->dev)) { + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); + if (clear_vf) + mlx5_eswitch_clear_vf_vports_info(esw); + } else if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs); + if (clear_vf) + mlx5_eswitch_clear_ec_vf_vports_info(esw); + } if (esw->mode == MLX5_ESWITCH_OFFLOADS) { struct devlink *devlink = priv_to_devlink(esw->dev); @@ -1419,7 +1496,10 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) if (esw->mode == MLX5_ESWITCH_LEGACY) mlx5_eswitch_disable_locked(esw); - esw->esw_funcs.num_vfs = 0; + if (!mlx5_core_is_ecpf(esw->dev)) + esw->esw_funcs.num_vfs = 0; + else + esw->esw_funcs.num_ec_vfs = 0; unlock: up_write(&esw->mode_lock); @@ -1439,9 +1519,9 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) mlx5_eswitch_event_handlers_unregister(esw); - esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n", + esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", - esw->esw_funcs.num_vfs, esw->enabled_vports); + esw->esw_funcs.num_vfs, esw->esw_funcs.num_ec_vfs, esw->enabled_vports); if (esw->fdb_table.flags & MLX5_ESW_FDB_CREATED) { esw->fdb_table.flags &= ~MLX5_ESW_FDB_CREATED; @@ -1601,6 +1681,17 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) idx++; } + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + int ec_vf_base_num = mlx5_core_ec_vf_vport_base(dev); + + for (i = 0; i < mlx5_core_max_ec_vfs(esw->dev); i++) { + err = mlx5_esw_vport_alloc(esw, idx, ec_vf_base_num + i); + if (err) + goto err; + idx++; + } + } + if (mlx5_ecpf_vport_exists(dev) || mlx5_core_is_ecpf_esw_manager(dev)) { err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_ECPF); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index d3608f198e0a..266b60fefe25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -289,6 +289,7 @@ struct mlx5_host_work { struct mlx5_esw_functions { struct mlx5_nb nb; u16 num_vfs; + u16 num_ec_vfs; }; enum { @@ -654,6 +655,18 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); #define mlx5_esw_for_each_host_func_vport(esw, index, vport, last) \ mlx5_esw_for_each_vport_marked(esw, index, vport, last, MLX5_ESW_VPT_HOST_FN) +/* This macro should only be used if EC SRIOV is enabled. + * + * Because there were no more marks available on the xarray this uses a + * for_each_range approach. The range is only valid when EC SRIOV is enabled + */ +#define mlx5_esw_for_each_ec_vf_vport(esw, index, vport, last) \ + xa_for_each_range(&((esw)->vports), \ + index, \ + vport, \ + MLX5_CAP_GEN_2((esw->dev), ec_vf_vport_base), \ + (last) - 1) + struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink); struct mlx5_vport *__must_check mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 625982454575..68798aed792f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3287,6 +3287,9 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) /* Representor will control the vport link state */ mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; + if (mlx5_core_ec_sriov_enabled(esw->dev)) + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) + vport->info.link_state = MLX5_VPORT_ADMIN_STATE_DOWN; /* Uplink vport rep must load first. */ err = esw_offloads_load_rep(esw, MLX5_VPORT_UPLINK); @@ -3524,8 +3527,27 @@ static int mlx5_esw_vports_inline_set(struct mlx5_eswitch *esw, u8 mlx5_mode, goto revert_inline_mode; } } + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) { + err = mlx5_modify_nic_vport_min_inline(dev, vport->vport, mlx5_mode); + if (err) { + err_vport_num = vport->vport; + NL_SET_ERR_MSG_MOD(extack, + "Failed to set min inline on vport"); + goto revert_ec_vf_inline_mode; + } + } + } return 0; +revert_ec_vf_inline_mode: + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) { + if (vport->vport == err_vport_num) + break; + mlx5_modify_nic_vport_min_inline(dev, + vport->vport, + esw->offloads.inline_mode); + } revert_inline_mode: mlx5_esw_for_each_host_func_vport(esw, i, vport, esw->esw_funcs.num_vfs) { if (vport->vport == err_vport_num) -- cgit v1.2.3 From fa3c73eee641cf76bc232373303aa51a1cad8b8e Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 7 Mar 2023 21:36:39 +0200 Subject: net/mlx5: Add/remove peer miss rules for EC VFs Add and remove the peer miss rules for EC VFs. It's possible that there are different amounts of total VFs per function so only create rules for the minimum number of max VFs. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 32 ++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 68798aed792f..fdf482f6fb34 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1125,11 +1125,32 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, flows[vport->index] = flow; } + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { + if (i >= mlx5_core_max_ec_vfs(peer_dev)) + break; + esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, + spec, vport->vport); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_ec_vf_flow_err; + } + flows[vport->index] = flow; + } + } esw->fdb_table.offloads.peer_miss_rules[mlx5_get_dev_index(peer_dev)] = flows; kvfree(spec); return 0; +add_ec_vf_flow_err: + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { + if (!flows[vport->index]) + continue; + mlx5_del_flow_rules(flows[vport->index]); + } add_vf_flow_err: mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) { if (!flows[vport->index]) @@ -1162,6 +1183,17 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw, flows = esw->fdb_table.offloads.peer_miss_rules[mlx5_get_dev_index(peer_dev)]; + if (mlx5_core_ec_sriov_enabled(esw->dev)) { + mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { + /* The flow for a particular vport could be NULL if the other ECPF + * has fewer or no VFs enabled + */ + if (!flows[vport->index]) + continue; + mlx5_del_flow_rules(flows[vport->index]); + } + } + mlx5_esw_for_each_vf_vport(esw, i, vport, mlx5_core_max_vfs(esw->dev)) mlx5_del_flow_rules(flows[vport->index]); -- cgit v1.2.3 From 395ccd6eb49a12b021ac5deaa56e6b0b8f93241b Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 7 Mar 2023 00:53:21 +0200 Subject: net/mlx5: Add new page type for EC VF pages When the embedded cpu supports SRIOV it can be enabled and disabled independently from the host SRIOV. Track the pages separately so we can properly wait for returned VF pages. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 11 ++++++++++- include/linux/mlx5/driver.h | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index bb95b40d25eb..fc13b41cc9b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -246,6 +246,7 @@ void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev) debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages); debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.page_counters[MLX5_VF]); + debugfs_create_u32("fw_pages_ec_vfs", 0400, pages, &dev->priv.page_counters[MLX5_EC_VF]); debugfs_create_u32("fw_pages_sfs", 0400, pages, &dev->priv.page_counters[MLX5_SF]); debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.page_counters[MLX5_HOST_PF]); debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 95dc67fb3001..dcf58efac159 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -79,7 +79,13 @@ static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_funct if (!func_id) return mlx5_core_is_ecpf(dev) && !ec_function ? MLX5_HOST_PF : MLX5_PF; - return func_id <= mlx5_core_max_vfs(dev) ? MLX5_VF : MLX5_SF; + if (func_id <= max(mlx5_core_max_vfs(dev), mlx5_core_max_ec_vfs(dev))) { + if (ec_function) + return MLX5_EC_VF; + else + return MLX5_VF; + } + return MLX5_SF; } static u32 mlx5_get_ec_function(u32 function) @@ -730,6 +736,9 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) WARN(dev->priv.page_counters[MLX5_HOST_PF], "External host PF FW pages counter is %d after reclaiming all pages\n", dev->priv.page_counters[MLX5_HOST_PF]); + WARN(dev->priv.page_counters[MLX5_EC_VF], + "EC VFs FW pages counter is %d after reclaiming all pages\n", + dev->priv.page_counters[MLX5_EC_VF]); return 0; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 252b6a6965b8..18a608a1f567 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -581,6 +581,7 @@ enum mlx5_func_type { MLX5_VF, MLX5_SF, MLX5_HOST_PF, + MLX5_EC_VF, MLX5_FUNC_TYPE_NUM, }; -- cgit v1.2.3 From 2ee3db806e851b9f3bfc46a1004a1ccee180b0a8 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 7 Mar 2023 19:06:58 +0200 Subject: net/mlx5: Use correct vport when restoring GUIDs Prior to enabling EC VF functionality the vport number and function ID were always the same. That's not the case now. Use the correct vport number to modify the HCA vport context. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index f07d00929162..c2463a1d7035 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -37,7 +37,7 @@ #include "mlx5_irq.h" #include "eswitch.h" -static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf) +static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf, u16 func_id) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; struct mlx5_hca_vport_context *in; @@ -59,7 +59,7 @@ static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf) !!(in->node_guid) * MLX5_HCA_VPORT_SEL_NODE_GUID | !!(in->policy) * MLX5_HCA_VPORT_SEL_STATE_POLICY; - err = mlx5_core_modify_hca_vport_context(dev, 1, 1, vf + 1, in); + err = mlx5_core_modify_hca_vport_context(dev, 1, 1, func_id, in); if (err) mlx5_core_warn(dev, "modify vport context failed, unable to restore VF %d settings\n", vf); @@ -73,6 +73,7 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; int err, vf, num_msix_count; + int vport_num; err = mlx5_eswitch_enable(dev->priv.eswitch, num_vfs); if (err) { @@ -104,7 +105,10 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) sriov->vfs_ctx[vf].enabled = 1; if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) { - err = sriov_restore_guids(dev, vf); + vport_num = mlx5_core_ec_sriov_enabled(dev) ? + mlx5_core_ec_vf_vport_base(dev) + vf + : vf + 1; + err = sriov_restore_guids(dev, vf, vport_num); if (err) { mlx5_core_warn(dev, "failed to restore VF %d settings, err %d\n", -- cgit v1.2.3 From 42a84a430931afe2ccf31a6910dec86e87de5d2a Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 7 Mar 2023 19:13:43 +0200 Subject: net/mlx5: Query correct caps for min msix vectors The VFs on the host and the embedded CPU platform share function numbers. Set the ec_vf_function field to query the caps for the correct function. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 843da89a9035..b2dbae763ca6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -41,6 +41,15 @@ struct mlx5_irq_table { struct mlx5_irq_pool *sf_comp_pool; }; +static int mlx5_core_func_to_vport(const struct mlx5_core_dev *dev, + int func, + bool ec_vf_func) +{ + if (!ec_vf_func) + return func; + return mlx5_core_ec_vf_vport_base(dev) + func - 1; +} + /** * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors * to be ssigned to each VF. @@ -79,6 +88,8 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); void *hca_cap = NULL, *query_cap = NULL, *cap; int num_vf_msix, min_msix, max_msix; + bool ec_vf_function; + int vport; int ret; num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix); @@ -104,7 +115,9 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, goto out; } - ret = mlx5_vport_get_other_func_general_cap(dev, function_id, query_cap); + ec_vf_function = mlx5_core_ec_sriov_enabled(dev); + vport = mlx5_core_func_to_vport(dev, function_id, ec_vf_function); + ret = mlx5_vport_get_other_func_general_cap(dev, vport, query_cap); if (ret) goto out; @@ -115,6 +128,7 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP); MLX5_SET(set_hca_cap_in, hca_cap, other_function, 1); + MLX5_SET(set_hca_cap_in, hca_cap, ec_vf_function, ec_vf_function); MLX5_SET(set_hca_cap_in, hca_cap, function_id, function_id); MLX5_SET(set_hca_cap_in, hca_cap, op_mod, -- cgit v1.2.3 From 6d98f314bfca10cebf66e42573c4b362ed2ee17c Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 7 Mar 2023 18:52:29 +0200 Subject: net/mlx5: Update SRIOV enable/disable to handle EC/VFs Previously on the embedded CPU platform SRIOV was never enabled/disabled via mlx5_core_sriov_configure. Host VF updates are provided by an event handler. Now in the disable flow it must be known if this is a disable due to driver unload or SRIOV detach, or if the user updated the number of VFs. If due to change in the number of VFs only wait for the pages of ECVFs. Signed-off-by: Daniel Jurgens Reviewed-by: William Tu Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 35 +++++++++++++++++----- 3 files changed, 30 insertions(+), 9 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d6ee016deae1..fed8b48a5b20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1809,7 +1809,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_drain_fw_reset(dev); mlx5_drain_health_wq(dev); devlink_unregister(devlink); - mlx5_sriov_disable(pdev); + mlx5_sriov_disable(pdev, false); mlx5_thermal_uninit(dev); mlx5_crdump_disable(dev); mlx5_uninit_one(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 7ca0c7a547aa..7a5f04082058 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -195,7 +195,7 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); int mlx5_sriov_attach(struct mlx5_core_dev *dev); void mlx5_sriov_detach(struct mlx5_core_dev *dev); int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); -void mlx5_sriov_disable(struct pci_dev *pdev); +void mlx5_sriov_disable(struct pci_dev *pdev, bool num_vf_change); int mlx5_core_sriov_set_msix_vec_count(struct pci_dev *vf, int msix_vec_count); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index c2463a1d7035..b73583b0a0fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -123,9 +123,11 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) } static void -mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) +mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf, bool num_vf_change) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; + bool wait_for_ec_vf_pages = true; + bool wait_for_vf_pages = true; int err; int vf; @@ -147,11 +149,30 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) mlx5_eswitch_disable_sriov(dev->priv.eswitch, clear_vf); + /* There are a number of scenarios when SRIOV is being disabled: + * 1. VFs or ECVFs had been created, and now set back to 0 (num_vf_change == true). + * - If EC SRIOV is enabled then this flow is happening on the + * embedded platform, wait for only EC VF pages. + * - If EC SRIOV is not enabled this flow is happening on non-embedded + * platform, wait for the VF pages. + * + * 2. The driver is being unloaded. In this case wait for all pages. + */ + if (num_vf_change) { + if (mlx5_core_ec_sriov_enabled(dev)) + wait_for_vf_pages = false; + else + wait_for_ec_vf_pages = false; + } + + if (wait_for_ec_vf_pages && mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_EC_VF])) + mlx5_core_warn(dev, "timeout reclaiming EC VFs pages\n"); + /* For ECPFs, skip waiting for host VF pages until ECPF is destroyed */ if (mlx5_core_is_ecpf(dev)) return; - if (mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF])) + if (wait_for_vf_pages && mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF])) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); } @@ -172,12 +193,12 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) err = pci_enable_sriov(pdev, num_vfs); if (err) { mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); - mlx5_device_disable_sriov(dev, num_vfs, true); + mlx5_device_disable_sriov(dev, num_vfs, true, true); } return err; } -void mlx5_sriov_disable(struct pci_dev *pdev) +void mlx5_sriov_disable(struct pci_dev *pdev, bool num_vf_change) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct devlink *devlink = priv_to_devlink(dev); @@ -185,7 +206,7 @@ void mlx5_sriov_disable(struct pci_dev *pdev) pci_disable_sriov(pdev); devl_lock(devlink); - mlx5_device_disable_sriov(dev, num_vfs, true); + mlx5_device_disable_sriov(dev, num_vfs, true, num_vf_change); devl_unlock(devlink); } @@ -200,7 +221,7 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) if (num_vfs) err = mlx5_sriov_enable(pdev, num_vfs); else - mlx5_sriov_disable(pdev); + mlx5_sriov_disable(pdev, true); if (!err) sriov->num_vfs = num_vfs; @@ -245,7 +266,7 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev) if (!mlx5_core_is_pf(dev)) return; - mlx5_device_disable_sriov(dev, pci_num_vf(dev->pdev), false); + mlx5_device_disable_sriov(dev, pci_num_vf(dev->pdev), false, false); } static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev) -- cgit v1.2.3 From 7057fe561988effa0b044b99262bb3712a5892c0 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Wed, 15 Mar 2023 17:29:13 +0200 Subject: net/mlx5: Set max number of embedded CPU VFs Set the maximum number of embedded cpu VF functions available. Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/sriov.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index b73583b0a0fe..4e42a3b9b8ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -305,6 +305,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) total_vfs = pci_sriov_get_totalvfs(pdev); sriov->max_vfs = mlx5_get_max_vfs(dev); sriov->num_vfs = pci_num_vf(pdev); + sriov->max_ec_vfs = mlx5_core_ec_sriov_enabled(dev) ? pci_sriov_get_totalvfs(dev->pdev) : 0; sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); if (!sriov->vfs_ctx) return -ENOMEM; -- cgit v1.2.3 From 2059cf51f318681a4cdd3eb1a01a2d62b6a9c442 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 3 May 2023 12:08:48 +0300 Subject: net/mlx5: Split function_setup() to enable and open functions mlx5_cmd_init_hca() is taking ~0.2 seconds. In case of a user who desire to disable some of the SF aux devices, and with large scale-1K SFs for example, this user will waste more than 3 minutes on mlx5_cmd_init_hca() which isn't needed at this stage. Downstream patch will change SFs which are probe over the E-switch, local SFs, to be probed without any aux dev. In order to support this, split function_setup() to avoid executing mlx5_cmd_init_hca(). Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 83 ++++++++++++++++++-------- 1 file changed, 58 insertions(+), 25 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index fed8b48a5b20..0faae77d84e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1118,7 +1118,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_devcom_unregister_device(dev->priv.devcom); } -static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout) +static int mlx5_function_enable(struct mlx5_core_dev *dev, bool boot, u64 timeout) { int err; @@ -1183,28 +1183,56 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout goto reclaim_boot_pages; } + return 0; + +reclaim_boot_pages: + mlx5_reclaim_startup_pages(dev); +err_disable_hca: + mlx5_core_disable_hca(dev, 0); +stop_health_poll: + mlx5_stop_health_poll(dev, boot); +err_cmd_cleanup: + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); + mlx5_cmd_cleanup(dev); + + return err; +} + +static void mlx5_function_disable(struct mlx5_core_dev *dev, bool boot) +{ + mlx5_reclaim_startup_pages(dev); + mlx5_core_disable_hca(dev, 0); + mlx5_stop_health_poll(dev, boot); + mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); + mlx5_cmd_cleanup(dev); +} + +static int mlx5_function_open(struct mlx5_core_dev *dev) +{ + int err; + err = set_hca_ctrl(dev); if (err) { mlx5_core_err(dev, "set_hca_ctrl failed\n"); - goto reclaim_boot_pages; + return err; } err = set_hca_cap(dev); if (err) { mlx5_core_err(dev, "set_hca_cap failed\n"); - goto reclaim_boot_pages; + return err; } err = mlx5_satisfy_startup_pages(dev, 0); if (err) { mlx5_core_err(dev, "failed to allocate init pages\n"); - goto reclaim_boot_pages; + return err; } err = mlx5_cmd_init_hca(dev, sw_owner_id); if (err) { mlx5_core_err(dev, "init hca failed\n"); - goto reclaim_boot_pages; + return err; } mlx5_set_driver_version(dev); @@ -1212,26 +1240,13 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout err = mlx5_query_hca_caps(dev); if (err) { mlx5_core_err(dev, "query hca failed\n"); - goto reclaim_boot_pages; + return err; } mlx5_start_health_fw_log_up(dev); - return 0; - -reclaim_boot_pages: - mlx5_reclaim_startup_pages(dev); -err_disable_hca: - mlx5_core_disable_hca(dev, 0); -stop_health_poll: - mlx5_stop_health_poll(dev, boot); -err_cmd_cleanup: - mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); - mlx5_cmd_cleanup(dev); - - return err; } -static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) +static int mlx5_function_close(struct mlx5_core_dev *dev) { int err; @@ -1240,15 +1255,33 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n"); return err; } - mlx5_reclaim_startup_pages(dev); - mlx5_core_disable_hca(dev, 0); - mlx5_stop_health_poll(dev, boot); - mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); - mlx5_cmd_cleanup(dev); return 0; } +static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout) +{ + int err; + + err = mlx5_function_enable(dev, boot, timeout); + if (err) + return err; + + err = mlx5_function_open(dev); + if (err) + mlx5_function_disable(dev, boot); + return err; +} + +static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) +{ + int err = mlx5_function_close(dev); + + if (!err) + mlx5_function_disable(dev, boot); + return err; +} + static int mlx5_load(struct mlx5_core_dev *dev) { int err; -- cgit v1.2.3 From 3f90840305e2b240749aec7dde23f5262e513641 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 17 May 2023 17:39:54 +0300 Subject: net/mlx5: Move esw multiport devlink param to eswitch code Move the param registration and handling code into the eswitch code as they are related to each other. No point in having the devlink param registration done in separate file. Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 34 ---------------- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 49 ++++++++++++++++++++++- 2 files changed, 47 insertions(+), 36 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 63635cc44479..27197acdb4d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -7,7 +7,6 @@ #include "fw_reset.h" #include "fs_core.h" #include "eswitch.h" -#include "lag/lag.h" #include "esw/qos.h" #include "sf/dev/dev.h" #include "sf/sf.h" @@ -427,33 +426,6 @@ static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id return 0; } - -static int mlx5_devlink_esw_multiport_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - - if (!MLX5_ESWITCH_MANAGER(dev)) - return -EOPNOTSUPP; - - if (ctx->val.vbool) - return mlx5_lag_mpesw_enable(dev); - - mlx5_lag_mpesw_disable(dev); - return 0; -} - -static int mlx5_devlink_esw_multiport_get(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - - if (!MLX5_ESWITCH_MANAGER(dev)) - return -EOPNOTSUPP; - - ctx->val.vbool = mlx5_lag_is_mpesw(dev); - return 0; -} #endif static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, @@ -527,12 +499,6 @@ static const struct devlink_param mlx5_devlink_params[] = { BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_large_group_num_validate), - DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, - "esw_multiport", DEVLINK_PARAM_TYPE_BOOL, - BIT(DEVLINK_PARAM_CMODE_RUNTIME), - mlx5_devlink_esw_multiport_get, - mlx5_devlink_esw_multiport_set, - NULL), #endif DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_eq_depth_validate), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index b33d852aae34..2af9c4646bc7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -41,6 +41,7 @@ #include "esw/qos.h" #include "mlx5_core.h" #include "lib/eq.h" +#include "lag/lag.h" #include "eswitch.h" #include "fs_core.h" #include "devlink.h" @@ -1709,6 +1710,38 @@ err: return err; } +static int mlx5_devlink_esw_multiport_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!MLX5_ESWITCH_MANAGER(dev)) + return -EOPNOTSUPP; + + if (ctx->val.vbool) + return mlx5_lag_mpesw_enable(dev); + + mlx5_lag_mpesw_disable(dev); + return 0; +} + +static int mlx5_devlink_esw_multiport_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + ctx->val.vbool = mlx5_lag_is_mpesw(dev); + return 0; +} + +static const struct devlink_param mlx5_eswitch_params[] = { + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, + "esw_multiport", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_devlink_esw_multiport_get, + mlx5_devlink_esw_multiport_set, NULL), +}; + int mlx5_eswitch_init(struct mlx5_core_dev *dev) { struct mlx5_eswitch *esw; @@ -1717,9 +1750,16 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) if (!MLX5_VPORT_MANAGER(dev) && !MLX5_ESWITCH_MANAGER(dev)) return 0; + err = devl_params_register(priv_to_devlink(dev), mlx5_eswitch_params, + ARRAY_SIZE(mlx5_eswitch_params)); + if (err) + return err; + esw = kzalloc(sizeof(*esw), GFP_KERNEL); - if (!esw) - return -ENOMEM; + if (!esw) { + err = -ENOMEM; + goto unregister_param; + } esw->dev = dev; esw->manager_vport = mlx5_eswitch_manager_vport(dev); @@ -1779,6 +1819,9 @@ abort: if (esw->work_queue) destroy_workqueue(esw->work_queue); kfree(esw); +unregister_param: + devl_params_unregister(priv_to_devlink(dev), mlx5_eswitch_params, + ARRAY_SIZE(mlx5_eswitch_params)); return err; } @@ -1802,6 +1845,8 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw_offloads_cleanup(esw); mlx5_esw_vports_cleanup(esw); kfree(esw); + devl_params_unregister(priv_to_devlink(esw->dev), mlx5_eswitch_params, + ARRAY_SIZE(mlx5_eswitch_params)); } /* Vport Administration */ -- cgit v1.2.3 From e71383fb9cd15a28d6c01d2c165a96f1c0bcf418 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 3 May 2023 14:18:23 +0300 Subject: net/mlx5: Light probe local SFs In case user wants to configure the SFs, for example: to use only vdpa functionality, he needs to fully probe a SF, configure what he wants, and afterward reload the SF. In order to save the time of the reload, local SFs will probe without any auxiliary sub-device, so that the SFs can be configured prior to its full probe. The defaults of the enable_* devlink params of these SFs are set to false. Usage example: Create SF: $ devlink port add pci/0000:08:00.0 flavour pcisf pfnum 0 sfnum 11 $ devlink port function set pci/0000:08:00.0/32768 \ hw_addr 00:00:00:00:00:11 state active Enable ETH auxiliary device: $ devlink dev param set auxiliary/mlx5_core.sf.1 \ name enable_eth value true cmode driverinit Now, in order to fully probe the SF, use devlink reload: $ devlink dev reload auxiliary/mlx5_core.sf.1 At this point the user have SF devlink instance with auxiliary device for the Ethernet functionality only. Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/switchdev.rst | 20 ++++ drivers/net/ethernet/mellanox/mlx5/core/dev.c | 16 +++ drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 20 +++- drivers/net/ethernet/mellanox/mlx5/core/health.c | 24 ++-- drivers/net/ethernet/mellanox/mlx5/core/main.c | 124 +++++++++++++++++++-- .../net/ethernet/mellanox/mlx5/core/mlx5_core.h | 7 ++ .../ethernet/mellanox/mlx5/core/sf/dev/driver.c | 15 ++- 7 files changed, 203 insertions(+), 23 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst index 01deedb71597..db62187eebce 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/switchdev.rst @@ -45,6 +45,26 @@ Following bridge VLAN functions are supported by mlx5: Subfunction =========== +Subfunction which are spawned over the E-switch are created only with devlink +device, and by default all the SF auxiliary devices are disabled. +This will allow user to configure the SF before the SF have been fully probed, +which will save time. + +Usage example: +Create SF: +$ devlink port add pci/0000:08:00.0 flavour pcisf pfnum 0 sfnum 11 +$ devlink port function set pci/0000:08:00.0/32768 \ + hw_addr 00:00:00:00:00:11 state active + +Enable ETH auxiliary device: +$ devlink dev param set auxiliary/mlx5_core.sf.1 \ + name enable_eth value true cmode driverinit + +Now, in order to fully probe the SF, use devlink reload: +$ devlink dev reload auxiliary/mlx5_core.sf.1 + +mlx5 supports ETH,rdma and vdpa (vnet) auxiliary devices devlink params (see :ref:`Documentation/networking/devlink/devlink-params.rst`) + mlx5 supports subfunction management using devlink port (see :ref:`Documentation/networking/devlink/devlink-port.rst `) interface. A subfunction has its own function capabilities and its own resources. This diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 1b33533b15de..617ac7e5d75c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -323,6 +323,18 @@ static void del_adev(struct auxiliary_device *adev) auxiliary_device_uninit(adev); } +void mlx5_dev_set_lightweight(struct mlx5_core_dev *dev) +{ + mutex_lock(&mlx5_intf_mutex); + dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; + mutex_unlock(&mlx5_intf_mutex); +} + +bool mlx5_dev_is_lightweight(struct mlx5_core_dev *dev) +{ + return dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV; +} + int mlx5_attach_device(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; @@ -457,6 +469,10 @@ static int add_drivers(struct mlx5_core_dev *dev) if (priv->adev[i]) continue; + if (mlx5_adev_devices[i].is_enabled && + !(mlx5_adev_devices[i].is_enabled(dev))) + continue; + if (mlx5_adev_devices[i].is_supported) is_supported = mlx5_adev_devices[i].is_supported(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 27197acdb4d8..3d82ec890666 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -141,6 +141,13 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, bool sf_dev_allocated; int ret = 0; + if (mlx5_dev_is_lightweight(dev)) { + if (action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT) + return -EOPNOTSUPP; + mlx5_unload_one_light(dev); + return 0; + } + sf_dev_allocated = mlx5_sf_dev_allocated(dev); if (sf_dev_allocated) { /* Reload results in deleting SF device which further results in @@ -193,6 +200,10 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a *actions_performed = BIT(action); switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: + if (mlx5_dev_is_lightweight(dev)) { + mlx5_fw_reporters_create(dev); + return mlx5_init_one_devl_locked(dev); + } ret = mlx5_load_one_devl_locked(dev, false); break; case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: @@ -511,7 +522,7 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) struct mlx5_core_dev *dev = devlink_priv(devlink); union devlink_param_value value; - value.vbool = MLX5_CAP_GEN(dev, roce); + value.vbool = MLX5_CAP_GEN(dev, roce) && !mlx5_dev_is_lightweight(dev); devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, value); @@ -561,7 +572,7 @@ static int mlx5_devlink_eth_params_register(struct devlink *devlink) if (err) return err; - value.vbool = true; + value.vbool = !mlx5_dev_is_lightweight(dev); devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, value); @@ -601,6 +612,7 @@ static const struct devlink_param mlx5_devlink_rdma_params[] = { static int mlx5_devlink_rdma_params_register(struct devlink *devlink) { + struct mlx5_core_dev *dev = devlink_priv(devlink); union devlink_param_value value; int err; @@ -612,7 +624,7 @@ static int mlx5_devlink_rdma_params_register(struct devlink *devlink) if (err) return err; - value.vbool = true; + value.vbool = !mlx5_dev_is_lightweight(dev); devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, value); @@ -647,7 +659,7 @@ static int mlx5_devlink_vnet_params_register(struct devlink *devlink) if (err) return err; - value.vbool = true; + value.vbool = !mlx5_dev_is_lightweight(dev); devl_param_driverinit_value_set(devlink, DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, value); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 871c32dda66e..210100a4064a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -719,7 +719,7 @@ static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { #define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000 #define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD -static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) +void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; struct devlink *devlink = priv_to_devlink(dev); @@ -735,17 +735,17 @@ static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) } health->fw_reporter = - devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops, - 0, dev); + devl_health_reporter_create(devlink, &mlx5_fw_reporter_ops, + 0, dev); if (IS_ERR(health->fw_reporter)) mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n", PTR_ERR(health->fw_reporter)); health->fw_fatal_reporter = - devlink_health_reporter_create(devlink, - &mlx5_fw_fatal_reporter_ops, - grace_period, - dev); + devl_health_reporter_create(devlink, + &mlx5_fw_fatal_reporter_ops, + grace_period, + dev); if (IS_ERR(health->fw_fatal_reporter)) mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n", PTR_ERR(health->fw_fatal_reporter)); @@ -777,7 +777,8 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; - queue_work(health->wq, &health->fatal_report_work); + if (!mlx5_dev_is_lightweight(dev)) + queue_work(health->wq, &health->fatal_report_work); } #define MLX5_MSEC_PER_HOUR (MSEC_PER_SEC * 60 * 60) @@ -905,10 +906,15 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev) int mlx5_health_init(struct mlx5_core_dev *dev) { + struct devlink *devlink = priv_to_devlink(dev); struct mlx5_core_health *health; char *name; - mlx5_fw_reporters_create(dev); + if (!mlx5_dev_is_lightweight(dev)) { + devl_lock(devlink); + mlx5_fw_reporters_create(dev); + devl_unlock(devlink); + } mlx5_reporter_vnic_create(dev); health = &dev->priv.health; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 0faae77d84e6..6fa314f8e5ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1424,12 +1424,11 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_put_uars_page(dev, dev->priv.uar); } -int mlx5_init_one(struct mlx5_core_dev *dev) +int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev) { - struct devlink *devlink = priv_to_devlink(dev); + bool light_probe = mlx5_dev_is_lightweight(dev); int err = 0; - devl_lock(devlink); mutex_lock(&dev->intf_state_mutex); dev->state = MLX5_DEVICE_STATE_UP; @@ -1443,9 +1442,14 @@ int mlx5_init_one(struct mlx5_core_dev *dev) goto function_teardown; } - err = mlx5_devlink_params_register(priv_to_devlink(dev)); - if (err) - goto err_devlink_params_reg; + /* In case of light_probe, mlx5_devlink is already registered. + * Hence, don't register devlink again. + */ + if (!light_probe) { + err = mlx5_devlink_params_register(priv_to_devlink(dev)); + if (err) + goto err_devlink_params_reg; + } err = mlx5_load(dev); if (err) @@ -1458,14 +1462,14 @@ int mlx5_init_one(struct mlx5_core_dev *dev) goto err_register; mutex_unlock(&dev->intf_state_mutex); - devl_unlock(devlink); return 0; err_register: clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); mlx5_unload(dev); err_load: - mlx5_devlink_params_unregister(priv_to_devlink(dev)); + if (!light_probe) + mlx5_devlink_params_unregister(priv_to_devlink(dev)); err_devlink_params_reg: mlx5_cleanup_once(dev); function_teardown: @@ -1473,6 +1477,16 @@ function_teardown: err_function: dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mutex_unlock(&dev->intf_state_mutex); + return err; +} + +int mlx5_init_one(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + int err; + + devl_lock(devlink); + err = mlx5_init_one_devl_locked(dev); devl_unlock(devlink); return err; } @@ -1590,6 +1604,100 @@ void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend) devl_unlock(devlink); } +/* In case of light probe, we don't need a full query of hca_caps, but only the bellow caps. + * A full query of hca_caps will be done when the device will reload. + */ +static int mlx5_query_hca_caps_light(struct mlx5_core_dev *dev) +{ + int err; + + err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); + if (err) + return err; + + if (MLX5_CAP_GEN(dev, eth_net_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ETHERNET_OFFLOADS); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, nic_flow_table) || + MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE); + if (err) + return err; + } + + if (MLX5_CAP_GEN_64(dev, general_obj_types) & + MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) { + err = mlx5_core_get_caps(dev, MLX5_CAP_VDPA_EMULATION); + if (err) + return err; + } + + return 0; +} + +int mlx5_init_one_light(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + int err; + + dev->state = MLX5_DEVICE_STATE_UP; + err = mlx5_function_enable(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); + if (err) { + mlx5_core_warn(dev, "mlx5_function_enable err=%d\n", err); + goto out; + } + + err = mlx5_query_hca_caps_light(dev); + if (err) { + mlx5_core_warn(dev, "mlx5_query_hca_caps_light err=%d\n", err); + goto query_hca_caps_err; + } + + devl_lock(devlink); + err = mlx5_devlink_params_register(priv_to_devlink(dev)); + devl_unlock(devlink); + if (err) { + mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err); + goto query_hca_caps_err; + } + + return 0; + +query_hca_caps_err: + mlx5_function_disable(dev, true); +out: + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + return err; +} + +void mlx5_uninit_one_light(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + + devl_lock(devlink); + mlx5_devlink_params_unregister(priv_to_devlink(dev)); + devl_unlock(devlink); + if (dev->state != MLX5_DEVICE_STATE_UP) + return; + mlx5_function_disable(dev, true); +} + +/* xxx_light() function are used in order to configure the device without full + * init (light init). e.g.: There isn't a point in reload a device to light state. + * Hence, mlx5_load_one_light() isn't needed. + */ + +void mlx5_unload_one_light(struct mlx5_core_dev *dev) +{ + if (dev->state != MLX5_DEVICE_STATE_UP) + return; + mlx5_function_disable(dev, false); + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; +} + static const int types[] = { MLX5_CAP_GENERAL, MLX5_CAP_GENERAL_2, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 7a5f04082058..464c6885a01c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -240,11 +240,14 @@ int mlx5_attach_device(struct mlx5_core_dev *dev); void mlx5_detach_device(struct mlx5_core_dev *dev, bool suspend); int mlx5_register_device(struct mlx5_core_dev *dev); void mlx5_unregister_device(struct mlx5_core_dev *dev); +void mlx5_dev_set_lightweight(struct mlx5_core_dev *dev); +bool mlx5_dev_is_lightweight(struct mlx5_core_dev *dev); struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev); void mlx5_dev_list_lock(void); void mlx5_dev_list_unlock(void); int mlx5_dev_list_trylock(void); +void mlx5_fw_reporters_create(struct mlx5_core_dev *dev); int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size); int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size); int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode); @@ -319,11 +322,15 @@ static inline bool mlx5_core_is_sf(const struct mlx5_core_dev *dev) int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx); void mlx5_mdev_uninit(struct mlx5_core_dev *dev); int mlx5_init_one(struct mlx5_core_dev *dev); +int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev); void mlx5_uninit_one(struct mlx5_core_dev *dev); void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend); void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend); int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery); int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery); +int mlx5_init_one_light(struct mlx5_core_dev *dev); +void mlx5_uninit_one_light(struct mlx5_core_dev *dev); +void mlx5_unload_one_light(struct mlx5_core_dev *dev); int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 vport, u16 opmod); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index 0692363cf80e..8fe82f1191bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -3,6 +3,7 @@ #include #include +#include #include "mlx5_core.h" #include "dev.h" #include "devlink.h" @@ -28,6 +29,10 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia mdev->priv.adev_idx = adev->id; sf_dev->mdev = mdev; + /* Only local SFs do light probe */ + if (MLX5_ESWITCH_MANAGER(sf_dev->parent_mdev)) + mlx5_dev_set_lightweight(mdev); + err = mlx5_mdev_init(mdev, MLX5_SF_PROF); if (err) { mlx5_core_warn(mdev, "mlx5_mdev_init on err=%d\n", err); @@ -41,7 +46,10 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia goto remap_err; } - err = mlx5_init_one(mdev); + if (MLX5_ESWITCH_MANAGER(sf_dev->parent_mdev)) + err = mlx5_init_one_light(mdev); + else + err = mlx5_init_one(mdev); if (err) { mlx5_core_warn(mdev, "mlx5_init_one err=%d\n", err); goto init_one_err; @@ -65,7 +73,10 @@ static void mlx5_sf_dev_remove(struct auxiliary_device *adev) mlx5_drain_health_wq(sf_dev->mdev); devlink_unregister(devlink); - mlx5_uninit_one(sf_dev->mdev); + if (mlx5_dev_is_lightweight(sf_dev->mdev)) + mlx5_uninit_one_light(sf_dev->mdev); + else + mlx5_uninit_one(sf_dev->mdev); iounmap(sf_dev->mdev->iseg); mlx5_mdev_uninit(sf_dev->mdev); mlx5_devlink_free(devlink); -- cgit v1.2.3 From 978015f7ef9240acfb078f4c1c0d79459b42f951 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 29 May 2023 10:34:59 +0200 Subject: net/mlx5e: Remove a useless function call 'handle' is known to be NULL here. There is no need to kfree() it. Signed-off-by: Christophe JAILLET Reviewed-by: Simon Horman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index 0290e0dea539..4e923a2874ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -112,10 +112,8 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *po int err; handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (!handle) { - kfree(handle); + if (!handle) return ERR_PTR(-ENOMEM); - } post_attr->chain = 0; post_attr->prio = 0; -- cgit v1.2.3 From c37cf54c12cfaa51e7aaf88708167b0d3259e64e Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Thu, 8 Jun 2023 13:02:26 -0700 Subject: iavf: remove mask from iavf_irq_enable_queues() Enable more than 32 IRQs by removing the u32 bit mask in iavf_irq_enable_queues(). There is no need for the mask as there are no callers that select individual IRQs through the bitmask. Also, if the PF allocates more than 32 IRQs, this mask will prevent us from using all of them. Modify the comment in iavf_register.h to show that the maximum number allowed for the IRQ index is 63 as per the iAVF standard 1.0 [1]. link: [1] https://www.intel.com/content/dam/www/public/us/en/documents/product-specifications/ethernet-adaptive-virtual-function-hardware-spec.pdf Fixes: 5eae00c57f5e ("i40evf: main driver core") Signed-off-by: Ahmed Zaki Tested-by: Rafal Romanowski Reviewed-by: Simon Horman Reviewed-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20230608200226.451861-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/iavf/iavf.h | 2 +- drivers/net/ethernet/intel/iavf/iavf_main.c | 15 ++++++--------- drivers/net/ethernet/intel/iavf/iavf_register.h | 2 +- 3 files changed, 8 insertions(+), 11 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 9abaff1f2aff..39d0fe76a38f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -525,7 +525,7 @@ void iavf_set_ethtool_ops(struct net_device *netdev); void iavf_update_stats(struct iavf_adapter *adapter); void iavf_reset_interrupt_capability(struct iavf_adapter *adapter); int iavf_init_interrupt_scheme(struct iavf_adapter *adapter); -void iavf_irq_enable_queues(struct iavf_adapter *adapter, u32 mask); +void iavf_irq_enable_queues(struct iavf_adapter *adapter); void iavf_free_all_tx_resources(struct iavf_adapter *adapter); void iavf_free_all_rx_resources(struct iavf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 2de4baff4c20..4a66873882d1 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -359,21 +359,18 @@ static void iavf_irq_disable(struct iavf_adapter *adapter) } /** - * iavf_irq_enable_queues - Enable interrupt for specified queues + * iavf_irq_enable_queues - Enable interrupt for all queues * @adapter: board private structure - * @mask: bitmap of queues to enable **/ -void iavf_irq_enable_queues(struct iavf_adapter *adapter, u32 mask) +void iavf_irq_enable_queues(struct iavf_adapter *adapter) { struct iavf_hw *hw = &adapter->hw; int i; for (i = 1; i < adapter->num_msix_vectors; i++) { - if (mask & BIT(i - 1)) { - wr32(hw, IAVF_VFINT_DYN_CTLN1(i - 1), - IAVF_VFINT_DYN_CTLN1_INTENA_MASK | - IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK); - } + wr32(hw, IAVF_VFINT_DYN_CTLN1(i - 1), + IAVF_VFINT_DYN_CTLN1_INTENA_MASK | + IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK); } } @@ -387,7 +384,7 @@ void iavf_irq_enable(struct iavf_adapter *adapter, bool flush) struct iavf_hw *hw = &adapter->hw; iavf_misc_irq_enable(adapter); - iavf_irq_enable_queues(adapter, ~0); + iavf_irq_enable_queues(adapter); if (flush) iavf_flush(hw); diff --git a/drivers/net/ethernet/intel/iavf/iavf_register.h b/drivers/net/ethernet/intel/iavf/iavf_register.h index bf793332fc9d..a19e88898a0b 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_register.h +++ b/drivers/net/ethernet/intel/iavf/iavf_register.h @@ -40,7 +40,7 @@ #define IAVF_VFINT_DYN_CTL01_INTENA_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTL01_INTENA_SHIFT) #define IAVF_VFINT_DYN_CTL01_ITR_INDX_SHIFT 3 #define IAVF_VFINT_DYN_CTL01_ITR_INDX_MASK IAVF_MASK(0x3, IAVF_VFINT_DYN_CTL01_ITR_INDX_SHIFT) -#define IAVF_VFINT_DYN_CTLN1(_INTVF) (0x00003800 + ((_INTVF) * 4)) /* _i=0...15 */ /* Reset: VFR */ +#define IAVF_VFINT_DYN_CTLN1(_INTVF) (0x00003800 + ((_INTVF) * 4)) /* _i=0...63 */ /* Reset: VFR */ #define IAVF_VFINT_DYN_CTLN1_INTENA_SHIFT 0 #define IAVF_VFINT_DYN_CTLN1_INTENA_MASK IAVF_MASK(0x1, IAVF_VFINT_DYN_CTLN1_INTENA_SHIFT) #define IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_SHIFT 2 -- cgit v1.2.3 From d457a0e329b0bfd3a1450e0b1a18cd2b47a25a08 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Jun 2023 19:17:37 +0000 Subject: net: move gso declarations and functions to their own files Move declarations into include/net/gso.h and code into net/core/gso.c Signed-off-by: Eric Dumazet Cc: Stanislav Fomichev Reviewed-by: Simon Horman Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20230608191738.3947077-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/tg3.c | 1 + drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 1 + drivers/net/ethernet/sfc/siena/tx_common.c | 1 + drivers/net/ethernet/sfc/tx_common.c | 1 + drivers/net/tap.c | 1 + drivers/net/usb/r8152.c | 1 + drivers/net/wireguard/device.c | 1 + drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 1 + include/linux/netdevice.h | 26 +-- include/linux/skbuff.h | 71 ------ include/net/gro.h | 1 + include/net/gso.h | 109 +++++++++ include/net/udp.h | 1 + net/core/Makefile | 2 +- net/core/dev.c | 70 +----- net/core/gro.c | 59 +---- net/core/gso.c | 273 +++++++++++++++++++++++ net/core/skbuff.c | 142 +----------- net/ipv4/af_inet.c | 1 + net/ipv4/esp4_offload.c | 1 + net/ipv4/gre_offload.c | 1 + net/ipv4/ip_output.c | 1 + net/ipv4/tcp_offload.c | 1 + net/ipv4/udp.c | 1 + net/ipv4/udp_offload.c | 1 + net/ipv6/esp6_offload.c | 1 + net/ipv6/ip6_offload.c | 1 + net/ipv6/ip6_output.c | 1 + net/ipv6/udp_offload.c | 1 + net/mac80211/tx.c | 1 + net/mpls/af_mpls.c | 1 + net/mpls/mpls_gso.c | 1 + net/netfilter/nf_flow_table_ip.c | 1 + net/netfilter/nfnetlink_queue.c | 1 + net/nsh/nsh.c | 1 + net/openvswitch/actions.c | 1 + net/openvswitch/datapath.c | 1 + net/sched/act_police.c | 1 + net/sched/sch_cake.c | 1 + net/sched/sch_netem.c | 1 + net/sched/sch_taprio.c | 1 + net/sched/sch_tbf.c | 1 + net/sctp/offload.c | 1 + net/xfrm/xfrm_device.c | 1 + net/xfrm/xfrm_interface_core.c | 1 + net/xfrm/xfrm_output.c | 1 + 46 files changed, 425 insertions(+), 365 deletions(-) create mode 100644 include/net/gso.h create mode 100644 net/core/gso.c (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 58747292521d..5e68a6a4b2af 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -57,6 +57,7 @@ #include #include +#include #include #include diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index c5687d94ea88..7b7e1c5b00f4 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/ethernet/sfc/siena/tx_common.c b/drivers/net/ethernet/sfc/siena/tx_common.c index 93a32d61944f..a7a9ab304e13 100644 --- a/drivers/net/ethernet/sfc/siena/tx_common.c +++ b/drivers/net/ethernet/sfc/siena/tx_common.c @@ -12,6 +12,7 @@ #include "efx.h" #include "nic_common.h" #include "tx_common.h" +#include static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue) { diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c index 67e789b96c43..4ce7d00e697d 100644 --- a/drivers/net/ethernet/sfc/tx_common.c +++ b/drivers/net/ethernet/sfc/tx_common.c @@ -12,6 +12,7 @@ #include "efx.h" #include "nic_common.h" #include "tx_common.h" +#include static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue) { diff --git a/drivers/net/tap.c b/drivers/net/tap.c index d30d730ed5a7..9137fb8c1c42 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 0999a58ca9d2..0738baa5b82e 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -27,6 +27,7 @@ #include #include #include +#include /* Information for net-next */ #define NETNEXT_VERSION "12" diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index d58e9f818d3b..258dcc103921 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 5fa6f98b8e55..ef0f53b3b89f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c2f0c6002a84..2d6cb2bf2f05 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4827,13 +4827,6 @@ int skb_crc32c_csum_help(struct sk_buff *skb); int skb_csum_hwoffload_help(struct sk_buff *skb, const netdev_features_t features); -struct sk_buff *__skb_gso_segment(struct sk_buff *skb, - netdev_features_t features, bool tx_path); -struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb, - netdev_features_t features, __be16 type); -struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, - netdev_features_t features); - struct netdev_bonding_info { ifslave slave; ifbond master; @@ -4856,11 +4849,6 @@ static inline void ethtool_notify(struct net_device *dev, unsigned int cmd, } #endif -static inline -struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) -{ - return __skb_gso_segment(skb, features, true); -} __be16 skb_network_protocol(struct sk_buff *skb, int *depth); static inline bool can_checksum_protocol(netdev_features_t features, @@ -4987,6 +4975,7 @@ netdev_features_t passthru_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); netdev_features_t netif_skb_features(struct sk_buff *skb); +void skb_warn_bad_offload(const struct sk_buff *skb); static inline bool net_gso_ok(netdev_features_t features, int gso_type) { @@ -5035,19 +5024,6 @@ void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs); void netif_inherit_tso_max(struct net_device *to, const struct net_device *from); -static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, - int pulled_hlen, u16 mac_offset, - int mac_len) -{ - skb->protocol = protocol; - skb->encapsulation = 1; - skb_push(skb, pulled_hlen); - skb_reset_transport_header(skb); - skb->mac_header = mac_offset; - skb->network_header = skb->mac_header + mac_len; - skb->mac_len = mac_len; -} - static inline bool netif_is_macsec(const struct net_device *dev) { return dev->priv_flags & IFF_MACSEC; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e2f48ddb2f7c..91ed66952580 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3974,8 +3974,6 @@ int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); -bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu); -bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features, unsigned int offset); @@ -4841,75 +4839,6 @@ static inline struct sec_path *skb_sec_path(const struct sk_buff *skb) #endif } -/* Keeps track of mac header offset relative to skb->head. - * It is useful for TSO of Tunneling protocol. e.g. GRE. - * For non-tunnel skb it points to skb_mac_header() and for - * tunnel skb it points to outer mac header. - * Keeps track of level of encapsulation of network headers. - */ -struct skb_gso_cb { - union { - int mac_offset; - int data_offset; - }; - int encap_level; - __wsum csum; - __u16 csum_start; -}; -#define SKB_GSO_CB_OFFSET 32 -#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_GSO_CB_OFFSET)) - -static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) -{ - return (skb_mac_header(inner_skb) - inner_skb->head) - - SKB_GSO_CB(inner_skb)->mac_offset; -} - -static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra) -{ - int new_headroom, headroom; - int ret; - - headroom = skb_headroom(skb); - ret = pskb_expand_head(skb, extra, 0, GFP_ATOMIC); - if (ret) - return ret; - - new_headroom = skb_headroom(skb); - SKB_GSO_CB(skb)->mac_offset += (new_headroom - headroom); - return 0; -} - -static inline void gso_reset_checksum(struct sk_buff *skb, __wsum res) -{ - /* Do not update partial checksums if remote checksum is enabled. */ - if (skb->remcsum_offload) - return; - - SKB_GSO_CB(skb)->csum = res; - SKB_GSO_CB(skb)->csum_start = skb_checksum_start(skb) - skb->head; -} - -/* Compute the checksum for a gso segment. First compute the checksum value - * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and - * then add in skb->csum (checksum from csum_start to end of packet). - * skb->csum and csum_start are then updated to reflect the checksum of the - * resultant packet starting from the transport header-- the resultant checksum - * is in the res argument (i.e. normally zero or ~ of checksum of a pseudo - * header. - */ -static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res) -{ - unsigned char *csum_start = skb_transport_header(skb); - int plen = (skb->head + SKB_GSO_CB(skb)->csum_start) - csum_start; - __wsum partial = SKB_GSO_CB(skb)->csum; - - SKB_GSO_CB(skb)->csum = res; - SKB_GSO_CB(skb)->csum_start = csum_start - skb->head; - - return csum_fold(csum_partial(csum_start, plen, partial)); -} - static inline bool skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; diff --git a/include/net/gro.h b/include/net/gro.h index 7b47dd6ce94f..75efa6fb8441 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -452,5 +452,6 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, gro_normal_list(napi); } +extern struct list_head offload_base; #endif /* _NET_IPV6_GRO_H */ diff --git a/include/net/gso.h b/include/net/gso.h new file mode 100644 index 000000000000..29975440cad5 --- /dev/null +++ b/include/net/gso.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _NET_GSO_H +#define _NET_GSO_H + +#include + +/* Keeps track of mac header offset relative to skb->head. + * It is useful for TSO of Tunneling protocol. e.g. GRE. + * For non-tunnel skb it points to skb_mac_header() and for + * tunnel skb it points to outer mac header. + * Keeps track of level of encapsulation of network headers. + */ +struct skb_gso_cb { + union { + int mac_offset; + int data_offset; + }; + int encap_level; + __wsum csum; + __u16 csum_start; +}; +#define SKB_GSO_CB_OFFSET 32 +#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_GSO_CB_OFFSET)) + +static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) +{ + return (skb_mac_header(inner_skb) - inner_skb->head) - + SKB_GSO_CB(inner_skb)->mac_offset; +} + +static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra) +{ + int new_headroom, headroom; + int ret; + + headroom = skb_headroom(skb); + ret = pskb_expand_head(skb, extra, 0, GFP_ATOMIC); + if (ret) + return ret; + + new_headroom = skb_headroom(skb); + SKB_GSO_CB(skb)->mac_offset += (new_headroom - headroom); + return 0; +} + +static inline void gso_reset_checksum(struct sk_buff *skb, __wsum res) +{ + /* Do not update partial checksums if remote checksum is enabled. */ + if (skb->remcsum_offload) + return; + + SKB_GSO_CB(skb)->csum = res; + SKB_GSO_CB(skb)->csum_start = skb_checksum_start(skb) - skb->head; +} + +/* Compute the checksum for a gso segment. First compute the checksum value + * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and + * then add in skb->csum (checksum from csum_start to end of packet). + * skb->csum and csum_start are then updated to reflect the checksum of the + * resultant packet starting from the transport header-- the resultant checksum + * is in the res argument (i.e. normally zero or ~ of checksum of a pseudo + * header. + */ +static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res) +{ + unsigned char *csum_start = skb_transport_header(skb); + int plen = (skb->head + SKB_GSO_CB(skb)->csum_start) - csum_start; + __wsum partial = SKB_GSO_CB(skb)->csum; + + SKB_GSO_CB(skb)->csum = res; + SKB_GSO_CB(skb)->csum_start = csum_start - skb->head; + + return csum_fold(csum_partial(csum_start, plen, partial)); +} + +struct sk_buff *__skb_gso_segment(struct sk_buff *skb, + netdev_features_t features, bool tx_path); + +static inline struct sk_buff *skb_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + return __skb_gso_segment(skb, features, true); +} + +struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb, + netdev_features_t features, __be16 type); + +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features); + +bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu); + +bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len); + +static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol, + int pulled_hlen, u16 mac_offset, + int mac_len) +{ + skb->protocol = protocol; + skb->encapsulation = 1; + skb_push(skb, pulled_hlen); + skb_reset_transport_header(skb); + skb->mac_header = mac_offset; + skb->network_header = skb->mac_header + mac_len; + skb->mac_len = mac_len; +} + +#endif /* _NET_GSO_H */ diff --git a/include/net/udp.h b/include/net/udp.h index 4ed0b47c5582..e01340a27155 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/net/core/Makefile b/net/core/Makefile index 8f367813bc68..731db2eaa610 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -13,7 +13,7 @@ obj-y += dev.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ fib_notifier.o xdp.o flow_offload.o gro.o \ - netdev-genl.o netdev-genl-gen.o + netdev-genl.o netdev-genl-gen.o gso.o obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o diff --git a/net/core/dev.c b/net/core/dev.c index 6d6f8a7fe6b4..c2456b3667fe 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3209,7 +3209,7 @@ static u16 skb_tx_hash(const struct net_device *dev, return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset; } -static void skb_warn_bad_offload(const struct sk_buff *skb) +void skb_warn_bad_offload(const struct sk_buff *skb) { static const netdev_features_t null_features; struct net_device *dev = skb->dev; @@ -3338,74 +3338,6 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth) return vlan_get_protocol_and_depth(skb, type, depth); } -/* openvswitch calls this on rx path, so we need a different check. - */ -static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) -{ - if (tx_path) - return skb->ip_summed != CHECKSUM_PARTIAL && - skb->ip_summed != CHECKSUM_UNNECESSARY; - - return skb->ip_summed == CHECKSUM_NONE; -} - -/** - * __skb_gso_segment - Perform segmentation on skb. - * @skb: buffer to segment - * @features: features for the output path (see dev->features) - * @tx_path: whether it is called in TX path - * - * This function segments the given skb and returns a list of segments. - * - * It may return NULL if the skb requires no segmentation. This is - * only possible when GSO is used for verifying header integrity. - * - * Segmentation preserves SKB_GSO_CB_OFFSET bytes of previous skb cb. - */ -struct sk_buff *__skb_gso_segment(struct sk_buff *skb, - netdev_features_t features, bool tx_path) -{ - struct sk_buff *segs; - - if (unlikely(skb_needs_check(skb, tx_path))) { - int err; - - /* We're going to init ->check field in TCP or UDP header */ - err = skb_cow_head(skb, 0); - if (err < 0) - return ERR_PTR(err); - } - - /* Only report GSO partial support if it will enable us to - * support segmentation on this frame without needing additional - * work. - */ - if (features & NETIF_F_GSO_PARTIAL) { - netdev_features_t partial_features = NETIF_F_GSO_ROBUST; - struct net_device *dev = skb->dev; - - partial_features |= dev->features & dev->gso_partial_features; - if (!skb_gso_ok(skb, features | partial_features)) - features &= ~NETIF_F_GSO_PARTIAL; - } - - BUILD_BUG_ON(SKB_GSO_CB_OFFSET + - sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); - - SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); - SKB_GSO_CB(skb)->encap_level = 0; - - skb_reset_mac_header(skb); - skb_reset_mac_len(skb); - - segs = skb_mac_gso_segment(skb, features); - - if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) - skb_warn_bad_offload(skb); - - return segs; -} -EXPORT_SYMBOL(__skb_gso_segment); /* Take action when hardware reception checksum errors are detected. */ #ifdef CONFIG_BUG diff --git a/net/core/gro.c b/net/core/gro.c index 4d45f78e2fac..dca800068e41 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -10,7 +10,7 @@ #define GRO_MAX_HEAD (MAX_HEADER + 128) static DEFINE_SPINLOCK(offload_lock); -static struct list_head offload_base __read_mostly = LIST_HEAD_INIT(offload_base); +struct list_head offload_base __read_mostly = LIST_HEAD_INIT(offload_base); /* Maximum number of GRO_NORMAL skbs to batch up for list-RX */ int gro_normal_batch __read_mostly = 8; @@ -92,63 +92,6 @@ void dev_remove_offload(struct packet_offload *po) } EXPORT_SYMBOL(dev_remove_offload); -/** - * skb_eth_gso_segment - segmentation handler for ethernet protocols. - * @skb: buffer to segment - * @features: features for the output path (see dev->features) - * @type: Ethernet Protocol ID - */ -struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb, - netdev_features_t features, __be16 type) -{ - struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); - struct packet_offload *ptype; - - rcu_read_lock(); - list_for_each_entry_rcu(ptype, &offload_base, list) { - if (ptype->type == type && ptype->callbacks.gso_segment) { - segs = ptype->callbacks.gso_segment(skb, features); - break; - } - } - rcu_read_unlock(); - - return segs; -} -EXPORT_SYMBOL(skb_eth_gso_segment); - -/** - * skb_mac_gso_segment - mac layer segmentation handler. - * @skb: buffer to segment - * @features: features for the output path (see dev->features) - */ -struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); - struct packet_offload *ptype; - int vlan_depth = skb->mac_len; - __be16 type = skb_network_protocol(skb, &vlan_depth); - - if (unlikely(!type)) - return ERR_PTR(-EINVAL); - - __skb_pull(skb, vlan_depth); - - rcu_read_lock(); - list_for_each_entry_rcu(ptype, &offload_base, list) { - if (ptype->type == type && ptype->callbacks.gso_segment) { - segs = ptype->callbacks.gso_segment(skb, features); - break; - } - } - rcu_read_unlock(); - - __skb_push(skb, skb->data - skb_mac_header(skb)); - - return segs; -} -EXPORT_SYMBOL(skb_mac_gso_segment); int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) { diff --git a/net/core/gso.c b/net/core/gso.c new file mode 100644 index 000000000000..9e1803bfc9c6 --- /dev/null +++ b/net/core/gso.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include +#include + +/** + * skb_eth_gso_segment - segmentation handler for ethernet protocols. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + * @type: Ethernet Protocol ID + */ +struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb, + netdev_features_t features, __be16 type) +{ + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); + struct packet_offload *ptype; + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, &offload_base, list) { + if (ptype->type == type && ptype->callbacks.gso_segment) { + segs = ptype->callbacks.gso_segment(skb, features); + break; + } + } + rcu_read_unlock(); + + return segs; +} +EXPORT_SYMBOL(skb_eth_gso_segment); + +/** + * skb_mac_gso_segment - mac layer segmentation handler. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + */ +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); + struct packet_offload *ptype; + int vlan_depth = skb->mac_len; + __be16 type = skb_network_protocol(skb, &vlan_depth); + + if (unlikely(!type)) + return ERR_PTR(-EINVAL); + + __skb_pull(skb, vlan_depth); + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, &offload_base, list) { + if (ptype->type == type && ptype->callbacks.gso_segment) { + segs = ptype->callbacks.gso_segment(skb, features); + break; + } + } + rcu_read_unlock(); + + __skb_push(skb, skb->data - skb_mac_header(skb)); + + return segs; +} +EXPORT_SYMBOL(skb_mac_gso_segment); +/* openvswitch calls this on rx path, so we need a different check. + */ +static bool skb_needs_check(const struct sk_buff *skb, bool tx_path) +{ + if (tx_path) + return skb->ip_summed != CHECKSUM_PARTIAL && + skb->ip_summed != CHECKSUM_UNNECESSARY; + + return skb->ip_summed == CHECKSUM_NONE; +} + +/** + * __skb_gso_segment - Perform segmentation on skb. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + * @tx_path: whether it is called in TX path + * + * This function segments the given skb and returns a list of segments. + * + * It may return NULL if the skb requires no segmentation. This is + * only possible when GSO is used for verifying header integrity. + * + * Segmentation preserves SKB_GSO_CB_OFFSET bytes of previous skb cb. + */ +struct sk_buff *__skb_gso_segment(struct sk_buff *skb, + netdev_features_t features, bool tx_path) +{ + struct sk_buff *segs; + + if (unlikely(skb_needs_check(skb, tx_path))) { + int err; + + /* We're going to init ->check field in TCP or UDP header */ + err = skb_cow_head(skb, 0); + if (err < 0) + return ERR_PTR(err); + } + + /* Only report GSO partial support if it will enable us to + * support segmentation on this frame without needing additional + * work. + */ + if (features & NETIF_F_GSO_PARTIAL) { + netdev_features_t partial_features = NETIF_F_GSO_ROBUST; + struct net_device *dev = skb->dev; + + partial_features |= dev->features & dev->gso_partial_features; + if (!skb_gso_ok(skb, features | partial_features)) + features &= ~NETIF_F_GSO_PARTIAL; + } + + BUILD_BUG_ON(SKB_GSO_CB_OFFSET + + sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); + + SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); + SKB_GSO_CB(skb)->encap_level = 0; + + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + + segs = skb_mac_gso_segment(skb, features); + + if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs))) + skb_warn_bad_offload(skb); + + return segs; +} +EXPORT_SYMBOL(__skb_gso_segment); + +/** + * skb_gso_transport_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_transport_seglen is used to determine the real size of the + * individual segments, including Layer4 headers (TCP/UDP). + * + * The MAC/L2 or network (IP, IPv6) headers are not accounted for. + */ +static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + unsigned int thlen = 0; + + if (skb->encapsulation) { + thlen = skb_inner_transport_header(skb) - + skb_transport_header(skb); + + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + thlen += inner_tcp_hdrlen(skb); + } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { + thlen = tcp_hdrlen(skb); + } else if (unlikely(skb_is_gso_sctp(skb))) { + thlen = sizeof(struct sctphdr); + } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { + thlen = sizeof(struct udphdr); + } + /* UFO sets gso_size to the size of the fragmentation + * payload, i.e. the size of the L4 (UDP) header is already + * accounted for. + */ + return thlen + shinfo->gso_size; +} + +/** + * skb_gso_network_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_network_seglen is used to determine the real size of the + * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP). + * + * The MAC/L2 header is not accounted for. + */ +static unsigned int skb_gso_network_seglen(const struct sk_buff *skb) +{ + unsigned int hdr_len = skb_transport_header(skb) - + skb_network_header(skb); + + return hdr_len + skb_gso_transport_seglen(skb); +} + +/** + * skb_gso_mac_seglen - Return length of individual segments of a gso packet + * + * @skb: GSO skb + * + * skb_gso_mac_seglen is used to determine the real size of the + * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4 + * headers (TCP/UDP). + */ +static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) +{ + unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + + return hdr_len + skb_gso_transport_seglen(skb); +} + +/** + * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS + * + * There are a couple of instances where we have a GSO skb, and we + * want to determine what size it would be after it is segmented. + * + * We might want to check: + * - L3+L4+payload size (e.g. IP forwarding) + * - L2+L3+L4+payload size (e.g. sanity check before passing to driver) + * + * This is a helper to do that correctly considering GSO_BY_FRAGS. + * + * @skb: GSO skb + * + * @seg_len: The segmented length (from skb_gso_*_seglen). In the + * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS]. + * + * @max_len: The maximum permissible length. + * + * Returns true if the segmented length <= max length. + */ +static inline bool skb_gso_size_check(const struct sk_buff *skb, + unsigned int seg_len, + unsigned int max_len) { + const struct skb_shared_info *shinfo = skb_shinfo(skb); + const struct sk_buff *iter; + + if (shinfo->gso_size != GSO_BY_FRAGS) + return seg_len <= max_len; + + /* Undo this so we can re-use header sizes */ + seg_len -= GSO_BY_FRAGS; + + skb_walk_frags(skb, iter) { + if (seg_len + skb_headlen(iter) > max_len) + return false; + } + + return true; +} + +/** + * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU? + * + * @skb: GSO skb + * @mtu: MTU to validate against + * + * skb_gso_validate_network_len validates if a given skb will fit a + * wanted MTU once split. It considers L3 headers, L4 headers, and the + * payload. + */ +bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu) +{ + return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu); +} +EXPORT_SYMBOL_GPL(skb_gso_validate_network_len); + +/** + * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length? + * + * @skb: GSO skb + * @len: length to validate against + * + * skb_gso_validate_mac_len validates if a given skb will fit a wanted + * length once split, including L2, L3 and L4 headers and the payload. + */ +bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len) +{ + return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len); +} +EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); + diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7c4338221b17..fee2b1c105fe 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -67,6 +67,7 @@ #include #include #include +#include #include #include #include @@ -5766,147 +5767,6 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) } EXPORT_SYMBOL_GPL(skb_scrub_packet); -/** - * skb_gso_transport_seglen - Return length of individual segments of a gso packet - * - * @skb: GSO skb - * - * skb_gso_transport_seglen is used to determine the real size of the - * individual segments, including Layer4 headers (TCP/UDP). - * - * The MAC/L2 or network (IP, IPv6) headers are not accounted for. - */ -static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb) -{ - const struct skb_shared_info *shinfo = skb_shinfo(skb); - unsigned int thlen = 0; - - if (skb->encapsulation) { - thlen = skb_inner_transport_header(skb) - - skb_transport_header(skb); - - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - thlen += inner_tcp_hdrlen(skb); - } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { - thlen = tcp_hdrlen(skb); - } else if (unlikely(skb_is_gso_sctp(skb))) { - thlen = sizeof(struct sctphdr); - } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { - thlen = sizeof(struct udphdr); - } - /* UFO sets gso_size to the size of the fragmentation - * payload, i.e. the size of the L4 (UDP) header is already - * accounted for. - */ - return thlen + shinfo->gso_size; -} - -/** - * skb_gso_network_seglen - Return length of individual segments of a gso packet - * - * @skb: GSO skb - * - * skb_gso_network_seglen is used to determine the real size of the - * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP). - * - * The MAC/L2 header is not accounted for. - */ -static unsigned int skb_gso_network_seglen(const struct sk_buff *skb) -{ - unsigned int hdr_len = skb_transport_header(skb) - - skb_network_header(skb); - - return hdr_len + skb_gso_transport_seglen(skb); -} - -/** - * skb_gso_mac_seglen - Return length of individual segments of a gso packet - * - * @skb: GSO skb - * - * skb_gso_mac_seglen is used to determine the real size of the - * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4 - * headers (TCP/UDP). - */ -static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb) -{ - unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); - - return hdr_len + skb_gso_transport_seglen(skb); -} - -/** - * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS - * - * There are a couple of instances where we have a GSO skb, and we - * want to determine what size it would be after it is segmented. - * - * We might want to check: - * - L3+L4+payload size (e.g. IP forwarding) - * - L2+L3+L4+payload size (e.g. sanity check before passing to driver) - * - * This is a helper to do that correctly considering GSO_BY_FRAGS. - * - * @skb: GSO skb - * - * @seg_len: The segmented length (from skb_gso_*_seglen). In the - * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS]. - * - * @max_len: The maximum permissible length. - * - * Returns true if the segmented length <= max length. - */ -static inline bool skb_gso_size_check(const struct sk_buff *skb, - unsigned int seg_len, - unsigned int max_len) { - const struct skb_shared_info *shinfo = skb_shinfo(skb); - const struct sk_buff *iter; - - if (shinfo->gso_size != GSO_BY_FRAGS) - return seg_len <= max_len; - - /* Undo this so we can re-use header sizes */ - seg_len -= GSO_BY_FRAGS; - - skb_walk_frags(skb, iter) { - if (seg_len + skb_headlen(iter) > max_len) - return false; - } - - return true; -} - -/** - * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU? - * - * @skb: GSO skb - * @mtu: MTU to validate against - * - * skb_gso_validate_network_len validates if a given skb will fit a - * wanted MTU once split. It considers L3 headers, L4 headers, and the - * payload. - */ -bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu) -{ - return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu); -} -EXPORT_SYMBOL_GPL(skb_gso_validate_network_len); - -/** - * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length? - * - * @skb: GSO skb - * @len: length to validate against - * - * skb_gso_validate_mac_len validates if a given skb will fit a wanted - * length once split, including L2, L3 and L4 headers and the payload. - */ -bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len) -{ - return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len); -} -EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len); - static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb) { int mac_len, meta_len; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fd233c4195ac..0e16ac8282c5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -100,6 +100,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 3969fa805679..12c5fb3c6e1e 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 2b9cb5398335..311e70bfce40 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -11,6 +11,7 @@ #include #include #include +#include static struct sk_buff *gre_gso_segment(struct sk_buff *skb, netdev_features_t features) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 244fb9365d87..457598dfa128 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 05b38f58b404..8311c38267b5 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index df5e407286d7..7e0542c10471 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -103,6 +103,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 1f01e15ca24f..75aa4de5b731 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 75c02992c520..b33c7de5bdbc 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 00dc2e3b0184..d6314287338d 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "ip6_offload.h" diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index c722cb881b2d..c06ff7519f19 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -42,6 +42,7 @@ #include #include +#include #include #include #include diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index c39c1e32f980..ad3b8726873e 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -14,6 +14,7 @@ #include #include "ip6_offload.h" #include +#include static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, netdev_features_t features) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 148a0e2aa740..cfbe4beb8f1c 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "ieee80211_i.h" #include "driver-ops.h" diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index dc5165d3eec4..bf6e81d56263 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 1482259de9b5..533d082f0701 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -14,6 +14,7 @@ #include #include #include +#include #include static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index d248763917ad..d885d34edfe1 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index e311462f6d98..556bc902af00 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c index 0f23e5e8e03e..f4a38bd6a7e0 100644 --- a/net/nsh/nsh.c +++ b/net/nsh/nsh.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index a8cf9a88758e..8074ea00d577 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 58f530f60172..a6d2a0b1aa21 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 2e9dce03d1ec..f3121c5a85e9 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 891e007d5c0b..9cff99558694 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 6ef3021e1169..0c9e93d66c50 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 5076da103f63..4a4e6ff894c1 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 277ad11f4d61..17d2d00ddb18 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sctp/offload.c b/net/sctp/offload.c index eb874e3c399a..502095173d88 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -22,6 +22,7 @@ #include #include #include +#include static __le32 sctp_gso_make_checksum(struct sk_buff *skb) { diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 408f5e55744e..533697e2488f 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 1f99dc469027..0ee864a76579 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -33,6 +33,7 @@ #include #include +#include #include #include #include diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 369e5de8558f..662c83beb345 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From e16ca7fb9ffb0d51ddf01e450a1043ea65b5be3f Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 8 Jun 2023 17:42:30 +0100 Subject: sfc: add fallback action-set-lists for TC offload When offloading a TC encap action, the action information for the hardware might not be "ready": if there's currently no neighbour entry available for the destination address, we can't construct the Ethernet header to prepend to the packet. In this case, we still offload the flow rule, but with its action-set-list ID pointing at a "fallback" action which simply delivers the packet to its default destination (as though no flow rule had matched), thus allowing software TC to handle it. Later, when we receive a neighbouring update that allows us to construct the encap header, the rule will become "ready" and we will update its action-set-list ID in hardware to point at the actual offloaded actions. This patch sets up these fallback ASLs, but does not yet use them. Reviewed-by: Pieter Jansen van Vuuren Signed-off-by: Edward Cree Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/tc.c | 68 +++++++++++++++++++++++++++++++++++++++++++ drivers/net/ethernet/sfc/tc.h | 9 ++++++ 2 files changed, 77 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index bb9ec1e761d3..24c67a163910 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -1391,6 +1391,58 @@ void efx_tc_deconfigure_default_rule(struct efx_nic *efx, rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; } +static int efx_tc_configure_fallback_acts(struct efx_nic *efx, u32 eg_port, + struct efx_tc_action_set_list *acts) +{ + struct efx_tc_action_set *act; + int rc; + + act = kzalloc(sizeof(*act), GFP_KERNEL); + if (!act) + return -ENOMEM; + act->deliver = 1; + act->dest_mport = eg_port; + rc = efx_mae_alloc_action_set(efx, act); + if (rc) + goto fail1; + EFX_WARN_ON_PARANOID(!list_empty(&acts->list)); + list_add_tail(&act->list, &acts->list); + rc = efx_mae_alloc_action_set_list(efx, acts); + if (rc) + goto fail2; + return 0; +fail2: + list_del(&act->list); + efx_mae_free_action_set(efx, act->fw_id); +fail1: + kfree(act); + return rc; +} + +static int efx_tc_configure_fallback_acts_pf(struct efx_nic *efx) +{ + struct efx_tc_action_set_list *acts = &efx->tc->facts.pf; + u32 eg_port; + + efx_mae_mport_uplink(efx, &eg_port); + return efx_tc_configure_fallback_acts(efx, eg_port, acts); +} + +static int efx_tc_configure_fallback_acts_reps(struct efx_nic *efx) +{ + struct efx_tc_action_set_list *acts = &efx->tc->facts.reps; + u32 eg_port; + + efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port); + return efx_tc_configure_fallback_acts(efx, eg_port, acts); +} + +static void efx_tc_deconfigure_fallback_acts(struct efx_nic *efx, + struct efx_tc_action_set_list *acts) +{ + efx_tc_free_action_set_list(efx, acts, true); +} + static int efx_tc_configure_rep_mport(struct efx_nic *efx) { u32 rep_mport_label; @@ -1481,6 +1533,12 @@ int efx_init_tc(struct efx_nic *efx) if (rc) return rc; rc = efx_tc_configure_rep_mport(efx); + if (rc) + return rc; + rc = efx_tc_configure_fallback_acts_pf(efx); + if (rc) + return rc; + rc = efx_tc_configure_fallback_acts_reps(efx); if (rc) return rc; efx->tc->up = true; @@ -1500,6 +1558,8 @@ void efx_fini_tc(struct efx_nic *efx) efx_tc_deconfigure_rep_mport(efx); efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.pf); efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.wire); + efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.pf); + efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.reps); efx->tc->up = false; } @@ -1564,6 +1624,10 @@ int efx_init_struct_tc(struct efx_nic *efx) efx->tc->dflt.pf.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; INIT_LIST_HEAD(&efx->tc->dflt.wire.acts.list); efx->tc->dflt.wire.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL; + INIT_LIST_HEAD(&efx->tc->facts.pf.list); + efx->tc->facts.pf.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL; + INIT_LIST_HEAD(&efx->tc->facts.reps.list); + efx->tc->facts.reps.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL; efx->extra_channel_type[EFX_EXTRA_CHANNEL_TC] = &efx_tc_channel_type; return 0; fail_match_action_ht: @@ -1589,6 +1653,10 @@ void efx_fini_struct_tc(struct efx_nic *efx) MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL); EFX_WARN_ON_PARANOID(efx->tc->dflt.wire.fw_id != MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL); + EFX_WARN_ON_PARANOID(efx->tc->facts.pf.fw_id != + MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL); + EFX_WARN_ON_PARANOID(efx->tc->facts.reps.fw_id != + MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL); rhashtable_free_and_destroy(&efx->tc->match_action_ht, efx_tc_flow_free, efx); rhashtable_free_and_destroy(&efx->tc->encap_match_ht, diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h index 24e9640c74e9..ae182553514d 100644 --- a/drivers/net/ethernet/sfc/tc.h +++ b/drivers/net/ethernet/sfc/tc.h @@ -160,6 +160,11 @@ enum efx_tc_rule_prios { * %EFX_TC_PRIO_DFLT. Named by *ingress* port * @dflt.pf: rule for traffic ingressing from PF (egresses to wire) * @dflt.wire: rule for traffic ingressing from wire (egresses to PF) + * @facts: Fallback action-set-lists for unready rules. Named by *egress* port + * @facts.pf: action-set-list for unready rules on PF netdev, hence applying to + * traffic from wire, and egressing to PF + * @facts.reps: action-set-list for unready rules on representors, hence + * applying to traffic from representees, and egressing to the reps mport * @up: have TC datastructures been set up? */ struct efx_tc_state { @@ -180,6 +185,10 @@ struct efx_tc_state { struct efx_tc_flow_rule pf; struct efx_tc_flow_rule wire; } dflt; + struct { + struct efx_tc_action_set_list pf; + struct efx_tc_action_set_list reps; + } facts; bool up; }; -- cgit v1.2.3 From b4da4235dc69427fbdb66c9fbdf094ac76cdf745 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 8 Jun 2023 17:42:31 +0100 Subject: sfc: some plumbing towards TC encap action offload Create software objects to manage the metadata for encap actions that can be attached to TC rules. However, since we don't yet have the neighbouring information (needed to generate the Ethernet header), all rules with encap actions are marked as "unready" and thus insert the fallback action into hardware rather than actually offloading the encapsulation action. Reviewed-by: Pieter Jansen van Vuuren Signed-off-by: Edward Cree Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/Makefile | 3 +- drivers/net/ethernet/sfc/tc.c | 104 ++++++++++++++++++++++- drivers/net/ethernet/sfc/tc.h | 7 ++ drivers/net/ethernet/sfc/tc_encap_actions.c | 126 ++++++++++++++++++++++++++++ drivers/net/ethernet/sfc/tc_encap_actions.h | 47 +++++++++++ 5 files changed, 284 insertions(+), 3 deletions(-) create mode 100644 drivers/net/ethernet/sfc/tc_encap_actions.c create mode 100644 drivers/net/ethernet/sfc/tc_encap_actions.h (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index 55b9c73cd8ef..16293b58e0a8 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -10,7 +10,8 @@ sfc-y += efx.o efx_common.o efx_channels.o nic.o \ efx_devlink.o sfc-$(CONFIG_SFC_MTD) += mtd.o sfc-$(CONFIG_SFC_SRIOV) += sriov.o ef10_sriov.o ef100_sriov.o ef100_rep.o \ - mae.o tc.o tc_bindings.o tc_counters.o + mae.o tc.o tc_bindings.o tc_counters.o \ + tc_encap_actions.o obj-$(CONFIG_SFC) += sfc.o diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index 24c67a163910..4177feced3e6 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -14,11 +14,12 @@ #include #include "tc.h" #include "tc_bindings.h" +#include "tc_encap_actions.h" #include "mae.h" #include "ef100_rep.h" #include "efx.h" -static enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev) +enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev) { if (netif_is_vxlan(net_dev)) return EFX_ENCAP_TYPE_VXLAN; @@ -111,6 +112,8 @@ static void efx_tc_free_action_set(struct efx_nic *efx, } if (act->count) efx_tc_flower_put_counter_index(efx, act->count); + if (act->encap_md) + efx_tc_flower_release_encap_md(efx, act->encap_md); kfree(act); } @@ -594,6 +597,7 @@ enum efx_tc_action_order { EFX_TC_AO_VLAN_POP, EFX_TC_AO_VLAN_PUSH, EFX_TC_AO_COUNT, + EFX_TC_AO_ENCAP, EFX_TC_AO_DELIVER }; /* Determine whether we can add @new action without violating order */ @@ -623,6 +627,10 @@ static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act, if (act->count) return false; fallthrough; + case EFX_TC_AO_ENCAP: + if (act->encap_md) + return false; + fallthrough; case EFX_TC_AO_DELIVER: return !act->deliver; default: @@ -918,11 +926,13 @@ static int efx_tc_flower_replace(struct efx_nic *efx, { struct flow_rule *fr = flow_cls_offload_flow_rule(tc); struct netlink_ext_ack *extack = tc->common.extack; + const struct ip_tunnel_info *encap_info = NULL; struct efx_tc_flow_rule *rule = NULL, *old; struct efx_tc_action_set *act = NULL; const struct flow_action_entry *fa; struct efx_rep *from_efv, *to_efv; struct efx_tc_match match; + u32 acts_id; s64 rc; int i; @@ -1087,6 +1097,46 @@ static int efx_tc_flower_replace(struct efx_nic *efx, case FLOW_ACTION_MIRRED: save = *act; + if (encap_info) { + struct efx_tc_encap_action *encap; + + if (!efx_tc_flower_action_order_ok(act, + EFX_TC_AO_ENCAP)) { + rc = -EOPNOTSUPP; + NL_SET_ERR_MSG_MOD(extack, "Encap action violates action order"); + goto release; + } + encap = efx_tc_flower_create_encap_md( + efx, encap_info, fa->dev, extack); + if (IS_ERR_OR_NULL(encap)) { + rc = PTR_ERR(encap); + if (!rc) + rc = -EIO; /* arbitrary */ + goto release; + } + act->encap_md = encap; + act->dest_mport = encap->dest_mport; + act->deliver = 1; + rc = efx_mae_alloc_action_set(efx, act); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (encap)"); + goto release; + } + list_add_tail(&act->list, &rule->acts.list); + act = NULL; + if (fa->id == FLOW_ACTION_REDIRECT) + break; /* end of the line */ + /* Mirror, so continue on with saved act */ + save.count = NULL; + act = kzalloc(sizeof(*act), GFP_USER); + if (!act) { + rc = -ENOMEM; + goto release; + } + *act = save; + break; + } + if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) { /* can't happen */ rc = -EOPNOTSUPP; @@ -1150,6 +1200,37 @@ static int efx_tc_flower_replace(struct efx_nic *efx, act->vlan_proto[act->vlan_push] = fa->vlan.proto; act->vlan_push++; break; + case FLOW_ACTION_TUNNEL_ENCAP: + if (encap_info) { + /* Can't specify encap multiple times. + * If you want to overwrite an existing + * encap_info, use an intervening + * FLOW_ACTION_TUNNEL_DECAP to clear it. + */ + NL_SET_ERR_MSG_MOD(extack, "Tunnel key set when already set"); + rc = -EINVAL; + goto release; + } + if (!fa->tunnel) { + NL_SET_ERR_MSG_MOD(extack, "Tunnel key set is missing key"); + rc = -EOPNOTSUPP; + goto release; + } + encap_info = fa->tunnel; + break; + case FLOW_ACTION_TUNNEL_DECAP: + if (encap_info) { + encap_info = NULL; + break; + } + /* Since we don't support enc_key matches on ingress + * (and if we did there'd be no tunnel-device to give + * us a type), we can't offload a decap that's not + * just undoing a previous encap action. + */ + NL_SET_ERR_MSG_MOD(extack, "Cannot offload tunnel decap action without tunnel device"); + rc = -EOPNOTSUPP; + goto release; default: NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u", fa->id); @@ -1193,8 +1274,21 @@ static int efx_tc_flower_replace(struct efx_nic *efx, NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw"); goto release; } + if (from_efv == EFX_EFV_PF) + /* PF netdev, so rule applies to traffic from wire */ + rule->fallback = &efx->tc->facts.pf; + else + /* repdev, so rule applies to traffic from representee */ + rule->fallback = &efx->tc->facts.reps; + if (!efx_tc_check_ready(efx, rule)) { + netif_dbg(efx, drv, efx->net_dev, "action not ready for hw\n"); + acts_id = rule->fallback->fw_id; + } else { + netif_dbg(efx, drv, efx->net_dev, "ready for hw\n"); + acts_id = rule->acts.fw_id; + } rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC, - rule->acts.fw_id, &rule->fw_id); + acts_id, &rule->fw_id); if (rc) { NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw"); goto release_acts; @@ -1609,6 +1703,9 @@ int efx_init_struct_tc(struct efx_nic *efx) mutex_init(&efx->tc->mutex); init_waitqueue_head(&efx->tc->flush_wq); + rc = efx_tc_init_encap_actions(efx); + if (rc < 0) + goto fail_encap_actions; rc = efx_tc_init_counters(efx); if (rc < 0) goto fail_counters; @@ -1635,6 +1732,8 @@ fail_match_action_ht: fail_encap_match_ht: efx_tc_destroy_counters(efx); fail_counters: + efx_tc_destroy_encap_actions(efx); +fail_encap_actions: mutex_destroy(&efx->tc->mutex); kfree(efx->tc->caps); fail_alloc_caps: @@ -1662,6 +1761,7 @@ void efx_fini_struct_tc(struct efx_nic *efx) rhashtable_free_and_destroy(&efx->tc->encap_match_ht, efx_tc_encap_match_free, NULL); efx_tc_fini_counters(efx); + efx_tc_fini_encap_actions(efx); mutex_unlock(&efx->tc->mutex); mutex_destroy(&efx->tc->mutex); kfree(efx->tc->caps); diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h index ae182553514d..5a8f701b05c5 100644 --- a/drivers/net/ethernet/sfc/tc.h +++ b/drivers/net/ethernet/sfc/tc.h @@ -25,6 +25,8 @@ static inline bool efx_ipv6_addr_all_ones(struct in6_addr *addr) } #endif +struct efx_tc_encap_action; /* see tc_encap_actions.h */ + struct efx_tc_action_set { u16 vlan_push:2; u16 vlan_pop:2; @@ -33,6 +35,7 @@ struct efx_tc_action_set { __be16 vlan_tci[2]; /* TCIs for vlan_push */ __be16 vlan_proto[2]; /* Ethertypes for vlan_push */ struct efx_tc_counter_index *count; + struct efx_tc_encap_action *encap_md; /* entry in tc_encap_ht table */ u32 dest_mport; u32 fw_id; /* index of this entry in firmware actions table */ struct list_head list; @@ -127,6 +130,7 @@ struct efx_tc_flow_rule { struct rhash_head linkage; struct efx_tc_match match; struct efx_tc_action_set_list acts; + struct efx_tc_action_set_list *fallback; /* what to use when unready? */ u32 fw_id; }; @@ -144,6 +148,7 @@ enum efx_tc_rule_prios { * @mutex: Used to serialise operations on TC hashtables * @counter_ht: Hashtable of TC counters (FW IDs and counter values) * @counter_id_ht: Hashtable mapping TC counter cookies to counters + * @encap_ht: Hashtable of TC encap actions * @encap_match_ht: Hashtable of TC encap matches * @match_action_ht: Hashtable of TC match-action rules * @reps_mport_id: MAE port allocated for representor RX @@ -173,6 +178,7 @@ struct efx_tc_state { struct mutex mutex; struct rhashtable counter_ht; struct rhashtable counter_id_ht; + struct rhashtable encap_ht; struct rhashtable encap_match_ht; struct rhashtable match_action_ht; u32 reps_mport_id, reps_mport_vport_id; @@ -194,6 +200,7 @@ struct efx_tc_state { struct efx_rep; +enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev); int efx_tc_configure_default_rule_rep(struct efx_rep *efv); void efx_tc_deconfigure_default_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule); diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.c b/drivers/net/ethernet/sfc/tc_encap_actions.c new file mode 100644 index 000000000000..c41493e659a3 --- /dev/null +++ b/drivers/net/ethernet/sfc/tc_encap_actions.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2023, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include "tc_encap_actions.h" +#include "tc.h" +#include "mae.h" +#include +#include + +static const struct rhashtable_params efx_tc_encap_ht_params = { + .key_len = offsetofend(struct efx_tc_encap_action, key), + .key_offset = 0, + .head_offset = offsetof(struct efx_tc_encap_action, linkage), +}; + +static void efx_tc_encap_free(void *ptr, void *__unused) +{ + struct efx_tc_encap_action *enc = ptr; + + WARN_ON(refcount_read(&enc->ref)); + kfree(enc); +} + +int efx_tc_init_encap_actions(struct efx_nic *efx) +{ + return rhashtable_init(&efx->tc->encap_ht, &efx_tc_encap_ht_params); +} + +/* Only call this in init failure teardown. + * Normal exit should fini instead as there may be entries in the table. + */ +void efx_tc_destroy_encap_actions(struct efx_nic *efx) +{ + rhashtable_destroy(&efx->tc->encap_ht); +} + +void efx_tc_fini_encap_actions(struct efx_nic *efx) +{ + rhashtable_free_and_destroy(&efx->tc->encap_ht, efx_tc_encap_free, NULL); +} + +bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule) +{ + struct efx_tc_action_set *act; + + /* Encap actions can only be offloaded if they have valid + * neighbour info for the outer Ethernet header. + */ + list_for_each_entry(act, &rule->acts.list, list) + if (act->encap_md) /* neigh bindings not implemented yet */ + return false; + return true; +} + +struct efx_tc_encap_action *efx_tc_flower_create_encap_md( + struct efx_nic *efx, const struct ip_tunnel_info *info, + struct net_device *egdev, struct netlink_ext_ack *extack) +{ + enum efx_encap_type type = efx_tc_indr_netdev_type(egdev); + struct efx_tc_encap_action *encap, *old; + s64 rc; + + if (type == EFX_ENCAP_TYPE_NONE) { + /* dest is not an encap device */ + NL_SET_ERR_MSG_MOD(extack, "Not a (supported) tunnel device but tunnel_key is set"); + return ERR_PTR(-EOPNOTSUPP); + } + rc = efx_mae_check_encap_type_supported(efx, type); + if (rc < 0) { + NL_SET_ERR_MSG_MOD(extack, "Firmware reports no support for this tunnel type"); + return ERR_PTR(rc); + } + /* No support yet for Geneve options */ + if (info->options_len) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel options"); + return ERR_PTR(-EOPNOTSUPP); + } + switch (info->mode) { + case IP_TUNNEL_INFO_TX: + break; + case IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_IPV6: + type |= EFX_ENCAP_FLAG_IPV6; + break; + default: + NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported tunnel mode %u", + info->mode); + return ERR_PTR(-EOPNOTSUPP); + } + encap = kzalloc(sizeof(*encap), GFP_KERNEL_ACCOUNT); + if (!encap) + return ERR_PTR(-ENOMEM); + encap->type = type; + encap->key = info->key; + old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_ht, + &encap->linkage, + efx_tc_encap_ht_params); + if (old) { + /* don't need our new entry */ + kfree(encap); + if (!refcount_inc_not_zero(&old->ref)) + return ERR_PTR(-EAGAIN); + /* existing entry found, ref taken */ + return old; + } + + /* ref and return */ + refcount_set(&encap->ref, 1); + return encap; +} + +void efx_tc_flower_release_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + if (!refcount_dec_and_test(&encap->ref)) + return; /* still in use */ + rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage, + efx_tc_encap_ht_params); + kfree(encap); +} diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.h b/drivers/net/ethernet/sfc/tc_encap_actions.h new file mode 100644 index 000000000000..1a3679e81f09 --- /dev/null +++ b/drivers/net/ethernet/sfc/tc_encap_actions.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2023, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_TC_ENCAP_ACTIONS_H +#define EFX_TC_ENCAP_ACTIONS_H +#include "net_driver.h" + +#include +#include + +/* This limit is arbitrary; current hardware (SN1022) handles encap headers + * of up to 126 bytes, but that limit is not enshrined in the MCDI protocol. + */ +#define EFX_TC_MAX_ENCAP_HDR 126 +struct efx_tc_encap_action { + enum efx_encap_type type; + struct ip_tunnel_key key; /* 52 bytes */ + u32 dest_mport; /* is copied into struct efx_tc_action_set */ + u8 encap_hdr_len; + u8 encap_hdr[EFX_TC_MAX_ENCAP_HDR]; + struct rhash_head linkage; /* efx->tc_encap_ht */ + refcount_t ref; + u32 fw_id; /* index of this entry in firmware encap table */ +}; + +/* create/uncreate/teardown hashtables */ +int efx_tc_init_encap_actions(struct efx_nic *efx); +void efx_tc_destroy_encap_actions(struct efx_nic *efx); +void efx_tc_fini_encap_actions(struct efx_nic *efx); + +struct efx_tc_flow_rule; +bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule); + +struct efx_tc_encap_action *efx_tc_flower_create_encap_md( + struct efx_nic *efx, const struct ip_tunnel_info *info, + struct net_device *egdev, struct netlink_ext_ack *extack); +void efx_tc_flower_release_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap); + +#endif /* EFX_TC_ENCAP_ACTIONS_H */ -- cgit v1.2.3 From 69819d3bc4086dd5a268600d1cbd65c39eda1672 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 8 Jun 2023 17:42:32 +0100 Subject: sfc: add function to atomically update a rule in the MAE efx_mae_update_rule() changes the action-set-list attached to an MAE flow rule in the Action Rule Table. We will use this when neighbouring updates change encap actions. Reviewed-by: Pieter Jansen van Vuuren Signed-off-by: Edward Cree Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/mae.c | 23 +++++++++++++++++++++++ drivers/net/ethernet/sfc/mae.h | 1 + 2 files changed, 24 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c index 37a4c6925ad4..4eef5d18817a 100644 --- a/drivers/net/ethernet/sfc/mae.c +++ b/drivers/net/ethernet/sfc/mae.c @@ -1229,6 +1229,29 @@ int efx_mae_insert_rule(struct efx_nic *efx, const struct efx_tc_match *match, return 0; } +int efx_mae_update_rule(struct efx_nic *efx, u32 acts_id, u32 id) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_ACTION_RULE_UPDATE_IN_LEN); + MCDI_DECLARE_STRUCT_PTR(response); + + BUILD_BUG_ON(MC_CMD_MAE_ACTION_RULE_UPDATE_OUT_LEN); + response = _MCDI_DWORD(inbuf, MAE_ACTION_RULE_UPDATE_IN_RESPONSE); + + MCDI_SET_DWORD(inbuf, MAE_ACTION_RULE_UPDATE_IN_AR_ID, id); + if (efx_mae_asl_id(acts_id)) { + MCDI_STRUCT_SET_DWORD(response, MAE_ACTION_RULE_RESPONSE_ASL_ID, acts_id); + MCDI_STRUCT_SET_DWORD(response, MAE_ACTION_RULE_RESPONSE_AS_ID, + MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL); + } else { + /* We only had one AS, so we didn't wrap it in an ASL */ + MCDI_STRUCT_SET_DWORD(response, MAE_ACTION_RULE_RESPONSE_ASL_ID, + MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL); + MCDI_STRUCT_SET_DWORD(response, MAE_ACTION_RULE_RESPONSE_AS_ID, acts_id); + } + return efx_mcdi_rpc(efx, MC_CMD_MAE_ACTION_RULE_UPDATE, inbuf, sizeof(inbuf), + NULL, 0, NULL); +} + int efx_mae_delete_rule(struct efx_nic *efx, u32 id) { MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_ACTION_RULE_DELETE_OUT_LEN(1)); diff --git a/drivers/net/ethernet/sfc/mae.h b/drivers/net/ethernet/sfc/mae.h index 1cf8dfeb0c28..c542aab43ea1 100644 --- a/drivers/net/ethernet/sfc/mae.h +++ b/drivers/net/ethernet/sfc/mae.h @@ -105,6 +105,7 @@ int efx_mae_unregister_encap_match(struct efx_nic *efx, int efx_mae_insert_rule(struct efx_nic *efx, const struct efx_tc_match *match, u32 prio, u32 acts_id, u32 *id); +int efx_mae_update_rule(struct efx_nic *efx, u32 acts_id, u32 id); int efx_mae_delete_rule(struct efx_nic *efx, u32 id); int efx_init_mae(struct efx_nic *efx); -- cgit v1.2.3 From f1363154c47468725611f264fc2e50833800dc3b Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 8 Jun 2023 17:42:33 +0100 Subject: sfc: MAE functions to create/update/delete encap headers Besides the raw header data, also pass the tunnel type, so that the hardware knows it needs to update the IP Total Length and UDP Length fields (and corresponding checksums) for each packet. Also, populate the ENCAP_HEADER_ID field in efx_mae_alloc_action_set() with the fw_id returned from efx_mae_allocate_encap_md(). Reviewed-by: Pieter Jansen van Vuuren Signed-off-by: Edward Cree Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/mae.c | 90 +++++++++++++++++++++++++++++++++++++++++- drivers/net/ethernet/sfc/mae.h | 7 ++++ 2 files changed, 95 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c index 4eef5d18817a..0cab508f2f9d 100644 --- a/drivers/net/ethernet/sfc/mae.c +++ b/drivers/net/ethernet/sfc/mae.c @@ -15,6 +15,7 @@ #include "mcdi.h" #include "mcdi_pcol.h" #include "mcdi_pcol_mae.h" +#include "tc_encap_actions.h" int efx_mae_allocate_mport(struct efx_nic *efx, u32 *id, u32 *label) { @@ -610,6 +611,87 @@ static int efx_mae_encap_type_to_mae_type(enum efx_encap_type type) } } +int efx_mae_allocate_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_ENCAP_HEADER_ALLOC_IN_LEN(EFX_TC_MAX_ENCAP_HDR)); + MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_ENCAP_HEADER_ALLOC_OUT_LEN); + size_t inlen, outlen; + int rc; + + rc = efx_mae_encap_type_to_mae_type(encap->type); + if (rc < 0) + return rc; + MCDI_SET_DWORD(inbuf, MAE_ENCAP_HEADER_ALLOC_IN_ENCAP_TYPE, rc); + inlen = MC_CMD_MAE_ENCAP_HEADER_ALLOC_IN_LEN(encap->encap_hdr_len); + if (WARN_ON(inlen > sizeof(inbuf))) /* can't happen */ + return -EINVAL; + memcpy(MCDI_PTR(inbuf, MAE_ENCAP_HEADER_ALLOC_IN_HDR_DATA), + encap->encap_hdr, + encap->encap_hdr_len); + rc = efx_mcdi_rpc(efx, MC_CMD_MAE_ENCAP_HEADER_ALLOC, inbuf, + inlen, outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < sizeof(outbuf)) + return -EIO; + encap->fw_id = MCDI_DWORD(outbuf, MAE_ENCAP_HEADER_ALLOC_OUT_ENCAP_HEADER_ID); + return 0; +} + +int efx_mae_update_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_ENCAP_HEADER_UPDATE_IN_LEN(EFX_TC_MAX_ENCAP_HDR)); + size_t inlen; + int rc; + + rc = efx_mae_encap_type_to_mae_type(encap->type); + if (rc < 0) + return rc; + MCDI_SET_DWORD(inbuf, MAE_ENCAP_HEADER_UPDATE_IN_ENCAP_TYPE, rc); + MCDI_SET_DWORD(inbuf, MAE_ENCAP_HEADER_UPDATE_IN_EH_ID, + encap->fw_id); + inlen = MC_CMD_MAE_ENCAP_HEADER_UPDATE_IN_LEN(encap->encap_hdr_len); + if (WARN_ON(inlen > sizeof(inbuf))) /* can't happen */ + return -EINVAL; + memcpy(MCDI_PTR(inbuf, MAE_ENCAP_HEADER_UPDATE_IN_HDR_DATA), + encap->encap_hdr, + encap->encap_hdr_len); + + BUILD_BUG_ON(MC_CMD_MAE_ENCAP_HEADER_UPDATE_OUT_LEN != 0); + return efx_mcdi_rpc(efx, MC_CMD_MAE_ENCAP_HEADER_UPDATE, inbuf, + inlen, NULL, 0, NULL); +} + +int efx_mae_free_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_MAE_ENCAP_HEADER_FREE_OUT_LEN(1)); + MCDI_DECLARE_BUF(inbuf, MC_CMD_MAE_ENCAP_HEADER_FREE_IN_LEN(1)); + size_t outlen; + int rc; + + MCDI_SET_DWORD(inbuf, MAE_ENCAP_HEADER_FREE_IN_EH_ID, encap->fw_id); + rc = efx_mcdi_rpc(efx, MC_CMD_MAE_ENCAP_HEADER_FREE, inbuf, + sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); + if (rc) + return rc; + if (outlen < sizeof(outbuf)) + return -EIO; + /* FW freed a different ID than we asked for, should also never happen. + * Warn because it means we've now got a different idea to the FW of + * what encap_mds exist, which could cause mayhem later. + */ + if (WARN_ON(MCDI_DWORD(outbuf, MAE_ENCAP_HEADER_FREE_OUT_FREED_EH_ID) != encap->fw_id)) + return -EIO; + /* We're probably about to free @encap, but let's just make sure its + * fw_id is blatted so that it won't look valid if it leaks out. + */ + encap->fw_id = MC_CMD_MAE_ENCAP_HEADER_ALLOC_OUT_ENCAP_HEADER_ID_NULL; + return 0; +} + int efx_mae_lookup_mport(struct efx_nic *efx, u32 vf_idx, u32 *id) { struct ef100_nic_data *nic_data = efx->nic_data; @@ -833,8 +915,12 @@ int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act) MCDI_SET_WORD_BE(inbuf, MAE_ACTION_SET_ALLOC_IN_VLAN1_PROTO_BE, act->vlan_proto[1]); } - MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_ENCAP_HEADER_ID, - MC_CMD_MAE_ENCAP_HEADER_ALLOC_OUT_ENCAP_HEADER_ID_NULL); + if (act->encap_md) + MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_ENCAP_HEADER_ID, + act->encap_md->fw_id); + else + MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_ENCAP_HEADER_ID, + MC_CMD_MAE_ENCAP_HEADER_ALLOC_OUT_ENCAP_HEADER_ID_NULL); if (act->deliver) MCDI_SET_DWORD(inbuf, MAE_ACTION_SET_ALLOC_IN_DELIVER, act->dest_mport); diff --git a/drivers/net/ethernet/sfc/mae.h b/drivers/net/ethernet/sfc/mae.h index c542aab43ea1..24abfe509690 100644 --- a/drivers/net/ethernet/sfc/mae.h +++ b/drivers/net/ethernet/sfc/mae.h @@ -90,6 +90,13 @@ int efx_mae_check_encap_type_supported(struct efx_nic *efx, int efx_mae_allocate_counter(struct efx_nic *efx, struct efx_tc_counter *cnt); int efx_mae_free_counter(struct efx_nic *efx, struct efx_tc_counter *cnt); +int efx_mae_allocate_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap); +int efx_mae_update_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap); +int efx_mae_free_encap_md(struct efx_nic *efx, + struct efx_tc_encap_action *encap); + int efx_mae_alloc_action_set(struct efx_nic *efx, struct efx_tc_action_set *act); int efx_mae_free_action_set(struct efx_nic *efx, u32 fw_id); -- cgit v1.2.3 From 7e5e7d800011adf4aeda615f8a1bc31c0c1e2bb9 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 8 Jun 2023 17:42:34 +0100 Subject: sfc: neighbour lookup for TC encap action offload For each neighbour we're interested in, create a struct efx_neigh_binder object which has a list of all the encap_actions using it. When we receive a neighbouring update (through the netevent notifier), find the corresponding efx_neigh_binder and update all its users. Since the actual generation of encap headers is still only a stub, the resulting rules still get left on fallback actions. Signed-off-by: Edward Cree Reviewed-by: Simon Horman Reviewed-by: Pieter Jansen van Vuuren Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/ef100_netdev.c | 34 +++ drivers/net/ethernet/sfc/net_driver.h | 3 + drivers/net/ethernet/sfc/tc.c | 12 +- drivers/net/ethernet/sfc/tc.h | 7 + drivers/net/ethernet/sfc/tc_bindings.c | 13 + drivers/net/ethernet/sfc/tc_bindings.h | 2 + drivers/net/ethernet/sfc/tc_encap_actions.c | 448 +++++++++++++++++++++++++++- drivers/net/ethernet/sfc/tc_encap_actions.h | 56 ++++ 8 files changed, 569 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/ef100_netdev.c b/drivers/net/ethernet/sfc/ef100_netdev.c index 274f3a2562ad..7f7d560cb2b4 100644 --- a/drivers/net/ethernet/sfc/ef100_netdev.c +++ b/drivers/net/ethernet/sfc/ef100_netdev.c @@ -24,6 +24,7 @@ #include "rx_common.h" #include "ef100_sriov.h" #include "tc_bindings.h" +#include "tc_encap_actions.h" #include "efx_devlink.h" static void ef100_update_name(struct efx_nic *efx) @@ -300,14 +301,38 @@ int ef100_netdev_event(struct notifier_block *this, { struct efx_nic *efx = container_of(this, struct efx_nic, netdev_notifier); struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); + struct ef100_nic_data *nic_data = efx->nic_data; + int err; if (efx->net_dev == net_dev && (event == NETDEV_CHANGENAME || event == NETDEV_REGISTER)) ef100_update_name(efx); + if (!nic_data->grp_mae) + return NOTIFY_DONE; + err = efx_tc_netdev_event(efx, event, net_dev); + if (err & NOTIFY_STOP_MASK) + return err; + return NOTIFY_DONE; } +static int ef100_netevent_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct efx_nic *efx = container_of(this, struct efx_nic, netevent_notifier); + struct ef100_nic_data *nic_data = efx->nic_data; + int err; + + if (!nic_data->grp_mae) + return NOTIFY_DONE; + err = efx_tc_netevent_event(efx, event, ptr); + if (err & NOTIFY_STOP_MASK) + return err; + + return NOTIFY_DONE; +}; + static int ef100_register_netdev(struct efx_nic *efx) { struct net_device *net_dev = efx->net_dev; @@ -367,6 +392,7 @@ void ef100_remove_netdev(struct efx_probe_data *probe_data) rtnl_unlock(); unregister_netdevice_notifier(&efx->netdev_notifier); + unregister_netevent_notifier(&efx->netevent_notifier); #if defined(CONFIG_SFC_SRIOV) if (!efx->type->is_vf) efx_ef100_pci_sriov_disable(efx, true); @@ -487,6 +513,14 @@ int ef100_probe_netdev(struct efx_probe_data *probe_data) goto fail; } + efx->netevent_notifier.notifier_call = ef100_netevent_event; + rc = register_netevent_notifier(&efx->netevent_notifier); + if (rc) { + netif_err(efx, probe, efx->net_dev, + "Failed to register netevent notifier, rc=%d\n", rc); + goto fail; + } + efx_probe_devlink_unlock(efx); return rc; fail: diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index fcd51d3992fa..a7a22b019794 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -27,6 +27,7 @@ #include #include #include +#include #include "enum.h" #include "bitfield.h" @@ -996,6 +997,7 @@ struct efx_mae; * @xdp_rxq_info_failed: Have any of the rx queues failed to initialise their * xdp_rxq_info structures? * @netdev_notifier: Netdevice notifier. + * @netevent_notifier: Netevent notifier (for neighbour updates). * @tc: state for TC offload (EF100). * @devlink: reference to devlink structure owned by this device * @dl_port: devlink port associated with the PF @@ -1183,6 +1185,7 @@ struct efx_nic { bool xdp_rxq_info_failed; struct notifier_block netdev_notifier; + struct notifier_block netevent_notifier; struct efx_tc_state *tc; struct devlink *devlink; diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index 4177feced3e6..77acdb60381e 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -34,8 +34,8 @@ enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev) * May return NULL for the PF (us), or an error pointer for a device that * isn't supported as a TC offload endpoint */ -static struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx, - struct net_device *dev) +struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx, + struct net_device *dev) { struct efx_rep *efv; @@ -71,7 +71,7 @@ static s64 efx_tc_flower_internal_mport(struct efx_nic *efx, struct efx_rep *efv } /* Convert a driver-internal vport ID into an external device (wire or VF) */ -static s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv) +s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv) { u32 mport; @@ -112,8 +112,10 @@ static void efx_tc_free_action_set(struct efx_nic *efx, } if (act->count) efx_tc_flower_put_counter_index(efx, act->count); - if (act->encap_md) + if (act->encap_md) { + list_del(&act->encap_user); efx_tc_flower_release_encap_md(efx, act->encap_md); + } kfree(act); } @@ -1115,6 +1117,7 @@ static int efx_tc_flower_replace(struct efx_nic *efx, goto release; } act->encap_md = encap; + list_add_tail(&act->encap_user, &encap->users); act->dest_mport = encap->dest_mport; act->deliver = 1; rc = efx_mae_alloc_action_set(efx, act); @@ -1123,6 +1126,7 @@ static int efx_tc_flower_replace(struct efx_nic *efx, goto release; } list_add_tail(&act->list, &rule->acts.list); + act->user = &rule->acts; act = NULL; if (fa->id == FLOW_ACTION_REDIRECT) break; /* end of the line */ diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h index 5a8f701b05c5..607429f8bb28 100644 --- a/drivers/net/ethernet/sfc/tc.h +++ b/drivers/net/ethernet/sfc/tc.h @@ -36,6 +36,8 @@ struct efx_tc_action_set { __be16 vlan_proto[2]; /* Ethertypes for vlan_push */ struct efx_tc_counter_index *count; struct efx_tc_encap_action *encap_md; /* entry in tc_encap_ht table */ + struct list_head encap_user; /* entry on encap_md->users list */ + struct efx_tc_action_set_list *user; /* Only populated if encap_md */ u32 dest_mport; u32 fw_id; /* index of this entry in firmware actions table */ struct list_head list; @@ -151,6 +153,7 @@ enum efx_tc_rule_prios { * @encap_ht: Hashtable of TC encap actions * @encap_match_ht: Hashtable of TC encap matches * @match_action_ht: Hashtable of TC match-action rules + * @neigh_ht: Hashtable of neighbour watches (&struct efx_neigh_binder) * @reps_mport_id: MAE port allocated for representor RX * @reps_filter_uc: VNIC filter for representor unicast RX (promisc) * @reps_filter_mc: VNIC filter for representor multicast RX (allmulti) @@ -181,6 +184,7 @@ struct efx_tc_state { struct rhashtable encap_ht; struct rhashtable encap_match_ht; struct rhashtable match_action_ht; + struct rhashtable neigh_ht; u32 reps_mport_id, reps_mport_vport_id; s32 reps_filter_uc, reps_filter_mc; bool flush_counters; @@ -201,6 +205,9 @@ struct efx_tc_state { struct efx_rep; enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev); +struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx, + struct net_device *dev); +s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv); int efx_tc_configure_default_rule_rep(struct efx_rep *efv); void efx_tc_deconfigure_default_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule); diff --git a/drivers/net/ethernet/sfc/tc_bindings.c b/drivers/net/ethernet/sfc/tc_bindings.c index c18d64519c2d..1b79c535c54e 100644 --- a/drivers/net/ethernet/sfc/tc_bindings.c +++ b/drivers/net/ethernet/sfc/tc_bindings.c @@ -10,6 +10,7 @@ #include "tc_bindings.h" #include "tc.h" +#include "tc_encap_actions.h" struct efx_tc_block_binding { struct list_head list; @@ -226,3 +227,15 @@ int efx_tc_setup(struct net_device *net_dev, enum tc_setup_type type, return -EOPNOTSUPP; } + +int efx_tc_netdev_event(struct efx_nic *efx, unsigned long event, + struct net_device *net_dev) +{ + if (efx->type->is_vf) + return NOTIFY_DONE; + + if (event == NETDEV_UNREGISTER) + efx_tc_unregister_egdev(efx, net_dev); + + return NOTIFY_OK; +} diff --git a/drivers/net/ethernet/sfc/tc_bindings.h b/drivers/net/ethernet/sfc/tc_bindings.h index c210bb09150e..095ddeb59eb3 100644 --- a/drivers/net/ethernet/sfc/tc_bindings.h +++ b/drivers/net/ethernet/sfc/tc_bindings.h @@ -26,4 +26,6 @@ int efx_tc_indr_setup_cb(struct net_device *net_dev, struct Qdisc *sch, void *cb_priv, enum tc_setup_type type, void *type_data, void *data, void (*cleanup)(struct flow_block_cb *block_cb)); +int efx_tc_netdev_event(struct efx_nic *efx, unsigned long event, + struct net_device *net_dev); #endif /* EFX_TC_BINDINGS_H */ diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.c b/drivers/net/ethernet/sfc/tc_encap_actions.c index c41493e659a3..c1bd7d468343 100644 --- a/drivers/net/ethernet/sfc/tc_encap_actions.c +++ b/drivers/net/ethernet/sfc/tc_encap_actions.c @@ -13,6 +13,14 @@ #include "mae.h" #include #include +#include +#include + +static const struct rhashtable_params efx_neigh_ht_params = { + .key_len = offsetof(struct efx_neigh_binder, ha), + .key_offset = 0, + .head_offset = offsetof(struct efx_neigh_binder, linkage), +}; static const struct rhashtable_params efx_tc_encap_ht_params = { .key_len = offsetofend(struct efx_tc_encap_action, key), @@ -28,9 +36,32 @@ static void efx_tc_encap_free(void *ptr, void *__unused) kfree(enc); } +static void efx_neigh_free(void *ptr, void *__unused) +{ + struct efx_neigh_binder *neigh = ptr; + + WARN_ON(refcount_read(&neigh->ref)); + WARN_ON(!list_empty(&neigh->users)); + put_net_track(neigh->net, &neigh->ns_tracker); + netdev_put(neigh->egdev, &neigh->dev_tracker); + kfree(neigh); +} + int efx_tc_init_encap_actions(struct efx_nic *efx) { - return rhashtable_init(&efx->tc->encap_ht, &efx_tc_encap_ht_params); + int rc; + + rc = rhashtable_init(&efx->tc->neigh_ht, &efx_neigh_ht_params); + if (rc < 0) + goto fail_neigh_ht; + rc = rhashtable_init(&efx->tc->encap_ht, &efx_tc_encap_ht_params); + if (rc < 0) + goto fail_encap_ht; + return 0; +fail_encap_ht: + rhashtable_destroy(&efx->tc->neigh_ht); +fail_neigh_ht: + return rc; } /* Only call this in init failure teardown. @@ -39,11 +70,337 @@ int efx_tc_init_encap_actions(struct efx_nic *efx) void efx_tc_destroy_encap_actions(struct efx_nic *efx) { rhashtable_destroy(&efx->tc->encap_ht); + rhashtable_destroy(&efx->tc->neigh_ht); } void efx_tc_fini_encap_actions(struct efx_nic *efx) { rhashtable_free_and_destroy(&efx->tc->encap_ht, efx_tc_encap_free, NULL); + rhashtable_free_and_destroy(&efx->tc->neigh_ht, efx_neigh_free, NULL); +} + +static void efx_neigh_update(struct work_struct *work); + +static int efx_bind_neigh(struct efx_nic *efx, + struct efx_tc_encap_action *encap, struct net *net, + struct netlink_ext_ack *extack) +{ + struct efx_neigh_binder *neigh, *old; + struct flowi6 flow6 = {}; + struct flowi4 flow4 = {}; + int rc; + + /* GCC stupidly thinks that only values explicitly listed in the enum + * definition can _possibly_ be sensible case values, so without this + * cast it complains about the IPv6 versions. + */ + switch ((int)encap->type) { + case EFX_ENCAP_TYPE_VXLAN: + case EFX_ENCAP_TYPE_GENEVE: + flow4.flowi4_proto = IPPROTO_UDP; + flow4.fl4_dport = encap->key.tp_dst; + flow4.flowi4_tos = encap->key.tos; + flow4.daddr = encap->key.u.ipv4.dst; + flow4.saddr = encap->key.u.ipv4.src; + break; + case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6: + case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6: + flow6.flowi6_proto = IPPROTO_UDP; + flow6.fl6_dport = encap->key.tp_dst; + flow6.flowlabel = ip6_make_flowinfo(encap->key.tos, + encap->key.label); + flow6.daddr = encap->key.u.ipv6.dst; + flow6.saddr = encap->key.u.ipv6.src; + break; + default: + NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported encap type %d", + (int)encap->type); + return -EOPNOTSUPP; + } + + neigh = kzalloc(sizeof(*neigh), GFP_KERNEL_ACCOUNT); + if (!neigh) + return -ENOMEM; + neigh->net = get_net_track(net, &neigh->ns_tracker, GFP_KERNEL_ACCOUNT); + neigh->dst_ip = flow4.daddr; + neigh->dst_ip6 = flow6.daddr; + + old = rhashtable_lookup_get_insert_fast(&efx->tc->neigh_ht, + &neigh->linkage, + efx_neigh_ht_params); + if (old) { + /* don't need our new entry */ + put_net_track(neigh->net, &neigh->ns_tracker); + kfree(neigh); + if (!refcount_inc_not_zero(&old->ref)) + return -EAGAIN; + /* existing entry found, ref taken */ + neigh = old; + } else { + /* New entry. We need to initiate a lookup */ + struct neighbour *n; + struct rtable *rt; + + if (encap->type & EFX_ENCAP_FLAG_IPV6) { +#if IS_ENABLED(CONFIG_IPV6) + struct dst_entry *dst; + + dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow6, + NULL); + rc = PTR_ERR_OR_ZERO(dst); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for IPv6 encap"); + goto out_free; + } + neigh->egdev = dst->dev; + netdev_hold(neigh->egdev, &neigh->dev_tracker, + GFP_KERNEL_ACCOUNT); + neigh->ttl = ip6_dst_hoplimit(dst); + n = dst_neigh_lookup(dst, &flow6.daddr); + dst_release(dst); +#else + /* We shouldn't ever get here, because if IPv6 isn't + * enabled how did someone create an IPv6 tunnel_key? + */ + rc = -EOPNOTSUPP; + NL_SET_ERR_MSG_MOD(extack, "No IPv6 support (neigh bind)"); +#endif + } else { + rt = ip_route_output_key(net, &flow4); + if (IS_ERR_OR_NULL(rt)) { + rc = PTR_ERR_OR_ZERO(rt); + if (!rc) + rc = -EIO; + NL_SET_ERR_MSG_MOD(extack, "Failed to lookup route for encap"); + goto out_free; + } + neigh->egdev = rt->dst.dev; + netdev_hold(neigh->egdev, &neigh->dev_tracker, + GFP_KERNEL_ACCOUNT); + neigh->ttl = ip4_dst_hoplimit(&rt->dst); + n = dst_neigh_lookup(&rt->dst, &flow4.daddr); + ip_rt_put(rt); + } + if (!n) { + rc = -ENETUNREACH; + NL_SET_ERR_MSG_MOD(extack, "Failed to lookup neighbour for encap"); + netdev_put(neigh->egdev, &neigh->dev_tracker); + goto out_free; + } + refcount_set(&neigh->ref, 1); + INIT_LIST_HEAD(&neigh->users); + read_lock_bh(&n->lock); + ether_addr_copy(neigh->ha, n->ha); + neigh->n_valid = n->nud_state & NUD_VALID; + read_unlock_bh(&n->lock); + rwlock_init(&neigh->lock); + INIT_WORK(&neigh->work, efx_neigh_update); + neigh->efx = efx; + neigh->used = jiffies; + if (!neigh->n_valid) + /* Prod ARP to find us a neighbour */ + neigh_event_send(n, NULL); + neigh_release(n); + } + /* Add us to this neigh */ + encap->neigh = neigh; + list_add_tail(&encap->list, &neigh->users); + return 0; + +out_free: + /* cleanup common to several error paths */ + rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage, + efx_neigh_ht_params); + synchronize_rcu(); + put_net_track(net, &neigh->ns_tracker); + kfree(neigh); + return rc; +} + +static void efx_free_neigh(struct efx_neigh_binder *neigh) +{ + struct efx_nic *efx = neigh->efx; + + rhashtable_remove_fast(&efx->tc->neigh_ht, &neigh->linkage, + efx_neigh_ht_params); + synchronize_rcu(); + netdev_put(neigh->egdev, &neigh->dev_tracker); + put_net_track(neigh->net, &neigh->ns_tracker); + kfree(neigh); +} + +static void efx_release_neigh(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + struct efx_neigh_binder *neigh = encap->neigh; + + if (!neigh) + return; + list_del(&encap->list); + encap->neigh = NULL; + if (!refcount_dec_and_test(&neigh->ref)) + return; /* still in use */ + efx_free_neigh(neigh); +} + +static void efx_gen_encap_header(struct efx_tc_encap_action *encap) +{ + /* stub for now */ + encap->n_valid = false; + memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr)); + encap->encap_hdr_len = ETH_HLEN; +} + +static void efx_tc_update_encap(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + struct efx_tc_action_set_list *acts, *fallback; + struct efx_tc_flow_rule *rule; + struct efx_tc_action_set *act; + int rc; + + if (encap->n_valid) { + /* Make sure no rules are using this encap while we change it */ + list_for_each_entry(act, &encap->users, encap_user) { + acts = act->user; + if (WARN_ON(!acts)) /* can't happen */ + continue; + rule = container_of(acts, struct efx_tc_flow_rule, acts); + if (rule->fallback) + fallback = rule->fallback; + else /* fallback fallback: deliver to PF */ + fallback = &efx->tc->facts.pf; + rc = efx_mae_update_rule(efx, fallback->fw_id, + rule->fw_id); + if (rc) + netif_err(efx, drv, efx->net_dev, + "Failed to update (f) rule %08x rc %d\n", + rule->fw_id, rc); + else + netif_dbg(efx, drv, efx->net_dev, "Updated (f) rule %08x\n", + rule->fw_id); + } + } + + if (encap->neigh) { + read_lock_bh(&encap->neigh->lock); + efx_gen_encap_header(encap); + read_unlock_bh(&encap->neigh->lock); + } else { + encap->n_valid = false; + memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr)); + encap->encap_hdr_len = ETH_HLEN; + } + + rc = efx_mae_update_encap_md(efx, encap); + if (rc) { + netif_err(efx, drv, efx->net_dev, + "Failed to update encap hdr %08x rc %d\n", + encap->fw_id, rc); + return; + } + netif_dbg(efx, drv, efx->net_dev, "Updated encap hdr %08x\n", + encap->fw_id); + if (!encap->n_valid) + return; + /* Update rule users: use the action if they are now ready */ + list_for_each_entry(act, &encap->users, encap_user) { + acts = act->user; + if (WARN_ON(!acts)) /* can't happen */ + continue; + rule = container_of(acts, struct efx_tc_flow_rule, acts); + if (!efx_tc_check_ready(efx, rule)) + continue; + rc = efx_mae_update_rule(efx, acts->fw_id, rule->fw_id); + if (rc) + netif_err(efx, drv, efx->net_dev, + "Failed to update rule %08x rc %d\n", + rule->fw_id, rc); + else + netif_dbg(efx, drv, efx->net_dev, "Updated rule %08x\n", + rule->fw_id); + } +} + +static void efx_neigh_update(struct work_struct *work) +{ + struct efx_neigh_binder *neigh = container_of(work, struct efx_neigh_binder, work); + struct efx_tc_encap_action *encap; + struct efx_nic *efx = neigh->efx; + + mutex_lock(&efx->tc->mutex); + list_for_each_entry(encap, &neigh->users, list) + efx_tc_update_encap(neigh->efx, encap); + /* release ref taken in efx_neigh_event() */ + if (refcount_dec_and_test(&neigh->ref)) + efx_free_neigh(neigh); + mutex_unlock(&efx->tc->mutex); +} + +static int efx_neigh_event(struct efx_nic *efx, struct neighbour *n) +{ + struct efx_neigh_binder keys = {NULL}, *neigh; + bool n_valid, ipv6 = false; + char ha[ETH_ALEN]; + size_t keysize; + + if (WARN_ON(!efx->tc)) + return NOTIFY_DONE; + + if (n->tbl == &arp_tbl) { + keysize = sizeof(keys.dst_ip); +#if IS_ENABLED(CONFIG_IPV6) + } else if (n->tbl == ipv6_stub->nd_tbl) { + ipv6 = true; + keysize = sizeof(keys.dst_ip6); +#endif + } else { + return NOTIFY_DONE; + } + if (!n->parms) { + netif_warn(efx, drv, efx->net_dev, "neigh_event with no parms!\n"); + return NOTIFY_DONE; + } + keys.net = read_pnet(&n->parms->net); + if (n->tbl->key_len != keysize) { + netif_warn(efx, drv, efx->net_dev, "neigh_event with bad key_len %u\n", + n->tbl->key_len); + return NOTIFY_DONE; + } + read_lock_bh(&n->lock); /* Get a consistent view */ + memcpy(ha, n->ha, ETH_ALEN); + n_valid = (n->nud_state & NUD_VALID) && !n->dead; + read_unlock_bh(&n->lock); + if (ipv6) + memcpy(&keys.dst_ip6, n->primary_key, n->tbl->key_len); + else + memcpy(&keys.dst_ip, n->primary_key, n->tbl->key_len); + rcu_read_lock(); + neigh = rhashtable_lookup_fast(&efx->tc->neigh_ht, &keys, + efx_neigh_ht_params); + if (!neigh || neigh->dying) + /* We're not interested in this neighbour */ + goto done; + write_lock_bh(&neigh->lock); + if (n_valid == neigh->n_valid && !memcmp(ha, neigh->ha, ETH_ALEN)) { + write_unlock_bh(&neigh->lock); + /* Nothing has changed; no work to do */ + goto done; + } + neigh->n_valid = n_valid; + memcpy(neigh->ha, ha, ETH_ALEN); + write_unlock_bh(&neigh->lock); + if (refcount_inc_not_zero(&neigh->ref)) { + rcu_read_unlock(); + if (!schedule_work(&neigh->work)) + /* failed to schedule, release the ref we just took */ + if (refcount_dec_and_test(&neigh->ref)) + efx_free_neigh(neigh); + } else { +done: + rcu_read_unlock(); + } + return NOTIFY_DONE; } bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule) @@ -54,7 +411,7 @@ bool efx_tc_check_ready(struct efx_nic *efx, struct efx_tc_flow_rule *rule) * neighbour info for the outer Ethernet header. */ list_for_each_entry(act, &rule->acts.list, list) - if (act->encap_md) /* neigh bindings not implemented yet */ + if (act->encap_md && !act->encap_md->n_valid) return false; return true; } @@ -65,6 +422,7 @@ struct efx_tc_encap_action *efx_tc_flower_create_encap_md( { enum efx_encap_type type = efx_tc_indr_netdev_type(egdev); struct efx_tc_encap_action *encap, *old; + struct efx_rep *to_efv; s64 rc; if (type == EFX_ENCAP_TYPE_NONE) { @@ -98,6 +456,7 @@ struct efx_tc_encap_action *efx_tc_flower_create_encap_md( return ERR_PTR(-ENOMEM); encap->type = type; encap->key = info->key; + INIT_LIST_HEAD(&encap->users); old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_ht, &encap->linkage, efx_tc_encap_ht_params); @@ -110,9 +469,42 @@ struct efx_tc_encap_action *efx_tc_flower_create_encap_md( return old; } + rc = efx_bind_neigh(efx, encap, dev_net(egdev), extack); + if (rc < 0) + goto out_remove; + to_efv = efx_tc_flower_lookup_efv(efx, encap->neigh->egdev); + if (IS_ERR(to_efv)) { + /* neigh->egdev isn't ours */ + NL_SET_ERR_MSG_MOD(extack, "Tunnel egress device not on switch"); + rc = PTR_ERR(to_efv); + goto out_release; + } + rc = efx_tc_flower_external_mport(efx, to_efv); + if (rc < 0) { + NL_SET_ERR_MSG_MOD(extack, "Failed to identify tunnel egress m-port"); + goto out_release; + } + encap->dest_mport = rc; + read_lock_bh(&encap->neigh->lock); + efx_gen_encap_header(encap); + read_unlock_bh(&encap->neigh->lock); + + rc = efx_mae_allocate_encap_md(efx, encap); + if (rc < 0) { + NL_SET_ERR_MSG_MOD(extack, "Failed to write tunnel header to hw"); + goto out_release; + } + /* ref and return */ refcount_set(&encap->ref, 1); return encap; +out_release: + efx_release_neigh(efx, encap); +out_remove: + rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage, + efx_tc_encap_ht_params); + kfree(encap); + return ERR_PTR(rc); } void efx_tc_flower_release_encap_md(struct efx_nic *efx, @@ -120,7 +512,59 @@ void efx_tc_flower_release_encap_md(struct efx_nic *efx, { if (!refcount_dec_and_test(&encap->ref)) return; /* still in use */ + efx_release_neigh(efx, encap); rhashtable_remove_fast(&efx->tc->encap_ht, &encap->linkage, efx_tc_encap_ht_params); + efx_mae_free_encap_md(efx, encap); kfree(encap); } + +static void efx_tc_remove_neigh_users(struct efx_nic *efx, struct efx_neigh_binder *neigh) +{ + struct efx_tc_encap_action *encap, *next; + + list_for_each_entry_safe(encap, next, &neigh->users, list) { + /* Should cause neigh usage count to fall to zero, freeing it */ + efx_release_neigh(efx, encap); + /* The encap has lost its neigh, so it's now unready */ + efx_tc_update_encap(efx, encap); + } +} + +void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev) +{ + struct efx_neigh_binder *neigh; + struct rhashtable_iter walk; + + mutex_lock(&efx->tc->mutex); + rhashtable_walk_enter(&efx->tc->neigh_ht, &walk); + rhashtable_walk_start(&walk); + while ((neigh = rhashtable_walk_next(&walk)) != NULL) { + if (IS_ERR(neigh)) + continue; + if (neigh->egdev != net_dev) + continue; + neigh->dying = true; + rhashtable_walk_stop(&walk); + synchronize_rcu(); /* Make sure any updates see dying flag */ + efx_tc_remove_neigh_users(efx, neigh); /* might sleep */ + rhashtable_walk_start(&walk); + } + rhashtable_walk_stop(&walk); + rhashtable_walk_exit(&walk); + mutex_unlock(&efx->tc->mutex); +} + +int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event, + void *ptr) +{ + if (efx->type->is_vf) + return NOTIFY_DONE; + + switch (event) { + case NETEVENT_NEIGH_UPDATE: + return efx_neigh_event(efx, ptr); + default: + return NOTIFY_DONE; + } +} diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.h b/drivers/net/ethernet/sfc/tc_encap_actions.h index 1a3679e81f09..4d755fb92daf 100644 --- a/drivers/net/ethernet/sfc/tc_encap_actions.h +++ b/drivers/net/ethernet/sfc/tc_encap_actions.h @@ -15,6 +15,54 @@ #include #include +/** + * struct efx_neigh_binder - driver state for a neighbour entry + * @net: the network namespace in which this neigh resides + * @dst_ip: the IPv4 destination address resolved by this neigh + * @dst_ip6: the IPv6 destination address resolved by this neigh + * @ha: the hardware (Ethernet) address of the neighbour + * @n_valid: true if the neighbour is in NUD_VALID state + * @lock: protects @ha and @n_valid + * @ttl: Time To Live associated with the route used + * @dying: set when egdev is going away, to skip further updates + * @egdev: egress device from the route lookup. Holds a reference + * @dev_tracker: reference tracker entry for @egdev + * @ns_tracker: reference tracker entry for @ns + * @ref: counts encap actions referencing this entry + * @used: jiffies of last time traffic hit any encap action using this. + * When counter reads update this, a new neighbour event is sent to + * indicate that the neighbour entry is still in use. + * @users: list of &struct efx_tc_encap_action + * @linkage: entry in efx->neigh_ht (keys are @net, @dst_ip, @dst_ip6). + * @work: processes neighbour state changes, updates the encap actions + * @efx: owning NIC instance. + * + * Associates a neighbour entry with the encap actions that are + * interested in it, allowing the latter to be updated when the + * neighbour details change. + * Whichever of @dst_ip and @dst_ip6 is not in use will be all-zeroes, + * this distinguishes IPv4 from IPv6 entries. + */ +struct efx_neigh_binder { + struct net *net; + __be32 dst_ip; + struct in6_addr dst_ip6; + char ha[ETH_ALEN]; + bool n_valid; + rwlock_t lock; + u8 ttl; + bool dying; + struct net_device *egdev; + netdevice_tracker dev_tracker; + netns_tracker ns_tracker; + refcount_t ref; + unsigned long used; + struct list_head users; + struct rhash_head linkage; + struct work_struct work; + struct efx_nic *efx; +}; + /* This limit is arbitrary; current hardware (SN1022) handles encap headers * of up to 126 bytes, but that limit is not enshrined in the MCDI protocol. */ @@ -24,7 +72,11 @@ struct efx_tc_encap_action { struct ip_tunnel_key key; /* 52 bytes */ u32 dest_mport; /* is copied into struct efx_tc_action_set */ u8 encap_hdr_len; + bool n_valid; u8 encap_hdr[EFX_TC_MAX_ENCAP_HDR]; + struct efx_neigh_binder *neigh; + struct list_head list; /* entry on neigh->users list */ + struct list_head users; /* action sets using this encap_md */ struct rhash_head linkage; /* efx->tc_encap_ht */ refcount_t ref; u32 fw_id; /* index of this entry in firmware encap table */ @@ -44,4 +96,8 @@ struct efx_tc_encap_action *efx_tc_flower_create_encap_md( void efx_tc_flower_release_encap_md(struct efx_nic *efx, struct efx_tc_encap_action *encap); +void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev); +int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event, + void *ptr); + #endif /* EFX_TC_ENCAP_ACTIONS_H */ -- cgit v1.2.3 From a1e82162af0b8ae9e65320ca405c6327edb99648 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 8 Jun 2023 17:42:35 +0100 Subject: sfc: generate encap headers for TC offload Support constructing VxLAN and GENEVE headers, on either IPv4 or IPv6, using the neighbouring information obtained in encap->neigh to populate the Ethernet header. Note that the ef100 hardware does not insert UDP checksums when performing encap, so for IPv6 the remote endpoint will need to be configured with udp6zerocsumrx or equivalent. Signed-off-by: Edward Cree Reviewed-by: Simon Horman Reviewed-by: Pieter Jansen van Vuuren Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/tc_encap_actions.c | 194 ++++++++++++++++++++++++++-- 1 file changed, 185 insertions(+), 9 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.c b/drivers/net/ethernet/sfc/tc_encap_actions.c index c1bd7d468343..aac259528e73 100644 --- a/drivers/net/ethernet/sfc/tc_encap_actions.c +++ b/drivers/net/ethernet/sfc/tc_encap_actions.c @@ -243,12 +243,183 @@ static void efx_release_neigh(struct efx_nic *efx, efx_free_neigh(neigh); } -static void efx_gen_encap_header(struct efx_tc_encap_action *encap) +static void efx_gen_tun_header_eth(struct efx_tc_encap_action *encap, u16 proto) { - /* stub for now */ - encap->n_valid = false; - memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr)); - encap->encap_hdr_len = ETH_HLEN; + struct efx_neigh_binder *neigh = encap->neigh; + struct ethhdr *eth; + + encap->encap_hdr_len = sizeof(*eth); + eth = (struct ethhdr *)encap->encap_hdr; + + if (encap->neigh->n_valid) + ether_addr_copy(eth->h_dest, neigh->ha); + else + eth_zero_addr(eth->h_dest); + ether_addr_copy(eth->h_source, neigh->egdev->dev_addr); + eth->h_proto = htons(proto); +} + +static void efx_gen_tun_header_ipv4(struct efx_tc_encap_action *encap, u8 ipproto, u8 len) +{ + struct efx_neigh_binder *neigh = encap->neigh; + struct ip_tunnel_key *key = &encap->key; + struct iphdr *ip; + + ip = (struct iphdr *)(encap->encap_hdr + encap->encap_hdr_len); + encap->encap_hdr_len += sizeof(*ip); + + ip->daddr = key->u.ipv4.dst; + ip->saddr = key->u.ipv4.src; + ip->ttl = neigh->ttl; + ip->protocol = ipproto; + ip->version = 0x4; + ip->ihl = 0x5; + ip->tot_len = cpu_to_be16(ip->ihl * 4 + len); + ip_send_check(ip); +} + +#ifdef CONFIG_IPV6 +static void efx_gen_tun_header_ipv6(struct efx_tc_encap_action *encap, u8 ipproto, u8 len) +{ + struct efx_neigh_binder *neigh = encap->neigh; + struct ip_tunnel_key *key = &encap->key; + struct ipv6hdr *ip; + + ip = (struct ipv6hdr *)(encap->encap_hdr + encap->encap_hdr_len); + encap->encap_hdr_len += sizeof(*ip); + + ip6_flow_hdr(ip, key->tos, key->label); + ip->daddr = key->u.ipv6.dst; + ip->saddr = key->u.ipv6.src; + ip->hop_limit = neigh->ttl; + ip->nexthdr = ipproto; + ip->version = 0x6; + ip->payload_len = cpu_to_be16(len); +} +#endif + +static void efx_gen_tun_header_udp(struct efx_tc_encap_action *encap, u8 len) +{ + struct ip_tunnel_key *key = &encap->key; + struct udphdr *udp; + + udp = (struct udphdr *)(encap->encap_hdr + encap->encap_hdr_len); + encap->encap_hdr_len += sizeof(*udp); + + udp->dest = key->tp_dst; + udp->len = cpu_to_be16(sizeof(*udp) + len); +} + +static void efx_gen_tun_header_vxlan(struct efx_tc_encap_action *encap) +{ + struct ip_tunnel_key *key = &encap->key; + struct vxlanhdr *vxlan; + + vxlan = (struct vxlanhdr *)(encap->encap_hdr + encap->encap_hdr_len); + encap->encap_hdr_len += sizeof(*vxlan); + + vxlan->vx_flags = VXLAN_HF_VNI; + vxlan->vx_vni = vxlan_vni_field(tunnel_id_to_key32(key->tun_id)); +} + +static void efx_gen_tun_header_geneve(struct efx_tc_encap_action *encap) +{ + struct ip_tunnel_key *key = &encap->key; + struct genevehdr *geneve; + u32 vni; + + geneve = (struct genevehdr *)(encap->encap_hdr + encap->encap_hdr_len); + encap->encap_hdr_len += sizeof(*geneve); + + geneve->proto_type = htons(ETH_P_TEB); + /* convert tun_id to host-endian so we can use host arithmetic to + * extract individual bytes. + */ + vni = ntohl(tunnel_id_to_key32(key->tun_id)); + geneve->vni[0] = vni >> 16; + geneve->vni[1] = vni >> 8; + geneve->vni[2] = vni; +} + +#define vxlan_header_l4_len (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) +#define vxlan4_header_len (sizeof(struct ethhdr) + sizeof(struct iphdr) + vxlan_header_l4_len) +static void efx_gen_vxlan_header_ipv4(struct efx_tc_encap_action *encap) +{ + BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan4_header_len); + efx_gen_tun_header_eth(encap, ETH_P_IP); + efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, vxlan_header_l4_len); + efx_gen_tun_header_udp(encap, sizeof(struct vxlanhdr)); + efx_gen_tun_header_vxlan(encap); +} + +#define geneve_header_l4_len (sizeof(struct udphdr) + sizeof(struct genevehdr)) +#define geneve4_header_len (sizeof(struct ethhdr) + sizeof(struct iphdr) + geneve_header_l4_len) +static void efx_gen_geneve_header_ipv4(struct efx_tc_encap_action *encap) +{ + BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve4_header_len); + efx_gen_tun_header_eth(encap, ETH_P_IP); + efx_gen_tun_header_ipv4(encap, IPPROTO_UDP, geneve_header_l4_len); + efx_gen_tun_header_udp(encap, sizeof(struct genevehdr)); + efx_gen_tun_header_geneve(encap); +} + +#ifdef CONFIG_IPV6 +#define vxlan6_header_len (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + vxlan_header_l4_len) +static void efx_gen_vxlan_header_ipv6(struct efx_tc_encap_action *encap) +{ + BUILD_BUG_ON(sizeof(encap->encap_hdr) < vxlan6_header_len); + efx_gen_tun_header_eth(encap, ETH_P_IPV6); + efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, vxlan_header_l4_len); + efx_gen_tun_header_udp(encap, sizeof(struct vxlanhdr)); + efx_gen_tun_header_vxlan(encap); +} + +#define geneve6_header_len (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + geneve_header_l4_len) +static void efx_gen_geneve_header_ipv6(struct efx_tc_encap_action *encap) +{ + BUILD_BUG_ON(sizeof(encap->encap_hdr) < geneve6_header_len); + efx_gen_tun_header_eth(encap, ETH_P_IPV6); + efx_gen_tun_header_ipv6(encap, IPPROTO_UDP, geneve_header_l4_len); + efx_gen_tun_header_udp(encap, sizeof(struct genevehdr)); + efx_gen_tun_header_geneve(encap); +} +#endif + +static void efx_gen_encap_header(struct efx_nic *efx, + struct efx_tc_encap_action *encap) +{ + encap->n_valid = encap->neigh->n_valid; + + /* GCC stupidly thinks that only values explicitly listed in the enum + * definition can _possibly_ be sensible case values, so without this + * cast it complains about the IPv6 versions. + */ + switch ((int)encap->type) { + case EFX_ENCAP_TYPE_VXLAN: + efx_gen_vxlan_header_ipv4(encap); + break; + case EFX_ENCAP_TYPE_GENEVE: + efx_gen_geneve_header_ipv4(encap); + break; +#ifdef CONFIG_IPV6 + case EFX_ENCAP_TYPE_VXLAN | EFX_ENCAP_FLAG_IPV6: + efx_gen_vxlan_header_ipv6(encap); + break; + case EFX_ENCAP_TYPE_GENEVE | EFX_ENCAP_FLAG_IPV6: + efx_gen_geneve_header_ipv6(encap); + break; +#endif + default: + /* unhandled encap type, can't happen */ + if (net_ratelimit()) + netif_err(efx, drv, efx->net_dev, + "Bogus encap type %d, can't generate\n", + encap->type); + + /* Use fallback action. */ + encap->n_valid = false; + break; + } } static void efx_tc_update_encap(struct efx_nic *efx, @@ -282,14 +453,19 @@ static void efx_tc_update_encap(struct efx_nic *efx, } } + /* Make sure we don't leak arbitrary bytes on the wire; + * set an all-0s ethernet header. A successful call to + * efx_gen_encap_header() will overwrite this. + */ + memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr)); + encap->encap_hdr_len = ETH_HLEN; + if (encap->neigh) { read_lock_bh(&encap->neigh->lock); - efx_gen_encap_header(encap); + efx_gen_encap_header(efx, encap); read_unlock_bh(&encap->neigh->lock); } else { encap->n_valid = false; - memset(encap->encap_hdr, 0, sizeof(encap->encap_hdr)); - encap->encap_hdr_len = ETH_HLEN; } rc = efx_mae_update_encap_md(efx, encap); @@ -486,7 +662,7 @@ struct efx_tc_encap_action *efx_tc_flower_create_encap_md( } encap->dest_mport = rc; read_lock_bh(&encap->neigh->lock); - efx_gen_encap_header(encap); + efx_gen_encap_header(efx, encap); read_unlock_bh(&encap->neigh->lock); rc = efx_mae_allocate_encap_md(efx, encap); -- cgit v1.2.3 From dc510c6d2ecfff1faaf95b642c5fc01c86d6fdff Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 8 Jun 2023 11:20:06 +0900 Subject: net: renesas: rswitch: Use napi_gro_receive() in RX This hardware can receive multiple frames so that using napi_gro_receive() instead of netif_receive_skb() gets good performance of RX. Signed-off-by: Yoshihiro Shimoda Reviewed-by: Maciej Fijalkowski Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/rswitch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index aace87139cea..7bb0a6d594a0 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -729,7 +729,7 @@ static bool rswitch_rx(struct net_device *ndev, int *quota) } skb_put(skb, pkt_len); skb->protocol = eth_type_trans(skb, ndev); - netif_receive_skb(skb); + napi_gro_receive(&rdev->napi, skb); rdev->ndev->stats.rx_packets++; rdev->ndev->stats.rx_bytes += pkt_len; -- cgit v1.2.3 From c87bd91e34e1593584c3b309e8fead833c985855 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 8 Jun 2023 11:20:07 +0900 Subject: net: renesas: rswitch: Use hardware pause features Since this driver used the "global rate limiter" feature of GWCA, the TX performance of each port was reduced when multiple ports transmitted frames simultaneously. To improve performance, remove the use of the "global rate limiter" feature and use "hardware pause" features of the following: - "per priority pause" of GWCA - "global pause" of COMA Note that these features are not related to the ethernet PAUSE frame. Signed-off-by: Yoshihiro Shimoda Reviewed-by: Maciej Fijalkowski Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/rswitch.c | 36 +++++++++++++--------------------- drivers/net/ethernet/renesas/rswitch.h | 7 +++++++ 2 files changed, 21 insertions(+), 22 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 7bb0a6d594a0..84f62c77eb8f 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -90,6 +90,11 @@ static int rswitch_bpool_config(struct rswitch_private *priv) return rswitch_reg_wait(priv->addr, CABPIRM, CABPIRM_BPR, CABPIRM_BPR); } +static void rswitch_coma_init(struct rswitch_private *priv) +{ + iowrite32(CABPPFLC_INIT_VALUE, priv->addr + CABPPFLC0); +} + /* R-Switch-2 block (TOP) */ static void rswitch_top_init(struct rswitch_private *priv) { @@ -156,24 +161,6 @@ static int rswitch_gwca_axi_ram_reset(struct rswitch_private *priv) return rswitch_reg_wait(priv->addr, GWARIRM, GWARIRM_ARR, GWARIRM_ARR); } -static void rswitch_gwca_set_rate_limit(struct rswitch_private *priv, int rate) -{ - u32 gwgrlulc, gwgrlc; - - switch (rate) { - case 1000: - gwgrlulc = 0x0000005f; - gwgrlc = 0x00010260; - break; - default: - dev_err(&priv->pdev->dev, "%s: This rate is not supported (%d)\n", __func__, rate); - return; - } - - iowrite32(gwgrlulc, priv->addr + GWGRLULC); - iowrite32(gwgrlc, priv->addr + GWGRLC); -} - static bool rswitch_is_any_data_irq(struct rswitch_private *priv, u32 *dis, bool tx) { u32 *mask = tx ? priv->gwca.tx_irq_bits : priv->gwca.rx_irq_bits; @@ -402,7 +389,7 @@ static int rswitch_gwca_queue_format(struct net_device *ndev, linkfix->die_dt = DT_LINKFIX; rswitch_desc_set_dptr(linkfix, gq->ring_dma); - iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DQT : 0) | GWDCC_EDE, + iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DCP(GWCA_IPV_NUM) | GWDCC_DQT : 0) | GWDCC_EDE, priv->addr + GWDCC_OFFS(gq->index)); return 0; @@ -500,7 +487,8 @@ static int rswitch_gwca_queue_ext_ts_format(struct net_device *ndev, linkfix->die_dt = DT_LINKFIX; rswitch_desc_set_dptr(linkfix, gq->ring_dma); - iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DQT : 0) | GWDCC_ETS | GWDCC_EDE, + iowrite32(GWDCC_BALR | (gq->dir_tx ? GWDCC_DCP(GWCA_IPV_NUM) | GWDCC_DQT : 0) | + GWDCC_ETS | GWDCC_EDE, priv->addr + GWDCC_OFFS(gq->index)); return 0; @@ -649,7 +637,8 @@ static int rswitch_gwca_hw_init(struct rswitch_private *priv) iowrite32(lower_32_bits(priv->gwca.ts_queue.ring_dma), priv->addr + GWTDCAC10); iowrite32(upper_32_bits(priv->gwca.ts_queue.ring_dma), priv->addr + GWTDCAC00); iowrite32(GWCA_TS_IRQ_BIT, priv->addr + GWTSDCC0); - rswitch_gwca_set_rate_limit(priv, priv->gwca.speed); + + iowrite32(GWTPC_PPPL(GWCA_IPV_NUM), priv->addr + GWTPC0); for (i = 0; i < RSWITCH_NUM_PORTS; i++) { err = rswitch_rxdmac_init(priv, i); @@ -1502,7 +1491,8 @@ static netdev_tx_t rswitch_start_xmit(struct sk_buff *skb, struct net_device *nd rswitch_desc_set_dptr(&desc->desc, dma_addr); desc->desc.info_ds = cpu_to_le16(skb->len); - desc->info1 = cpu_to_le64(INFO1_DV(BIT(rdev->etha->index)) | INFO1_FMT); + desc->info1 = cpu_to_le64(INFO1_DV(BIT(rdev->etha->index)) | + INFO1_IPV(GWCA_IPV_NUM) | INFO1_FMT); if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { struct rswitch_gwca_ts_info *ts_info; @@ -1772,6 +1762,8 @@ static int rswitch_init(struct rswitch_private *priv) if (err < 0) return err; + rswitch_coma_init(priv); + err = rswitch_gwca_linkfix_alloc(priv); if (err < 0) return -ENOMEM; diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h index b3e0411b408e..bb9ed971a97c 100644 --- a/drivers/net/ethernet/renesas/rswitch.h +++ b/drivers/net/ethernet/renesas/rswitch.h @@ -48,6 +48,7 @@ #define GWCA_NUM_IRQS 8 #define GWCA_INDEX 0 #define AGENT_INDEX_GWCA 3 +#define GWCA_IPV_NUM 0 #define GWRO RSWITCH_GWCA0_OFFSET #define GWCA_TS_IRQ_RESOURCE_NAME "gwca0_rxts0" @@ -768,11 +769,14 @@ enum rswitch_gwca_mode { #define GWARIRM_ARR BIT(1) #define GWDCC_BALR BIT(24) +#define GWDCC_DCP_MASK GENMASK(18, 16) +#define GWDCC_DCP(prio) FIELD_PREP(GWDCC_DCP_MASK, (prio)) #define GWDCC_DQT BIT(11) #define GWDCC_ETS BIT(9) #define GWDCC_EDE BIT(8) #define GWTRC(queue) (GWTRC0 + (queue) / 32 * 4) +#define GWTPC_PPPL(ipv) BIT(ipv) #define GWDCC_OFFS(queue) (GWDCC0 + (queue) * 4) #define GWDIS(i) (GWDIS0 + (i) * 0x10) @@ -789,6 +793,8 @@ enum rswitch_gwca_mode { #define CABPIRM_BPIOG BIT(0) #define CABPIRM_BPR BIT(1) +#define CABPPFLC_INIT_VALUE 0x00800080 + /* MFWD */ #define FWPC0_LTHTA BIT(0) #define FWPC0_IP4UE BIT(3) @@ -863,6 +869,7 @@ enum DIE_DT { /* For transmission */ #define INFO1_TSUN(val) ((u64)(val) << 8ULL) +#define INFO1_IPV(prio) ((u64)(prio) << 28ULL) #define INFO1_CSD0(index) ((u64)(index) << 32ULL) #define INFO1_CSD1(index) ((u64)(index) << 40ULL) #define INFO1_DV(port_vector) ((u64)(port_vector) << 48ULL) -- cgit v1.2.3 From 4e635f9d86165e47f5440196f2ebdb258efb8341 Mon Sep 17 00:00:00 2001 From: Satha Rao Date: Thu, 8 Jun 2023 17:12:00 +0530 Subject: octeontx2-af: fixed resource availability check txschq_alloc response have two different arrays to store continuous and non-continuous schedulers of each level. Requested count should be checked for each array separately. Fixes: 5d9b976d4480 ("octeontx2-af: Support fixed transmit scheduler topology") Signed-off-by: Satha Rao Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Naveen Mamindlapalli Reviewed-by: Sridhar Samudrala Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 4ad707e758b9..1e058b96cbe2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -1878,7 +1878,8 @@ static int nix_check_txschq_alloc_req(struct rvu *rvu, int lvl, u16 pcifunc, free_cnt = rvu_rsrc_free_count(&txsch->schq); } - if (free_cnt < req_schq || req_schq > MAX_TXSCHQ_PER_FUNC) + if (free_cnt < req_schq || req->schq[lvl] > MAX_TXSCHQ_PER_FUNC || + req->schq_contig[lvl] > MAX_TXSCHQ_PER_FUNC) return NIX_AF_ERR_TLX_ALLOC_FAIL; /* If contiguous queues are needed, check for availability */ -- cgit v1.2.3 From 87e12a17eef476bbf768dc3a74419ad461f36fbc Mon Sep 17 00:00:00 2001 From: Nithin Dabilpuram Date: Thu, 8 Jun 2023 17:12:01 +0530 Subject: octeontx2-af: fix lbk link credits on cn10k Fix LBK link credits on CN10K to be same as CN9K i.e 16 * MAX_LBK_DATA_RATE instead of current scheme of calculation based on LBK buf length / FIFO size. Fixes: 6e54e1c5399a ("octeontx2-af: cn10K: Add MTU configuration") Signed-off-by: Nithin Dabilpuram Signed-off-by: Naveen Mamindlapalli Reviewed-by: Sridhar Samudrala Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 1e058b96cbe2..f01d057ad025 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -4081,10 +4081,6 @@ int rvu_mbox_handler_nix_set_rx_cfg(struct rvu *rvu, struct nix_rx_cfg *req, static u64 rvu_get_lbk_link_credits(struct rvu *rvu, u16 lbk_max_frs) { - /* CN10k supports 72KB FIFO size and max packet size of 64k */ - if (rvu->hw->lbk_bufsize == 0x12000) - return (rvu->hw->lbk_bufsize - lbk_max_frs) / 16; - return 1600; /* 16 * max LBK datarate = 16 * 100Gbps */ } -- cgit v1.2.3 From 7ebe4eda4265642859507d1b3ca330d8c196cfe5 Mon Sep 17 00:00:00 2001 From: David Christensen Date: Thu, 8 Jun 2023 16:01:43 -0400 Subject: bnx2x: fix page fault following EEH recovery In the last step of the EEH recovery process, the EEH driver calls into bnx2x_io_resume() to re-initialize the NIC hardware via the function bnx2x_nic_load(). If an error occurs during bnx2x_nic_load(), OS and hardware resources are released and an error code is returned to the caller. When called from bnx2x_io_resume(), the return code is ignored and the network interface is brought up unconditionally. Later attempts to send a packet via this interface result in a page fault due to a null pointer reference. This patch checks the return code of bnx2x_nic_load(), prints an error message if necessary, and does not enable the interface. Signed-off-by: David Christensen Reviewed-by: Sridhar Samudrala Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 637d162bbcfa..1e7a6f1d4223 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -14294,11 +14294,16 @@ static void bnx2x_io_resume(struct pci_dev *pdev) bp->fw_seq = SHMEM_RD(bp, func_mb[BP_FW_MB_IDX(bp)].drv_mb_header) & DRV_MSG_SEQ_NUMBER_MASK; - if (netif_running(dev)) - bnx2x_nic_load(bp, LOAD_NORMAL); + if (netif_running(dev)) { + if (bnx2x_nic_load(bp, LOAD_NORMAL)) { + netdev_err(bp->dev, "Error during driver initialization, try unloading/reloading the driver\n"); + goto done; + } + } netif_device_attach(dev); +done: rtnl_unlock(); } -- cgit v1.2.3 From c023b61ac8285dc6b2b2f275bf9d97cfd36b56fb Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Mon, 5 Jun 2023 13:14:04 +0300 Subject: net/mlx5: Nullify qp->dbg pointer post destruction Nullifying qp->dbg is a preparation for the next patches from the series in which mlx5_core_destroy_qp() could actually fail, and then it can be called again which causes a kernel crash, since qp->dbg was not nullified in previous call. Signed-off-by: Patrisious Haddad Link: https://lore.kernel.org/r/1677e52bb642fd8d6062d73a5aa69083c0283dc9.1685953497.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index bb95b40d25eb..b08b5695ee45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -513,11 +513,11 @@ EXPORT_SYMBOL(mlx5_debug_qp_add); void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) { - if (!mlx5_debugfs_root) + if (!mlx5_debugfs_root || !qp->dbg) return; - if (qp->dbg) - rem_res_tree(qp->dbg); + rem_res_tree(qp->dbg); + qp->dbg = NULL; } EXPORT_SYMBOL(mlx5_debug_qp_remove); -- cgit v1.2.3 From 617f5db1a626f18d5cbb7c7faf7bf8f9ea12be78 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 5 Jun 2023 13:33:26 +0300 Subject: RDMA/mlx5: Fix affinity assignment The cited commit aimed to ensure that Virtual Functions (VFs) assign a queue affinity to a Queue Pair (QP) to distribute traffic when the LAG master creates a hardware LAG. If the affinity was set while the hardware was not in LAG, the firmware would ignore the affinity value. However, this commit unintentionally assigned an affinity to QPs on the LAG master's VPORT even if the RDMA device was not marked as LAG-enabled. In most cases, this was not an issue because when the hardware entered hardware LAG configuration, the RDMA device of the LAG master would be destroyed and a new one would be created, marked as LAG-enabled. The problem arises when a user configures Equal-Cost Multipath (ECMP). In ECMP mode, traffic can be directed to different physical ports based on the queue affinity, which is intended for use by VPORTS other than the E-Switch manager. ECMP mode is supported only if both E-Switch managers are in switchdev mode and the appropriate route is configured via IP. In this configuration, the RDMA device is not destroyed, and we retain the RDMA device that is not marked as LAG-enabled. To ensure correct behavior, Send Queues (SQs) opened by the E-Switch manager through verbs should be assigned strict affinity. This means they will only be able to communicate through the native physical port associated with the E-Switch manager. This will prevent the firmware from assigning affinity and will not allow the SQs to be remapped in case of failover. Fixes: 802dcc7fc5ec ("RDMA/mlx5: Support TX port affinity for VF drivers in LAG mode") Reviewed-by: Maor Gottlieb Signed-off-by: Mark Bloch Link: https://lore.kernel.org/r/425b05f4da840bc684b0f7e8ebf61aeb5cef09b0.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +++ drivers/infiniband/hw/mlx5/qp.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 12 ------------ include/linux/mlx5/driver.h | 12 ++++++++++++ 4 files changed, 18 insertions(+), 12 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 91fc0cdf377d..2dfa6f49a6f4 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1598,6 +1598,9 @@ static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev) MLX5_CAP_PORT_SELECTION(dev->mdev, port_select_flow_table_bypass)) return 0; + if (mlx5_lag_is_lacp_owner(dev->mdev) && !dev->lag_active) + return 0; + return dev->lag_active || (MLX5_CAP_GEN(dev->mdev, num_lag_ports) > 1 && MLX5_CAP_GEN(dev->mdev, lag_tx_port_affinity)); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 70ca8ffa9256..78b96bfb4e6a 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1237,6 +1237,9 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid); MLX5_SET(tisc, tisc, transport_domain, tdn); + if (!mlx5_ib_lag_should_assign_affinity(dev) && + mlx5_lag_is_lacp_owner(dev->mdev)) + MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); if (qp->flags & IB_QP_CREATE_SOURCE_QPN) MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 1d879374acaa..229520405d4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -276,18 +276,6 @@ static inline bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) return pci_num_vf(dev->pdev) ? true : false; } -static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) -{ - /* LACP owner conditions: - * 1) Function is physical. - * 2) LAG is supported by FW. - * 3) LAG is managed by driver (currently the only option). - */ - return MLX5_CAP_GEN(dev, vport_group_manager) && - (MLX5_CAP_GEN(dev, num_lag_ports) > 1) && - MLX5_CAP_GEN(dev, lag_master); -} - int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev); static inline int mlx5_rescan_drivers(struct mlx5_core_dev *dev) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a4c4f737f9c1..8ad16b779898 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1237,6 +1237,18 @@ static inline u16 mlx5_core_max_vfs(const struct mlx5_core_dev *dev) return dev->priv.sriov.max_vfs; } +static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) +{ + /* LACP owner conditions: + * 1) Function is physical. + * 2) LAG is supported by FW. + * 3) LAG is managed by driver (currently the only option). + */ + return MLX5_CAP_GEN(dev, vport_group_manager) && + (MLX5_CAP_GEN(dev, num_lag_ports) > 1) && + MLX5_CAP_GEN(dev, lag_master); +} + static inline int mlx5_get_gid_table_len(u16 param) { if (param > 4) { -- cgit v1.2.3 From 132b4ebfa090492663f84144a1e7afaca54cd58a Mon Sep 17 00:00:00 2001 From: Nitya Sunkad Date: Thu, 8 Jun 2023 22:50:16 -0700 Subject: ionic: add support for ethtool extended stat link_down_count Following the example of 'commit 9a0f830f8026 ("ethtool: linkstate: add a statistic for PHY down events")', added support for link down events. Add callback ionic_get_link_ext_stats to ionic_ethtool.c to support link_down_count, a property of netdev that gets reported exclusively on physical link down events. Run ethtool -I to display the device link down count. Signed-off-by: Nitya Sunkad Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_ethtool.c | 10 ++++++++++ drivers/net/ethernet/pensando/ionic/ionic_lif.c | 1 + drivers/net/ethernet/pensando/ionic/ionic_lif.h | 1 + 3 files changed, 12 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index 9b2b96fa36af..3a6b0a9bc241 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -104,6 +104,15 @@ static void ionic_get_regs(struct net_device *netdev, struct ethtool_regs *regs, memcpy_fromio(p + offset, lif->ionic->idev.dev_cmd_regs->words, size); } +static void ionic_get_link_ext_stats(struct net_device *netdev, + struct ethtool_link_ext_stats *stats) +{ + struct ionic_lif *lif = netdev_priv(netdev); + + if (lif->ionic->pdev->is_physfn) + stats->link_down_events = lif->link_down_count; +} + static int ionic_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *ks) { @@ -1074,6 +1083,7 @@ static const struct ethtool_ops ionic_ethtool_ops = { .get_regs_len = ionic_get_regs_len, .get_regs = ionic_get_regs, .get_link = ethtool_op_get_link, + .get_link_ext_stats = ionic_get_link_ext_stats, .get_link_ksettings = ionic_get_link_ksettings, .set_link_ksettings = ionic_set_link_ksettings, .get_coalesce = ionic_get_coalesce, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 957027e546b3..6ccc1ea91992 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -168,6 +168,7 @@ static void ionic_link_status_check(struct ionic_lif *lif) } } else { if (netif_carrier_ok(netdev)) { + lif->link_down_count++; netdev_info(netdev, "Link down\n"); netif_carrier_off(netdev); } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index c9c4c46d5a16..fd2ea670e7d8 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -201,6 +201,7 @@ struct ionic_lif { u64 hw_features; bool registered; u16 lif_type; + unsigned int link_down_count; unsigned int nmcast; unsigned int nucast; unsigned int nvlans; -- cgit v1.2.3 From 998b85f0468f0b4784da69c087f52149ae7ded13 Mon Sep 17 00:00:00 2001 From: Martin Habets Date: Fri, 9 Jun 2023 08:57:36 +0100 Subject: sfc: Add devlink dev info support for EF10 Reuse the work done for EF100 to add devlink support for EF10. There is no devlink port support for EF10. Signed-off-by: Martin Habets Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index a4f22d8e6ac7..d670a319b379 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -32,6 +32,7 @@ #include "io.h" #include "selftest.h" #include "sriov.h" +#include "efx_devlink.h" #include "mcdi_port_common.h" #include "mcdi_pcol.h" @@ -877,6 +878,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev) if (efx->type->sriov_fini) efx->type->sriov_fini(efx); + efx_fini_devlink_lock(efx); efx_unregister_netdev(efx); efx_mtd_remove(efx); @@ -886,6 +888,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev) efx_fini_io(efx); pci_dbg(efx->pci_dev, "shutdown successful\n"); + efx_fini_devlink_and_unlock(efx); efx_fini_struct(efx); free_netdev(efx->net_dev); probe_data = container_of(efx, struct efx_probe_data, efx); @@ -1025,7 +1028,13 @@ static int efx_pci_probe_post_io(struct efx_nic *efx) NETDEV_XDP_ACT_REDIRECT | NETDEV_XDP_ACT_NDO_XMIT; + /* devlink creation, registration and lock */ + rc = efx_probe_devlink_and_lock(efx); + if (rc) + pci_err(efx->pci_dev, "devlink registration failed"); + rc = efx_register_netdev(efx); + efx_probe_devlink_unlock(efx); if (!rc) return 0; -- cgit v1.2.3 From b803d1fded4085d268507a432dac8077ead68971 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 9 Jun 2023 05:47:17 -0700 Subject: net: mana: Add support for vlan tagging To support vlan, use MANA_LONG_PKT_FMT if vlan tag is present in TX skb. Then extract the vlan tag from the skb struct, and save it to tx_oob for the NIC to transmit. For vlan tags on the payload, they are accepted by the NIC too. For RX, extract the vlan tag from CQE and put it into skb. Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/microsoft/mana/mana_en.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index d907727c7b7a..cd4d5ceb9f2d 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -179,6 +179,14 @@ netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev) pkg.tx_oob.s_oob.short_vp_offset = txq->vp_offset; } + if (skb_vlan_tag_present(skb)) { + pkt_fmt = MANA_LONG_PKT_FMT; + pkg.tx_oob.l_oob.inject_vlan_pri_tag = 1; + pkg.tx_oob.l_oob.pcp = skb_vlan_tag_get_prio(skb); + pkg.tx_oob.l_oob.dei = skb_vlan_tag_get_cfi(skb); + pkg.tx_oob.l_oob.vlan_id = skb_vlan_tag_get_id(skb); + } + pkg.tx_oob.s_oob.pkt_fmt = pkt_fmt; if (pkt_fmt == MANA_SHORT_PKT_FMT) { @@ -1457,6 +1465,12 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe, skb_set_hash(skb, hash_value, PKT_HASH_TYPE_L3); } + if (cqe->rx_vlantag_present) { + u16 vlan_tci = cqe->rx_vlan_id; + + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); + } + u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += pkt_len; @@ -2451,8 +2465,9 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, ndev->hw_features |= NETIF_F_RXCSUM; ndev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6; ndev->hw_features |= NETIF_F_RXHASH; - ndev->features = ndev->hw_features; - ndev->vlan_features = 0; + ndev->features = ndev->hw_features | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; + ndev->vlan_features = ndev->features; ndev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | NETDEV_XDP_ACT_NDO_XMIT; -- cgit v1.2.3 From 2b84960fc5dd38a19241388fb33f20936cb217e2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 9 Jun 2023 16:59:16 +0300 Subject: net/sched: taprio: report class offload stats per TXQ, not per TC The taprio Qdisc creates child classes per netdev TX queue, but taprio_dump_class_stats() currently reports offload statistics per traffic class. Traffic classes are groups of TXQs sharing the same dequeue priority, so this is incorrect and we shouldn't be bundling up the TXQ stats when reporting them, as we currently do in enetc. Modify the API from taprio to drivers such that they report TXQ offload stats and not TC offload stats. There is no change in the UAPI or in the global Qdisc stats. Fixes: 6c1adb650c8d ("net/sched: taprio: add netlink reporting for offload statistics counters") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc_qos.c | 20 +++++++------------- include/net/pkt_sched.h | 10 +++++----- net/sched/sch_taprio.c | 8 ++++---- 3 files changed, 16 insertions(+), 22 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 71157eba1fbe..58cdd67bb573 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -160,20 +160,14 @@ static void enetc_taprio_stats(struct net_device *ndev, stats->window_drops = window_drops; } -static void enetc_taprio_tc_stats(struct net_device *ndev, - struct tc_taprio_qopt_tc_stats *tc_stats) +static void enetc_taprio_queue_stats(struct net_device *ndev, + struct tc_taprio_qopt_queue_stats *queue_stats) { - struct tc_taprio_qopt_stats *stats = &tc_stats->stats; + struct tc_taprio_qopt_stats *stats = &queue_stats->stats; struct enetc_ndev_priv *priv = netdev_priv(ndev); - int tc = tc_stats->tc; - u64 window_drops = 0; - int i; + int queue = queue_stats->queue; - for (i = 0; i < priv->num_tx_rings; i++) - if (priv->tx_ring[i]->prio == tc) - window_drops += priv->tx_ring[i]->stats.win_drop; - - stats->window_drops = window_drops; + stats->window_drops = priv->tx_ring[queue]->stats.win_drop; } static int enetc_taprio_replace(struct net_device *ndev, @@ -208,8 +202,8 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) case TAPRIO_CMD_STATS: enetc_taprio_stats(ndev, &offload->stats); break; - case TAPRIO_CMD_TC_STATS: - enetc_taprio_tc_stats(ndev, &offload->tc_stats); + case TAPRIO_CMD_QUEUE_STATS: + enetc_taprio_queue_stats(ndev, &offload->queue_stats); break; default: err = -EOPNOTSUPP; diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 518febb91c9f..e98aac9d5ad5 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -191,7 +191,7 @@ enum tc_taprio_qopt_cmd { TAPRIO_CMD_REPLACE, TAPRIO_CMD_DESTROY, TAPRIO_CMD_STATS, - TAPRIO_CMD_TC_STATS, + TAPRIO_CMD_QUEUE_STATS, }; /** @@ -208,8 +208,8 @@ struct tc_taprio_qopt_stats { u64 tx_overruns; }; -struct tc_taprio_qopt_tc_stats { - int tc; +struct tc_taprio_qopt_queue_stats { + int queue; struct tc_taprio_qopt_stats stats; }; @@ -227,8 +227,8 @@ struct tc_taprio_qopt_offload { union { /* TAPRIO_CMD_STATS */ struct tc_taprio_qopt_stats stats; - /* TAPRIO_CMD_TC_STATS */ - struct tc_taprio_qopt_tc_stats tc_stats; + /* TAPRIO_CMD_QUEUE_STATS */ + struct tc_taprio_qopt_queue_stats queue_stats; /* TAPRIO_CMD_REPLACE */ struct { struct tc_mqprio_qopt_offload mqprio; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 4a4e6ff894c1..c6627f5abdfa 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -2458,9 +2458,9 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, { struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); struct tc_taprio_qopt_offload offload = { - .cmd = TAPRIO_CMD_TC_STATS, - .tc_stats = { - .tc = cl - 1, + .cmd = TAPRIO_CMD_QUEUE_STATS, + .queue_stats = { + .queue = cl - 1, }, }; struct Qdisc *child; @@ -2470,7 +2470,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, qdisc_qstats_copy(d, child) < 0) return -1; - return taprio_dump_xstats(sch, d, &offload, &offload.tc_stats.stats); + return taprio_dump_xstats(sch, d, &offload, &offload.queue_stats.stats); } static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) -- cgit v1.2.3 From f1e668d29c57499f734a291bfb96a82142322f41 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 9 Jun 2023 16:59:17 +0300 Subject: net: enetc: reset taprio stats when taprio is deleted Currently, the window_drop stats persist even if an incorrect Qdisc was removed from the interface and a new one is installed. This is because the enetc driver keeps the state, and that is persistent across multiple Qdiscs. To resolve the issue, clear all win_drop counters from all TX queues when the currently active Qdisc is removed. These counters are zero by default. The counters visible in ethtool -S are also affected, but I don't care very much about preserving those enough to keep them monotonically incrementing. Fixes: 4802fca8d1af ("net: enetc: report statistics counters for taprio") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/enetc/enetc_qos.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 58cdd67bb573..9d74104df7c8 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -127,6 +127,14 @@ static int enetc_setup_taprio(struct enetc_ndev_priv *priv, return 0; } +static void enetc_reset_taprio_stats(struct enetc_ndev_priv *priv) +{ + int i; + + for (i = 0; i < priv->num_tx_rings; i++) + priv->tx_ring[i]->stats.win_drop = 0; +} + static void enetc_reset_taprio(struct enetc_ndev_priv *priv) { struct enetc_hw *hw = &priv->si->hw; @@ -145,6 +153,7 @@ static void enetc_taprio_destroy(struct net_device *ndev) enetc_reset_taprio(priv); enetc_reset_tc_mqprio(ndev); + enetc_reset_taprio_stats(priv); } static void enetc_taprio_stats(struct net_device *ndev, -- cgit v1.2.3 From 50f6c3d57e9a7d11ff935198c1d55d37975c2fa4 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 9 Jun 2023 19:32:06 +0200 Subject: mlxsw: spectrum_router: mlxsw_sp_router_fini(): Extract a helper variable Make mlxsw_sp_router_fini() more similar to the _init() function (and more concise) by extracting the `router' handle to a named variable and using that throughout. The availability of a dedicated `router' variable will come in handy in following patches. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 25 +++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 7304e8a29cf9..583d0b717e25 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -10664,15 +10664,16 @@ err_router_ops_init: void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_sp_router *router = mlxsw_sp->router; + unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), - &mlxsw_sp->router->netdevice_nb); - unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), - &mlxsw_sp->router->fib_nb); + &router->netdevice_nb); + unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), &router->fib_nb); unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp), - &mlxsw_sp->router->nexthop_nb); - unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); - unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb); - unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb); + &router->nexthop_nb); + unregister_netevent_notifier(&router->netevent_nb); + unregister_inet6addr_notifier(&router->inet6addr_nb); + unregister_inetaddr_notifier(&router->inetaddr_nb); mlxsw_core_flush_owq(); mlxsw_sp_mp_hash_fini(mlxsw_sp); mlxsw_sp_neigh_fini(mlxsw_sp); @@ -10680,12 +10681,12 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_vrs_fini(mlxsw_sp); mlxsw_sp_mr_fini(mlxsw_sp); mlxsw_sp_lpm_fini(mlxsw_sp); - rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht); - rhashtable_destroy(&mlxsw_sp->router->nexthop_ht); + rhashtable_destroy(&router->nexthop_group_ht); + rhashtable_destroy(&router->nexthop_ht); mlxsw_sp_ipips_fini(mlxsw_sp); mlxsw_sp_rifs_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); - cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw); - mutex_destroy(&mlxsw_sp->router->lock); - kfree(mlxsw_sp->router); + cancel_delayed_work_sync(&router->nh_grp_activity_dw); + mutex_destroy(&router->lock); + kfree(router); } -- cgit v1.2.3 From 41b2bd208e8acf0f453e258f342a593332e3d2c8 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 9 Jun 2023 19:32:07 +0200 Subject: mlxsw: spectrum_router: Move here inetaddr validator notifiers The validation logic is already in the router code. Move there the notifier blocks themselves as well. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 18 +-------------- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 4 ---- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 26 ++++++++++++++++++---- .../net/ethernet/mellanox/mlxsw/spectrum_router.h | 2 ++ 4 files changed, 25 insertions(+), 25 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 02a327744a61..4609b13bda02 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -5139,14 +5139,6 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb, return notifier_from_errno(err); } -static struct notifier_block mlxsw_sp_inetaddr_valid_nb __read_mostly = { - .notifier_call = mlxsw_sp_inetaddr_valid_event, -}; - -static struct notifier_block mlxsw_sp_inet6addr_valid_nb __read_mostly = { - .notifier_call = mlxsw_sp_inet6addr_valid_event, -}; - static const struct pci_device_id mlxsw_sp1_pci_id_table[] = { {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM), 0}, {0, }, @@ -5191,12 +5183,9 @@ static int __init mlxsw_sp_module_init(void) { int err; - register_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); - register_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); - err = mlxsw_core_driver_register(&mlxsw_sp1_driver); if (err) - goto err_sp1_core_driver_register; + return err; err = mlxsw_core_driver_register(&mlxsw_sp2_driver); if (err) @@ -5242,9 +5231,6 @@ err_sp3_core_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp2_driver); err_sp2_core_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp1_driver); -err_sp1_core_driver_register: - unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); - unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); return err; } @@ -5258,8 +5244,6 @@ static void __exit mlxsw_sp_module_exit(void) mlxsw_core_driver_unregister(&mlxsw_sp3_driver); mlxsw_core_driver_unregister(&mlxsw_sp2_driver); mlxsw_core_driver_unregister(&mlxsw_sp1_driver); - unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb); - unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb); } module_init(mlxsw_sp_module_init); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 4c22f8004514..0b57c8d0cce0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -755,10 +755,6 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, const struct net_device *macvlan_dev); -int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, - unsigned long event, void *ptr); -int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, - unsigned long event, void *ptr); int mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, struct net_device *l3_dev, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 583d0b717e25..edfc42230285 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -8879,8 +8879,8 @@ out: return notifier_from_errno(err); } -int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, + unsigned long event, void *ptr) { struct in_validator_info *ivi = (struct in_validator_info *) ptr; struct net_device *dev = ivi->ivi_dev->dev; @@ -8962,8 +8962,8 @@ static int mlxsw_sp_inet6addr_event(struct notifier_block *nb, return NOTIFY_DONE; } -int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, - unsigned long event, void *ptr) +static int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, + unsigned long event, void *ptr) { struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr; struct net_device *dev = i6vi->i6vi_dev->dev; @@ -10510,6 +10510,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, struct netlink_ext_ack *extack) { struct mlxsw_sp_router *router; + struct notifier_block *nb; int err; router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL); @@ -10588,6 +10589,17 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_register_inet6addr_notifier; + router->inetaddr_valid_nb.notifier_call = mlxsw_sp_inetaddr_valid_event; + err = register_inetaddr_validator_notifier(&router->inetaddr_valid_nb); + if (err) + goto err_register_inetaddr_valid_notifier; + + nb = &router->inet6addr_valid_nb; + nb->notifier_call = mlxsw_sp_inet6addr_valid_event; + err = register_inet6addr_validator_notifier(nb); + if (err) + goto err_register_inet6addr_valid_notifier; + mlxsw_sp->router->netevent_nb.notifier_call = mlxsw_sp_router_netevent_event; err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb); @@ -10627,6 +10639,10 @@ err_register_fib_notifier: err_register_nexthop_notifier: unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); err_register_netevent_notifier: + unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb); +err_register_inet6addr_valid_notifier: + unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb); +err_register_inetaddr_valid_notifier: unregister_inet6addr_notifier(&router->inet6addr_nb); err_register_inet6addr_notifier: unregister_inetaddr_notifier(&router->inetaddr_nb); @@ -10672,6 +10688,8 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp), &router->nexthop_nb); unregister_netevent_notifier(&router->netevent_nb); + unregister_inet6addr_validator_notifier(&router->inet6addr_valid_nb); + unregister_inetaddr_validator_notifier(&router->inetaddr_valid_nb); unregister_inet6addr_notifier(&router->inet6addr_nb); unregister_inetaddr_notifier(&router->inetaddr_nb); mlxsw_core_flush_owq(); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 37d6e4c80e6a..229d38c514b9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -52,6 +52,8 @@ struct mlxsw_sp_router { struct notifier_block inetaddr_nb; struct notifier_block inet6addr_nb; struct notifier_block netdevice_nb; + struct notifier_block inetaddr_valid_nb; + struct notifier_block inet6addr_valid_nb; const struct mlxsw_sp_rif_ops **rif_ops_arr; const struct mlxsw_sp_ipip_ops **ipip_ops_arr; struct mlxsw_sp_router_nve_decap nve_decap_config; -- cgit v1.2.3 From 48dde35ea157b4b17bcccb8cd6fed3dfd9627a7a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 9 Jun 2023 19:32:08 +0200 Subject: mlxsw: spectrum_router: Pass router to mlxsw_sp_router_schedule_work() directly Instead of passing a notifier block and deducing the router pointer from that in the helper, do that in the caller, and pass the result. In the following patches, the pointer will also be made useful in the caller. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index edfc42230285..7b1877c116ed 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2748,13 +2748,11 @@ static void mlxsw_sp_router_update_priority_work(struct work_struct *work) } static int mlxsw_sp_router_schedule_work(struct net *net, - struct notifier_block *nb, + struct mlxsw_sp_router *router, void (*cb)(struct work_struct *)) { struct mlxsw_sp_netevent_work *net_work; - struct mlxsw_sp_router *router; - router = container_of(nb, struct mlxsw_sp_router, netevent_nb); if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp))) return NOTIFY_DONE; @@ -2773,11 +2771,14 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, { struct mlxsw_sp_netevent_work *net_work; struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp_router *router; struct mlxsw_sp *mlxsw_sp; unsigned long interval; struct neigh_parms *p; struct neighbour *n; + router = container_of(nb, struct mlxsw_sp_router, netevent_nb); + switch (event) { case NETEVENT_DELAY_PROBE_TIME_UPDATE: p = ptr; @@ -2830,11 +2831,11 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, break; case NETEVENT_IPV4_MPATH_HASH_UPDATE: case NETEVENT_IPV6_MPATH_HASH_UPDATE: - return mlxsw_sp_router_schedule_work(ptr, nb, + return mlxsw_sp_router_schedule_work(ptr, router, mlxsw_sp_router_mp_hash_event_work); case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE: - return mlxsw_sp_router_schedule_work(ptr, nb, + return mlxsw_sp_router_schedule_work(ptr, router, mlxsw_sp_router_update_priority_work); } -- cgit v1.2.3 From 14304e70634cb03913ec11b7f418df752d9ee3f8 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 9 Jun 2023 19:32:09 +0200 Subject: mlxsw: spectrum_router: Use the available router pointer for netevent handling This code handles NETEVENT_DELAY_PROBE_TIME_UPDATE, which is invoked every time the delay_probe_time changes. mlxsw router currently only maintains one timer, so the last delay_probe_time set wins. Currently, mlxsw uses mlxsw_sp_port_lower_dev_hold() to find a reference to the router. This is no longer necessary. But as a side effect, this makes sure that only updates to "interesting netdevices" (ones that have a physical netdevice lower) are projected. Retain that side effect by calling mlxsw_sp_port_dev_lower_find_rcu() and punting if there is none. Then just proceed using the router pointer that's already at hand in the helper. Note that previously, the code took and put a reference of the netdevice. Because the mlxsw_sp pointer is now obtained from the notifier block, the port pointer (non-) NULL-ness is all that's relevant, and the reference does not need to be taken anymore. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 7b1877c116ed..9d34fc846b93 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2766,13 +2766,22 @@ static int mlxsw_sp_router_schedule_work(struct net *net, return NOTIFY_DONE; } +static bool mlxsw_sp_dev_lower_is_port(struct net_device *dev) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + + rcu_read_lock(); + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find_rcu(dev); + rcu_read_unlock(); + return !!mlxsw_sp_port; +} + static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct mlxsw_sp_netevent_work *net_work; struct mlxsw_sp_port *mlxsw_sp_port; struct mlxsw_sp_router *router; - struct mlxsw_sp *mlxsw_sp; unsigned long interval; struct neigh_parms *p; struct neighbour *n; @@ -2791,15 +2800,11 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, /* We are in atomic context and can't take RTNL mutex, * so use RCU variant to walk the device chain. */ - mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev); - if (!mlxsw_sp_port) + if (!mlxsw_sp_dev_lower_is_port(p->dev)) return NOTIFY_DONE; - mlxsw_sp = mlxsw_sp_port->mlxsw_sp; interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME)); - mlxsw_sp->router->neighs_update.interval = interval; - - mlxsw_sp_port_dev_put(mlxsw_sp_port); + router->neighs_update.interval = interval; break; case NETEVENT_NEIGH_UPDATE: n = ptr; -- cgit v1.2.3 From 151b89f6025a95dd8083d5844f2746a9450653ca Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 9 Jun 2023 19:32:10 +0200 Subject: mlxsw: spectrum_router: Reuse work neighbor initialization in work scheduler After the struct mlxsw_sp_netevent_work.n field initialization is moved here, the body of code that handles NETEVENT_NEIGH_UPDATE is almost identical to the one in the helper function. Therefore defer to the helper instead of inlining the equivalent. Note that previously, the code took and put a reference of the netdevice. The new code defers to mlxsw_sp_dev_lower_is_port() to obviate the need for taking the reference. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 29 ++++++++-------------- 1 file changed, 10 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 9d34fc846b93..a0598aa4cb5d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2749,6 +2749,7 @@ static void mlxsw_sp_router_update_priority_work(struct work_struct *work) static int mlxsw_sp_router_schedule_work(struct net *net, struct mlxsw_sp_router *router, + struct neighbour *n, void (*cb)(struct work_struct *)) { struct mlxsw_sp_netevent_work *net_work; @@ -2762,6 +2763,7 @@ static int mlxsw_sp_router_schedule_work(struct net *net, INIT_WORK(&net_work->work, cb); net_work->mlxsw_sp = router->mlxsw_sp; + net_work->n = n; mlxsw_core_schedule_work(&net_work->work); return NOTIFY_DONE; } @@ -2779,12 +2781,11 @@ static bool mlxsw_sp_dev_lower_is_port(struct net_device *dev) static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, unsigned long event, void *ptr) { - struct mlxsw_sp_netevent_work *net_work; - struct mlxsw_sp_port *mlxsw_sp_port; struct mlxsw_sp_router *router; unsigned long interval; struct neigh_parms *p; struct neighbour *n; + struct net *net; router = container_of(nb, struct mlxsw_sp_router, netevent_nb); @@ -2808,39 +2809,29 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, break; case NETEVENT_NEIGH_UPDATE: n = ptr; + net = neigh_parms_net(n->parms); if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6) return NOTIFY_DONE; - mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev); - if (!mlxsw_sp_port) + if (!mlxsw_sp_dev_lower_is_port(n->dev)) return NOTIFY_DONE; - net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC); - if (!net_work) { - mlxsw_sp_port_dev_put(mlxsw_sp_port); - return NOTIFY_BAD; - } - - INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work); - net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - net_work->n = n; - /* Take a reference to ensure the neighbour won't be * destructed until we drop the reference in delayed * work. */ neigh_clone(n); - mlxsw_core_schedule_work(&net_work->work); - mlxsw_sp_port_dev_put(mlxsw_sp_port); - break; + return mlxsw_sp_router_schedule_work(net, router, n, + mlxsw_sp_router_neigh_event_work); + case NETEVENT_IPV4_MPATH_HASH_UPDATE: case NETEVENT_IPV6_MPATH_HASH_UPDATE: - return mlxsw_sp_router_schedule_work(ptr, router, + return mlxsw_sp_router_schedule_work(ptr, router, NULL, mlxsw_sp_router_mp_hash_event_work); case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE: - return mlxsw_sp_router_schedule_work(ptr, router, + return mlxsw_sp_router_schedule_work(ptr, router, NULL, mlxsw_sp_router_update_priority_work); } -- cgit v1.2.3 From 0255f74845c0035d918697b7abff1fcf45ed2789 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 9 Jun 2023 19:32:11 +0200 Subject: mlxsw: Convert RIF-has-netdevice queries to a dedicated helper In a number of places, a netdevice underlying a RIF is obtained only to check if it a NULL pointer. In order to clean up the interface between the router and the other modules, add a new helper to specifically answer this question, and convert the relevant uses to this new interface. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c | 6 +++--- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 5 +++++ drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h | 1 + 4 files changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 5416093c0e35..c8a356accdf8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -221,7 +221,7 @@ start_again: for (; i < rif_count; i++) { struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); - if (!rif || !mlxsw_sp_rif_dev(rif)) + if (!rif || !mlxsw_sp_rif_has_dev(rif)) continue; err = mlxsw_sp_erif_entry_get(mlxsw_sp, &entry, rif, counters_enabled); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c index 1f6bc0c7e91d..b0f03009c130 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -720,7 +720,7 @@ int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table, const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp_mr_vif *mr_vif; - if (!rif_dev) + if (!mlxsw_sp_rif_has_dev(rif)) return 0; mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); @@ -736,7 +736,7 @@ void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table, const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp_mr_vif *mr_vif; - if (!rif_dev) + if (!mlxsw_sp_rif_has_dev(rif)) return; mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); @@ -754,7 +754,7 @@ void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table, struct mlxsw_sp_mr *mr = mlxsw_sp->mr; struct mlxsw_sp_mr_vif *mr_vif; - if (!rif_dev) + if (!mlxsw_sp_rif_has_dev(rif)) return; /* Search for a VIF that use that RIF */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index a0598aa4cb5d..3259aede09ec 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -8080,6 +8080,11 @@ const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) return rif->dev; } +bool mlxsw_sp_rif_has_dev(const struct mlxsw_sp_rif *rif) +{ + return !!mlxsw_sp_rif_dev(rif); +} + static void mlxsw_sp_rif_push_l3_stats(struct mlxsw_sp_rif *rif) { struct rtnl_hw_stats64 stats = {}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 229d38c514b9..a6a8cf0b4500 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -94,6 +94,7 @@ u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif); u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif); +bool mlxsw_sp_rif_has_dev(const struct mlxsw_sp_rif *rif); int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir, -- cgit v1.2.3 From 5374a50f2eb617022cdd1aab6b1b7d4d2a952d56 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 9 Jun 2023 19:32:12 +0200 Subject: mlxsw: Convert does-RIF-have-this-netdev queries to a dedicated helper In a number of places, a netdevice underlying a RIF is obtained only to compare it to another pointer. In order to clean up the interface between the router and the other modules, add a new helper to specifically answer this question, and convert the relevant uses to this new interface. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c | 13 +++++-------- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 8 +++++++- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h | 2 ++ 3 files changed, 14 insertions(+), 9 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c index b0f03009c130..69cd689dbc83 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c @@ -704,12 +704,12 @@ void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index) static struct mlxsw_sp_mr_vif * mlxsw_sp_mr_dev_vif_lookup(struct mlxsw_sp_mr_table *mr_table, - const struct net_device *dev) + const struct mlxsw_sp_rif *rif) { vifi_t vif_index; for (vif_index = 0; vif_index < MAXVIFS; vif_index++) - if (mr_table->vifs[vif_index].dev == dev) + if (mlxsw_sp_rif_dev_is(rif, mr_table->vifs[vif_index].dev)) return &mr_table->vifs[vif_index]; return NULL; } @@ -717,13 +717,12 @@ mlxsw_sp_mr_dev_vif_lookup(struct mlxsw_sp_mr_table *mr_table, int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table, const struct mlxsw_sp_rif *rif) { - const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp_mr_vif *mr_vif; if (!mlxsw_sp_rif_has_dev(rif)) return 0; - mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif); if (!mr_vif) return 0; return mlxsw_sp_mr_vif_resolve(mr_table, mr_vif->dev, mr_vif, @@ -733,13 +732,12 @@ int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table, void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table, const struct mlxsw_sp_rif *rif) { - const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp_mr_vif *mr_vif; if (!mlxsw_sp_rif_has_dev(rif)) return; - mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif); if (!mr_vif) return; mlxsw_sp_mr_vif_unresolve(mr_table, mr_vif->dev, mr_vif); @@ -748,7 +746,6 @@ void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table, void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table, const struct mlxsw_sp_rif *rif, int mtu) { - const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp; struct mlxsw_sp_mr_route_vif_entry *rve; struct mlxsw_sp_mr *mr = mlxsw_sp->mr; @@ -758,7 +755,7 @@ void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table, return; /* Search for a VIF that use that RIF */ - mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev); + mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif); if (!mr_vif) return; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 3259aede09ec..537730b22c7a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -7705,7 +7705,7 @@ mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, for (i = 0; i < max_rifs; i++) if (mlxsw_sp->router->rifs[i] && - mlxsw_sp->router->rifs[i]->dev == dev) + mlxsw_sp_rif_dev_is(mlxsw_sp->router->rifs[i], dev)) return mlxsw_sp->router->rifs[i]; return NULL; @@ -8085,6 +8085,12 @@ bool mlxsw_sp_rif_has_dev(const struct mlxsw_sp_rif *rif) return !!mlxsw_sp_rif_dev(rif); } +bool mlxsw_sp_rif_dev_is(const struct mlxsw_sp_rif *rif, + const struct net_device *dev) +{ + return mlxsw_sp_rif_dev(rif) == dev; +} + static void mlxsw_sp_rif_push_l3_stats(struct mlxsw_sp_rif *rif) { struct rtnl_hw_stats64 stats = {}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index a6a8cf0b4500..b941e781e476 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -95,6 +95,8 @@ u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif); bool mlxsw_sp_rif_has_dev(const struct mlxsw_sp_rif *rif); +bool mlxsw_sp_rif_dev_is(const struct mlxsw_sp_rif *rif, + const struct net_device *dev); int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir, -- cgit v1.2.3 From df95ae66cc0a1606278677b1be4f2170c73876a9 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 9 Jun 2023 19:32:13 +0200 Subject: mlxsw: spectrum_router: Privatize mlxsw_sp_rif_dev() Now that the external users of mlxsw_sp_rif_dev() have been converted in the preceding patches, make the function static. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 537730b22c7a..f9328e8410f5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -8075,7 +8075,7 @@ int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) return rif->dev->ifindex; } -const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) +static const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) { return rif->dev; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index b941e781e476..5ff443f27136 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -93,7 +93,6 @@ u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif); u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); -const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif); bool mlxsw_sp_rif_has_dev(const struct mlxsw_sp_rif *rif); bool mlxsw_sp_rif_dev_is(const struct mlxsw_sp_rif *rif, const struct net_device *dev); -- cgit v1.2.3 From 3a2cb45ca0ccb5dab9b701f50cfd981f8dfd1673 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 12 Jun 2023 09:22:22 +0200 Subject: net: mlxsw: i2c: Switch back to use struct i2c_driver's .probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit b8a1a4cd5a98 ("i2c: Provide a temporary .probe_new() call-back type"), all drivers being converted to .probe_new() and then commit 03c835f498b5 ("i2c: Switch .probe() to not take an id parameter") convert back to (the new) .probe() to be able to eventually drop .probe_new() from struct i2c_driver. Signed-off-by: Uwe Kleine-König Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 2c586c2308ae..41298835a11e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -751,7 +751,7 @@ static void mlxsw_i2c_remove(struct i2c_client *client) int mlxsw_i2c_driver_register(struct i2c_driver *i2c_driver) { - i2c_driver->probe_new = mlxsw_i2c_probe; + i2c_driver->probe = mlxsw_i2c_probe; i2c_driver->remove = mlxsw_i2c_remove; return i2c_add_driver(i2c_driver); } -- cgit v1.2.3 From e43516f5978d11d36511ce63d31d1da4db916510 Mon Sep 17 00:00:00 2001 From: Muhammad Husaini Zulkifli Date: Mon, 15 May 2023 23:49:36 +0800 Subject: igc: Clean the TX buffer and TX descriptor ring There could be a race condition during link down where interrupt being generated and igc_clean_tx_irq() been called to perform the TX completion. Properly clear the TX buffer/descriptor ring and disable the TX Queue ring in igc_free_tx_resources() to avoid that. Kernel trace: [ 108.237177] Hardware name: Intel Corporation Tiger Lake Client Platform/TigerLake U DDR4 SODIMM RVP, BIOS TGLIFUI1.R00.4204.A00.2105270302 05/27/2021 [ 108.237178] RIP: 0010:refcount_warn_saturate+0x55/0x110 [ 108.242143] RSP: 0018:ffff9e7980003db0 EFLAGS: 00010286 [ 108.245555] Code: 84 bc 00 00 00 c3 cc cc cc cc 85 f6 74 46 80 3d 20 8c 4d 01 00 75 ee 48 c7 c7 88 f4 03 ab c6 05 10 8c 4d 01 01 e8 0b 10 96 ff <0f> 0b c3 cc cc cc cc 80 3d fc 8b 4d 01 00 75 cb 48 c7 c7 b0 f4 03 [ 108.250434] [ 108.250434] RSP: 0018:ffff9e798125f910 EFLAGS: 00010286 [ 108.254358] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 108.259325] [ 108.259325] RAX: 0000000000000000 RBX: ffff8ddb935b8000 RCX: 0000000000000027 [ 108.261868] RDX: ffff8de250a28800 RSI: ffff8de250a1c580 RDI: ffff8de250a1c580 [ 108.265538] RDX: 0000000000000027 RSI: 0000000000000002 RDI: ffff8de250a9c588 [ 108.265539] RBP: ffff8ddb935b8000 R08: ffffffffab2655a0 R09: ffff9e798125f898 [ 108.267914] RBP: ffff8ddb8a5b8d80 R08: 0000005648eba354 R09: 0000000000000000 [ 108.270196] R10: 0000000000000001 R11: 000000002d2d2d2d R12: ffff9e798125f948 [ 108.270197] R13: ffff9e798125fa1c R14: ffff8ddb8a5b8d80 R15: 7fffffffffffffff [ 108.273001] R10: 000000002d2d2d2d R11: 000000002d2d2d2d R12: ffff8ddb8a5b8ed4 [ 108.276410] FS: 00007f605851b740(0000) GS:ffff8de250a80000(0000) knlGS:0000000000000000 [ 108.280597] R13: 00000000000002ac R14: 00000000ffffff99 R15: ffff8ddb92561b80 [ 108.282966] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 108.282967] CR2: 00007f053c039248 CR3: 0000000185850003 CR4: 0000000000f70ee0 [ 108.286206] FS: 0000000000000000(0000) GS:ffff8de250a00000(0000) knlGS:0000000000000000 [ 108.289701] PKRU: 55555554 [ 108.289702] Call Trace: [ 108.289704] [ 108.293977] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 108.297562] sock_alloc_send_pskb+0x20c/0x240 [ 108.301494] CR2: 00007f053c03a168 CR3: 0000000184394002 CR4: 0000000000f70ef0 [ 108.301495] PKRU: 55555554 [ 108.306464] __ip_append_data.isra.0+0x96f/0x1040 [ 108.309441] Call Trace: [ 108.309443] ? __pfx_ip_generic_getfrag+0x10/0x10 [ 108.314927] [ 108.314928] sock_wfree+0x1c7/0x1d0 [ 108.318078] ? __pfx_ip_generic_getfrag+0x10/0x10 [ 108.320276] skb_release_head_state+0x32/0x90 [ 108.324812] ip_make_skb+0xf6/0x130 [ 108.327188] skb_release_all+0x16/0x40 [ 108.330775] ? udp_sendmsg+0x9f3/0xcb0 [ 108.332626] napi_consume_skb+0x48/0xf0 [ 108.334134] ? xfrm_lookup_route+0x23/0xb0 [ 108.344285] igc_poll+0x787/0x1620 [igc] [ 108.346659] udp_sendmsg+0x9f3/0xcb0 [ 108.360010] ? ttwu_do_activate+0x40/0x220 [ 108.365237] ? __pfx_ip_generic_getfrag+0x10/0x10 [ 108.366744] ? try_to_wake_up+0x289/0x5e0 [ 108.376987] ? sock_sendmsg+0x81/0x90 [ 108.395698] ? __pfx_process_timeout+0x10/0x10 [ 108.395701] sock_sendmsg+0x81/0x90 [ 108.409052] __napi_poll+0x29/0x1c0 [ 108.414279] ____sys_sendmsg+0x284/0x310 [ 108.419507] net_rx_action+0x257/0x2d0 [ 108.438216] ___sys_sendmsg+0x7c/0xc0 [ 108.439723] __do_softirq+0xc1/0x2a8 [ 108.444950] ? finish_task_switch+0xb4/0x2f0 [ 108.452077] irq_exit_rcu+0xa9/0xd0 [ 108.453584] ? __schedule+0x372/0xd00 [ 108.460713] common_interrupt+0x84/0xa0 [ 108.467840] ? clockevents_program_event+0x95/0x100 [ 108.474968] [ 108.482096] ? do_nanosleep+0x88/0x130 [ 108.489224] [ 108.489225] asm_common_interrupt+0x26/0x40 [ 108.496353] ? __rseq_handle_notify_resume+0xa9/0x4f0 [ 108.503478] RIP: 0010:cpu_idle_poll+0x2c/0x100 [ 108.510607] __sys_sendmsg+0x5d/0xb0 [ 108.518687] Code: 05 e1 d9 c8 00 65 8b 15 de 64 85 55 85 c0 7f 57 e8 b9 ef ff ff fb 65 48 8b 1c 25 00 cc 02 00 48 8b 03 a8 08 74 0b eb 1c f3 90 <48> 8b 03 a8 08 75 13 8b 05 77 63 cd 00 85 c0 75 ed e8 ce ec ff ff [ 108.525817] do_syscall_64+0x44/0xa0 [ 108.531563] RSP: 0018:ffffffffab203e70 EFLAGS: 00000202 [ 108.538693] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 108.546775] [ 108.546777] RIP: 0033:0x7f605862b7f7 [ 108.549495] RAX: 0000000000000001 RBX: ffffffffab20c940 RCX: 000000000000003b [ 108.551955] Code: 0e 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 [ 108.554068] RDX: 4000000000000000 RSI: 000000002da97f6a RDI: 00000000002b8ff4 [ 108.559816] RSP: 002b:00007ffc99264058 EFLAGS: 00000246 [ 108.564178] RBP: 0000000000000000 R08: 00000000002b8ff4 R09: ffff8ddb01554c80 [ 108.571302] ORIG_RAX: 000000000000002e [ 108.571303] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f605862b7f7 [ 108.574023] R10: 000000000000015b R11: 000000000000000f R12: ffffffffab20c940 [ 108.574024] R13: 0000000000000000 R14: ffff8de26fbeef40 R15: ffffffffab20c940 [ 108.578727] RDX: 0000000000000000 RSI: 00007ffc992640a0 RDI: 0000000000000003 [ 108.578728] RBP: 00007ffc99264110 R08: 0000000000000000 R09: 175f48ad1c3a9c00 [ 108.581187] do_idle+0x62/0x230 [ 108.585890] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffc992642d8 [ 108.585891] R13: 00005577814ab2ba R14: 00005577814addf0 R15: 00007f605876d000 [ 108.587920] cpu_startup_entry+0x1d/0x20 [ 108.591422] [ 108.596127] rest_init+0xc5/0xd0 [ 108.600490] ---[ end trace 0000000000000000 ]--- Test Setup: DUT: - Change mac address on DUT Side. Ensure NIC not having same MAC Address - Running udp_tai on DUT side. Let udp_tai running throughout the test Example: ./udp_tai -i enp170s0 -P 100000 -p 90 -c 1 -t 0 -u 30004 Host: - Perform link up/down every 5 second. Result: Kernel panic will happen on DUT Side. Fixes: 13b5b7fd6a4a ("igc: Add support for Tx/Rx rings") Signed-off-by: Muhammad Husaini Zulkifli Tested-by: Naama Meir Reviewed-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 1c4676882082..f986e88be5c1 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -254,6 +254,13 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) /* reset BQL for queue */ netdev_tx_reset_queue(txring_txq(tx_ring)); + /* Zero out the buffer ring */ + memset(tx_ring->tx_buffer_info, 0, + sizeof(*tx_ring->tx_buffer_info) * tx_ring->count); + + /* Zero out the descriptor ring */ + memset(tx_ring->desc, 0, tx_ring->size); + /* reset next_to_use and next_to_clean */ tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; @@ -267,7 +274,7 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) */ void igc_free_tx_resources(struct igc_ring *tx_ring) { - igc_clean_tx_ring(tx_ring); + igc_disable_tx_ring(tx_ring); vfree(tx_ring->tx_buffer_info); tx_ring->tx_buffer_info = NULL; -- cgit v1.2.3 From c080fe262f9e73a00934b70c16b1479cf40cd2bd Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Mon, 17 Apr 2023 15:18:39 -0700 Subject: igc: Fix possible system crash when loading module Guarantee that when probe() is run again, PTM and PCI busmaster will be in the same state as it was if the driver was never loaded. Avoid an i225/i226 hardware issue that PTM requests can be made even though PCI bus mastering is not enabled. These unexpected PTM requests can crash some systems. So, "force" disable PTM and busmastering before removing the driver, so they can be re-enabled in the right order during probe(). This is more like a workaround and should be applicable for i225 and i226, in any platform. Fixes: 1b5d73fb8624 ("igc: Enable PCIe PTM") Signed-off-by: Vinicius Costa Gomes Reviewed-by: Muhammad Husaini Zulkifli Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index f986e88be5c1..fa764190f270 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6730,6 +6730,9 @@ static void igc_remove(struct pci_dev *pdev) igc_ptp_stop(adapter); + pci_disable_ptm(pdev); + pci_clear_master(pdev); + set_bit(__IGC_DOWN, &adapter->state); del_timer_sync(&adapter->watchdog_timer); -- cgit v1.2.3 From 48a821fd58837800750ec1b3962f0f799630a844 Mon Sep 17 00:00:00 2001 From: Aleksandr Loktionov Date: Tue, 25 Apr 2023 17:44:14 +0200 Subject: igb: fix nvm.ops.read() error handling Add error handling into igb_set_eeprom() function, in case nvm.ops.read() fails just quit with error code asap. Fixes: 9d5c824399de ("igb: PCI-Express 82575 Gigabit Ethernet driver") Signed-off-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 7d60da1b7bf4..319ed601eaa1 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -822,6 +822,8 @@ static int igb_set_eeprom(struct net_device *netdev, */ ret_val = hw->nvm.ops.read(hw, last_word, 1, &eeprom_buff[last_word - first_word]); + if (ret_val) + goto out; } /* Device's eeprom is always little-endian, word addressable */ @@ -841,6 +843,7 @@ static int igb_set_eeprom(struct net_device *netdev, hw->nvm.ops.update(hw); igb_set_fw_version(adapter); +out: kfree(eeprom_buff); return ret_val; } -- cgit v1.2.3 From 09de114c770fef0c8c586b4dd59431226d873387 Mon Sep 17 00:00:00 2001 From: Naveen Mamindlapalli Date: Mon, 12 Jun 2023 11:34:19 +0530 Subject: octeontx2-af: Add devlink option to adjust mcam high prio zone entries The NPC MCAM entries are currently divided into three priority zones in AF driver: high, mid, and low. The high priority zone and low priority zone take up 1/8th (each) of the available MCAM entries, and remaining going to the mid priority zone. The current allocation scheme may not meet certain requirements, such as when a requester needs more high priority zone entries than are reserved. This patch adds a devlink configurable option to increase the number of high priority zone entries that can be allocated by requester. The max number of entries that can be reserved for high priority usage is 100% of available MCAM entries. Usage: 1) Change high priority zone percentage to 75%: devlink -p dev param set pci/0002:01:00.0 name npc_mcam_high_zone_percent \ value 75 cmode runtime 2) Read high priority zone percentage: devlink -p dev param show pci/0002:01:00.0 name npc_mcam_high_zone_percent The devlink set configuration is only permitted when no MCAM entries are assigned, i.e., all MCAM entries are free, indicating that no PF/VF driver is loaded. So user must unload/unbind PF/VF driver/devices before modifying the high priority zone percentage. Signed-off-by: Naveen Mamindlapalli Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/af/rvu_devlink.c | 68 ++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index e4407f09c9d3..548549604c49 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1438,6 +1438,7 @@ enum rvu_af_dl_param_id { RVU_AF_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU, RVU_AF_DEVLINK_PARAM_ID_NPC_EXACT_FEATURE_DISABLE, + RVU_AF_DEVLINK_PARAM_ID_NPC_MCAM_ZONE_PERCENT, }; static int rvu_af_npc_exact_feature_get(struct devlink *devlink, u32 id, @@ -1494,6 +1495,67 @@ static int rvu_af_npc_exact_feature_validate(struct devlink *devlink, u32 id, return -EFAULT; } +static int rvu_af_dl_npc_mcam_high_zone_percent_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + struct npc_mcam *mcam; + u32 percent; + + mcam = &rvu->hw->mcam; + percent = (mcam->hprio_count * 100) / mcam->bmap_entries; + ctx->val.vu8 = (u8)percent; + + return 0; +} + +static int rvu_af_dl_npc_mcam_high_zone_percent_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + struct npc_mcam *mcam; + u32 percent; + + percent = ctx->val.vu8; + mcam = &rvu->hw->mcam; + mcam->hprio_count = (mcam->bmap_entries * percent) / 100; + mcam->hprio_end = mcam->hprio_count; + mcam->lprio_count = (mcam->bmap_entries - mcam->hprio_count) / 2; + mcam->lprio_start = mcam->bmap_entries - mcam->lprio_count; + + return 0; +} + +static int rvu_af_dl_npc_mcam_high_zone_percent_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + struct npc_mcam *mcam; + + /* The percent of high prio zone must range from 12% to 100% of unreserved mcam space */ + if (val.vu8 < 12 || val.vu8 > 100) { + NL_SET_ERR_MSG_MOD(extack, + "mcam high zone percent must be between 12% to 100%"); + return -EINVAL; + } + + /* Do not allow user to modify the high priority zone entries while mcam entries + * have already been assigned. + */ + mcam = &rvu->hw->mcam; + if (mcam->bmap_fcnt < mcam->bmap_entries) { + NL_SET_ERR_MSG_MOD(extack, + "mcam entries have already been assigned, can't resize"); + return -EPERM; + } + + return 0; +} + static const struct devlink_param rvu_af_dl_params[] = { DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU, "dwrr_mtu", DEVLINK_PARAM_TYPE_U32, @@ -1509,6 +1571,12 @@ static const struct devlink_param rvu_af_dl_param_exact_match[] = { rvu_af_npc_exact_feature_get, rvu_af_npc_exact_feature_disable, rvu_af_npc_exact_feature_validate), + DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_NPC_MCAM_ZONE_PERCENT, + "npc_mcam_high_zone_percent", DEVLINK_PARAM_TYPE_U8, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + rvu_af_dl_npc_mcam_high_zone_percent_get, + rvu_af_dl_npc_mcam_high_zone_percent_set, + rvu_af_dl_npc_mcam_high_zone_percent_validate), }; /* Devlink switch mode */ -- cgit v1.2.3 From 79bc788c038c9c87224d41ba6bbab20b6bf1a141 Mon Sep 17 00:00:00 2001 From: Kiran Kumar K Date: Mon, 12 Jun 2023 11:34:20 +0530 Subject: octeontx2-af: extend RSS supported offload types Add support to select L3 SRC or DST only, L4 SRC or DST only for RSS calculation. AF consumer may have requirement as we can select only SRC or DST data for RSS calculation in L3, L4 layers. With this requirement there will be following combinations, IPV[4,6]_SRC_ONLY, IPV[4,6]_DST_ONLY, [TCP,UDP,SCTP]_SRC_ONLY, [TCP,UDP,SCTP]_DST_ONLY. So, instead of creating a bit for each combination, we are using upper 4 bits (31:28) in the flow_key_cfg to represent the SRC, DST selection. 31 => L3_SRC, 30 => L3_DST, 29 => L4_SRC, 28 => L4_DST. These won't be part of flow_cfg, so that we don't need to change the existing ABI. Signed-off-by: Kiran Kumar K Signed-off-by: Geetha sowjanya Signed-off-by: Naveen Mamindlapalli Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/mbox.h | 6 +++ .../net/ethernet/marvell/octeontx2/af/rvu_nix.c | 57 ++++++++++++++++++++++ 2 files changed, 63 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 6389ed83637d..671fcf86ed87 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1080,6 +1080,8 @@ struct nix_vtag_config_rsp { */ }; +#define NIX_FLOW_KEY_TYPE_L3_L4_MASK (~(0xf << 28)) + struct nix_rss_flowkey_cfg { struct mbox_msghdr hdr; int mcam_index; /* MCAM entry index to modify */ @@ -1105,6 +1107,10 @@ struct nix_rss_flowkey_cfg { #define NIX_FLOW_KEY_TYPE_IPV4_PROTO BIT(21) #define NIX_FLOW_KEY_TYPE_AH BIT(22) #define NIX_FLOW_KEY_TYPE_ESP BIT(23) +#define NIX_FLOW_KEY_TYPE_L4_DST_ONLY BIT(28) +#define NIX_FLOW_KEY_TYPE_L4_SRC_ONLY BIT(29) +#define NIX_FLOW_KEY_TYPE_L3_DST_ONLY BIT(30) +#define NIX_FLOW_KEY_TYPE_L3_SRC_ONLY BIT(31) u32 flowkey_cfg; /* Flowkey types selected */ u8 group; /* RSS context or group */ }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 79ed7af0b0a4..ee52b86c061f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -3353,6 +3353,7 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) struct nix_rx_flowkey_alg *field; struct nix_rx_flowkey_alg tmp; u32 key_type, valid_key; + u32 l3_l4_src_dst; int l4_key_offset = 0; if (!alg) @@ -3380,6 +3381,15 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) * group_member - Enabled when protocol is part of a group. */ + /* Last 4 bits (31:28) are reserved to specify SRC, DST + * selection for L3, L4 i.e IPV[4,6]_SRC, IPV[4,6]_DST, + * [TCP,UDP,SCTP]_SRC, [TCP,UDP,SCTP]_DST + * 31 => L3_SRC, 30 => L3_DST, 29 => L4_SRC, 28 => L4_DST + */ + l3_l4_src_dst = flow_cfg; + /* Reset these 4 bits, so that these won't be part of key */ + flow_cfg &= NIX_FLOW_KEY_TYPE_L3_L4_MASK; + keyoff_marker = 0; max_key_off = 0; group_member = 0; nr_field = 0; key_off = 0; field_marker = 1; field = &tmp; max_bit_pos = fls(flow_cfg); @@ -3417,6 +3427,22 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) } field->hdr_offset = 12; /* SIP offset */ field->bytesm1 = 7; /* SIP + DIP, 8 bytes */ + + /* Only SIP */ + if (l3_l4_src_dst & NIX_FLOW_KEY_TYPE_L3_SRC_ONLY) + field->bytesm1 = 3; /* SIP, 4 bytes */ + + if (l3_l4_src_dst & NIX_FLOW_KEY_TYPE_L3_DST_ONLY) { + /* Both SIP + DIP */ + if (field->bytesm1 == 3) { + field->bytesm1 = 7; /* SIP + DIP, 8B */ + } else { + /* Only DIP */ + field->hdr_offset = 16; /* DIP off */ + field->bytesm1 = 3; /* DIP, 4 bytes */ + } + } + field->ltype_mask = 0xF; /* Match only IPv4 */ keyoff_marker = false; break; @@ -3430,6 +3456,22 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) } field->hdr_offset = 8; /* SIP offset */ field->bytesm1 = 31; /* SIP + DIP, 32 bytes */ + + /* Only SIP */ + if (l3_l4_src_dst & NIX_FLOW_KEY_TYPE_L3_SRC_ONLY) + field->bytesm1 = 15; /* SIP, 16 bytes */ + + if (l3_l4_src_dst & NIX_FLOW_KEY_TYPE_L3_DST_ONLY) { + /* Both SIP + DIP */ + if (field->bytesm1 == 15) { + /* SIP + DIP, 32 bytes */ + field->bytesm1 = 31; + } else { + /* Only DIP */ + field->hdr_offset = 24; /* DIP off */ + field->bytesm1 = 15; /* DIP,16 bytes */ + } + } field->ltype_mask = 0xF; /* Match only IPv6 */ break; case NIX_FLOW_KEY_TYPE_TCP: @@ -3445,6 +3487,21 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg) field->lid = NPC_LID_LH; field->bytesm1 = 3; /* Sport + Dport, 4 bytes */ + if (l3_l4_src_dst & NIX_FLOW_KEY_TYPE_L4_SRC_ONLY) + field->bytesm1 = 1; /* SRC, 2 bytes */ + + if (l3_l4_src_dst & NIX_FLOW_KEY_TYPE_L4_DST_ONLY) { + /* Both SRC + DST */ + if (field->bytesm1 == 1) { + /* SRC + DST, 4 bytes */ + field->bytesm1 = 3; + } else { + /* Only DIP */ + field->hdr_offset = 2; /* DST off */ + field->bytesm1 = 1; /* DST, 2 bytes */ + } + } + /* Enum values for NPC_LID_LD and NPC_LID_LG are same, * so no need to change the ltype_match, just change * the lid for inner protocols -- cgit v1.2.3 From bbba125eade7916277ef694d562cc95a39e86487 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Mon, 12 Jun 2023 11:34:21 +0530 Subject: octeontx2-af: cn10k: Set NIX DWRR MTU for CN10KB silicon The DWRR MTU config added for SDP and RPM/LBK links on CN10K silicon is further extended on CK10KB silicon variant and made it configurable. Now there are 4 DWRR MTU config to choose while setting transmit scheduler's RR_WEIGHT. Here we are reserving one config for each of RPM, SDP and LBK. NIXX_AF_DWRR_MTUX(0) ---> RPM NIXX_AF_DWRR_MTUX(1) ---> SDP NIXX_AF_DWRR_MTUX(2) ---> LBK PF/VF drivers can choose the DWRR_MTU to be used by setting SMQX_CFG[pkt_link_type] to one of above. TLx_SCHEDULE[RR_WEIGHT] is to be as configured 'quantum / 2^DWRR_MTUX[MTU]'. DWRR_MTU of each link is exposed to PF/VF drivers via mailbox for RR_WEIGHT calculation. Signed-off-by: Sunil Goutham Signed-off-by: Geetha sowjanya Signed-off-by: Naveen Mamindlapalli Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/common.h | 7 ++++ drivers/net/ethernet/marvell/octeontx2/af/mbox.h | 4 +- drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 2 + .../ethernet/marvell/octeontx2/af/rvu_devlink.c | 6 ++- .../net/ethernet/marvell/octeontx2/af/rvu_nix.c | 44 ++++++++++++++++++---- .../net/ethernet/marvell/octeontx2/af/rvu_reg.h | 3 +- .../ethernet/marvell/octeontx2/nic/otx2_common.c | 18 ++++++++- .../ethernet/marvell/octeontx2/nic/otx2_common.h | 1 + 8 files changed, 73 insertions(+), 12 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h index f5bf719a6ccf..2436c1ff9ba4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h @@ -145,6 +145,13 @@ enum nix_scheduler { #define TXSCH_TL1_DFLT_RR_PRIO (0x7ull) #define CN10K_MAX_DWRR_WEIGHT 16384 /* Weight is 14bit on CN10K */ +/* Don't change the order as on CN10K (except CN10KB) + * SMQX_CFG[SDP] value should be 1 for SDP flows. + */ +#define SMQ_LINK_TYPE_RPM 0 +#define SMQ_LINK_TYPE_SDP 1 +#define SMQ_LINK_TYPE_LBK 2 + /* Min/Max packet sizes, excluding FCS */ #define NIC_HW_MIN_FRS 40 #define NIC_HW_MAX_FRS 9212 diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 671fcf86ed87..1794ef0f9ae0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1245,7 +1245,9 @@ struct nix_hw_info { u16 min_mtu; u32 rpm_dwrr_mtu; u32 sdp_dwrr_mtu; - u64 rsvd[16]; /* Add reserved fields for future expansion */ + u32 lbk_dwrr_mtu; + u32 rsvd32[1]; + u64 rsvd[15]; /* Add reserved fields for future expansion */ }; struct nix_bandprof_alloc_req { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index d655bf04a483..12e644bc239a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -346,6 +346,7 @@ struct hw_cap { bool per_pf_mbox_regs; /* PF mbox specified in per PF registers ? */ bool programmable_chans; /* Channels programmable ? */ bool ipolicer; + bool nix_multiple_dwrr_mtu; /* Multiple DWRR_MTU to choose from */ bool npc_hash_extract; /* Hash extract enabled ? */ bool npc_exact_match_enabled; /* Exact match supported ? */ }; @@ -802,6 +803,7 @@ int nix_aq_context_read(struct rvu *rvu, struct nix_hw *nix_hw, struct nix_cn10k_aq_enq_rsp *aq_rsp, u16 pcifunc, u8 ctype, u32 qidx); int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc); +int nix_get_dwrr_mtu_reg(struct rvu_hwinfo *hw, int smq_link_type); u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu); u32 convert_bytes_to_dwrr_mtu(u32 bytes); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 548549604c49..41df5ac23f92 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1413,7 +1413,8 @@ static int rvu_af_dl_dwrr_mtu_set(struct devlink *devlink, u32 id, u64 dwrr_mtu; dwrr_mtu = convert_bytes_to_dwrr_mtu(ctx->val.vu32); - rvu_write64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU, dwrr_mtu); + rvu_write64(rvu, BLKADDR_NIX0, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_RPM), dwrr_mtu); return 0; } @@ -1428,7 +1429,8 @@ static int rvu_af_dl_dwrr_mtu_get(struct devlink *devlink, u32 id, if (!rvu->hw->cap.nix_common_dwrr_mtu) return -EOPNOTSUPP; - dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU); + dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_RPM)); ctx->val.vu32 = convert_dwrr_mtu_to_bytes(dwrr_mtu); return 0; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index ee52b86c061f..f069d13dcb54 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -191,6 +191,18 @@ struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr) return NULL; } +int nix_get_dwrr_mtu_reg(struct rvu_hwinfo *hw, int smq_link_type) +{ + if (hw->cap.nix_multiple_dwrr_mtu) + return NIX_AF_DWRR_MTUX(smq_link_type); + + if (smq_link_type == SMQ_LINK_TYPE_SDP) + return NIX_AF_DWRR_SDP_MTU; + + /* Here it's same reg for RPM and LBK */ + return NIX_AF_DWRR_RPM_MTU; +} + u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu) { dwrr_mtu &= 0x1FULL; @@ -3191,10 +3203,16 @@ static int nix_setup_txschq(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr) } /* Setup a default value of 8192 as DWRR MTU */ - if (rvu->hw->cap.nix_common_dwrr_mtu) { - rvu_write64(rvu, blkaddr, NIX_AF_DWRR_RPM_MTU, + if (rvu->hw->cap.nix_common_dwrr_mtu || + rvu->hw->cap.nix_multiple_dwrr_mtu) { + rvu_write64(rvu, blkaddr, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_RPM), convert_bytes_to_dwrr_mtu(8192)); - rvu_write64(rvu, blkaddr, NIX_AF_DWRR_SDP_MTU, + rvu_write64(rvu, blkaddr, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_LBK), + convert_bytes_to_dwrr_mtu(8192)); + rvu_write64(rvu, blkaddr, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_SDP), convert_bytes_to_dwrr_mtu(8192)); } @@ -3292,19 +3310,28 @@ int rvu_mbox_handler_nix_get_hw_info(struct rvu *rvu, struct msg_req *req, rsp->min_mtu = NIC_HW_MIN_FRS; - if (!rvu->hw->cap.nix_common_dwrr_mtu) { + if (!rvu->hw->cap.nix_common_dwrr_mtu && + !rvu->hw->cap.nix_multiple_dwrr_mtu) { /* Return '1' on OTx2 */ rsp->rpm_dwrr_mtu = 1; rsp->sdp_dwrr_mtu = 1; + rsp->lbk_dwrr_mtu = 1; return 0; } - dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU); + /* Return DWRR_MTU for TLx_SCHEDULE[RR_WEIGHT] config */ + dwrr_mtu = rvu_read64(rvu, blkaddr, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_RPM)); rsp->rpm_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu); - dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_SDP_MTU); + dwrr_mtu = rvu_read64(rvu, blkaddr, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_SDP)); rsp->sdp_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu); + dwrr_mtu = rvu_read64(rvu, blkaddr, + nix_get_dwrr_mtu_reg(rvu->hw, SMQ_LINK_TYPE_LBK)); + rsp->lbk_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu); + return 0; } @@ -4371,8 +4398,11 @@ static void rvu_nix_setup_capabilities(struct rvu *rvu, int blkaddr) * Check if HW uses a common MTU for all DWRR quantum configs. * On OcteonTx2 this register field is '0'. */ - if (((hw_const >> 56) & 0x10) == 0x10) + if ((((hw_const >> 56) & 0x10) == 0x10) && !(hw_const & BIT_ULL(61))) hw->cap.nix_common_dwrr_mtu = true; + + if (hw_const & BIT_ULL(61)) + hw->cap.nix_multiple_dwrr_mtu = true; } static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index 7007f0b8e659..b42e631e52d0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -272,7 +272,8 @@ #define NIX_AF_DEBUG_NPC_RESP_DATAX(a) (0x680 | (a) << 3) #define NIX_AF_SMQX_CFG(a) (0x700 | (a) << 16) #define NIX_AF_SQM_DBG_CTL_STATUS (0x750) -#define NIX_AF_DWRR_SDP_MTU (0x790) +#define NIX_AF_DWRR_SDP_MTU (0x790) /* All CN10K except CN10KB */ +#define NIX_AF_DWRR_MTUX(a) (0x790 | (a) << 16) /* Only for CN10KB */ #define NIX_AF_DWRR_RPM_MTU (0x7A0) #define NIX_AF_PSE_CHANNEL_LEVEL (0x800) #define NIX_AF_PSE_SHAPER_CFG (0x810) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index a79cb680bb23..77c8f650f7ac 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "otx2_reg.h" #include "otx2_common.h" @@ -642,6 +643,10 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool txschq_for req->regval[0] = ((u64)pfvf->tx_max_pktlen << 8) | OTX2_MIN_MTU; req->regval[0] |= (0x20ULL << 51) | (0x80ULL << 39) | (0x2ULL << 36); + /* Set link type for DWRR MTU selection on CN10K silicons */ + if (!is_dev_otx2(pfvf->pdev)) + req->regval[0] |= FIELD_PREP(GENMASK_ULL(58, 57), + (u64)hw->smq_link_type); req->num_regs++; /* MDQ config */ parent = schq_list[NIX_TXSCH_LVL_TL4][prio]; @@ -1824,6 +1829,17 @@ void otx2_set_cints_affinity(struct otx2_nic *pfvf) } } +static u32 get_dwrr_mtu(struct otx2_nic *pfvf, struct nix_hw_info *hw) +{ + if (is_otx2_lbkvf(pfvf->pdev)) { + pfvf->hw.smq_link_type = SMQ_LINK_TYPE_LBK; + return hw->lbk_dwrr_mtu; + } + + pfvf->hw.smq_link_type = SMQ_LINK_TYPE_RPM; + return hw->rpm_dwrr_mtu; +} + u16 otx2_get_max_mtu(struct otx2_nic *pfvf) { struct nix_hw_info *rsp; @@ -1853,7 +1869,7 @@ u16 otx2_get_max_mtu(struct otx2_nic *pfvf) max_mtu = rsp->max_mtu - 8 - OTX2_ETH_HLEN; /* Also save DWRR MTU, needed for DWRR weight calculation */ - pfvf->hw.dwrr_mtu = rsp->rpm_dwrr_mtu; + pfvf->hw.dwrr_mtu = get_dwrr_mtu(pfvf, rsp); if (!pfvf->hw.dwrr_mtu) pfvf->hw.dwrr_mtu = 1; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index a9ed15d1793a..ba8091131ec0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -227,6 +227,7 @@ struct otx2_hw { u16 txschq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC]; u16 matchall_ipolicer; u32 dwrr_mtu; + u8 smq_link_type; /* HW settings, coalescing etc */ u16 rx_chan_base; -- cgit v1.2.3 From b6a072a153277dc590703ada2fd1f53ecb7f8cb9 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Mon, 12 Jun 2023 11:34:22 +0530 Subject: octeontx2-af: Enable LBK links only when switch mode is on. Currently, all the TL3_TL2 nodes are being configured to enable switch LBK channel 63 in them. Instead enable them only when switch mode is enabled. Signed-off-by: Subbaraya Sundeep Signed-off-by: Naveen Mamindlapalli Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 2 ++ drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 11 +++++------ drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c | 18 ++++++++++++++++++ 3 files changed, 25 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 12e644bc239a..c07d826e36d1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -806,6 +806,8 @@ int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc); int nix_get_dwrr_mtu_reg(struct rvu_hwinfo *hw, int smq_link_type); u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu); u32 convert_bytes_to_dwrr_mtu(u32 bytes); +void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr, u16 pcifunc, + struct nix_txsch *txsch, bool enable); /* NPC APIs */ void rvu_npc_freemem(struct rvu *rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index f069d13dcb54..8a89cc5e5e40 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -2460,17 +2460,19 @@ static int nix_txschq_cfg_read(struct rvu *rvu, struct nix_hw *nix_hw, return 0; } -static void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr, - u16 pcifunc, struct nix_txsch *txsch) +void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr, u16 pcifunc, + struct nix_txsch *txsch, bool enable) { struct rvu_hwinfo *hw = rvu->hw; int lbk_link_start, lbk_links; u8 pf = rvu_get_pf(pcifunc); int schq; + u64 cfg; if (!is_pf_cgxmapped(rvu, pf)) return; + cfg = enable ? (BIT_ULL(12) | RVU_SWITCH_LBK_CHAN) : 0; lbk_link_start = hw->cgx_links; for (schq = 0; schq < txsch->schq.max; schq++) { @@ -2484,8 +2486,7 @@ static void rvu_nix_tx_tl2_cfg(struct rvu *rvu, int blkaddr, rvu_write64(rvu, blkaddr, NIX_AF_TL3_TL2X_LINKX_CFG(schq, lbk_link_start + - lbk_links), - BIT_ULL(12) | RVU_SWITCH_LBK_CHAN); + lbk_links), cfg); } } @@ -2591,8 +2592,6 @@ int rvu_mbox_handler_nix_txschq_cfg(struct rvu *rvu, rvu_write64(rvu, blkaddr, reg, regval); } - rvu_nix_tx_tl2_cfg(rvu, blkaddr, pcifunc, - &nix_hw->txsch[NIX_TXSCH_LVL_TL2]); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c index 3392487f6b47..592b317f4637 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c @@ -8,6 +8,17 @@ #include #include "rvu.h" +static void rvu_switch_enable_lbk_link(struct rvu *rvu, u16 pcifunc, bool enable) +{ + struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); + struct nix_hw *nix_hw; + + nix_hw = get_nix_hw(rvu->hw, pfvf->nix_blkaddr); + /* Enable LBK links with channel 63 for TX MCAM rule */ + rvu_nix_tx_tl2_cfg(rvu, pfvf->nix_blkaddr, pcifunc, + &nix_hw->txsch[NIX_TXSCH_LVL_TL2], enable); +} + static int rvu_switch_install_rx_rule(struct rvu *rvu, u16 pcifunc, u16 chan_mask) { @@ -52,6 +63,8 @@ static int rvu_switch_install_tx_rule(struct rvu *rvu, u16 pcifunc, u16 entry) if (!test_bit(NIXLF_INITIALIZED, &pfvf->flags)) return 0; + rvu_switch_enable_lbk_link(rvu, pcifunc, true); + lbkid = pfvf->nix_blkaddr == BLKADDR_NIX0 ? 0 : 1; ether_addr_copy(req.packet.dmac, pfvf->mac_addr); eth_broadcast_addr((u8 *)&req.mask.dmac); @@ -218,6 +231,9 @@ void rvu_switch_disable(struct rvu *rvu) "Reverting RX rule for PF%d failed(%d)\n", pf, err); + /* Disable LBK link */ + rvu_switch_enable_lbk_link(rvu, pcifunc, false); + rvu_get_pf_numvfs(rvu, pf, &numvfs, NULL); for (vf = 0; vf < numvfs; vf++) { pcifunc = pf << 10 | ((vf + 1) & 0x3FF); @@ -226,6 +242,8 @@ void rvu_switch_disable(struct rvu *rvu) dev_err(rvu->dev, "Reverting RX rule for PF%dVF%d failed(%d)\n", pf, vf, err); + + rvu_switch_enable_lbk_link(rvu, pcifunc, false); } } -- cgit v1.2.3 From 4ed6387a61fcc96f46859349b7e7696db9988ed6 Mon Sep 17 00:00:00 2001 From: Nithin Dabilpuram Date: Mon, 12 Jun 2023 11:34:23 +0530 Subject: octeontx2-af: add option to toggle DROP_RE enable in rx cfg Add option to toggle DROP_RE bit in rx cfg mbox. This helps in modifying the config runtime as opposed to setting available via nix_lf_alloc() mbox at NIX LF init time. Signed-off-by: Nithin Dabilpuram Signed-off-by: Jerin Jacob Kollanukkaran Signed-off-by: Sunil Kovvuri Goutham Signed-off-by: Naveen Mamindlapalli Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/mbox.h | 1 + drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 1794ef0f9ae0..eba307eee2b2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1157,6 +1157,7 @@ struct nix_rx_cfg { struct mbox_msghdr hdr; #define NIX_RX_OL3_VERIFY BIT(0) #define NIX_RX_OL4_VERIFY BIT(1) +#define NIX_RX_DROP_RE BIT(2) u8 len_verify; /* Outer L3/L4 len check */ #define NIX_RX_CSUM_OL4_VERIFY BIT(0) u8 csum_verify; /* Outer L4 checksum verification */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 8a89cc5e5e40..23149036be77 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -4196,6 +4196,11 @@ int rvu_mbox_handler_nix_set_rx_cfg(struct rvu *rvu, struct nix_rx_cfg *req, else cfg &= ~BIT_ULL(40); + if (req->len_verify & NIX_RX_DROP_RE) + cfg |= BIT_ULL(32); + else + cfg &= ~BIT_ULL(32); + if (req->csum_verify & BIT(0)) cfg |= BIT_ULL(37); else -- cgit v1.2.3 From e18aab0470d8f6259be82282ffb3fdcfeaeff6c3 Mon Sep 17 00:00:00 2001 From: Naveen Mamindlapalli Date: Mon, 12 Jun 2023 11:34:24 +0530 Subject: octeontx2-af: Set XOFF on other child transmit schedulers during SMQ flush When multiple transmit scheduler queues feed a TL1 transmit link, the SMQ flush initiated on a low priority queue might get stuck when a high priority queue fully subscribes the transmit link. This inturn effects interface teardown. To avoid this, temporarily XOFF all TL1's other immediate child transmit scheduler queues and also clear any rate limit configuration on all the scheduler queues in SMQ(flush) hierarchy. Signed-off-by: Naveen Mamindlapalli Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 16 +++ .../net/ethernet/marvell/octeontx2/af/rvu_nix.c | 130 ++++++++++++++++++++- 2 files changed, 144 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index c07d826e36d1..b5a7ee63508c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -285,6 +285,22 @@ struct nix_mark_format { u32 *cfg; }; +/* smq(flush) to tl1 cir/pir info */ +struct nix_smq_tree_ctx { + u64 cir_off; + u64 cir_val; + u64 pir_off; + u64 pir_val; +}; + +/* smq flush context */ +struct nix_smq_flush_ctx { + int smq; + u16 tl1_schq; + u16 tl2_schq; + struct nix_smq_tree_ctx smq_tree_ctx[NIX_TXSCH_LVL_CNT]; +}; + struct npc_pkind { struct rsrc_bmap rsrc; u32 *pfchan_map; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 23149036be77..601ef269aa29 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -2114,9 +2114,121 @@ exit: return rc; } +static void nix_smq_flush_fill_ctx(struct rvu *rvu, int blkaddr, int smq, + struct nix_smq_flush_ctx *smq_flush_ctx) +{ + struct nix_smq_tree_ctx *smq_tree_ctx; + u64 parent_off, regval; + u16 schq; + int lvl; + + smq_flush_ctx->smq = smq; + + schq = smq; + for (lvl = NIX_TXSCH_LVL_SMQ; lvl <= NIX_TXSCH_LVL_TL1; lvl++) { + smq_tree_ctx = &smq_flush_ctx->smq_tree_ctx[lvl]; + if (lvl == NIX_TXSCH_LVL_TL1) { + smq_flush_ctx->tl1_schq = schq; + smq_tree_ctx->cir_off = NIX_AF_TL1X_CIR(schq); + smq_tree_ctx->pir_off = 0; + smq_tree_ctx->pir_val = 0; + parent_off = 0; + } else if (lvl == NIX_TXSCH_LVL_TL2) { + smq_flush_ctx->tl2_schq = schq; + smq_tree_ctx->cir_off = NIX_AF_TL2X_CIR(schq); + smq_tree_ctx->pir_off = NIX_AF_TL2X_PIR(schq); + parent_off = NIX_AF_TL2X_PARENT(schq); + } else if (lvl == NIX_TXSCH_LVL_TL3) { + smq_tree_ctx->cir_off = NIX_AF_TL3X_CIR(schq); + smq_tree_ctx->pir_off = NIX_AF_TL3X_PIR(schq); + parent_off = NIX_AF_TL3X_PARENT(schq); + } else if (lvl == NIX_TXSCH_LVL_TL4) { + smq_tree_ctx->cir_off = NIX_AF_TL4X_CIR(schq); + smq_tree_ctx->pir_off = NIX_AF_TL4X_PIR(schq); + parent_off = NIX_AF_TL4X_PARENT(schq); + } else if (lvl == NIX_TXSCH_LVL_MDQ) { + smq_tree_ctx->cir_off = NIX_AF_MDQX_CIR(schq); + smq_tree_ctx->pir_off = NIX_AF_MDQX_PIR(schq); + parent_off = NIX_AF_MDQX_PARENT(schq); + } + /* save cir/pir register values */ + smq_tree_ctx->cir_val = rvu_read64(rvu, blkaddr, smq_tree_ctx->cir_off); + if (smq_tree_ctx->pir_off) + smq_tree_ctx->pir_val = rvu_read64(rvu, blkaddr, smq_tree_ctx->pir_off); + + /* get parent txsch node */ + if (parent_off) { + regval = rvu_read64(rvu, blkaddr, parent_off); + schq = (regval >> 16) & 0x1FF; + } + } +} + +static void nix_smq_flush_enadis_xoff(struct rvu *rvu, int blkaddr, + struct nix_smq_flush_ctx *smq_flush_ctx, bool enable) +{ + struct nix_txsch *txsch; + struct nix_hw *nix_hw; + u64 regoff; + int tl2; + + nix_hw = get_nix_hw(rvu->hw, blkaddr); + if (!nix_hw) + return; + + /* loop through all TL2s with matching PF_FUNC */ + txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL2]; + for (tl2 = 0; tl2 < txsch->schq.max; tl2++) { + /* skip the smq(flush) TL2 */ + if (tl2 == smq_flush_ctx->tl2_schq) + continue; + /* skip unused TL2s */ + if (TXSCH_MAP_FLAGS(txsch->pfvf_map[tl2]) & NIX_TXSCHQ_FREE) + continue; + /* skip if PF_FUNC doesn't match */ + if ((TXSCH_MAP_FUNC(txsch->pfvf_map[tl2]) & ~RVU_PFVF_FUNC_MASK) != + (TXSCH_MAP_FUNC(txsch->pfvf_map[smq_flush_ctx->tl2_schq] & + ~RVU_PFVF_FUNC_MASK))) + continue; + /* enable/disable XOFF */ + regoff = NIX_AF_TL2X_SW_XOFF(tl2); + if (enable) + rvu_write64(rvu, blkaddr, regoff, 0x1); + else + rvu_write64(rvu, blkaddr, regoff, 0x0); + } +} + +static void nix_smq_flush_enadis_rate(struct rvu *rvu, int blkaddr, + struct nix_smq_flush_ctx *smq_flush_ctx, bool enable) +{ + u64 cir_off, pir_off, cir_val, pir_val; + struct nix_smq_tree_ctx *smq_tree_ctx; + int lvl; + + for (lvl = NIX_TXSCH_LVL_SMQ; lvl <= NIX_TXSCH_LVL_TL1; lvl++) { + smq_tree_ctx = &smq_flush_ctx->smq_tree_ctx[lvl]; + cir_off = smq_tree_ctx->cir_off; + cir_val = smq_tree_ctx->cir_val; + pir_off = smq_tree_ctx->pir_off; + pir_val = smq_tree_ctx->pir_val; + + if (enable) { + rvu_write64(rvu, blkaddr, cir_off, cir_val); + if (lvl != NIX_TXSCH_LVL_TL1) + rvu_write64(rvu, blkaddr, pir_off, pir_val); + } else { + rvu_write64(rvu, blkaddr, cir_off, 0x0); + if (lvl != NIX_TXSCH_LVL_TL1) + rvu_write64(rvu, blkaddr, pir_off, 0x0); + } + } +} + static int nix_smq_flush(struct rvu *rvu, int blkaddr, int smq, u16 pcifunc, int nixlf) { + struct nix_smq_flush_ctx *smq_flush_ctx; int pf = rvu_get_pf(pcifunc); u8 cgx_id = 0, lmac_id = 0; int err, restore_tx_en = 0; @@ -2136,6 +2248,14 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr, lmac_id, true); } + /* XOFF all TL2s whose parent TL1 matches SMQ tree TL1 */ + smq_flush_ctx = kzalloc(sizeof(*smq_flush_ctx), GFP_KERNEL); + if (!smq_flush_ctx) + return -ENOMEM; + nix_smq_flush_fill_ctx(rvu, blkaddr, smq, smq_flush_ctx); + nix_smq_flush_enadis_xoff(rvu, blkaddr, smq_flush_ctx, true); + nix_smq_flush_enadis_rate(rvu, blkaddr, smq_flush_ctx, false); + cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq)); /* Do SMQ flush and set enqueue xoff */ cfg |= BIT_ULL(50) | BIT_ULL(49); @@ -2150,8 +2270,14 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr, err = rvu_poll_reg(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), BIT_ULL(49), true); if (err) - dev_err(rvu->dev, - "NIXLF%d: SMQ%d flush failed\n", nixlf, smq); + dev_info(rvu->dev, + "NIXLF%d: SMQ%d flush failed, txlink might be busy\n", + nixlf, smq); + + /* clear XOFF on TL2s */ + nix_smq_flush_enadis_rate(rvu, blkaddr, smq_flush_ctx, true); + nix_smq_flush_enadis_xoff(rvu, blkaddr, smq_flush_ctx, false); + kfree(smq_flush_ctx); rvu_cgx_enadis_rx_bp(rvu, pf, true); /* restore cgx tx state */ -- cgit v1.2.3 From 6b5f9a87e12d044f513a4f4c0e31ac7b5e988b66 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Mon, 12 Jun 2023 11:37:24 +0530 Subject: amd-xgbe: extend 10Mbps support to MAC version 21H MAC version 21H supports the 10Mbps speed. So, extend support to platforms that support it. Acked-by: Shyam Sundar S K Reviewed-by: Sridhar Samudrala Signed-off-by: Raju Rangoju Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 16e7fb2c0dae..6a716337f48b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -2782,9 +2782,9 @@ static bool xgbe_phy_valid_speed_baset_mode(struct xgbe_prv_data *pdata, switch (speed) { case SPEED_10: - /* Supported in ver >= 30H */ + /* Supported in ver 21H and ver >= 30H */ ver = XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER); - return (ver >= 0x30) ? true : false; + return (ver == 0x21 || ver >= 0x30); case SPEED_100: case SPEED_1000: return true; @@ -2806,9 +2806,10 @@ static bool xgbe_phy_valid_speed_sfp_mode(struct xgbe_prv_data *pdata, switch (speed) { case SPEED_10: - /* Supported in ver >= 30H */ + /* Supported in ver 21H and ver >= 30H */ ver = XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER); - return (ver >= 0x30) && (phy_data->sfp_speed == XGBE_SFP_SPEED_100_1000); + return ((ver == 0x21 || ver >= 0x30) && + (phy_data->sfp_speed == XGBE_SFP_SPEED_100_1000)); case SPEED_100: return (phy_data->sfp_speed == XGBE_SFP_SPEED_100_1000); case SPEED_1000: @@ -3158,9 +3159,9 @@ static bool xgbe_phy_port_mode_mismatch(struct xgbe_prv_data *pdata) struct xgbe_phy_data *phy_data = pdata->phy_data; unsigned int ver; - /* 10 Mbps speed is not supported in ver < 30H */ + /* 10 Mbps speed is supported in ver 21H and ver >= 30H */ ver = XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER); - if (ver < 0x30 && (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10)) + if ((ver < 0x30 && ver != 0x21) && (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10)) return true; switch (phy_data->port_mode) { -- cgit v1.2.3 From 374283a1001277e4d07491387aac1fad5aa08d43 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 12 Jun 2023 10:18:50 +0300 Subject: net: ethernet: ti: am65-cpsw: Call of_node_put() on error path This code returns directly but it should instead call of_node_put() to drop some reference counts. Fixes: dab2b265dd23 ("net: ethernet: ti: am65-cpsw: Add support for SERDES configuration") Signed-off-by: Dan Carpenter Reviewed-by: Roger Quadros Link: https://lore.kernel.org/r/e3012f0c-1621-40e6-bf7d-03c276f6e07f@kili.mountain Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 11cbcd9e2c72..bebcfd5e6b57 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2068,7 +2068,7 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) /* Initialize the Serdes PHY for the port */ ret = am65_cpsw_init_serdes_phy(dev, port_np, port); if (ret) - return ret; + goto of_node_put; port->slave.mac_only = of_property_read_bool(port_np, "ti,mac-only"); -- cgit v1.2.3 From e0db883b6949bd0b5b221893e30afd5f838783b1 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 12 Jun 2023 17:31:00 +0200 Subject: mlxsw: spectrum_router: Extract a helper from mlxsw_sp_port_vlan_router_join() Split out of mlxsw_sp_port_vlan_router_join() the part that checks for RIF and dispatches to __mlxsw_sp_port_vlan_router_join(), leaving it as wrapper that just manages the router lock. The new function, mlxsw_sp_port_vlan_router_join_existing(), will be useful as an atom in later patches. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 29 +++++++++++++++------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index f9328e8410f5..0edda06e92bb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -8562,24 +8562,35 @@ __mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) mlxsw_sp_rif_subport_put(rif); } +static int +mlxsw_sp_port_vlan_router_join_existing(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, + struct net_device *l3_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp; + + lockdep_assert_held(&mlxsw_sp->router->lock); + + if (!mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev)) + return 0; + + return __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, l3_dev, + extack); +} + int mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, struct net_device *l3_dev, struct netlink_ext_ack *extack) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp; - struct mlxsw_sp_rif *rif; - int err = 0; + int err; mutex_lock(&mlxsw_sp->router->lock); - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); - if (!rif) - goto out; - - err = __mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan, l3_dev, - extack); -out: + err = mlxsw_sp_port_vlan_router_join_existing(mlxsw_sp_port_vlan, + l3_dev, extack); mutex_unlock(&mlxsw_sp->router->lock); + return err; } -- cgit v1.2.3 From 76962b802efe27a32425d2abe818e53167c71dbe Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 12 Jun 2023 17:31:01 +0200 Subject: mlxsw: spectrum_router: Add a helper specifically for joining a LAG Currently, joining a LAG very simply means that the LAG RIF should be joined by the subport representing untagged traffic. If the RIF does not exist, it does not have to be created: if the user wants there to be RIF for the LAG device, they are supposed to add an IP address, and they are supposed to do it after tha LAG becomes mlxsw upper. We can also assume that the LAG has no uppers, otherwise the enslavement is not allowed. In the future, these ordering dependencies should be removed. That means that joining LAG will be more complex operation, possibly involving a lazy RIF creation, and possibly joining / lazily creating RIFs for VLAN uppers of the LAG. It will be handy to have a dedicated function that handles all this. The new function mlxsw_sp_router_port_join_lag() is that. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 +- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 4 -- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 56 +++++++++++++++------- .../net/ethernet/mellanox/mlxsw/spectrum_router.h | 3 ++ 4 files changed, 45 insertions(+), 22 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 4609b13bda02..25a01dafde1b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4337,8 +4337,8 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port->default_vlan); /* Join a router interface configured on the LAG, if exists */ - err = mlxsw_sp_port_vlan_router_join(mlxsw_sp_port->default_vlan, - lag_dev, extack); + err = mlxsw_sp_router_port_join_lag(mlxsw_sp_port, lag_dev, + extack); if (err) goto err_router_join; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 0b57c8d0cce0..231e364cbb7c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -755,10 +755,6 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, const struct net_device *macvlan_dev); -int -mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, - struct net_device *l3_dev, - struct netlink_ext_ack *extack); void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 0edda06e92bb..2c3dcbc2f9a6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -8578,22 +8578,6 @@ mlxsw_sp_port_vlan_router_join_existing(struct mlxsw_sp_port_vlan *mlxsw_sp_port extack); } -int -mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, - struct net_device *l3_dev, - struct netlink_ext_ack *extack) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp; - int err; - - mutex_lock(&mlxsw_sp->router->lock); - err = mlxsw_sp_port_vlan_router_join_existing(mlxsw_sp_port_vlan, - l3_dev, extack); - mutex_unlock(&mlxsw_sp->router->lock); - - return err; -} - void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan) { @@ -9278,6 +9262,46 @@ mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, return err; } +static int +mlxsw_sp_port_vid_router_join_existing(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid, struct net_device *dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; + + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, + vid); + if (WARN_ON(!mlxsw_sp_port_vlan)) + return -EINVAL; + + return mlxsw_sp_port_vlan_router_join_existing(mlxsw_sp_port_vlan, + dev, extack); +} + +static int __mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev, + struct netlink_ext_ack *extack) +{ + u16 default_vid = MLXSW_SP_DEFAULT_VID; + + return mlxsw_sp_port_vid_router_join_existing(mlxsw_sp_port, + default_vid, lag_dev, + extack); +} + +int mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev, + struct netlink_ext_ack *extack) +{ + int err; + + mutex_lock(&mlxsw_sp_port->mlxsw_sp->router->lock); + err = __mlxsw_sp_router_port_join_lag(mlxsw_sp_port, lag_dev, extack); + mutex_unlock(&mlxsw_sp_port->mlxsw_sp->router->lock); + + return err; +} + static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 5ff443f27136..5a0babc614b4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -170,5 +170,8 @@ int mlxsw_sp_ipip_ecn_encap_init(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_ipip_ecn_decap_init(struct mlxsw_sp *mlxsw_sp); struct net_device * mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev); +int mlxsw_sp_router_port_join_lag(struct mlxsw_sp_port *mlxsw_sp_port, + struct net_device *lag_dev, + struct netlink_ext_ack *extack); #endif /* _MLXSW_ROUTER_H_*/ -- cgit v1.2.3 From fb6ac45e86668456e5750d2a1c874ca2f5526171 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 12 Jun 2023 17:31:02 +0200 Subject: mlxsw: spectrum_router: Access rif->dev through a helper In order to abstract away deduction of netdevice from the corresponding RIF, introduce a helper, mlxsw_sp_rif_dev(), and use it throughout. This will make it possible to change the deduction path easily later on. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 109 ++++++++++++--------- 1 file changed, 64 insertions(+), 45 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 2c3dcbc2f9a6..e9183c223575 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -71,6 +71,11 @@ struct mlxsw_sp_rif { bool counter_egress_valid; }; +static struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) +{ + return rif->dev; +} + struct mlxsw_sp_rif_params { struct net_device *dev; union { @@ -1560,6 +1565,7 @@ mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id, u16 ul_rif_id, bool enable) { struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config; + struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common); enum mlxsw_reg_ritr_loopback_ipip_options ipip_options; struct mlxsw_sp_rif *rif = &lb_rif->common; struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; @@ -1572,7 +1578,7 @@ mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id, case MLXSW_SP_L3_PROTO_IPV4: saddr4 = be32_to_cpu(lb_cf.saddr.addr4); mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF, - rif->rif_index, rif->vr_id, rif->dev->mtu); + rif->rif_index, rif->vr_id, dev->mtu); mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt, ipip_options, ul_vr_id, ul_rif_id, saddr4, @@ -1582,7 +1588,7 @@ mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id, case MLXSW_SP_L3_PROTO_IPV6: saddr6 = &lb_cf.saddr.addr6; mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF, - rif->rif_index, rif->vr_id, rif->dev->mtu); + rif->rif_index, rif->vr_id, dev->mtu); mlxsw_reg_ritr_loopback_ipip6_pack(ritr_pl, lb_cf.lb_ipipt, ipip_options, ul_vr_id, ul_rif_id, saddr6, @@ -2332,7 +2338,7 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, } dipn = htonl(dip); - dev = mlxsw_sp->router->rifs[rif]->dev; + dev = mlxsw_sp_rif_dev(mlxsw_sp->router->rifs[rif]); n = neigh_lookup(&arp_tbl, &dipn, dev); if (!n) return; @@ -2360,7 +2366,7 @@ static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp, return; } - dev = mlxsw_sp->router->rifs[rif]->dev; + dev = mlxsw_sp_rif_dev(mlxsw_sp->router->rifs[rif]); n = neigh_lookup(&nd_tbl, &dip, dev); if (!n) return; @@ -4368,6 +4374,7 @@ static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp_nexthop *nh; bool removing; @@ -4377,7 +4384,7 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, removing = false; break; case MLXSW_SP_NEXTHOP_TYPE_IPIP: - removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev); + removing = !mlxsw_sp_ipip_netdev_ul_up(dev); break; default: WARN_ON(1); @@ -7798,7 +7805,7 @@ mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev, return true; if (rif && addr_list_empty && - !netif_is_l3_slave(rif->dev)) + !netif_is_l3_slave(mlxsw_sp_rif_dev(rif))) return true; /* It is possible we already removed the RIF ourselves * if it was assigned to a netdev that is now a bridge @@ -7895,7 +7902,8 @@ u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif) u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) { - u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev); + struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common); + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev); struct mlxsw_sp_vr *ul_vr; ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL); @@ -8072,12 +8080,7 @@ mlxsw_sp_router_hwstats_notify_schedule(struct net_device *dev) int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) { - return rif->dev->ifindex; -} - -static const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) -{ - return rif->dev; + return mlxsw_sp_rif_dev(rif)->ifindex; } bool mlxsw_sp_rif_has_dev(const struct mlxsw_sp_rif *rif) @@ -8096,7 +8099,7 @@ static void mlxsw_sp_rif_push_l3_stats(struct mlxsw_sp_rif *rif) struct rtnl_hw_stats64 stats = {}; if (!mlxsw_sp_router_port_l3_stats_fetch(rif, &stats)) - netdev_offload_xstats_push_delta(rif->dev, + netdev_offload_xstats_push_delta(mlxsw_sp_rif_dev(rif), NETDEV_OFFLOAD_XSTATS_TYPE_L3, &stats); } @@ -8198,6 +8201,7 @@ err_rif_index_alloc: static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); const struct mlxsw_sp_rif_ops *ops = rif->ops; struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_fid *fid = rif->fid; @@ -8210,11 +8214,10 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); vr = &mlxsw_sp->router->vrs[rif->vr_id]; - if (netdev_offload_xstats_enabled(rif->dev, - NETDEV_OFFLOAD_XSTATS_TYPE_L3)) { + if (netdev_offload_xstats_enabled(dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3)) { mlxsw_sp_rif_push_l3_stats(rif); mlxsw_sp_router_port_l3_stats_disable(rif); - mlxsw_sp_router_hwstats_notify_schedule(rif->dev); + mlxsw_sp_router_hwstats_notify_schedule(dev); } else { mlxsw_sp_rif_counters_free(rif); } @@ -8226,7 +8229,7 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) /* Loopback RIFs are not associated with a FID. */ mlxsw_sp_fid_put(fid); mlxsw_sp->router->rifs[rif->rif_index] = NULL; - dev_put(rif->dev); + dev_put(dev); kfree(rif); mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); vr->rif_count--; @@ -9012,7 +9015,7 @@ mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, struct netlink_ext_ack *extack) { - struct net_device *dev = rif->dev; + struct net_device *dev = mlxsw_sp_rif_dev(rif); u8 old_mac_profile; u16 fid_index; int err; @@ -9348,15 +9351,16 @@ static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct netdev_nested_priv priv = { .data = (void *)rif, }; - if (!netif_is_macvlan_port(rif->dev)) + if (!netif_is_macvlan_port(dev)) return 0; - netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n"); - return netdev_walk_all_upper_dev_rcu(rif->dev, + netdev_warn(dev, "Router interface is deleted. Upper macvlans will not work\n"); + return netdev_walk_all_upper_dev_rcu(dev, __mlxsw_sp_rif_macvlan_flush, &priv); } @@ -9377,6 +9381,7 @@ static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif, static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_rif_subport *rif_subport; char ritr_pl[MLXSW_REG_RITR_LEN]; @@ -9384,8 +9389,8 @@ static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable) rif_subport = mlxsw_sp_rif_subport_rif(rif); mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF, - rif->rif_index, rif->vr_id, rif->dev->mtu); - mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr); + rif->rif_index, rif->vr_id, dev->mtu); + mlxsw_reg_ritr_mac_pack(ritr_pl, dev->dev_addr); mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id); efid = mlxsw_sp_fid_index(rif->fid); mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag, @@ -9398,6 +9403,7 @@ static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable) static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif, struct netlink_ext_ack *extack) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); u8 mac_profile; int err; @@ -9411,7 +9417,7 @@ static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif, if (err) goto err_rif_subport_op; - err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), true); if (err) goto err_rif_fdb_op; @@ -9423,7 +9429,7 @@ static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif, return 0; err_fid_rif_set: - mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), false); err_rif_fdb_op: mlxsw_sp_rif_subport_op(rif, false); @@ -9434,10 +9440,11 @@ err_rif_subport_op: static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp_fid *fid = rif->fid; mlxsw_sp_fid_rif_unset(fid); - mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(fid), false); mlxsw_sp_rif_macvlan_flush(rif); mlxsw_sp_rif_subport_op(rif, false); @@ -9463,12 +9470,13 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = { static int mlxsw_sp_rif_fid_op(struct mlxsw_sp_rif *rif, u16 fid, bool enable) { enum mlxsw_reg_ritr_if_type type = MLXSW_REG_RITR_FID_IF; + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; char ritr_pl[MLXSW_REG_RITR_LEN]; mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id, - rif->dev->mtu); - mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr); + dev->mtu); + mlxsw_reg_ritr_mac_pack(ritr_pl, dev->dev_addr); mlxsw_reg_ritr_if_mac_profile_id_set(ritr_pl, rif->mac_profile_id); mlxsw_reg_ritr_fid_if_fid_set(ritr_pl, fid); @@ -9483,6 +9491,7 @@ u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif, struct netlink_ext_ack *extack) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; u16 fid_index = mlxsw_sp_fid_index(rif->fid); u8 mac_profile; @@ -9508,7 +9517,7 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif, if (err) goto err_fid_bc_flood_set; - err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), true); if (err) goto err_rif_fdb_op; @@ -9520,7 +9529,7 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif, return 0; err_fid_rif_set: - mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), false); err_rif_fdb_op: mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, @@ -9537,12 +9546,13 @@ err_rif_fid_op: static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); u16 fid_index = mlxsw_sp_fid_index(rif->fid); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_fid *fid = rif->fid; mlxsw_sp_fid_rif_unset(fid); - mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(fid), false); mlxsw_sp_rif_macvlan_flush(rif); mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, @@ -9557,7 +9567,9 @@ static struct mlxsw_sp_fid * mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif, struct netlink_ext_ack *extack) { - return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex); + int rif_ifindex = mlxsw_sp_rif_dev_ifindex(rif); + + return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif_ifindex); } static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) @@ -9565,7 +9577,7 @@ static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) struct switchdev_notifier_fdb_info info = {}; struct net_device *dev; - dev = br_fdb_find_port(rif->dev, mac, 0); + dev = br_fdb_find_port(mlxsw_sp_rif_dev(rif), mac, 0); if (!dev) return; @@ -9588,17 +9600,18 @@ static struct mlxsw_sp_fid * mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif, struct netlink_ext_ack *extack) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct net_device *br_dev; u16 vid; int err; - if (is_vlan_dev(rif->dev)) { - vid = vlan_dev_vlan_id(rif->dev); - br_dev = vlan_dev_real_dev(rif->dev); + if (is_vlan_dev(dev)) { + vid = vlan_dev_vlan_id(dev); + br_dev = vlan_dev_real_dev(dev); if (WARN_ON(!netif_is_bridge_master(br_dev))) return ERR_PTR(-EINVAL); } else { - err = br_vlan_get_pvid(rif->dev, &vid); + err = br_vlan_get_pvid(dev, &vid); if (err < 0 || !vid) { NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID"); return ERR_PTR(-EINVAL); @@ -9610,12 +9623,13 @@ mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif, static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) { + struct net_device *rif_dev = mlxsw_sp_rif_dev(rif); struct switchdev_notifier_fdb_info info = {}; u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); struct net_device *br_dev; struct net_device *dev; - br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev; + br_dev = is_vlan_dev(rif_dev) ? vlan_dev_real_dev(rif_dev) : rif_dev; dev = br_fdb_find_port(br_dev, mac, vid); if (!dev) return; @@ -9629,11 +9643,12 @@ static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) static int mlxsw_sp_rif_vlan_op(struct mlxsw_sp_rif *rif, u16 vid, u16 efid, bool enable) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; char ritr_pl[MLXSW_REG_RITR_LEN]; mlxsw_reg_ritr_vlan_if_pack(ritr_pl, enable, rif->rif_index, rif->vr_id, - rif->dev->mtu, rif->dev->dev_addr, + dev->mtu, dev->dev_addr, rif->mac_profile_id, vid, efid); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); @@ -9642,6 +9657,7 @@ static int mlxsw_sp_rif_vlan_op(struct mlxsw_sp_rif *rif, u16 vid, u16 efid, static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif, u16 efid, struct netlink_ext_ack *extack) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; u8 mac_profile; @@ -9667,7 +9683,7 @@ static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif, u16 efid, if (err) goto err_fid_bc_flood_set; - err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), true); if (err) goto err_rif_fdb_op; @@ -9679,7 +9695,7 @@ static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif, u16 efid, return 0; err_fid_rif_set: - mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), false); err_rif_fdb_op: mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, @@ -9696,11 +9712,12 @@ err_rif_vlan_fid_op: static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif) { + struct net_device *dev = mlxsw_sp_rif_dev(rif); u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; mlxsw_sp_fid_rif_unset(rif->fid); - mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr, mlxsw_sp_fid_index(rif->fid), false); mlxsw_sp_rif_macvlan_flush(rif); mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, @@ -9767,7 +9784,8 @@ mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif, struct netlink_ext_ack *extack) { struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); - u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev); + struct net_device *dev = mlxsw_sp_rif_dev(rif); + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_vr *ul_vr; int err; @@ -9966,7 +9984,8 @@ mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif, struct netlink_ext_ack *extack) { struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); - u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev); + struct net_device *dev = mlxsw_sp_rif_dev(rif); + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev); struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_rif *ul_rif; int err; -- cgit v1.2.3 From 2019b5eeae2af862af41882b0863d61dfdaee25d Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 12 Jun 2023 17:31:03 +0200 Subject: mlxsw: spectrum_router: Access rif->dev from params in mlxsw_sp_rif_create() The previous patch added a helper to access a netdevice given a RIF. Using this helper in mlxsw_sp_rif_create() is unreasonable: the netdevice was given in RIF creation parameters. Just take it there. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index e9183c223575..da582ef8efda 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -8138,7 +8138,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, err = -ENOMEM; goto err_rif_alloc; } - dev_hold(rif->dev); + dev_hold(params->dev); mlxsw_sp->router->rifs[rif_index] = rif; rif->mlxsw_sp = mlxsw_sp; rif->ops = ops; @@ -8166,12 +8166,12 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, goto err_mr_rif_add; } - if (netdev_offload_xstats_enabled(rif->dev, + if (netdev_offload_xstats_enabled(params->dev, NETDEV_OFFLOAD_XSTATS_TYPE_L3)) { err = mlxsw_sp_router_port_l3_stats_enable(rif); if (err) goto err_stats_enable; - mlxsw_sp_router_hwstats_notify_schedule(rif->dev); + mlxsw_sp_router_hwstats_notify_schedule(params->dev); } else { mlxsw_sp_rif_counters_alloc(rif); } @@ -8189,7 +8189,7 @@ err_configure: mlxsw_sp_fid_put(fid); err_fid_get: mlxsw_sp->router->rifs[rif_index] = NULL; - dev_put(rif->dev); + dev_put(params->dev); kfree(rif); err_rif_alloc: mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); -- cgit v1.2.3 From 69f4ba177d6bb863925e586fa15bd0d454a875d7 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 12 Jun 2023 17:31:04 +0200 Subject: mlxsw: spectrum_router: Access nh->rif->dev through a helper In order to abstract away deduction of netdevice from the corresponding next hop, introduce a helper, mlxsw_sp_nexthop_dev(), and use it throughout. This will make it possible to change the deduction path easily later on. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 23 +++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index da582ef8efda..d7013727da21 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2941,6 +2941,14 @@ struct mlxsw_sp_nexthop { bool counter_valid; }; +static struct net_device * +mlxsw_sp_nexthop_dev(const struct mlxsw_sp_nexthop *nh) +{ + if (nh->rif) + return mlxsw_sp_rif_dev(nh->rif); + return NULL; +} + enum mlxsw_sp_nexthop_group_type { MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4, MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6, @@ -4014,16 +4022,18 @@ mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp, { struct neighbour *n, *old_n = neigh_entry->key.n; struct mlxsw_sp_nexthop *nh; + struct net_device *dev; bool entry_connected; u8 nud_state, dead; int err; nh = list_first_entry(&neigh_entry->nexthop_list, struct mlxsw_sp_nexthop, neigh_list_node); + dev = mlxsw_sp_nexthop_dev(nh); - n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); + n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, dev); if (!n) { - n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); + n = neigh_create(nh->neigh_tbl, &nh->gw_addr, dev); if (IS_ERR(n)) return PTR_ERR(n); neigh_event_send(n, NULL); @@ -4110,21 +4120,23 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry; + struct net_device *dev; struct neighbour *n; u8 nud_state, dead; int err; if (!nh->nhgi->gateway || nh->neigh_entry) return 0; + dev = mlxsw_sp_nexthop_dev(nh); /* Take a reference of neigh here ensuring that neigh would * not be destructed before the nexthop entry is finished. * The reference is taken either in neigh_lookup() or * in neigh_create() in case n is not found. */ - n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); + n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, dev); if (!n) { - n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); + n = neigh_create(nh->neigh_tbl, &nh->gw_addr, dev); if (IS_ERR(n)) return PTR_ERR(n); neigh_event_send(n, NULL); @@ -5516,9 +5528,10 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, for (i = 0; i < nh_grp->nhgi->count; i++) { struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i]; + struct net_device *dev = mlxsw_sp_nexthop_dev(nh); struct fib6_info *rt = mlxsw_sp_rt6->rt; - if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev && + if (dev && dev == rt->fib6_nh->fib_nh_dev && ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr, &rt->fib6_nh->fib_nh_gw6)) return nh; -- cgit v1.2.3 From 532b6e2bbc190c899086da5be1fd83e69fb41c12 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 12 Jun 2023 17:31:05 +0200 Subject: mlxsw: spectrum_router: Access nhgi->rif through a helper To abstract away deduction of RIF from the corresponding next hop group info (NHGI), mlxsw currently uses a macro. In its current form, that macro is impossible to extend to more general computation. Therefore introduce a helper, mlxsw_sp_nhgi_rif(), and use it throughout. This will make it possible to change the deduction path easily later on. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index d7013727da21..e05c47568ece 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2966,9 +2966,14 @@ struct mlxsw_sp_nexthop_group_info { is_resilient:1; struct list_head list; /* member in nh_res_grp_list */ struct mlxsw_sp_nexthop nexthops[]; -#define nh_rif nexthops[0].rif }; +static struct mlxsw_sp_rif * +mlxsw_sp_nhgi_rif(const struct mlxsw_sp_nexthop_group_info *nhgi) +{ + return nhgi->nexthops[0].rif; +} + struct mlxsw_sp_nexthop_group_vr_key { u16 vr_id; enum mlxsw_sp_l3proto proto; @@ -5510,7 +5515,7 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: return !!nh_group->nhgi->adj_index_valid; case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: - return !!nh_group->nhgi->nh_rif; + return !!mlxsw_sp_nhgi_rif(nh_group->nhgi); case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE: case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP: @@ -5772,7 +5777,8 @@ static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; adjacency_index = nhgi->adj_index; ecmp_size = nhgi->ecmp_size; - } else if (!nhgi->adj_index_valid && nhgi->count && nhgi->nh_rif) { + } else if (!nhgi->adj_index_valid && nhgi->count && + mlxsw_sp_nhgi_rif(nhgi)) { trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; adjacency_index = mlxsw_sp->router->adj_trap_index; ecmp_size = 1; @@ -5791,7 +5797,7 @@ static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) { - struct mlxsw_sp_rif *rif = fib_entry->nh_group->nhgi->nh_rif; + struct mlxsw_sp_rif *rif = mlxsw_sp_nhgi_rif(fib_entry->nh_group->nhgi); enum mlxsw_reg_ralue_trap_action trap_action; char ralue_pl[MLXSW_REG_RALUE_LEN]; u16 trap_id = 0; -- cgit v1.2.3 From 571c56911b45059dbdd2fb7912152e3412b658cf Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 12 Jun 2023 17:31:06 +0200 Subject: mlxsw: spectrum_router: Extract a helper to free a RIF Right now freeing the object that mlxsw uses to keep track of a RIF is as simple as calling a kfree. But later on as CRIF abstraction is brought in, it will involve severing the link between CRIF and its RIF as well. Better to have the logic encapsulated in a helper. Since a helper is being introduced, make it a full-fledged destructor and have it validate that the objects tracked at the RIF have been released. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index e05c47568ece..1e05ecd29c8d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -7903,6 +7903,13 @@ static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, return rif; } +static void mlxsw_sp_rif_free(struct mlxsw_sp_rif *rif) +{ + WARN_ON(!list_empty(&rif->neigh_list)); + WARN_ON(!list_empty(&rif->nexthop_list)); + kfree(rif); +} + struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 rif_index) { @@ -8209,7 +8216,7 @@ err_configure: err_fid_get: mlxsw_sp->router->rifs[rif_index] = NULL; dev_put(params->dev); - kfree(rif); + mlxsw_sp_rif_free(rif); err_rif_alloc: mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); err_rif_index_alloc: @@ -8249,7 +8256,7 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp_fid_put(fid); mlxsw_sp->router->rifs[rif->rif_index] = NULL; dev_put(dev); - kfree(rif); + mlxsw_sp_rif_free(rif); mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); vr->rif_count--; mlxsw_sp_vr_put(mlxsw_sp, vr); @@ -9902,7 +9909,7 @@ mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, ul_rif_op_err: mlxsw_sp->router->rifs[rif_index] = NULL; - kfree(ul_rif); + mlxsw_sp_rif_free(ul_rif); err_rif_alloc: mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); return ERR_PTR(err); @@ -9917,7 +9924,7 @@ static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif) atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count); mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false); mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL; - kfree(ul_rif); + mlxsw_sp_rif_free(ul_rif); mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); } -- cgit v1.2.3 From 33d11c4e5ce922bc16493fc75ad3c20cc55ed88a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 12 Jun 2023 17:31:07 +0200 Subject: mlxsw: spectrum_router: Add a helper to check if netdev has addresses This function will be useful later as the driver will need to retroactively create RIFs for new uppers with addresses. Add another helper that assumes RCU lock, and restructure the code to skip the IPv6 branch not through conditioning on the addr_list_empty variable, but by directly returning the result value. This makes the skip more obvious than it previously was. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 42 +++++++++++++++------- 1 file changed, 29 insertions(+), 13 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 1e05ecd29c8d..25dbddabd91e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -7794,28 +7794,44 @@ static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); } +static bool __mlxsw_sp_dev_addr_list_empty(const struct net_device *dev) +{ + struct inet6_dev *inet6_dev; + struct in_device *idev; + + idev = __in_dev_get_rcu(dev); + if (idev && idev->ifa_list) + return false; + + inet6_dev = __in6_dev_get(dev); + if (inet6_dev && !list_empty(&inet6_dev->addr_list)) + return false; + + return true; +} + +static bool mlxsw_sp_dev_addr_list_empty(const struct net_device *dev) +{ + bool addr_list_empty; + + rcu_read_lock(); + addr_list_empty = __mlxsw_sp_dev_addr_list_empty(dev); + rcu_read_unlock(); + + return addr_list_empty; +} + static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev, unsigned long event) { - struct inet6_dev *inet6_dev; - bool addr_list_empty = true; - struct in_device *idev; + bool addr_list_empty; switch (event) { case NETDEV_UP: return rif == NULL; case NETDEV_DOWN: - rcu_read_lock(); - idev = __in_dev_get_rcu(dev); - if (idev && idev->ifa_list) - addr_list_empty = false; - - inet6_dev = __in6_dev_get(dev); - if (addr_list_empty && inet6_dev && - !list_empty(&inet6_dev->addr_list)) - addr_list_empty = false; - rcu_read_unlock(); + addr_list_empty = mlxsw_sp_dev_addr_list_empty(dev); /* macvlans do not have a RIF, but rather piggy back on the * RIF of their lower device. -- cgit v1.2.3 From 440273e763f575bfd801e00a8892a9abb82fa263 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 12 Jun 2023 17:31:08 +0200 Subject: mlxsw: spectrum_router: Extract a helper for RIF migration RIF configuration contains a number of parameters that cannot be changed after the RIF is created. For the IPIP loopbacks, this is currently worked around by creating a new RIF with the desired configuration changes applied, and updating next hops to the new RIF, and then destroying the old RIF. This operation will be useful as a reusable atom, so extract a helper to that effect. Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 25dbddabd91e..fdb812152e71 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1651,6 +1651,17 @@ static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *old_rif, struct mlxsw_sp_rif *new_rif); +static void mlxsw_sp_rif_migrate_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *old_rif, + struct mlxsw_sp_rif *new_rif, + bool migrate_nhs) +{ + if (migrate_nhs) + mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, old_rif, new_rif); + + mlxsw_sp_rif_destroy(old_rif); +} + static int mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry, @@ -1668,12 +1679,8 @@ mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, return PTR_ERR(new_lb_rif); ipip_entry->ol_lb = new_lb_rif; - if (keep_encap) - mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common, - &new_lb_rif->common); - - mlxsw_sp_rif_destroy(&old_lb_rif->common); - + mlxsw_sp_rif_migrate_destroy(mlxsw_sp, &old_lb_rif->common, + &new_lb_rif->common, keep_encap); return 0; } -- cgit v1.2.3 From d4a37bf0943d70ca78ae93da9b05e70632c2019c Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 12 Jun 2023 17:31:09 +0200 Subject: mlxsw: spectrum_router: Move IPIP init up mlxsw will need to keep track of certain devices that are not related to any of its front panel ports. This includes IPIP netdevices. To be able to query the list of supported IPIP types, router->ipip_ops_arr needs to be initialized. To that end, move the IPIP initialization up (and finalization correspondingly down). Signed-off-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index fdb812152e71..43e8f19c7a0a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -10643,14 +10643,14 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_router_init; - err = mlxsw_sp_rifs_init(mlxsw_sp); - if (err) - goto err_rifs_init; - err = mlxsw_sp->router_ops->ipips_init(mlxsw_sp); if (err) goto err_ipips_init; + err = mlxsw_sp_rifs_init(mlxsw_sp); + if (err) + goto err_rifs_init; + err = rhashtable_init(&mlxsw_sp->router->nexthop_ht, &mlxsw_sp_nexthop_ht_params); if (err) @@ -10776,10 +10776,10 @@ err_lpm_init: err_nexthop_group_ht_init: rhashtable_destroy(&mlxsw_sp->router->nexthop_ht); err_nexthop_ht_init: - mlxsw_sp_ipips_fini(mlxsw_sp); -err_ipips_init: mlxsw_sp_rifs_fini(mlxsw_sp); err_rifs_init: + mlxsw_sp_ipips_fini(mlxsw_sp); +err_ipips_init: __mlxsw_sp_router_fini(mlxsw_sp); err_router_init: cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw); @@ -10812,8 +10812,8 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_lpm_fini(mlxsw_sp); rhashtable_destroy(&router->nexthop_group_ht); rhashtable_destroy(&router->nexthop_ht); - mlxsw_sp_ipips_fini(mlxsw_sp); mlxsw_sp_rifs_fini(mlxsw_sp); + mlxsw_sp_ipips_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); cancel_delayed_work_sync(&router->nh_grp_activity_dw); mutex_destroy(&router->lock); -- cgit v1.2.3 From 3bff63ee0303b12bf6727bd18e529bc7f59f6426 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Sun, 11 Jun 2023 18:19:36 +0100 Subject: net/mlx5: Rely on dev->link_active_reporting Use dev->link_active_reporting to determine whether Data Link Layer Link Active Reporting is available rather than re-retrieving the capability. Link: https://lore.kernel.org/r/alpine.DEB.2.21.2305310125370.59226@angie.orcam.me.uk Signed-off-by: Maciej W. Rozycki Signed-off-by: Bjorn Helgaas --- drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 50022e7565f1..9ebebd963dab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -307,7 +307,6 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev) unsigned long timeout; struct pci_dev *sdev; int cap, err; - u32 reg32; /* Check that all functions under the pci bridge are PFs of * this device otherwise fail this function. @@ -346,11 +345,8 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev) return err; /* Check link */ - err = pci_read_config_dword(bridge, cap + PCI_EXP_LNKCAP, ®32); - if (err) - return err; - if (!(reg32 & PCI_EXP_LNKCAP_DLLLARC)) { - mlx5_core_warn(dev, "No PCI link reporting capability (0x%08x)\n", reg32); + if (!bridge->link_active_reporting) { + mlx5_core_warn(dev, "No PCI link reporting capability\n"); msleep(1000); goto restore; } -- cgit v1.2.3 From 24b454bc354ab7b1aa918a4fe3d7696516f592d4 Mon Sep 17 00:00:00 2001 From: Jakub Buchocki Date: Mon, 12 Jun 2023 10:14:21 -0700 Subject: ice: Fix ice module unload Clearing the interrupt scheme before PFR reset, during the removal routine, could cause the hardware errors and possibly lead to system reboot, as the PF reset can cause the interrupt to be generated. Place the call for PFR reset inside ice_deinit_dev(), wait until reset and all pending transactions are done, then call ice_clear_interrupt_scheme(). This introduces a PFR reset to multiple error paths. Additionally, remove the call for the reset from ice_load() - it will be a part of ice_unload() now. Error example: [ 75.229328] ice 0000:ca:00.1: Failed to read Tx Scheduler Tree - User Selection data from flash [ 77.571315] {1}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1 [ 77.571418] {1}[Hardware Error]: event severity: recoverable [ 77.571459] {1}[Hardware Error]: Error 0, type: recoverable [ 77.571500] {1}[Hardware Error]: section_type: PCIe error [ 77.571540] {1}[Hardware Error]: port_type: 4, root port [ 77.571580] {1}[Hardware Error]: version: 3.0 [ 77.571615] {1}[Hardware Error]: command: 0x0547, status: 0x4010 [ 77.571661] {1}[Hardware Error]: device_id: 0000:c9:02.0 [ 77.571703] {1}[Hardware Error]: slot: 25 [ 77.571736] {1}[Hardware Error]: secondary_bus: 0xca [ 77.571773] {1}[Hardware Error]: vendor_id: 0x8086, device_id: 0x347a [ 77.571821] {1}[Hardware Error]: class_code: 060400 [ 77.571858] {1}[Hardware Error]: bridge: secondary_status: 0x2800, control: 0x0013 [ 77.572490] pcieport 0000:c9:02.0: AER: aer_status: 0x00200000, aer_mask: 0x00100020 [ 77.572870] pcieport 0000:c9:02.0: [21] ACSViol (First) [ 77.573222] pcieport 0000:c9:02.0: AER: aer_layer=Transaction Layer, aer_agent=Receiver ID [ 77.573554] pcieport 0000:c9:02.0: AER: aer_uncor_severity: 0x00463010 [ 77.691273] {2}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 1 [ 77.691738] {2}[Hardware Error]: event severity: recoverable [ 77.691971] {2}[Hardware Error]: Error 0, type: recoverable [ 77.692192] {2}[Hardware Error]: section_type: PCIe error [ 77.692403] {2}[Hardware Error]: port_type: 4, root port [ 77.692616] {2}[Hardware Error]: version: 3.0 [ 77.692825] {2}[Hardware Error]: command: 0x0547, status: 0x4010 [ 77.693032] {2}[Hardware Error]: device_id: 0000:c9:02.0 [ 77.693238] {2}[Hardware Error]: slot: 25 [ 77.693440] {2}[Hardware Error]: secondary_bus: 0xca [ 77.693641] {2}[Hardware Error]: vendor_id: 0x8086, device_id: 0x347a [ 77.693853] {2}[Hardware Error]: class_code: 060400 [ 77.694054] {2}[Hardware Error]: bridge: secondary_status: 0x0800, control: 0x0013 [ 77.719115] pci 0000:ca:00.1: AER: can't recover (no error_detected callback) [ 77.719140] pcieport 0000:c9:02.0: AER: device recovery failed [ 77.719216] pcieport 0000:c9:02.0: AER: aer_status: 0x00200000, aer_mask: 0x00100020 [ 77.719390] pcieport 0000:c9:02.0: [21] ACSViol (First) [ 77.719557] pcieport 0000:c9:02.0: AER: aer_layer=Transaction Layer, aer_agent=Receiver ID [ 77.719723] pcieport 0000:c9:02.0: AER: aer_uncor_severity: 0x00463010 Fixes: 5b246e533d01 ("ice: split probe into smaller functions") Signed-off-by: Jakub Buchocki Reviewed-by: Przemek Kitszel Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230612171421.21570-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 03513d4871ab..42c318ceff61 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4802,9 +4802,13 @@ err_init_pf: static void ice_deinit_dev(struct ice_pf *pf) { ice_free_irq_msix_misc(pf); - ice_clear_interrupt_scheme(pf); ice_deinit_pf(pf); ice_deinit_hw(&pf->hw); + + /* Service task is already stopped, so call reset directly. */ + ice_reset(&pf->hw, ICE_RESET_PFR); + pci_wait_for_pending_transaction(pf->pdev); + ice_clear_interrupt_scheme(pf); } static void ice_init_features(struct ice_pf *pf) @@ -5094,10 +5098,6 @@ int ice_load(struct ice_pf *pf) struct ice_vsi *vsi; int err; - err = ice_reset(&pf->hw, ICE_RESET_PFR); - if (err) - return err; - err = ice_init_dev(pf); if (err) return err; @@ -5354,12 +5354,6 @@ static void ice_remove(struct pci_dev *pdev) ice_setup_mc_magic_wake(pf); ice_set_wake(pf); - /* Issue a PFR as part of the prescribed driver unload flow. Do not - * do it via ice_schedule_reset() since there is no need to rebuild - * and the service task is already stopped. - */ - ice_reset(&pf->hw, ICE_RESET_PFR); - pci_wait_for_pending_transaction(pdev); pci_disable_device(pdev); } -- cgit v1.2.3 From cae4bc06b3e41bc6dfd7116a45c1006e50ffe153 Mon Sep 17 00:00:00 2001 From: Maulik Jodhani Date: Mon, 12 Jun 2023 23:43:40 -0600 Subject: net: macb: Add support for partial store and forward When the receive partial store and forward mode is activated, the receiver will only begin to forward the packet to the external AHB or AXI slave when enough packet data is stored in the packet buffer. The amount of packet data required to activate the forwarding process is programmable via watermark registers which are located at the same address as the partial store and forward enable bits. Adding support to read this rx-watermark value from device-tree, to program the watermark registers and enable partial store and forwarding. Signed-off-by: Maulik Jodhani Signed-off-by: Pranavi Somisetty Reviewed-by: Claudiu Beznea Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb.h | 12 ++++++++++++ drivers/net/ethernet/cadence/macb_main.c | 27 +++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index b6d5bf8deb79..78c972bb1d96 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -82,6 +82,7 @@ #define GEM_NCFGR 0x0004 /* Network Config */ #define GEM_USRIO 0x000c /* User IO */ #define GEM_DMACFG 0x0010 /* DMA Configuration */ +#define GEM_PBUFRXCUT 0x0044 /* RX Partial Store and Forward */ #define GEM_JML 0x0048 /* Jumbo Max Length */ #define GEM_HS_MAC_CONFIG 0x0050 /* GEM high speed config */ #define GEM_HRB 0x0080 /* Hash Bottom */ @@ -347,6 +348,10 @@ #define GEM_ADDR64_SIZE 1 +/* Bitfields in PBUFRXCUT */ +#define GEM_ENCUTTHRU_OFFSET 31 /* Enable RX partial store and forward */ +#define GEM_ENCUTTHRU_SIZE 1 + /* Bitfields in NSR */ #define MACB_NSR_LINK_OFFSET 0 /* pcs_link_state */ #define MACB_NSR_LINK_SIZE 1 @@ -513,6 +518,8 @@ #define GEM_TX_PKT_BUFF_OFFSET 21 #define GEM_TX_PKT_BUFF_SIZE 1 +#define GEM_RX_PBUF_ADDR_OFFSET 22 +#define GEM_RX_PBUF_ADDR_SIZE 4 /* Bitfields in DCFG5. */ #define GEM_TSU_OFFSET 8 @@ -521,6 +528,8 @@ /* Bitfields in DCFG6. */ #define GEM_PBUF_LSO_OFFSET 27 #define GEM_PBUF_LSO_SIZE 1 +#define GEM_PBUF_CUTTHRU_OFFSET 25 +#define GEM_PBUF_CUTTHRU_SIZE 1 #define GEM_DAW64_OFFSET 23 #define GEM_DAW64_SIZE 1 @@ -1290,6 +1299,9 @@ struct macb { u32 wol; + /* holds value of rx watermark value for pbuf_rxcutthru register */ + u32 rx_watermark; + struct macb_ptp_info *ptp_info; /* macb-ptp interface */ struct phy *sgmii_phy; /* for ZynqMP SGMII mode */ diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 50a4b04315e9..2e35e200fdcb 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -2635,6 +2635,9 @@ static void macb_reset_hw(struct macb *bp) macb_writel(bp, TSR, -1); macb_writel(bp, RSR, -1); + /* Disable RX partial store and forward and reset watermark value */ + gem_writel(bp, PBUFRXCUT, 0); + /* Disable all interrupts */ for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { queue_writel(queue, IDR, -1); @@ -2792,6 +2795,10 @@ static void macb_init_hw(struct macb *bp) bp->rx_frm_len_mask = MACB_RX_JFRMLEN_MASK; macb_configure_dma(bp); + + /* Enable RX partial store and forward and set watermark */ + if (bp->rx_watermark) + gem_writel(bp, PBUFRXCUT, (bp->rx_watermark | GEM_BIT(ENCUTTHRU))); } /* The hash address register is 64 bits long and takes up two @@ -4946,6 +4953,7 @@ static int macb_probe(struct platform_device *pdev) phy_interface_t interface; struct net_device *dev; struct resource *regs; + u32 wtrmrk_rst_val; void __iomem *mem; struct macb *bp; int err, val; @@ -5025,6 +5033,25 @@ static int macb_probe(struct platform_device *pdev) bp->usrio = macb_config->usrio; + /* By default we set to partial store and forward mode for zynqmp. + * Disable if not set in devicetree. + */ + if (GEM_BFEXT(PBUF_CUTTHRU, gem_readl(bp, DCFG6))) { + err = of_property_read_u32(bp->pdev->dev.of_node, + "cdns,rx-watermark", + &bp->rx_watermark); + + if (!err) { + /* Disable partial store and forward in case of error or + * invalid watermark value + */ + wtrmrk_rst_val = (1 << (GEM_BFEXT(RX_PBUF_ADDR, gem_readl(bp, DCFG2)))) - 1; + if (bp->rx_watermark > wtrmrk_rst_val || !bp->rx_watermark) { + dev_info(&bp->pdev->dev, "Invalid watermark value\n"); + bp->rx_watermark = 0; + } + } + } spin_lock_init(&bp->lock); /* setup capabilities */ -- cgit v1.2.3 From ed3c9a2fcab3b60b0766eb5d7566fd3b10df9a8e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 13 Jun 2023 13:50:06 -0700 Subject: net: tls: make the offload check helper take skb not socket All callers of tls_is_sk_tx_device_offloaded() currently do an equivalent of: if (skb->sk && tls_is_skb_tx_device_offloaded(skb->sk)) Have the helper accept skb and do the skb->sk check locally. Two drivers have local static inlines with similar wrappers already. While at it change the ifdef condition to TLS_DEVICE. Only TLS_DEVICE selects SOCK_VALIDATE_XMIT, so the two are equivalent. This makes removing the duplicated IS_ENABLED() check in funeth more obviously correct. Signed-off-by: Jakub Kicinski Acked-by: Maxim Mikityanskiy Reviewed-by: Simon Horman Acked-by: Tariq Toukan Acked-by: Dimitris Michailidis Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 4 ++-- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 5 ----- drivers/net/ethernet/chelsio/cxgb4/sge.c | 2 +- drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 2 +- drivers/net/ethernet/fungible/funeth/funeth_tx.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h | 5 ----- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 4 ++-- include/net/tls.h | 8 +++++--- net/tls/tls_device.c | 4 ++-- 12 files changed, 17 insertions(+), 26 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 007cec23a92f..16405b84dc2f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -5442,7 +5442,7 @@ static netdev_tx_t bond_tls_device_xmit(struct bonding *bond, struct sk_buff *sk { struct net_device *tls_netdev = rcu_dereference(tls_get_ctx(skb->sk)->netdev); - /* tls_netdev might become NULL, even if tls_is_sk_tx_device_offloaded + /* tls_netdev might become NULL, even if tls_is_skb_tx_device_offloaded * was true, if tls_device_down is running in parallel, but it's OK, * because bond_get_slave_by_dev has a NULL check. */ @@ -5461,7 +5461,7 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev return NETDEV_TX_OK; #if IS_ENABLED(CONFIG_TLS_DEVICE) - if (skb->sk && tls_is_sk_tx_device_offloaded(skb->sk)) + if (tls_is_skb_tx_device_offloaded(skb)) return bond_tls_device_xmit(bond, skb, dev); #endif diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index f0bc7396ce2b..2eb33a727bba 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -1175,7 +1175,7 @@ static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb, txq = netdev_pick_tx(dev, skb, sb_dev); if (xfrm_offload(skb) || is_ptp_enabled(skb, dev) || skb->encapsulation || - cxgb4_is_ktls_skb(skb) || + tls_is_skb_tx_device_offloaded(skb) || (proto != IPPROTO_TCP && proto != IPPROTO_UDP)) txq = txq % pi->nqsets; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 34546f5312ee..a9599ba26975 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -497,11 +497,6 @@ struct cxgb4_uld_info { #endif }; -static inline bool cxgb4_is_ktls_skb(struct sk_buff *skb) -{ - return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk); -} - void cxgb4_uld_enable(struct adapter *adap); void cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p); int cxgb4_unregister_uld(enum cxgb4_uld type); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 46809e2d94ee..98dd78551d89 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -1530,7 +1530,7 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) #endif /* CHELSIO_IPSEC_INLINE */ #if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE) - if (cxgb4_is_ktls_skb(skb) && + if (tls_is_skb_tx_device_offloaded(skb) && (skb->len - skb_tcp_all_headers(skb))) return adap->uld[CXGB4_ULD_KTLS].tx_handler(skb, dev); #endif /* CHELSIO_TLS_DEVICE */ diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 1a5fdd755e9e..bcdc7fc2f427 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -1946,7 +1946,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev) tls_ctx = tls_get_ctx(skb->sk); tls_netdev = rcu_dereference_bh(tls_ctx->netdev); /* Don't quit on NULL: if tls_device_down is running in parallel, - * netdev might become NULL, even if tls_is_sk_tx_device_offloaded was + * netdev might become NULL, even if tls_is_skb_tx_device_offloaded was * true. Rather continue processing this packet. */ if (unlikely(tls_netdev && tls_netdev != dev)) diff --git a/drivers/net/ethernet/fungible/funeth/funeth_tx.c b/drivers/net/ethernet/fungible/funeth/funeth_tx.c index 706d81e39a54..8ddefd3ec15b 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_tx.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_tx.c @@ -348,8 +348,7 @@ netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev) unsigned int tls_len = 0; unsigned int ndesc; - if (IS_ENABLED(CONFIG_TLS_DEVICE) && skb->sk && - tls_is_sk_tx_device_offloaded(skb->sk)) { + if (tls_is_skb_tx_device_offloaded(skb)) { skb = fun_tls_tx(skb, q, &tls_len); if (unlikely(!skb)) goto dropped; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index c964644ee866..bac4717548c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -125,7 +125,7 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev, #ifdef CONFIG_MLX5_EN_TLS /* May send WQEs. */ - if (mlx5e_ktls_skb_offloaded(skb)) + if (tls_is_skb_tx_device_offloaded(skb)) if (unlikely(!mlx5e_ktls_handle_tx_skb(dev, sq, skb, &state->tls))) return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 0e4c0a093293..efb2cf74ad6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -846,7 +846,7 @@ bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, tls_ctx = tls_get_ctx(skb->sk); tls_netdev = rcu_dereference_bh(tls_ctx->netdev); /* Don't WARN on NULL: if tls_device_down is running in parallel, - * netdev might become NULL, even if tls_is_sk_tx_device_offloaded was + * netdev might become NULL, even if tls_is_skb_tx_device_offloaded was * true. Rather continue processing this packet. */ if (WARN_ON_ONCE(tls_netdev && tls_netdev != netdev)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h index 2dd78dd4ad65..f87b65c560ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h @@ -49,11 +49,6 @@ mlx5e_ktls_rx_pending_resync_list(struct mlx5e_channel *c, int budget) return budget && test_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &c->async_icosq.state); } -static inline bool mlx5e_ktls_skb_offloaded(struct sk_buff *skb) -{ - return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk); -} - static inline void mlx5e_ktls_handle_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg, struct mlx5e_accel_tx_tls_state *state) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index b7cce746b5c0..49f2f081ebb5 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -598,7 +598,7 @@ nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, if (likely(!dp->ktls_tx)) return skb; - if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)) + if (!tls_is_skb_tx_device_offloaded(skb)) return skb; datalen = skb->len - skb_tcp_all_headers(skb); @@ -666,7 +666,7 @@ void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle) if (!tls_handle) return; - if (WARN_ON_ONCE(!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))) + if (WARN_ON_ONCE(!tls_is_skb_tx_device_offloaded(skb))) return; datalen = skb->len - skb_tcp_all_headers(skb); diff --git a/include/net/tls.h b/include/net/tls.h index b7d0f1e3058b..5e71dd3df8ca 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -370,10 +370,12 @@ struct sk_buff * tls_validate_xmit_skb_sw(struct sock *sk, struct net_device *dev, struct sk_buff *skb); -static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) +static inline bool tls_is_skb_tx_device_offloaded(const struct sk_buff *skb) { -#ifdef CONFIG_SOCK_VALIDATE_XMIT - return sk_fullsock(sk) && +#ifdef CONFIG_TLS_DEVICE + struct sock *sk = skb->sk; + + return sk && sk_fullsock(sk) && (smp_load_acquire(&sk->sk_validate_xmit_skb) == &tls_validate_xmit_skb); #else diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index b4864d55900f..b82770f68807 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1219,7 +1219,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) tls_device_attach(ctx, sk, netdev); up_read(&device_offload_lock); - /* following this assignment tls_is_sk_tx_device_offloaded + /* following this assignment tls_is_skb_tx_device_offloaded * will return true and the context might be accessed * by the netdev's xmit function. */ @@ -1372,7 +1372,7 @@ static int tls_device_down(struct net_device *netdev) list_for_each_entry_safe(ctx, tmp, &list, list) { /* Stop offloaded TX and switch to the fallback. - * tls_is_sk_tx_device_offloaded will return false. + * tls_is_skb_tx_device_offloaded will return false. */ WRITE_ONCE(ctx->sk->sk_validate_xmit_skb, tls_validate_xmit_skb_sw); -- cgit v1.2.3 From e84a1e1e683f3558e30f437d7c99df35afb8b52c Mon Sep 17 00:00:00 2001 From: Íñigo Huguet Date: Tue, 13 Jun 2023 15:38:54 +0200 Subject: sfc: fix XDP queues mode with legacy IRQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In systems without MSI-X capabilities, xdp_txq_queues_mode is calculated in efx_allocate_msix_channels, but when enabling MSI-X fails, it was not changed to a proper default value. This was leading to the driver thinking that it has dedicated XDP queues, when it didn't. Fix it by setting xdp_txq_queues_mode to the correct value if the driver fallbacks to MSI or legacy IRQ mode. The correct value is EFX_XDP_TX_QUEUES_BORROWED because there are no XDP dedicated queues. The issue can be easily visible if the kernel is started with pci=nomsi, then a call trace is shown. It is not shown only with sfc's modparam interrupt_mode=2. Call trace example: WARNING: CPU: 2 PID: 663 at drivers/net/ethernet/sfc/efx_channels.c:828 efx_set_xdp_channels+0x124/0x260 [sfc] [...skip...] Call Trace: efx_set_channels+0x5c/0xc0 [sfc] efx_probe_nic+0x9b/0x15a [sfc] efx_probe_all+0x10/0x1a2 [sfc] efx_pci_probe_main+0x12/0x156 [sfc] efx_pci_probe_post_io+0x18/0x103 [sfc] efx_pci_probe.cold+0x154/0x257 [sfc] local_pci_probe+0x42/0x80 Fixes: 6215b608a8c4 ("sfc: last resort fallback for lack of xdp tx queues") Reported-by: Yanghang Liu Signed-off-by: Íñigo Huguet Acked-by: Martin Habets Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx_channels.c | 2 ++ drivers/net/ethernet/sfc/siena/efx_channels.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index fcea3ea809d7..41b33a75333c 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -301,6 +301,7 @@ int efx_probe_interrupts(struct efx_nic *efx) efx->tx_channel_offset = 0; efx->n_xdp_channels = 0; efx->xdp_channel_offset = efx->n_channels; + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED; rc = pci_enable_msi(efx->pci_dev); if (rc == 0) { efx_get_channel(efx, 0)->irq = efx->pci_dev->irq; @@ -322,6 +323,7 @@ int efx_probe_interrupts(struct efx_nic *efx) efx->tx_channel_offset = efx_separate_tx_channels ? 1 : 0; efx->n_xdp_channels = 0; efx->xdp_channel_offset = efx->n_channels; + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED; efx->legacy_irq = efx->pci_dev->irq; } diff --git a/drivers/net/ethernet/sfc/siena/efx_channels.c b/drivers/net/ethernet/sfc/siena/efx_channels.c index 06ed74994e36..1776f7f8a7a9 100644 --- a/drivers/net/ethernet/sfc/siena/efx_channels.c +++ b/drivers/net/ethernet/sfc/siena/efx_channels.c @@ -302,6 +302,7 @@ int efx_siena_probe_interrupts(struct efx_nic *efx) efx->tx_channel_offset = 0; efx->n_xdp_channels = 0; efx->xdp_channel_offset = efx->n_channels; + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED; rc = pci_enable_msi(efx->pci_dev); if (rc == 0) { efx_get_channel(efx, 0)->irq = efx->pci_dev->irq; @@ -323,6 +324,7 @@ int efx_siena_probe_interrupts(struct efx_nic *efx) efx->tx_channel_offset = efx_siena_separate_tx_channels ? 1 : 0; efx->n_xdp_channels = 0; efx->xdp_channel_offset = efx->n_channels; + efx->xdp_txq_queues_mode = EFX_XDP_TX_QUEUES_BORROWED; efx->legacy_irq = efx->pci_dev->irq; } -- cgit v1.2.3 From 30134b7c47bd28fdb4db4d12aef824e0579cfee4 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Wed, 14 Jun 2023 11:17:14 +0200 Subject: net: ethernet: stmicro: stmmac: fix possible memory leak in __stmmac_open Fix a possible memory leak in __stmmac_open when stmmac_init_phy fails. It's also needed to free everything allocated by stmmac_setup_dma_desc and not just the dma_conf struct. Drop free_dma_desc_resources from __stmmac_open and correctly call free_dma_desc_resources on each user of __stmmac_open on error. Reported-by: Jose Abreu Fixes: ba39b344e924 ("net: ethernet: stmicro: stmmac: generate stmmac dma conf before open") Signed-off-by: Christian Marangi Cc: stable@vger.kernel.org Reviewed-by: Simon Horman Reviewed-by: Jose Abreu Link: https://lore.kernel.org/r/20230614091714.15912-1-ansuelsmth@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 52cab9de05f2..87510951f4e8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3873,7 +3873,6 @@ irq_error: stmmac_hw_teardown(dev); init_error: - free_dma_desc_resources(priv, &priv->dma_conf); phylink_disconnect_phy(priv->phylink); init_phy_error: pm_runtime_put(priv->device); @@ -3891,6 +3890,9 @@ static int stmmac_open(struct net_device *dev) return PTR_ERR(dma_conf); ret = __stmmac_open(dev, dma_conf); + if (ret) + free_dma_desc_resources(priv, dma_conf); + kfree(dma_conf); return ret; } @@ -5633,12 +5635,15 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) stmmac_release(dev); ret = __stmmac_open(dev, dma_conf); - kfree(dma_conf); if (ret) { + free_dma_desc_resources(priv, dma_conf); + kfree(dma_conf); netdev_err(priv->dev, "failed reopening the interface after MTU change\n"); return ret; } + kfree(dma_conf); + stmmac_set_rx_mode(dev); } -- cgit v1.2.3 From 9a36e2d44d122fe73a2a76ba73f1d50a65cf8210 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Thu, 15 Jun 2023 11:34:00 +0800 Subject: octeon_ep: Add missing check for ioremap Add check for ioremap() and return the error if it fails in order to guarantee the success of ioremap(). Fixes: 862cd659a6fb ("octeon_ep: Add driver framework and device initialization") Signed-off-by: Jiasheng Jiang Reviewed-by: Kalesh AP Link: https://lore.kernel.org/r/20230615033400.2971-1-jiasheng@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index e1853da280f9..43eb6e871351 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -981,6 +981,9 @@ int octep_device_setup(struct octep_device *oct) oct->mmio[i].hw_addr = ioremap(pci_resource_start(oct->pdev, i * 2), pci_resource_len(oct->pdev, i * 2)); + if (!oct->mmio[i].hw_addr) + goto unmap_prev; + oct->mmio[i].mapped = 1; } @@ -1015,7 +1018,9 @@ int octep_device_setup(struct octep_device *oct) return 0; unsupported_dev: - for (i = 0; i < OCTEP_MMIO_REGIONS; i++) + i = OCTEP_MMIO_REGIONS; +unmap_prev: + while (i--) iounmap(oct->mmio[i].hw_addr); kfree(oct->conf); -- cgit v1.2.3 From f7d625adeb7bc6a9ec83d32d9615889969d64484 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Mon, 12 Jun 2023 12:14:48 +0000 Subject: net: ena: Add dynamic recycling mechanism for rx buffers The current implementation allocates page-sized rx buffers. As traffic may consist of different types and sizes of packets, in various cases, buffers are not fully used. This change (Dynamic RX Buffers - DRB) uses part of the allocated rx page needed for the incoming packet, and returns the rest of the unused page to be used again as an rx buffer for future packets. A threshold of 2K for unused space has been set in order to declare whether the remainder of the page can be reused again as an rx buffer. As a page may be reused, dma_sync_single_for_cpu() is added in order to sync the memory to the CPU side after it was owned by the HW. In addition, when the rx page can no longer be reused, it is being unmapped using dma_page_unmap(), which implicitly syncs and then unmaps the entire page. In case the kernel still handles the skbs pointing to the previous buffers from that rx page, it may access garbage pointers, caused by the implicit sync overwriting them. The implicit dma sync is removed by replacing dma_page_unmap() with dma_unmap_page_attrs() with DMA_ATTR_SKIP_CPU_SYNC flag. The functionality is disabled for XDP traffic to avoid handling several descriptors per packet. Signed-off-by: Arthur Kiyanovski Signed-off-by: Shay Agroskin Signed-off-by: David Arinzon Link: https://lore.kernel.org/r/20230612121448.28829-1-darinzon@amazon.com Signed-off-by: Jakub Kicinski --- .../device_drivers/ethernet/amazon/ena.rst | 32 +++++ drivers/net/ethernet/amazon/ena/ena_admin_defs.h | 6 +- drivers/net/ethernet/amazon/ena/ena_netdev.c | 136 ++++++++++++++------- drivers/net/ethernet/amazon/ena/ena_netdev.h | 4 + 4 files changed, 136 insertions(+), 42 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst index 8bcb173e0353..491492677632 100644 --- a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst +++ b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst @@ -205,6 +205,7 @@ Adaptive coalescing can be switched on/off through `ethtool(8)`'s More information about Adaptive Interrupt Moderation (DIM) can be found in Documentation/networking/net_dim.rst +.. _`RX copybreak`: RX copybreak ============ The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK @@ -315,3 +316,34 @@ Rx - The new SKB is updated with the necessary information (protocol, checksum hw verify result, etc), and then passed to the network stack, using the NAPI interface function :code:`napi_gro_receive()`. + +Dynamic RX Buffers (DRB) +------------------------ + +Each RX descriptor in the RX ring is a single memory page (which is either 4KB +or 16KB long depending on system's configurations). +To reduce the memory allocations required when dealing with a high rate of small +packets, the driver tries to reuse the remaining RX descriptor's space if more +than 2KB of this page remain unused. + +A simple example of this mechanism is the following sequence of events: + +:: + + 1. Driver allocates page-sized RX buffer and passes it to hardware + +----------------------+ + |4KB RX Buffer | + +----------------------+ + + 2. A 300Bytes packet is received on this buffer + + 3. The driver increases the ref count on this page and returns it back to + HW as an RX buffer of size 4KB - 300Bytes = 3796 Bytes + +----+--------------------+ + |****|3796 Bytes RX Buffer| + +----+--------------------+ + +This mechanism isn't used when an XDP program is loaded, or when the +RX packet is less than rx_copybreak bytes (in which case the packet is +copied out of the RX buffer into the linear part of a new skb allocated +for it and the RX buffer remains the same size, see `RX copybreak`_). diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h index 466ad9470d1f..6de0d590be34 100644 --- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h @@ -869,7 +869,9 @@ struct ena_admin_host_info { * 2 : interrupt_moderation * 3 : rx_buf_mirroring * 4 : rss_configurable_function_key - * 31:5 : reserved + * 5 : reserved + * 6 : rx_page_reuse + * 31:7 : reserved */ u32 driver_supported_features; }; @@ -1184,6 +1186,8 @@ struct ena_admin_ena_mmio_req_read_less_resp { #define ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK BIT(3) #define ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_SHIFT 4 #define ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK BIT(4) +#define ENA_ADMIN_HOST_INFO_RX_PAGE_REUSE_SHIFT 6 +#define ENA_ADMIN_HOST_INFO_RX_PAGE_REUSE_MASK BIT(6) /* aenq_common_desc */ #define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index e6a6efaeb87c..d19593fae226 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1023,7 +1023,7 @@ static int ena_alloc_rx_buffer(struct ena_ring *rx_ring, int tailroom; /* restore page offset value in case it has been changed by device */ - rx_info->page_offset = headroom; + rx_info->buf_offset = headroom; /* if previous allocated page is not used */ if (unlikely(rx_info->page)) @@ -1040,6 +1040,8 @@ static int ena_alloc_rx_buffer(struct ena_ring *rx_ring, tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); rx_info->page = page; + rx_info->dma_addr = dma; + rx_info->page_offset = 0; ena_buf = &rx_info->ena_buf; ena_buf->paddr = dma + headroom; ena_buf->len = ENA_PAGE_SIZE - headroom - tailroom; @@ -1047,14 +1049,12 @@ static int ena_alloc_rx_buffer(struct ena_ring *rx_ring, return 0; } -static void ena_unmap_rx_buff(struct ena_ring *rx_ring, - struct ena_rx_buffer *rx_info) +static void ena_unmap_rx_buff_attrs(struct ena_ring *rx_ring, + struct ena_rx_buffer *rx_info, + unsigned long attrs) { - struct ena_com_buf *ena_buf = &rx_info->ena_buf; - - dma_unmap_page(rx_ring->dev, ena_buf->paddr - rx_ring->rx_headroom, - ENA_PAGE_SIZE, - DMA_BIDIRECTIONAL); + dma_unmap_page_attrs(rx_ring->dev, rx_info->dma_addr, ENA_PAGE_SIZE, + DMA_BIDIRECTIONAL, attrs); } static void ena_free_rx_page(struct ena_ring *rx_ring, @@ -1068,7 +1068,7 @@ static void ena_free_rx_page(struct ena_ring *rx_ring, return; } - ena_unmap_rx_buff(rx_ring, rx_info); + ena_unmap_rx_buff_attrs(rx_ring, rx_info, 0); __free_page(page); rx_info->page = NULL; @@ -1406,14 +1406,14 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) return tx_pkts; } -static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag) +static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag, u16 len) { struct sk_buff *skb; if (!first_frag) - skb = napi_alloc_skb(rx_ring->napi, rx_ring->rx_copybreak); + skb = napi_alloc_skb(rx_ring->napi, len); else - skb = napi_build_skb(first_frag, ENA_PAGE_SIZE); + skb = napi_build_skb(first_frag, len); if (unlikely(!skb)) { ena_increase_stat(&rx_ring->rx_stats.skb_alloc_fail, 1, @@ -1422,24 +1422,47 @@ static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag) netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev, "Failed to allocate skb. first_frag %s\n", first_frag ? "provided" : "not provided"); - return NULL; } return skb; } +static bool ena_try_rx_buf_page_reuse(struct ena_rx_buffer *rx_info, u16 buf_len, + u16 len, int pkt_offset) +{ + struct ena_com_buf *ena_buf = &rx_info->ena_buf; + + /* More than ENA_MIN_RX_BUF_SIZE left in the reused buffer + * for data + headroom + tailroom. + */ + if (SKB_DATA_ALIGN(len + pkt_offset) + ENA_MIN_RX_BUF_SIZE <= ena_buf->len) { + page_ref_inc(rx_info->page); + rx_info->page_offset += buf_len; + ena_buf->paddr += buf_len; + ena_buf->len -= buf_len; + return true; + } + + return false; +} + static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, struct ena_com_rx_buf_info *ena_bufs, u32 descs, u16 *next_to_clean) { + int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + bool is_xdp_loaded = ena_xdp_present_ring(rx_ring); struct ena_rx_buffer *rx_info; struct ena_adapter *adapter; + int page_offset, pkt_offset; + dma_addr_t pre_reuse_paddr; u16 len, req_id, buf = 0; + bool reuse_rx_buf_page; struct sk_buff *skb; - void *page_addr; - u32 page_offset; - void *data_addr; + void *buf_addr; + int buf_offset; + u16 buf_len; len = ena_bufs[buf].len; req_id = ena_bufs[buf].req_id; @@ -1459,34 +1482,30 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, "rx_info %p page %p\n", rx_info, rx_info->page); - /* save virt address of first buffer */ - page_addr = page_address(rx_info->page); + buf_offset = rx_info->buf_offset; + pkt_offset = buf_offset - rx_ring->rx_headroom; page_offset = rx_info->page_offset; - data_addr = page_addr + page_offset; - - prefetch(data_addr); + buf_addr = page_address(rx_info->page) + page_offset; if (len <= rx_ring->rx_copybreak) { - skb = ena_alloc_skb(rx_ring, NULL); + skb = ena_alloc_skb(rx_ring, NULL, len); if (unlikely(!skb)) return NULL; - netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, - "RX allocated small packet. len %d. data_len %d\n", - skb->len, skb->data_len); - /* sync this buffer for CPU use */ dma_sync_single_for_cpu(rx_ring->dev, - dma_unmap_addr(&rx_info->ena_buf, paddr), + dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset, len, DMA_FROM_DEVICE); - skb_copy_to_linear_data(skb, data_addr, len); + skb_copy_to_linear_data(skb, buf_addr + buf_offset, len); dma_sync_single_for_device(rx_ring->dev, - dma_unmap_addr(&rx_info->ena_buf, paddr), + dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset, len, DMA_FROM_DEVICE); skb_put(skb, len); + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "RX allocated small packet. len %d.\n", skb->len); skb->protocol = eth_type_trans(skb, rx_ring->netdev); rx_ring->free_ids[*next_to_clean] = req_id; *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs, @@ -1494,14 +1513,28 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, return skb; } - ena_unmap_rx_buff(rx_ring, rx_info); + buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom); + + pre_reuse_paddr = dma_unmap_addr(&rx_info->ena_buf, paddr); + + /* If XDP isn't loaded try to reuse part of the RX buffer */ + reuse_rx_buf_page = !is_xdp_loaded && + ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset); - skb = ena_alloc_skb(rx_ring, page_addr); + dma_sync_single_for_cpu(rx_ring->dev, + pre_reuse_paddr + pkt_offset, + len, + DMA_FROM_DEVICE); + + if (!reuse_rx_buf_page) + ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC); + + skb = ena_alloc_skb(rx_ring, buf_addr, buf_len); if (unlikely(!skb)) return NULL; /* Populate skb's linear part */ - skb_reserve(skb, page_offset); + skb_reserve(skb, buf_offset); skb_put(skb, len); skb->protocol = eth_type_trans(skb, rx_ring->netdev); @@ -1510,7 +1543,8 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, "RX skb updated. len %d. data_len %d\n", skb->len, skb->data_len); - rx_info->page = NULL; + if (!reuse_rx_buf_page) + rx_info->page = NULL; rx_ring->free_ids[*next_to_clean] = req_id; *next_to_clean = @@ -1525,10 +1559,28 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, rx_info = &rx_ring->rx_buffer_info[req_id]; - ena_unmap_rx_buff(rx_ring, rx_info); + /* rx_info->buf_offset includes rx_ring->rx_headroom */ + buf_offset = rx_info->buf_offset; + pkt_offset = buf_offset - rx_ring->rx_headroom; + buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom); + page_offset = rx_info->page_offset; + + pre_reuse_paddr = dma_unmap_addr(&rx_info->ena_buf, paddr); + + reuse_rx_buf_page = !is_xdp_loaded && + ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset); + + dma_sync_single_for_cpu(rx_ring->dev, + pre_reuse_paddr + pkt_offset, + len, + DMA_FROM_DEVICE); + + if (!reuse_rx_buf_page) + ena_unmap_rx_buff_attrs(rx_ring, rx_info, + DMA_ATTR_SKIP_CPU_SYNC); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page, - rx_info->page_offset, len, ENA_PAGE_SIZE); + page_offset + buf_offset, len, buf_len); } while (1); @@ -1626,7 +1678,7 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; xdp_prepare_buff(xdp, page_address(rx_info->page), - rx_info->page_offset, + rx_info->buf_offset, rx_ring->ena_bufs[0].len, false); /* If for some reason we received a bigger packet than * we expect, then we simply drop it @@ -1638,7 +1690,7 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) /* The xdp program might expand the headers */ if (ret == ENA_XDP_PASS) { - rx_info->page_offset = xdp->data - xdp->data_hard_start; + rx_info->buf_offset = xdp->data - xdp->data_hard_start; rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data; } @@ -1693,7 +1745,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, /* First descriptor might have an offset set by the device */ rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]; - rx_info->page_offset += ena_rx_ctx.pkt_offset; + rx_info->buf_offset += ena_rx_ctx.pkt_offset; netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n", @@ -1723,8 +1775,9 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, * from RX side. */ if (xdp_verdict & ENA_XDP_FORWARDED) { - ena_unmap_rx_buff(rx_ring, - &rx_ring->rx_buffer_info[req_id]); + ena_unmap_rx_buff_attrs(rx_ring, + &rx_ring->rx_buffer_info[req_id], + 0); rx_ring->rx_buffer_info[req_id].page = NULL; } } @@ -3233,7 +3286,8 @@ static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pd ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK | ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK | ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK | - ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK; + ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK | + ENA_ADMIN_HOST_INFO_RX_PAGE_REUSE_MASK; rc = ena_com_set_host_attributes(ena_dev); if (rc) { diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 5a0d4ee76172..248b715b4d68 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -51,6 +51,8 @@ #define ENA_DEFAULT_RING_SIZE (1024) #define ENA_MIN_RING_SIZE (256) +#define ENA_MIN_RX_BUF_SIZE (2048) + #define ENA_MIN_NUM_IO_QUEUES (1) #define ENA_TX_WAKEUP_THRESH (MAX_SKB_FRAGS + 2) @@ -175,7 +177,9 @@ struct ena_tx_buffer { struct ena_rx_buffer { struct sk_buff *skb; struct page *page; + dma_addr_t dma_addr; u32 page_offset; + u32 buf_offset; struct ena_com_buf ena_buf; } ____cacheline_aligned; -- cgit v1.2.3 From d9ffa069e006fa2873b94fbf2387546942d4f85b Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 13 Jun 2023 16:46:39 +1000 Subject: sunvnet: fix sparc64 build error after gso code split After merging the net-next tree, today's linux-next build (sparc64 defconfig) failed like this: drivers/net/ethernet/sun/sunvnet_common.c: In function 'vnet_handle_offloads': drivers/net/ethernet/sun/sunvnet_common.c:1277:16: error: implicit declaration of function 'skb_gso_segment'; did you mean 'skb_gso_reset'? [-Werror=implicit-function-declaration] 1277 | segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO); | ^~~~~~~~~~~~~~~ | skb_gso_reset drivers/net/ethernet/sun/sunvnet_common.c:1277:14: warning: assignment to 'struct sk_buff *' from 'int' makes pointer from integer without a cast [-Wint-conversion] 1277 | segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO); | ^ Fixes: d457a0e329b0 ("net: move gso declarations and functions to their own files") Signed-off-by: Stephen Rothwell Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230613164639.164b2991@canb.auug.org.au Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sun/sunvnet_common.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index a6211b95ed17..3525d5c0d694 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -25,6 +25,7 @@ #endif #include +#include #include #include -- cgit v1.2.3 From 8f72fb1578a910571b3f25457e3b7855edfac6cf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 14 Jun 2023 20:52:31 -0700 Subject: eth: fs_enet: fix print format for resource size Randy reported that linux-next build warns on PowerPC: drivers/net/ethernet/freescale/fs_enet/mii-fec.c: In function 'fs_enet_mdio_probe': drivers/net/ethernet/freescale/fs_enet/mii-fec.c:130:50: warning: format '%x' expects argument of type 'unsigned int', but argument 4 has type 'resource_size_t' {aka 'long long unsigned int'} [-Wformat=] 130 | snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", res.start); | ~^ ~~~~~~~~~ | | | | | resource_size_t {aka long long unsigned int} | unsigned int | %llx Use the right print format. Link: https://lore.kernel.org/all/8f9f8d38-d9c7-9f1b-feb0-103d76902d14@infradead.org/ Reported-by: Randy Dunlap Acked-by: Randy Dunlap Tested-by: Randy Dunlap # build-tested Link: https://lore.kernel.org/r/20230615035231.2184880-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fs_enet/mii-fec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c index d37d7a19a759..59a8f0bd0f5c 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mii-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mii-fec.c @@ -127,7 +127,7 @@ static int fs_enet_mdio_probe(struct platform_device *ofdev) if (ret) goto out_res; - snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", res.start); + snprintf(new_bus->id, MII_BUS_ID_SIZE, "%pap", &res.start); fec->fecp = ioremap(res.start, resource_size(&res)); if (!fec->fecp) { -- cgit v1.2.3 From c45a6d1a23c50b97afd7d767b86e28ea5722e7b7 Mon Sep 17 00:00:00 2001 From: Piotr Gardocki Date: Wed, 14 Jun 2023 16:53:01 +0200 Subject: i40e: remove unnecessary check for old MAC == new MAC The check has been moved to core. The ndo_set_mac_address callback is not being called with new MAC address equal to the old one anymore. Signed-off-by: Piotr Gardocki Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_main.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b847bd105b16..29ad1797adce 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1788,12 +1788,6 @@ static int i40e_set_mac(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - if (ether_addr_equal(netdev->dev_addr, addr->sa_data)) { - netdev_info(netdev, "already using mac address %pM\n", - addr->sa_data); - return 0; - } - if (test_bit(__I40E_DOWN, pf->state) || test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) return -EADDRNOTAVAIL; -- cgit v1.2.3 From 96868cca7971a5a3887717fdacd44b281fb87cc9 Mon Sep 17 00:00:00 2001 From: Piotr Gardocki Date: Wed, 14 Jun 2023 16:53:02 +0200 Subject: ice: remove unnecessary check for old MAC == new MAC The check has been moved to core. The ndo_set_mac_address callback is not being called with new MAC address equal to the old one anymore. Signed-off-by: Piotr Gardocki Reviewed-by: Simon Horman Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index a0283b5bf65f..65bf399a0efc 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5624,11 +5624,6 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) if (!is_valid_ether_addr(mac)) return -EADDRNOTAVAIL; - if (ether_addr_equal(netdev->dev_addr, mac)) { - netdev_dbg(netdev, "already using mac %pM\n", mac); - return 0; - } - if (test_bit(ICE_DOWN, pf->state) || ice_is_reset_in_progress(pf->state)) { netdev_err(netdev, "can't set mac %pM. device not ready\n", -- cgit v1.2.3 From 18da174d865a87d47d2f33f5b0a322efcf067728 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 15 Jun 2023 00:20:35 +0800 Subject: net: ethernet: litex: add support for 64 bit stats Implement 64 bit per cpu stats to fix the overflow of netdev->stats on 32 bit platforms. To simplify the code, we use net core pcpu_sw_netstats infrastructure. One small drawback is some memory overhead because litex uses just one queue, but we allocate the counters per cpu. Signed-off-by: Jisheng Zhang Reviewed-by: Simon Horman Acked-by: Gabriel Somlo Link: https://lore.kernel.org/r/20230614162035.300-1-jszhang@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/litex/litex_liteeth.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/litex/litex_liteeth.c b/drivers/net/ethernet/litex/litex_liteeth.c index 35f24e0f0934..ffa96059079c 100644 --- a/drivers/net/ethernet/litex/litex_liteeth.c +++ b/drivers/net/ethernet/litex/litex_liteeth.c @@ -78,8 +78,7 @@ static int liteeth_rx(struct net_device *netdev) memcpy_fromio(data, priv->rx_base + rx_slot * priv->slot_size, len); skb->protocol = eth_type_trans(skb, netdev); - netdev->stats.rx_packets++; - netdev->stats.rx_bytes += len; + dev_sw_netstats_rx_add(netdev, len); return netif_rx(skb); @@ -185,8 +184,7 @@ static netdev_tx_t liteeth_start_xmit(struct sk_buff *skb, litex_write16(priv->base + LITEETH_READER_LENGTH, skb->len); litex_write8(priv->base + LITEETH_READER_START, 1); - netdev->stats.tx_bytes += skb->len; - netdev->stats.tx_packets++; + dev_sw_netstats_tx_add(netdev, 1, skb->len); priv->tx_slot = (priv->tx_slot + 1) % priv->num_tx_slots; dev_kfree_skb_any(skb); @@ -194,9 +192,17 @@ static netdev_tx_t liteeth_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } +static void +liteeth_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) +{ + netdev_stats_to_stats64(stats, &netdev->stats); + dev_fetch_sw_netstats(stats, netdev->tstats); +} + static const struct net_device_ops liteeth_netdev_ops = { .ndo_open = liteeth_open, .ndo_stop = liteeth_stop, + .ndo_get_stats64 = liteeth_get_stats64, .ndo_start_xmit = liteeth_start_xmit, }; @@ -242,6 +248,11 @@ static int liteeth_probe(struct platform_device *pdev) priv->netdev = netdev; priv->dev = &pdev->dev; + netdev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev, + struct pcpu_sw_netstats); + if (!netdev->tstats) + return -ENOMEM; + irq = platform_get_irq(pdev, 0); if (irq < 0) return irq; -- cgit v1.2.3 From c08afcdcf95288c627267bb20002e8baaf3394e1 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 15 Jun 2023 22:52:43 +0100 Subject: sfc: do not try to call tc functions when CONFIG_SFC_SRIOV=n Functions efx_tc_netdev_event and efx_tc_netevent_event do not exist in that case as object files tc_bindings.o and tc_encap_actions.o are not built, so the calls to them from ef100_netdev_event and ef100_netevent_event cause link errors. Wrap the corresponding header files (tc_bindings.h, tc_encap_actions.h) with #if IS_ENABLED(CONFIG_SFC_SRIOV), and add an #else with static inline stubs for these two functions. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202306102026.ISK5JfUQ-lkp@intel.com/ Fixes: 7e5e7d800011 ("sfc: neighbour lookup for TC encap action offload") Signed-off-by: Edward Cree Reviewed-by: Martin Habets Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/tc_bindings.h | 12 ++++++++++++ drivers/net/ethernet/sfc/tc_encap_actions.h | 11 +++++++++++ 2 files changed, 23 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/tc_bindings.h b/drivers/net/ethernet/sfc/tc_bindings.h index 095ddeb59eb3..a326d23d322b 100644 --- a/drivers/net/ethernet/sfc/tc_bindings.h +++ b/drivers/net/ethernet/sfc/tc_bindings.h @@ -12,6 +12,7 @@ #define EFX_TC_BINDINGS_H #include "net_driver.h" +#if IS_ENABLED(CONFIG_SFC_SRIOV) #include struct efx_rep; @@ -28,4 +29,15 @@ int efx_tc_indr_setup_cb(struct net_device *net_dev, struct Qdisc *sch, void (*cleanup)(struct flow_block_cb *block_cb)); int efx_tc_netdev_event(struct efx_nic *efx, unsigned long event, struct net_device *net_dev); + +#else /* CONFIG_SFC_SRIOV */ + +static inline int efx_tc_netdev_event(struct efx_nic *efx, unsigned long event, + struct net_device *net_dev) +{ + return NOTIFY_DONE; +} + +#endif /* CONFIG_SFC_SRIOV */ + #endif /* EFX_TC_BINDINGS_H */ diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.h b/drivers/net/ethernet/sfc/tc_encap_actions.h index 4d755fb92daf..c3c7904ad7ff 100644 --- a/drivers/net/ethernet/sfc/tc_encap_actions.h +++ b/drivers/net/ethernet/sfc/tc_encap_actions.h @@ -12,6 +12,7 @@ #define EFX_TC_ENCAP_ACTIONS_H #include "net_driver.h" +#if IS_ENABLED(CONFIG_SFC_SRIOV) #include #include @@ -100,4 +101,14 @@ void efx_tc_unregister_egdev(struct efx_nic *efx, struct net_device *net_dev); int efx_tc_netevent_event(struct efx_nic *efx, unsigned long event, void *ptr); +#else /* CONFIG_SFC_SRIOV */ + +static inline int efx_tc_netevent_event(struct efx_nic *efx, + unsigned long event, void *ptr) +{ + return NOTIFY_DONE; +} + +#endif /* CONFIG_SFC_SRIOV */ + #endif /* EFX_TC_ENCAP_ACTIONS_H */ -- cgit v1.2.3 From 92501fa6e4217aa0b85b092f91b2649b3c214a75 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Sat, 20 May 2023 11:33:14 +0300 Subject: net/mlx5: Ack on sync_reset_request only if PF can do reset_now Verify at reset_request stage that PF is capable to do reset_now. In case PF is not capable, notify the firmware that the sync reset can not happen and so firmware will abort the sync reset at early stage and will not send reset_now event to any PF. Signed-off-by: Moshe Shemesh Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 57 +++++++++++++++++----- 1 file changed, 44 insertions(+), 13 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 50022e7565f1..952cc340b510 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -276,6 +276,44 @@ static void mlx5_fw_live_patch_event(struct work_struct *work) mlx5_core_err(dev, "Failed to reload FW tracer\n"); } +static int mlx5_check_dev_ids(struct mlx5_core_dev *dev, u16 dev_id) +{ + struct pci_bus *bridge_bus = dev->pdev->bus; + struct pci_dev *sdev; + u16 sdev_id; + int err; + + /* Check that all functions under the pci bridge are PFs of + * this device otherwise fail this function. + */ + list_for_each_entry(sdev, &bridge_bus->devices, bus_list) { + err = pci_read_config_word(sdev, PCI_DEVICE_ID, &sdev_id); + if (err) + return err; + if (sdev_id != dev_id) { + mlx5_core_warn(dev, "unrecognized dev_id (0x%x)\n", sdev_id); + return -EPERM; + } + } + return 0; +} + +static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev) +{ + u16 dev_id; + int err; + + if (!MLX5_CAP_GEN(dev, fast_teardown)) { + mlx5_core_warn(dev, "fast teardown is not supported by firmware\n"); + return -EOPNOTSUPP; + } + + err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id); + if (err) + return false; + return (!mlx5_check_dev_ids(dev, dev_id)); +} + static void mlx5_sync_reset_request_event(struct work_struct *work) { struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, @@ -283,7 +321,8 @@ static void mlx5_sync_reset_request_event(struct work_struct *work) struct mlx5_core_dev *dev = fw_reset->dev; int err; - if (test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags)) { + if (test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) || + !mlx5_is_reset_now_capable(dev)) { err = mlx5_fw_reset_set_reset_sync_nack(dev); mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s", err ? "Failed" : "Sent"); @@ -303,26 +342,18 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev) { struct pci_bus *bridge_bus = dev->pdev->bus; struct pci_dev *bridge = bridge_bus->self; - u16 reg16, dev_id, sdev_id; unsigned long timeout; struct pci_dev *sdev; + u16 reg16, dev_id; int cap, err; u32 reg32; - /* Check that all functions under the pci bridge are PFs of - * this device otherwise fail this function. - */ err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id); if (err) return err; - list_for_each_entry(sdev, &bridge_bus->devices, bus_list) { - err = pci_read_config_word(sdev, PCI_DEVICE_ID, &sdev_id); - if (err) - return err; - if (sdev_id != dev_id) - return -EPERM; - } - + err = mlx5_check_dev_ids(dev, dev_id); + if (err) + return err; cap = pci_find_capability(bridge, PCI_CAP_ID_EXP); if (!cap) return -EOPNOTSUPP; -- cgit v1.2.3 From 8bb42ed4210e342631f63d32f7ed87b722968da6 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 17 May 2023 11:53:50 +0300 Subject: net/mlx5: Expose timeout for sync reset unload stage Expose new timoueout in Default Timeouts Register to be used on sync reset flow running on smart NIC. In this flow the driver should know how much time to wait from getting unload request till firmware will ask the PF to continue to next stage of the flow. Signed-off-by: Moshe Shemesh Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c | 4 +++- drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h | 1 + include/linux/mlx5/mlx5_ifc.h | 4 +++- 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c index 696e45e2bd06..a87d0178ebf3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c @@ -24,7 +24,8 @@ static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = { [MLX5_TO_TEARDOWN_MS] = 3000, [MLX5_TO_FSM_REACTIVATE_MS] = 5000, [MLX5_TO_RECLAIM_PAGES_MS] = 5000, - [MLX5_TO_RECLAIM_VFS_PAGES_MS] = 120000 + [MLX5_TO_RECLAIM_VFS_PAGES_MS] = 120000, + [MLX5_TO_RESET_UNLOAD_MS] = 300000 }; static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_types type) @@ -146,6 +147,7 @@ static int tout_query_dtor(struct mlx5_core_dev *dev) MLX5_TIMEOUT_FILL(fsm_reactivate_to, out, dev, MLX5_TO_FSM_REACTIVATE_MS, 0); MLX5_TIMEOUT_FILL(reclaim_pages_to, out, dev, MLX5_TO_RECLAIM_PAGES_MS, 0); MLX5_TIMEOUT_FILL(reclaim_vfs_pages_to, out, dev, MLX5_TO_RECLAIM_VFS_PAGES_MS, 0); + MLX5_TIMEOUT_FILL(reset_unload_to, out, dev, MLX5_TO_RESET_UNLOAD_MS, 0); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h index bc9e9aeda847..99e0a05526fe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h @@ -26,6 +26,7 @@ enum mlx5_timeouts_types { MLX5_TO_FSM_REACTIVATE_MS, MLX5_TO_RECLAIM_PAGES_MS, MLX5_TO_RECLAIM_VFS_PAGES_MS, + MLX5_TO_RESET_UNLOAD_MS, MAX_TIMEOUT_TYPES }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1f4f62cb9f34..14892e795808 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -3117,7 +3117,9 @@ struct mlx5_ifc_dtor_reg_bits { struct mlx5_ifc_default_timeout_bits reclaim_vfs_pages_to; - u8 reserved_at_1c0[0x40]; + struct mlx5_ifc_default_timeout_bits reset_unload_to; + + u8 reserved_at_1c0[0x20]; }; enum { -- cgit v1.2.3 From 6f8551f8d9e44894ea9ca0748b5523767d7aeacb Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 14 Jun 2023 06:47:58 +0300 Subject: net/mlx5: Check DTOR entry value is not zero The Default Timeout Register (DTOR) provides timeout values to driver for flows that are device dependent. Zero value for DTOR entry is not valid and should not be used. In case of reading zero value from DTOR, the driver should use the hard coded SW default value instead. Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c index a87d0178ebf3..e223e0e46433 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c @@ -119,7 +119,8 @@ u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type) #define MLX5_TIMEOUT_FILL(fld, reg_out, dev, to_type, to_extra) \ ({ \ u64 fw_to = MLX5_TIMEOUT_QUERY(fld, reg_out); \ - tout_set(dev, fw_to + (to_extra), to_type); \ + if (fw_to) \ + tout_set(dev, fw_to + (to_extra), to_type); \ fw_to; \ }) -- cgit v1.2.3 From 7a9770f1bfeaeddf5afabd3244e2c4c4966be37d Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 17 May 2023 16:07:40 +0300 Subject: net/mlx5: Handle sync reset unload event Added a new event handler to firmware sync reset, which is used to support firmware sync reset flow on smart NIC. Adding this new stage to the flow enables the firmware to ensure host PFs unload before ECPFs unload, to avoid race of PFs recovery. If firmware sends sync_reset_unload event to driver the driver should unload and close all HW resources of the function. Once the driver finishes unloading part, it can't get any more events from firmware as event queues are closed, so it polls the reset state field to know when to continue to next stage of the sync reset flow. Added capability bit for supporting sync_reset_unload event. Signed-off-by: Moshe Shemesh Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 103 ++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 + include/linux/mlx5/device.h | 1 + include/linux/mlx5/mlx5_ifc.h | 3 +- 4 files changed, 104 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 952cc340b510..7af2b14ab5d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -21,6 +21,7 @@ struct mlx5_fw_reset { struct workqueue_struct *wq; struct work_struct fw_live_patch_work; struct work_struct reset_request_work; + struct work_struct reset_unload_work; struct work_struct reset_reload_work; struct work_struct reset_now_work; struct work_struct reset_abort_work; @@ -30,6 +31,26 @@ struct mlx5_fw_reset { int ret; }; +enum { + MLX5_FW_RST_STATE_IDLE = 0, + MLX5_FW_RST_STATE_TOGGLE_REQ = 4, +}; + +enum { + MLX5_RST_STATE_BIT_NUM = 12, + MLX5_RST_ACK_BIT_NUM = 22, +}; + +static u8 mlx5_get_fw_rst_state(struct mlx5_core_dev *dev) +{ + return (ioread32be(&dev->iseg->initializing) >> MLX5_RST_STATE_BIT_NUM) & 0xF; +} + +static void mlx5_set_fw_rst_ack(struct mlx5_core_dev *dev) +{ + iowrite32be(BIT(MLX5_RST_ACK_BIT_NUM), &dev->iseg->initializing); +} + static int mlx5_fw_reset_enable_remote_dev_reset_set(struct devlink *devlink, u32 id, struct devlink_param_gset_ctx *ctx) { @@ -155,7 +176,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false); } -static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) +static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unloaded) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; @@ -163,7 +184,8 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { complete(&fw_reset->done); } else { - mlx5_unload_one(dev, false); + if (!unloaded) + mlx5_unload_one(dev, false); if (mlx5_health_wait_pci_up(dev)) mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); else @@ -204,7 +226,7 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work) mlx5_sync_reset_clear_reset_requested(dev, false); mlx5_enter_error_state(dev, true); - mlx5_fw_reset_complete_reload(dev); + mlx5_fw_reset_complete_reload(dev, false); } #define MLX5_RESET_POLL_INTERVAL (HZ / 10) @@ -458,7 +480,70 @@ static void mlx5_sync_reset_now_event(struct work_struct *work) mlx5_enter_error_state(dev, true); done: fw_reset->ret = err; - mlx5_fw_reset_complete_reload(dev); + mlx5_fw_reset_complete_reload(dev, false); +} + +static void mlx5_sync_reset_unload_event(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset; + struct mlx5_core_dev *dev; + unsigned long timeout; + bool reset_action; + u8 rst_state; + int err; + + fw_reset = container_of(work, struct mlx5_fw_reset, reset_unload_work); + dev = fw_reset->dev; + + if (mlx5_sync_reset_clear_reset_requested(dev, false)) + return; + + mlx5_core_warn(dev, "Sync Reset Unload. Function is forced down.\n"); + + err = mlx5_cmd_fast_teardown_hca(dev); + if (err) + mlx5_core_warn(dev, "Fast teardown failed, unloading, err %d\n", err); + else + mlx5_enter_error_state(dev, true); + + if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) + mlx5_unload_one_devl_locked(dev, false); + else + mlx5_unload_one(dev, false); + + mlx5_set_fw_rst_ack(dev); + mlx5_core_warn(dev, "Sync Reset Unload done, device reset expected\n"); + + reset_action = false; + timeout = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, RESET_UNLOAD)); + do { + rst_state = mlx5_get_fw_rst_state(dev); + if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ || + rst_state == MLX5_FW_RST_STATE_IDLE) { + reset_action = true; + break; + } + msleep(20); + } while (!time_after(jiffies, timeout)); + + if (!reset_action) { + mlx5_core_err(dev, "Got timeout waiting for sync reset action, state = %u\n", + rst_state); + fw_reset->ret = -ETIMEDOUT; + goto done; + } + + mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", rst_state); + if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ) { + err = mlx5_pci_link_toggle(dev); + if (err) { + mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, err %d\n", err); + fw_reset->ret = err; + } + } + +done: + mlx5_fw_reset_complete_reload(dev, true); } static void mlx5_sync_reset_abort_event(struct work_struct *work) @@ -483,6 +568,9 @@ static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct case MLX5_SYNC_RST_STATE_RESET_REQUEST: queue_work(fw_reset->wq, &fw_reset->reset_request_work); break; + case MLX5_SYNC_RST_STATE_RESET_UNLOAD: + queue_work(fw_reset->wq, &fw_reset->reset_unload_work); + break; case MLX5_SYNC_RST_STATE_RESET_NOW: queue_work(fw_reset->wq, &fw_reset->reset_now_work); break; @@ -517,10 +605,13 @@ static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long acti int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev) { unsigned long pci_sync_update_timeout = mlx5_tout_ms(dev, PCI_SYNC_UPDATE); - unsigned long timeout = msecs_to_jiffies(pci_sync_update_timeout); struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + unsigned long timeout; int err; + if (MLX5_CAP_GEN(dev, pci_sync_for_fw_update_with_driver_unload)) + pci_sync_update_timeout += mlx5_tout_ms(dev, RESET_UNLOAD); + timeout = msecs_to_jiffies(pci_sync_update_timeout); if (!wait_for_completion_timeout(&fw_reset->done, timeout)) { mlx5_core_warn(dev, "FW sync reset timeout after %lu seconds\n", pci_sync_update_timeout / 1000); @@ -557,6 +648,7 @@ void mlx5_drain_fw_reset(struct mlx5_core_dev *dev) set_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, &fw_reset->reset_flags); cancel_work_sync(&fw_reset->fw_live_patch_work); cancel_work_sync(&fw_reset->reset_request_work); + cancel_work_sync(&fw_reset->reset_unload_work); cancel_work_sync(&fw_reset->reset_reload_work); cancel_work_sync(&fw_reset->reset_now_work); cancel_work_sync(&fw_reset->reset_abort_work); @@ -595,6 +687,7 @@ int mlx5_fw_reset_init(struct mlx5_core_dev *dev) INIT_WORK(&fw_reset->fw_live_patch_work, mlx5_fw_live_patch_event); INIT_WORK(&fw_reset->reset_request_work, mlx5_sync_reset_request_event); + INIT_WORK(&fw_reset->reset_unload_work, mlx5_sync_reset_unload_event); INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work); INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event); INIT_WORK(&fw_reset->reset_abort_work, mlx5_sync_reset_abort_event); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 6fa314f8e5ee..88dbea6631d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -619,6 +619,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_event)) MLX5_SET(cmd_hca_cap, set_hca_cap, pci_sync_for_fw_update_event, 1); + if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_with_driver_unload)) + MLX5_SET(cmd_hca_cap, set_hca_cap, + pci_sync_for_fw_update_with_driver_unload, 1); if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports)) MLX5_SET(cmd_hca_cap, diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index c0af74efd3cb..80cc12a9a531 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -716,6 +716,7 @@ enum sync_rst_state_type { MLX5_SYNC_RST_STATE_RESET_REQUEST = 0x0, MLX5_SYNC_RST_STATE_RESET_NOW = 0x1, MLX5_SYNC_RST_STATE_RESET_ABORT = 0x2, + MLX5_SYNC_RST_STATE_RESET_UNLOAD = 0x3, }; struct mlx5_eqe_sync_fw_update { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 14892e795808..d61dcb5d7cd5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1755,7 +1755,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_328[0x2]; u8 relaxed_ordering_read[0x1]; u8 log_max_pd[0x5]; - u8 reserved_at_330[0x7]; + u8 reserved_at_330[0x6]; + u8 pci_sync_for_fw_update_with_driver_unload[0x1]; u8 vnic_env_cnt_steering_fail[0x1]; u8 reserved_at_338[0x1]; u8 q_counter_aggregation[0x1]; -- cgit v1.2.3 From f405787a0abaf14e332aa6d1d924e75970332e68 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 1 Jun 2023 09:34:35 +0200 Subject: net/mlx5: Create eswitch debugfs root directory Following patch in series uses the new directory for bridge FDB debugfs. The new directory is intended for all future eswitch-specific debugfs files. Signed-off-by: Vlad Buslov Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++++ drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 3 +++ 2 files changed, 7 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 2af9c4646bc7..5aaedbf71783 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -31,6 +31,7 @@ */ #include +#include #include #include #include @@ -1765,6 +1766,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->manager_vport = mlx5_eswitch_manager_vport(dev); esw->first_host_vport = mlx5_eswitch_first_host_vport_num(dev); + esw->debugfs_root = debugfs_create_dir("esw", mlx5_debugfs_get_dev_root(dev)); esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { err = -ENOMEM; @@ -1818,6 +1820,7 @@ reps_err: abort: if (esw->work_queue) destroy_workqueue(esw->work_queue); + debugfs_remove_recursive(esw->debugfs_root); kfree(esw); unregister_param: devl_params_unregister(priv_to_devlink(dev), mlx5_eswitch_params, @@ -1844,6 +1847,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) mutex_destroy(&esw->offloads.decap_tbl_lock); esw_offloads_cleanup(esw); mlx5_esw_vports_cleanup(esw); + debugfs_remove_recursive(esw->debugfs_root); kfree(esw); devl_params_unregister(priv_to_devlink(esw->dev), mlx5_eswitch_params, ARRAY_SIZE(mlx5_eswitch_params)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 266b60fefe25..bcbab06759c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -304,6 +304,8 @@ enum { MLX5_ESW_FDB_CREATED = BIT(0), }; +struct dentry; + struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_nb nb; @@ -312,6 +314,7 @@ struct mlx5_eswitch { struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct esw_mc_addr mc_promisc; /* end of legacy */ + struct dentry *debugfs_root; struct workqueue_struct *work_queue; struct xarray vports; u32 flags; -- cgit v1.2.3 From ade19f0d6a3a395e7936227811acbf897ee186fc Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 26 May 2023 08:55:15 +0200 Subject: net/mlx5: Bridge, pass net device when linking vport to bridge Following patch requires access to additional data in bridge net_device. Pass the whole structure down the stack instead of adding necessary fields as function arguments one-by-one. Signed-off-by: Vlad Buslov Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/rep/bridge.c | 9 +++--- .../net/ethernet/mellanox/mlx5/core/esw/bridge.c | 35 ++++++++++++---------- .../net/ethernet/mellanox/mlx5/core/esw/bridge.h | 10 ++++--- 3 files changed, 29 insertions(+), 25 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index fd191925ab4b..560800246573 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -136,7 +136,6 @@ static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr struct mlx5_eswitch *esw = br_offloads->esw; u16 vport_num, esw_owner_vhca_id; struct netlink_ext_ack *extack; - int ifindex = upper->ifindex; int err = 0; if (!netif_is_bridge_master(upper)) @@ -150,15 +149,15 @@ static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr if (mlx5_esw_bridge_is_local(dev, rep, esw)) err = info->linking ? - mlx5_esw_bridge_vport_link(ifindex, vport_num, esw_owner_vhca_id, + mlx5_esw_bridge_vport_link(upper, vport_num, esw_owner_vhca_id, br_offloads, extack) : - mlx5_esw_bridge_vport_unlink(ifindex, vport_num, esw_owner_vhca_id, + mlx5_esw_bridge_vport_unlink(upper, vport_num, esw_owner_vhca_id, br_offloads, extack); else if (mlx5_esw_bridge_dev_same_hw(rep, esw)) err = info->linking ? - mlx5_esw_bridge_vport_peer_link(ifindex, vport_num, esw_owner_vhca_id, + mlx5_esw_bridge_vport_peer_link(upper, vport_num, esw_owner_vhca_id, br_offloads, extack) : - mlx5_esw_bridge_vport_peer_unlink(ifindex, vport_num, esw_owner_vhca_id, + mlx5_esw_bridge_vport_peer_unlink(upper, vport_num, esw_owner_vhca_id, br_offloads, extack); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index bea7cc645461..eaa9b328abd5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -834,7 +834,7 @@ mlx5_esw_bridge_egress_miss_flow_create(struct mlx5_flow_table *egress_ft, return handle; } -static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex, +static struct mlx5_esw_bridge *mlx5_esw_bridge_create(struct net_device *br_netdev, struct mlx5_esw_bridge_offloads *br_offloads) { struct mlx5_esw_bridge *bridge; @@ -858,7 +858,7 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex, goto err_mdb_ht; INIT_LIST_HEAD(&bridge->fdb_list); - bridge->ifindex = ifindex; + bridge->ifindex = br_netdev->ifindex; bridge->refcnt = 1; bridge->ageing_time = clock_t_to_jiffies(BR_DEFAULT_AGEING_TIME); bridge->vlan_proto = ETH_P_8021Q; @@ -898,14 +898,14 @@ static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads, } static struct mlx5_esw_bridge * -mlx5_esw_bridge_lookup(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads) +mlx5_esw_bridge_lookup(struct net_device *br_netdev, struct mlx5_esw_bridge_offloads *br_offloads) { struct mlx5_esw_bridge *bridge; ASSERT_RTNL(); list_for_each_entry(bridge, &br_offloads->bridges, list) { - if (bridge->ifindex == ifindex) { + if (bridge->ifindex == br_netdev->ifindex) { mlx5_esw_bridge_get(bridge); return bridge; } @@ -918,7 +918,7 @@ mlx5_esw_bridge_lookup(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads return ERR_PTR(err); } - bridge = mlx5_esw_bridge_create(ifindex, br_offloads); + bridge = mlx5_esw_bridge_create(br_netdev, br_offloads); if (IS_ERR(bridge) && list_empty(&br_offloads->bridges)) mlx5_esw_bridge_ingress_table_cleanup(br_offloads); return bridge; @@ -1601,15 +1601,15 @@ static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_off return 0; } -static int mlx5_esw_bridge_vport_link_with_flags(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, - u16 flags, +static int mlx5_esw_bridge_vport_link_with_flags(struct net_device *br_netdev, u16 vport_num, + u16 esw_owner_vhca_id, u16 flags, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack) { struct mlx5_esw_bridge *bridge; int err; - bridge = mlx5_esw_bridge_lookup(ifindex, br_offloads); + bridge = mlx5_esw_bridge_lookup(br_netdev, br_offloads); if (IS_ERR(bridge)) { NL_SET_ERR_MSG_MOD(extack, "Error checking for existing bridge with same ifindex"); return PTR_ERR(bridge); @@ -1627,15 +1627,16 @@ err_vport: return err; } -int mlx5_esw_bridge_vport_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, +int mlx5_esw_bridge_vport_link(struct net_device *br_netdev, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack) { - return mlx5_esw_bridge_vport_link_with_flags(ifindex, vport_num, esw_owner_vhca_id, 0, + return mlx5_esw_bridge_vport_link_with_flags(br_netdev, vport_num, esw_owner_vhca_id, 0, br_offloads, extack); } -int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, +int mlx5_esw_bridge_vport_unlink(struct net_device *br_netdev, u16 vport_num, + u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack) { @@ -1647,7 +1648,7 @@ int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_ NL_SET_ERR_MSG_MOD(extack, "Port is not attached to any bridge"); return -EINVAL; } - if (port->bridge->ifindex != ifindex) { + if (port->bridge->ifindex != br_netdev->ifindex) { NL_SET_ERR_MSG_MOD(extack, "Port is attached to another bridge"); return -EINVAL; } @@ -1658,23 +1659,25 @@ int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_ return err; } -int mlx5_esw_bridge_vport_peer_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, +int mlx5_esw_bridge_vport_peer_link(struct net_device *br_netdev, u16 vport_num, + u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack) { if (!MLX5_CAP_ESW(br_offloads->esw->dev, merged_eswitch)) return 0; - return mlx5_esw_bridge_vport_link_with_flags(ifindex, vport_num, esw_owner_vhca_id, + return mlx5_esw_bridge_vport_link_with_flags(br_netdev, vport_num, esw_owner_vhca_id, MLX5_ESW_BRIDGE_PORT_FLAG_PEER, br_offloads, extack); } -int mlx5_esw_bridge_vport_peer_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, +int mlx5_esw_bridge_vport_peer_unlink(struct net_device *br_netdev, u16 vport_num, + u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack) { - return mlx5_esw_bridge_vport_unlink(ifindex, vport_num, esw_owner_vhca_id, br_offloads, + return mlx5_esw_bridge_vport_unlink(br_netdev, vport_num, esw_owner_vhca_id, br_offloads, extack); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h index a9dd18c73d6a..2f7ad3bdba5e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h @@ -43,16 +43,18 @@ struct mlx5_esw_bridge_offloads { struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw); void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw); -int mlx5_esw_bridge_vport_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, +int mlx5_esw_bridge_vport_link(struct net_device *br_netdev, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack); -int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, +int mlx5_esw_bridge_vport_unlink(struct net_device *br_netdev, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack); -int mlx5_esw_bridge_vport_peer_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, +int mlx5_esw_bridge_vport_peer_link(struct net_device *br_netdev, u16 vport_num, + u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack); -int mlx5_esw_bridge_vport_peer_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, +int mlx5_esw_bridge_vport_peer_unlink(struct net_device *br_netdev, u16 vport_num, + u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct netlink_ext_ack *extack); void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, -- cgit v1.2.3 From 791eb78285e8b81bc09bfc6bd928b981eaefb082 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 25 May 2023 15:19:00 +0200 Subject: net/mlx5: Bridge, expose FDB state via debugfs For debugging purposes expose offloaded FDB state (flags, counters, etc.) via debugfs inside 'esw' root directory. Example debugfs file output: $ cat mlx5/0000\:08\:00.0/esw/bridge/bridge1/fdb DEV MAC VLAN PACKETS BYTES LASTUSE FLAGS enp8s0f0_1 e4:0a:05:08:00:06 2 2 204 4295567112 0x0 enp8s0f0_0 e4:0a:05:08:00:03 2 3 278 4295567112 0x0 Signed-off-by: Vlad Buslov Reviewed-by: Tariq Toukan Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 3 +- .../net/ethernet/mellanox/mlx5/core/esw/bridge.c | 4 + .../net/ethernet/mellanox/mlx5/core/esw/bridge.h | 2 + .../mellanox/mlx5/core/esw/bridge_debugfs.c | 89 ++++++++++++++++++++++ .../ethernet/mellanox/mlx5/core/esw/bridge_priv.h | 6 ++ 5 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index ddf1e352f51d..35f00700a4d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -75,7 +75,8 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \ esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \ esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o -mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o esw/bridge_mcast.o en/rep/bridge.o +mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o esw/bridge_mcast.o esw/bridge_debugfs.o \ + en/rep/bridge.o mlx5_core-$(CONFIG_THERMAL) += thermal.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index eaa9b328abd5..f4fe1daa4afd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -863,6 +863,7 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(struct net_device *br_netd bridge->ageing_time = clock_t_to_jiffies(BR_DEFAULT_AGEING_TIME); bridge->vlan_proto = ETH_P_8021Q; list_add(&bridge->list, &br_offloads->bridges); + mlx5_esw_bridge_debugfs_init(br_netdev, bridge); return bridge; @@ -886,6 +887,7 @@ static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads, if (--bridge->refcnt) return; + mlx5_esw_bridge_debugfs_cleanup(bridge); mlx5_esw_bridge_egress_table_cleanup(bridge); mlx5_esw_bridge_mcast_disable(bridge); list_del(&bridge->list); @@ -1904,6 +1906,7 @@ struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw) xa_init(&br_offloads->ports); br_offloads->esw = esw; esw->br_offloads = br_offloads; + mlx5_esw_bridge_debugfs_offloads_init(br_offloads); return br_offloads; } @@ -1919,6 +1922,7 @@ void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw) mlx5_esw_bridge_flush(br_offloads); WARN_ON(!xa_empty(&br_offloads->ports)); + mlx5_esw_bridge_debugfs_offloads_cleanup(br_offloads); esw->br_offloads = NULL; kvfree(br_offloads); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h index 2f7ad3bdba5e..c2c7c70d99eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h @@ -10,6 +10,7 @@ #include #include "eswitch.h" +struct dentry; struct mlx5_flow_table; struct mlx5_flow_group; @@ -17,6 +18,7 @@ struct mlx5_esw_bridge_offloads { struct mlx5_eswitch *esw; struct list_head bridges; struct xarray ports; + struct dentry *debugfs_root; struct notifier_block netdev_nb; struct notifier_block nb_blk; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c new file mode 100644 index 000000000000..b6a45eff28f5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include +#include "bridge.h" +#include "bridge_priv.h" + +static void *mlx5_esw_bridge_debugfs_start(struct seq_file *seq, loff_t *pos); +static void *mlx5_esw_bridge_debugfs_next(struct seq_file *seq, void *v, loff_t *pos); +static void mlx5_esw_bridge_debugfs_stop(struct seq_file *seq, void *v); +static int mlx5_esw_bridge_debugfs_show(struct seq_file *seq, void *v); + +static const struct seq_operations mlx5_esw_bridge_debugfs_sops = { + .start = mlx5_esw_bridge_debugfs_start, + .next = mlx5_esw_bridge_debugfs_next, + .stop = mlx5_esw_bridge_debugfs_stop, + .show = mlx5_esw_bridge_debugfs_show, +}; +DEFINE_SEQ_ATTRIBUTE(mlx5_esw_bridge_debugfs); + +static void *mlx5_esw_bridge_debugfs_start(struct seq_file *seq, loff_t *pos) +{ + struct mlx5_esw_bridge *bridge = seq->private; + + rtnl_lock(); + return *pos ? seq_list_start(&bridge->fdb_list, *pos - 1) : SEQ_START_TOKEN; +} + +static void *mlx5_esw_bridge_debugfs_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct mlx5_esw_bridge *bridge = seq->private; + + return seq_list_next(v == SEQ_START_TOKEN ? &bridge->fdb_list : v, &bridge->fdb_list, pos); +} + +static void mlx5_esw_bridge_debugfs_stop(struct seq_file *seq, void *v) +{ + rtnl_unlock(); +} + +static int mlx5_esw_bridge_debugfs_show(struct seq_file *seq, void *v) +{ + struct mlx5_esw_bridge_fdb_entry *entry; + u64 packets, bytes, lastuse; + + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "%-16s %-17s %4s %20s %20s %20s %5s\n", + "DEV", "MAC", "VLAN", "PACKETS", "BYTES", "LASTUSE", "FLAGS"); + return 0; + } + + entry = list_entry(v, struct mlx5_esw_bridge_fdb_entry, list); + mlx5_fc_query_cached_raw(entry->ingress_counter, &bytes, &packets, &lastuse); + seq_printf(seq, "%-16s %-17pM %4d %20llu %20llu %20llu %#5x\n", + entry->dev->name, entry->key.addr, entry->key.vid, packets, bytes, lastuse, + entry->flags); + return 0; +} + +void mlx5_esw_bridge_debugfs_init(struct net_device *br_netdev, struct mlx5_esw_bridge *bridge) +{ + if (!bridge->br_offloads->debugfs_root) + return; + + bridge->debugfs_dir = debugfs_create_dir(br_netdev->name, + bridge->br_offloads->debugfs_root); + debugfs_create_file("fdb", 0444, bridge->debugfs_dir, bridge, + &mlx5_esw_bridge_debugfs_fops); +} + +void mlx5_esw_bridge_debugfs_cleanup(struct mlx5_esw_bridge *bridge) +{ + debugfs_remove_recursive(bridge->debugfs_dir); + bridge->debugfs_dir = NULL; +} + +void mlx5_esw_bridge_debugfs_offloads_init(struct mlx5_esw_bridge_offloads *br_offloads) +{ + if (!br_offloads->esw->debugfs_root) + return; + + br_offloads->debugfs_root = debugfs_create_dir("bridge", br_offloads->esw->debugfs_root); +} + +void mlx5_esw_bridge_debugfs_offloads_cleanup(struct mlx5_esw_bridge_offloads *br_offloads) +{ + debugfs_remove_recursive(br_offloads->debugfs_root); + br_offloads->debugfs_root = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h index c9595801bdb4..4911cc32161b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h @@ -199,6 +199,7 @@ struct mlx5_esw_bridge { int refcnt; struct list_head list; struct mlx5_esw_bridge_offloads *br_offloads; + struct dentry *debugfs_dir; struct list_head fdb_list; struct rhashtable fdb_ht; @@ -241,4 +242,9 @@ void mlx5_esw_bridge_port_mdb_vlan_flush(struct mlx5_esw_bridge_port *port, struct mlx5_esw_bridge_vlan *vlan); void mlx5_esw_bridge_mdb_flush(struct mlx5_esw_bridge *bridge); +void mlx5_esw_bridge_debugfs_offloads_init(struct mlx5_esw_bridge_offloads *br_offloads); +void mlx5_esw_bridge_debugfs_offloads_cleanup(struct mlx5_esw_bridge_offloads *br_offloads); +void mlx5_esw_bridge_debugfs_init(struct net_device *br_netdev, struct mlx5_esw_bridge *bridge); +void mlx5_esw_bridge_debugfs_cleanup(struct mlx5_esw_bridge *bridge); + #endif /* _MLX5_ESW_BRIDGE_PRIVATE_ */ -- cgit v1.2.3 From 8a955da230d39932869e7a6835143be9889b0a45 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Wed, 7 Jun 2023 14:12:10 -0700 Subject: net/mlx5: E-Switch, remove redundant else statements These else statement blocks are redundant since the if block already jumps to the function abort label. Signed-off-by: Saeed Mahameed Reviewed-by: Rahul Rameshbabu --- drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index fabe49a35a5c..255bc8b749f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -285,9 +285,8 @@ static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); goto out; - } else { - esw->fdb_table.legacy.vepa_uplink_rule = flow_rule; } + esw->fdb_table.legacy.vepa_uplink_rule = flow_rule; /* Star rule to forward all traffic to uplink vport */ memset(&dest, 0, sizeof(dest)); @@ -299,9 +298,8 @@ static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); goto out; - } else { - esw->fdb_table.legacy.vepa_star_rule = flow_rule; } + esw->fdb_table.legacy.vepa_star_rule = flow_rule; out: kvfree(spec); -- cgit v1.2.3 From 559f4c32ebff40a25199b5178d58c9283ac5eb9c Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 23 Apr 2023 14:29:26 +0300 Subject: net/mlx5e: Remove mlx5e_dbg() and msglvl support The msglvl support was implemented using the mlx5e_dbg() macro which is rarely used in the driver, and is not very useful when you can just use dynamic debug instead. Remove mlx5e_dbg() and convert its usages to netdev_dbg(). Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 10 ----- .../ethernet/mellanox/mlx5/core/en/port_buffer.c | 44 +++++++++++----------- drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 8 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 26 ++++++------- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 18 ++------- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 5 +-- 6 files changed, 45 insertions(+), 66 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index ceabe57c511a..b1807bfb815f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -165,15 +165,6 @@ struct page_pool; #define MLX5E_MAX_KLM_PER_WQE(mdev) \ MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev)) -#define MLX5E_MSG_LEVEL NETIF_MSG_LINK - -#define mlx5e_dbg(mlevel, priv, format, ...) \ -do { \ - if (NETIF_MSG_##mlevel & (priv)->msglevel) \ - netdev_warn(priv->netdev, format, \ - ##__VA_ARGS__); \ -} while (0) - #define mlx5e_state_dereference(priv, p) \ rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock)) @@ -880,7 +871,6 @@ struct mlx5e_priv { #endif /* priv data path fields - end */ - u32 msglevel; unsigned long state; struct mutex state_lock; /* Protects Interface state */ struct mlx5e_rq drop_rq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 7e8e96cc5cd0..8e25f4ef5ccc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -65,12 +65,13 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv, MLX5_GET(bufferx_reg, buffer, xoff_threshold) * port_buff_cell_sz; total_used += port_buffer->buffer[i].size; - mlx5e_dbg(HW, priv, "buffer %d: size=%d, xon=%d, xoff=%d, epsb=%d, lossy=%d\n", i, - port_buffer->buffer[i].size, - port_buffer->buffer[i].xon, - port_buffer->buffer[i].xoff, - port_buffer->buffer[i].epsb, - port_buffer->buffer[i].lossy); + netdev_dbg(priv->netdev, "buffer %d: size=%d, xon=%d, xoff=%d, epsb=%d, lossy=%d\n", + i, + port_buffer->buffer[i].size, + port_buffer->buffer[i].xon, + port_buffer->buffer[i].xoff, + port_buffer->buffer[i].epsb, + port_buffer->buffer[i].lossy); } port_buffer->internal_buffers_size = 0; @@ -87,11 +88,11 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv, port_buffer->internal_buffers_size - port_buffer->headroom_size; - mlx5e_dbg(HW, priv, - "total buffer size=%u, headroom buffer size=%u, internal buffers size=%u, spare buffer size=%u\n", - port_buffer->port_buffer_size, port_buffer->headroom_size, - port_buffer->internal_buffers_size, - port_buffer->spare_buffer_size); + netdev_dbg(priv->netdev, + "total buffer size=%u, headroom buffer size=%u, internal buffers size=%u, spare buffer size=%u\n", + port_buffer->port_buffer_size, port_buffer->headroom_size, + port_buffer->internal_buffers_size, + port_buffer->spare_buffer_size); out: kfree(out); return err; @@ -352,7 +353,7 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu) xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100; - mlx5e_dbg(HW, priv, "%s: xoff=%d\n", __func__, xoff); + netdev_dbg(priv->netdev, "%s: xoff=%d\n", __func__, xoff); return xoff; } @@ -484,6 +485,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, u8 *prio2buffer) { u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; + struct net_device *netdev = priv->netdev; struct mlx5e_port_buffer port_buffer; u32 xoff = calculate_xoff(priv, mtu); bool update_prio2buffer = false; @@ -495,7 +497,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, int err; int i; - mlx5e_dbg(HW, priv, "%s: change=%x\n", __func__, change); + netdev_dbg(netdev, "%s: change=%x\n", __func__, change); max_mtu = max_t(unsigned int, priv->netdev->max_mtu, MINIMUM_MAX_MTU); err = mlx5e_port_query_buffer(priv, &port_buffer); @@ -510,8 +512,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, } if (change & MLX5E_PORT_BUFFER_PFC) { - mlx5e_dbg(HW, priv, "%s: requested PFC per priority bitmask: 0x%x\n", - __func__, pfc->pfc_en); + netdev_dbg(netdev, "%s: requested PFC per priority bitmask: 0x%x\n", + __func__, pfc->pfc_en); err = mlx5e_port_query_priority2buffer(priv->mdev, buffer); if (err) return err; @@ -526,8 +528,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) { update_prio2buffer = true; for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) - mlx5e_dbg(HW, priv, "%s: requested to map prio[%d] to buffer %d\n", - __func__, i, prio2buffer[i]); + netdev_dbg(priv->netdev, "%s: requested to map prio[%d] to buffer %d\n", + __func__, i, prio2buffer[i]); err = fill_pfc_en(priv->mdev, &curr_pfc_en); if (err) @@ -541,10 +543,10 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (change & MLX5E_PORT_BUFFER_SIZE) { for (i = 0; i < MLX5E_MAX_NETWORK_BUFFER; i++) { - mlx5e_dbg(HW, priv, "%s: buffer[%d]=%d\n", __func__, i, buffer_size[i]); + netdev_dbg(priv->netdev, "%s: buffer[%d]=%d\n", __func__, i, buffer_size[i]); if (!port_buffer.buffer[i].lossy && !buffer_size[i]) { - mlx5e_dbg(HW, priv, "%s: lossless buffer[%d] size cannot be zero\n", - __func__, i); + netdev_dbg(priv->netdev, "%s: lossless buffer[%d] size cannot be zero\n", + __func__, i); return -EINVAL; } @@ -552,7 +554,7 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, total_used += buffer_size[i]; } - mlx5e_dbg(HW, priv, "%s: total buffer requested=%d\n", __func__, total_used); + netdev_dbg(priv->netdev, "%s: total buffer requested=%d\n", __func__, total_used); if (total_used > port_buffer.headroom_size && (total_used - port_buffer.headroom_size) > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index bed0c2d043e7..933a7772a7a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -570,10 +570,10 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, if (IS_ERR(rule)) { err = PTR_ERR(rule); priv->channel_stats[arfs_rule->rxq]->rq.arfs_err++; - mlx5e_dbg(HW, priv, - "%s: add rule(filter id=%d, rq idx=%d, ip proto=0x%x) failed,err=%d\n", - __func__, arfs_rule->filter_id, arfs_rule->rxq, - tuple->ip_proto, err); + netdev_dbg(priv->netdev, + "%s: add rule(filter id=%d, rq idx=%d, ip proto=0x%x) failed,err=%d\n", + __func__, arfs_rule->filter_id, arfs_rule->rxq, + tuple->ip_proto, err); } out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index ebee52a8361a..8705cffc747f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -275,10 +275,10 @@ static int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets memcpy(priv->dcbx.tc_tsa, ets->tc_tsa, sizeof(ets->tc_tsa)); for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - mlx5e_dbg(HW, priv, "%s: prio_%d <=> tc_%d\n", - __func__, i, ets->prio_tc[i]); - mlx5e_dbg(HW, priv, "%s: tc_%d <=> tx_bw_%d%%, group_%d\n", - __func__, i, tc_tx_bw[i], tc_group[i]); + netdev_dbg(priv->netdev, "%s: prio_%d <=> tc_%d\n", + __func__, i, ets->prio_tc[i]); + netdev_dbg(priv->netdev, "%s: tc_%d <=> tx_bw_%d%%, group_%d\n", + __func__, i, tc_tx_bw[i], tc_group[i]); } return err; @@ -399,9 +399,9 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, } if (!ret) { - mlx5e_dbg(HW, priv, - "%s: PFC per priority bit mask: 0x%x\n", - __func__, pfc->pfc_en); + netdev_dbg(dev, + "%s: PFC per priority bit mask: 0x%x\n", + __func__, pfc->pfc_en); } return ret; } @@ -611,8 +611,8 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, } for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - mlx5e_dbg(HW, priv, "%s: tc_%d <=> max_bw %d Gbps\n", - __func__, i, max_bw_value[i]); + netdev_dbg(netdev, "%s: tc_%d <=> max_bw %d Gbps\n", + __func__, i, max_bw_value[i]); } return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit); @@ -640,10 +640,10 @@ static u8 mlx5e_dcbnl_setall(struct net_device *netdev) ets.tc_rx_bw[i] = cee_cfg->pg_bw_pct[i]; ets.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS; ets.prio_tc[i] = cee_cfg->prio_to_pg_map[i]; - mlx5e_dbg(HW, priv, - "%s: Priority group %d: tx_bw %d, rx_bw %d, prio_tc %d\n", - __func__, i, ets.tc_tx_bw[i], ets.tc_rx_bw[i], - ets.prio_tc[i]); + netdev_dbg(netdev, + "%s: Priority group %d: tx_bw %d, rx_bw %d, prio_tc %d\n", + __func__, i, ets.tc_tx_bw[i], ets.tc_rx_bw[i], + ets.prio_tc[i]); } err = mlx5e_dbcnl_validate_ets(netdev, &ets, true); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 1f5a2110d31f..27861b68ced5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1689,16 +1689,6 @@ static int mlx5e_set_fecparam(struct net_device *netdev, return 0; } -static u32 mlx5e_get_msglevel(struct net_device *dev) -{ - return ((struct mlx5e_priv *)netdev_priv(dev))->msglevel; -} - -static void mlx5e_set_msglevel(struct net_device *dev, u32 val) -{ - ((struct mlx5e_priv *)netdev_priv(dev))->msglevel = val; -} - static int mlx5e_set_phys_id(struct net_device *dev, enum ethtool_phys_id_state state) { @@ -1952,9 +1942,9 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val if (err) return err; - mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n", - MLX5E_GET_PFLAG(&priv->channels.params, - MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF"); + netdev_dbg(priv->netdev, "MLX5E: RxCqeCmprss was turned %s\n", + MLX5E_GET_PFLAG(&priv->channels.params, + MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF"); return 0; } @@ -2444,8 +2434,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_priv_flags = mlx5e_get_priv_flags, .set_priv_flags = mlx5e_set_priv_flags, .self_test = mlx5e_self_test, - .get_msglevel = mlx5e_get_msglevel, - .set_msglevel = mlx5e_set_msglevel, .get_fec_stats = mlx5e_get_fec_stats, .get_fecparam = mlx5e_get_fecparam, .set_fecparam = mlx5e_set_fecparam, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a7c526ee5024..c564ac86ff8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2401,7 +2401,7 @@ static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu) /* Asymmetric dynamic memory allocation. * Freed in mlx5e_priv_arrays_free, not on channel closure. */ - mlx5e_dbg(DRV, priv, "Creating channel stats %d\n", ix); + netdev_dbg(priv->netdev, "Creating channel stats %d\n", ix); priv->channel_stats[ix] = kvzalloc_node(sizeof(**priv->channel_stats), GFP_KERNEL, cpu_to_node(cpu)); if (!priv->channel_stats[ix]) @@ -2779,7 +2779,7 @@ int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS)) num_txqs += ntc; - mlx5e_dbg(DRV, priv, "Setting num_txqs %d\n", num_txqs); + netdev_dbg(priv->netdev, "Setting num_txqs %d\n", num_txqs); err = netif_set_real_num_tx_queues(priv->netdev, num_txqs); if (err) netdev_warn(priv->netdev, "netif_set_real_num_tx_queues failed, %d\n", err); @@ -5585,7 +5585,6 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, /* priv init */ priv->mdev = mdev; priv->netdev = netdev; - priv->msglevel = MLX5E_MSG_LEVEL; priv->max_nch = nch; priv->max_opened_tc = 1; -- cgit v1.2.3 From c8013a1f714f6d9f2d8d673177a824c6b9653218 Mon Sep 17 00:00:00 2001 From: Or Har-Toov Date: Thu, 23 Mar 2023 18:11:50 +0200 Subject: net/mlx5e: Add local loopback counter to vport stats Add counter for number of unicast, multicast and broadcast packets/ octets that were loopback. Signed-off-by: Or Har-Toov Reviewed-by: Avihai Horon Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/counters.rst | 10 ++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 23 +++++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst index 6b2d1fe74ecf..a395df9c2751 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst @@ -797,6 +797,16 @@ Counters on the NIC port that is connected to a eSwitch. RoCE/UD/RC traffic) [#accel]_. - Acceleration + * - `vport_loopback_packets` + - Unicast, multicast and broadcast packets that were loop-back (received + and transmitted), IB/Eth [#accel]_. + - Acceleration + + * - `vport_loopback_bytes` + - Unicast, multicast and broadcast bytes that were loop-back (received + and transmitted), IB/Eth [#accel]_. + - Acceleration + * - `rx_steer_missed_packets` - Number of packets that was received by the NIC, however was discarded because it did not match any flow in the NIC flow table. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index f1d9596905c6..25a6c596300d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -748,11 +748,22 @@ static const struct counter_desc vport_stats_desc[] = { VPORT_COUNTER_OFF(transmitted_ib_multicast.octets) }, }; +static const struct counter_desc vport_loopback_stats_desc[] = { + { "vport_loopback_packets", + VPORT_COUNTER_OFF(local_loopback.packets) }, + { "vport_loopback_bytes", + VPORT_COUNTER_OFF(local_loopback.octets) }, +}; + #define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc) +#define NUM_VPORT_LOOPBACK_COUNTERS(dev) \ + (MLX5_CAP_GEN(dev, vport_counter_local_loopback) ? \ + ARRAY_SIZE(vport_loopback_stats_desc) : 0) static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(vport) { - return NUM_VPORT_COUNTERS; + return NUM_VPORT_COUNTERS + + NUM_VPORT_LOOPBACK_COUNTERS(priv->mdev); } static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport) @@ -761,6 +772,11 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(vport) for (i = 0; i < NUM_VPORT_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_stats_desc[i].format); + + for (i = 0; i < NUM_VPORT_LOOPBACK_COUNTERS(priv->mdev); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vport_loopback_stats_desc[i].format); + return idx; } @@ -771,6 +787,11 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport) for (i = 0; i < NUM_VPORT_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, vport_stats_desc, i); + + for (i = 0; i < NUM_VPORT_LOOPBACK_COUNTERS(priv->mdev); i++) + data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, + vport_loopback_stats_desc, i); + return idx; } -- cgit v1.2.3 From b3bd68925ebb20942d448405351cf43cac9676a7 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 13 Jun 2023 22:26:43 +0300 Subject: net/mlx5: Fix the macro for accessing EC VF vports The last value is not set correctly. This results in representors not being created for all EC VFs when the base value is higher than 0. Fixes: a7719b29a821 ("net/mlx5: Add management of EC VF vports") Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index bcbab06759c4..7064609f4998 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -668,6 +668,7 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); index, \ vport, \ MLX5_CAP_GEN_2((esw->dev), ec_vf_vport_base), \ + MLX5_CAP_GEN_2((esw->dev), ec_vf_vport_base) +\ (last) - 1) struct mlx5_eswitch *mlx5_devlink_eswitch_get(struct devlink *devlink); -- cgit v1.2.3 From 8bbe544e03809514e441994b4b849fdbeadd0068 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 13 Jun 2023 22:30:49 +0300 Subject: net/mlx5: DR, update query of HCA caps for EC VFs This change is needed to use EC VFs with metadata based steering. There was an assumption that vport was equal to function ID. That's not the case for EC VF functions. Adjust to function ID and set the ec_vf_function bit accordingly. Fixes: 9ac0b128248e ("net/mlx5: Update vport caps query/set for EC VFs") Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 7 +++++++ drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c | 4 +++- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 6 ------ 3 files changed, 10 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 60673f98de2b..c4be257c043d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -358,4 +358,11 @@ static inline bool mlx5_core_is_ec_vf_vport(const struct mlx5_core_dev *dev, u16 return (vport_num >= base_vport && vport_num < max_vport); } + +static inline int mlx5_vport_to_func_id(const struct mlx5_core_dev *dev, u16 vport, bool ec_vf_func) +{ + return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev) + : vport; +} + #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index 1aa525e509f1..7491911ebcb5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -34,6 +34,7 @@ int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev, int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_vport, u16 vport_number, u16 *gvmi) { + bool ec_vf_func = other_vport ? mlx5_core_is_ec_vf_vport(mdev, vport_number) : false; u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; int out_size; void *out; @@ -46,7 +47,8 @@ int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_vport, MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); MLX5_SET(query_hca_cap_in, in, other_function, other_vport); - MLX5_SET(query_hca_cap_in, in, function_id, vport_number); + MLX5_SET(query_hca_cap_in, in, function_id, mlx5_vport_to_func_id(mdev, vport_number, ec_vf_func)); + MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func); MLX5_SET(query_hca_cap_in, in, op_mod, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | HCA_CAP_OPMOD_GET_CUR); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 6d3984dd5b21..5a31fb47ffa5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1161,12 +1161,6 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); -static int mlx5_vport_to_func_id(const struct mlx5_core_dev *dev, u16 vport, bool ec_vf_func) -{ - return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev) - : vport; -} - int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out, u16 opmod) { -- cgit v1.2.3 From 2bd3b292955fe0a7eb2f768f36a767eddaedd6da Mon Sep 17 00:00:00 2001 From: Juhee Kang Date: Mon, 5 Jun 2023 16:51:36 +0900 Subject: net/mlx5: Add header file for events Separate the event API defined in the generic mlx5.h header into a dedicated header. And remove the TODO comment in commit 69c1280b1f3b ("net/mlx5: Device events, Use async events chain"). Signed-off-by: Juhee Kang Reviewed-by: Larysa Zaremba Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/events.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/health.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c | 2 +- .../net/ethernet/mellanox/mlx5/core/lag/mpesw.c | 2 +- .../net/ethernet/mellanox/mlx5/core/lib/events.h | 40 ++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h | 34 ------------------ 7 files changed, 45 insertions(+), 38 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/events.h (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 25a6c596300d..4d77055abd4b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -30,7 +30,7 @@ * SOFTWARE. */ -#include "lib/mlx5.h" +#include "lib/events.h" #include "en.h" #include "en_accel/ktls.h" #include "en_accel/en_accel.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 718cf09c28ce..3ec892d51f57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -5,7 +5,7 @@ #include "mlx5_core.h" #include "lib/eq.h" -#include "lib/mlx5.h" +#include "lib/events.h" struct mlx5_event_nb { struct mlx5_nb nb; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 210100a4064a..187cb2c464f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -39,6 +39,7 @@ #include "mlx5_core.h" #include "lib/eq.h" #include "lib/mlx5.h" +#include "lib/events.h" #include "lib/pci_vsc.h" #include "lib/tout.h" #include "diag/fw_tracer.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index 976caa8e6922..b1aa494c76ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -7,7 +7,7 @@ #include "lag/mp.h" #include "mlx5_core.h" #include "eswitch.h" -#include "lib/mlx5.h" +#include "lib/events.h" static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index 0e869a76dfe4..4bf15391525c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -6,7 +6,7 @@ #include "lag/lag.h" #include "eswitch.h" #include "esw/acl/ofld.h" -#include "lib/mlx5.h" +#include "lib/events.h" static void mlx5_mpesw_metadata_cleanup(struct mlx5_lag *ldev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/events.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/events.h new file mode 100644 index 000000000000..a0f7faea317b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/events.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __LIB_EVENTS_H__ +#define __LIB_EVENTS_H__ + +#include "mlx5_core.h" + +#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF +#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF + +enum port_module_event_status_type { + MLX5_MODULE_STATUS_PLUGGED = 0x1, + MLX5_MODULE_STATUS_UNPLUGGED = 0x2, + MLX5_MODULE_STATUS_ERROR = 0x3, + MLX5_MODULE_STATUS_DISABLED = 0x4, + MLX5_MODULE_STATUS_NUM, +}; + +enum port_module_event_error_type { + MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED = 0x0, + MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX = 0x1, + MLX5_MODULE_EVENT_ERROR_BUS_STUCK = 0x2, + MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT = 0x3, + MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4, + MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER = 0x5, + MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6, + MLX5_MODULE_EVENT_ERROR_BAD_CABLE = 0x7, + MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED = 0xc, + MLX5_MODULE_EVENT_ERROR_NUM, +}; + +struct mlx5_pme_stats { + u64 status_counters[MLX5_MODULE_STATUS_NUM]; + u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; +}; + +void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats); +int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data); +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index ccf12f7db6f0..2b5826a785c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -45,40 +45,6 @@ int mlx5_crdump_enable(struct mlx5_core_dev *dev); void mlx5_crdump_disable(struct mlx5_core_dev *dev); int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data); -/* TODO move to lib/events.h */ - -#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF -#define PORT_MODULE_EVENT_ERROR_TYPE_MASK 0xF - -enum port_module_event_status_type { - MLX5_MODULE_STATUS_PLUGGED = 0x1, - MLX5_MODULE_STATUS_UNPLUGGED = 0x2, - MLX5_MODULE_STATUS_ERROR = 0x3, - MLX5_MODULE_STATUS_DISABLED = 0x4, - MLX5_MODULE_STATUS_NUM, -}; - -enum port_module_event_error_type { - MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED = 0x0, - MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX = 0x1, - MLX5_MODULE_EVENT_ERROR_BUS_STUCK = 0x2, - MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT = 0x3, - MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST = 0x4, - MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER = 0x5, - MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6, - MLX5_MODULE_EVENT_ERROR_BAD_CABLE = 0x7, - MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED = 0xc, - MLX5_MODULE_EVENT_ERROR_NUM, -}; - -struct mlx5_pme_stats { - u64 status_counters[MLX5_MODULE_STATUS_NUM]; - u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; -}; - -void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats); -int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data); - static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) { return devlink_net(priv_to_devlink(dev)); -- cgit v1.2.3 From 5f2cf757f9c56255470c23a2a4a5574a34edad4b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 2 Jun 2023 15:34:00 +0200 Subject: net/mlx5: Remove unused ecpu field from struct mlx5_sf_table "ecpu" field in struct mlx5_sf_table is not used anywhere. Remove it. Signed-off-by: Jiri Pirko Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index 9c02e5ea797c..6a3fa30b2bf2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -28,7 +28,6 @@ struct mlx5_sf_table { struct mutex sf_state_lock; /* Serializes sf state among user cmds & vhca event handler. */ struct notifier_block esw_nb; struct notifier_block vhca_nb; - u8 ecpu: 1; }; static struct mlx5_sf * -- cgit v1.2.3 From 4e7401fc8c8d048a813e0221a706be84182a76c1 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 14 Jun 2023 12:00:05 +0300 Subject: net/mlx5e: XDP, Allow growing tail for XDP multi buffer The cited commits missed passing frag_size to __xdp_rxq_info_reg, which is required by bpf_xdp_adjust_tail to support growing the tail pointer in fragmented packets. Pass the missing parameter when the current RQ mode allows XDP multi buffer. Fixes: ea5d49bdae8b ("net/mlx5e: Add XDP multi buffer support to the non-linear legacy RQ") Fixes: 9cb9482ef10e ("net/mlx5e: Use fragments of the same size in non-linear legacy RQ with XDP") Signed-off-by: Maxim Mikityanskiy Cc: Tariq Toukan Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 8 ++++++-- drivers/net/ethernet/mellanox/mlx5/core/en/params.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 ++++--- 3 files changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 9c94807097cb..5ce28ff7685f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -732,7 +732,8 @@ static void mlx5e_rx_compute_wqe_bulk_params(struct mlx5e_params *params, static int mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, - struct mlx5e_rq_frags_info *info) + struct mlx5e_rq_frags_info *info, + u32 *xdp_frag_size) { u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); int frag_size_max = DEFAULT_FRAG_SIZE; @@ -845,6 +846,8 @@ out: info->log_num_frags = order_base_2(info->num_frags); + *xdp_frag_size = info->num_frags > 1 && params->xdp_prog ? PAGE_SIZE : 0; + return 0; } @@ -989,7 +992,8 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, } default: /* MLX5_WQ_TYPE_CYCLIC */ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); - err = mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); + err = mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info, + ¶m->xdp_frag_size); if (err) return err; ndsegs = param->frags_info.num_frags; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index a5d20f6d6d9c..6800949dafbc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -24,6 +24,7 @@ struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; struct mlx5_wq_param wq; struct mlx5e_rq_frags_info frags_info; + u32 xdp_frag_size; }; struct mlx5e_sq_param { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a7c526ee5024..a5bdf78955d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -641,7 +641,7 @@ static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq) } static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_rq *rq) + u32 xdp_frag_size, struct mlx5e_rq *rq) { struct mlx5_core_dev *mdev = c->mdev; int err; @@ -665,7 +665,8 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param if (err) return err; - return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id); + return __xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id, + xdp_frag_size); } static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, @@ -2240,7 +2241,7 @@ static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param { int err; - err = mlx5e_init_rxq_rq(c, params, &c->rq); + err = mlx5e_init_rxq_rq(c, params, rq_params->xdp_frag_size, &c->rq); if (err) return err; -- cgit v1.2.3 From 62a522d3354d81a86dd56feeb40e5ced36d72737 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 14 Jun 2023 12:00:06 +0300 Subject: net/mlx5e: xsk: Set napi_id to support busy polling on XSK RQ The cited commit missed setting napi_id on XSK RQs, it only affected regular RQs. Add the missing part to support socket busy polling on XSK RQs. Fixes: a2740f529da2 ("net/mlx5e: xsk: Set napi_id to support busy polling") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index ed279f450976..36826b582484 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -86,7 +86,7 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, if (err) return err; - return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, 0); + return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, c->napi.napi_id); } static int mlx5e_open_xsk_rq(struct mlx5e_channel *c, struct mlx5e_params *params, -- cgit v1.2.3 From 0ab999d4a1bfe8251538be1e7e495c50433475a8 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 3 May 2023 15:10:05 +0300 Subject: net/mlx5: Fix driver load with single msix vector When a PCI device has just one msix vector available, we want to share this vector between async and completion events. Current code fails to do that assuming it will always have at least one dedicated vector for completion events. Fix this by detecting when the pool contains just a single vector. Fixes: 3354822cde5a ("net/mlx5: Use dynamic msix vectors allocation") Signed-off-by: Eli Cohen Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 843da89a9035..33b9359de53d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -565,15 +565,21 @@ void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs) int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, struct mlx5_irq **irqs, struct cpu_rmap **rmap) { + struct mlx5_irq_table *table = mlx5_irq_table_get(dev); + struct mlx5_irq_pool *pool = table->pcif_pool; struct irq_affinity_desc af_desc; struct mlx5_irq *irq; + int offset = 1; int i; + if (!pool->xa_num_irqs.max) + offset = 0; + af_desc.is_managed = false; for (i = 0; i < nirqs; i++) { cpumask_clear(&af_desc.mask); cpumask_set_cpu(cpus[i], &af_desc.mask); - irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap); + irq = mlx5_irq_request(dev, i + offset, &af_desc, rmap); if (IS_ERR(irq)) break; irqs[i] = irq; -- cgit v1.2.3 From b100573ab76ee5b0cb8f9129b568d9e7da795f76 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Tue, 11 Apr 2023 11:17:35 +0300 Subject: net/mlx5e: TC, Add null pointer check for hardware miss support The cited commits add hardware miss support to tc action. But if the rules can't be offloaded, the pointers are null and system will panic when accessing them. Fix it by checking null pointer. Fixes: 08fe94ec5f77 ("net/mlx5e: TC, Remove special handling of CT action") Fixes: 6702782845a5 ("net/mlx5e: TC, Set CT miss to the specific ct action instance") Signed-off-by: Chris Mi Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index ead38ef69483..a254e728ac95 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -2021,6 +2021,8 @@ void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr) { + if (!attr->ct_attr.ft) /* no ct action, return */ + return; if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */ return; -- cgit v1.2.3 From fb7be476ab7e797b18c6f0047041635c41b3b5a4 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 6 Apr 2023 09:38:09 +0300 Subject: net/mlx5e: TC, Cleanup ct resources for nic flow The cited commit removes special handling of CT action. But it removes too much. Pre ct/ct_nat tables and some other resources are not destroyed due to the cited commit. Fix it by adding it back. Fixes: 08fe94ec5f77 ("net/mlx5e: TC, Remove special handling of CT action") Signed-off-by: Chris Mi Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 8a5a8703f0a3..b9b1da751a3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1439,6 +1439,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, mlx5e_hairpin_flow_del(priv, flow); free_flow_post_acts(flow); + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr); kvfree(attr->parse_attr); kfree(flow->attr); -- cgit v1.2.3 From 87cd0649176c0588daf2cad53058143f808b0905 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Mon, 12 Jun 2023 01:05:48 +0300 Subject: net/mlx5: DR, Support SW created encap actions for FW table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some cases, steering might need to use SW-created action in FW table, which results in wrong packet reformat being used: mlx5_core 0000:81:00.1: mlx5_cmd_check:756:(pid 1154): SET_FLOW_TABLE_ENTRY(0×936) op_mod(0×0) failed, status bad resource(0×5), syndrome (0xf2ff71) This patch adds support for usage of SW-created packet reformat (encap) actions in FW tables, and adds clear error flow for attempt to use SW-created modify header on FW tables. Fixes: 6a48faeeca10 ("net/mlx5: Add direct rule fs_cmd implementation") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Erez Shitrit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 50 ++++++++++++++++------ drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 7 +++ .../mellanox/mlx5/core/steering/dr_action.c | 5 +++ .../ethernet/mellanox/mlx5/core/steering/fs_dr.c | 27 +++++++++++- .../ethernet/mellanox/mlx5/core/steering/fs_dr.h | 7 +++ .../ethernet/mellanox/mlx5/core/steering/mlx5dr.h | 2 + 6 files changed, 83 insertions(+), 15 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 144e59480686..ec83e6483d1a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -511,10 +511,11 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, struct mlx5_flow_rule *dst; void *in_flow_context, *vlan; void *in_match_value; + int reformat_id = 0; unsigned int inlen; int dst_cnt_size; + u32 *in, action; void *in_dests; - u32 *in; int err; if (mlx5_set_extended_dest(dev, fte, &extended_dest)) @@ -553,22 +554,42 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, extended_destination, extended_dest); - if (extended_dest) { - u32 action; - action = fte->action.action & - ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - MLX5_SET(flow_context, in_flow_context, action, action); - } else { - MLX5_SET(flow_context, in_flow_context, action, - fte->action.action); - if (fte->action.pkt_reformat) - MLX5_SET(flow_context, in_flow_context, packet_reformat_id, - fte->action.pkt_reformat->id); + action = fte->action.action; + if (extended_dest) + action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + + MLX5_SET(flow_context, in_flow_context, action, action); + + if (!extended_dest && fte->action.pkt_reformat) { + struct mlx5_pkt_reformat *pkt_reformat = fte->action.pkt_reformat; + + if (pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_SW) { + reformat_id = mlx5_fs_dr_action_get_pkt_reformat_id(pkt_reformat); + if (reformat_id < 0) { + mlx5_core_err(dev, + "Unsupported SW-owned pkt_reformat type (%d) in FW-owned table\n", + pkt_reformat->reformat_type); + err = reformat_id; + goto err_out; + } + } else { + reformat_id = fte->action.pkt_reformat->id; + } } - if (fte->action.modify_hdr) + + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, (u32)reformat_id); + + if (fte->action.modify_hdr) { + if (fte->action.modify_hdr->owner == MLX5_FLOW_RESOURCE_OWNER_SW) { + mlx5_core_err(dev, "Can't use SW-owned modify_hdr in FW-owned table\n"); + err = -EOPNOTSUPP; + goto err_out; + } + MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->action.modify_hdr->id); + } MLX5_SET(flow_context, in_flow_context, encrypt_decrypt_type, fte->action.crypto.type); @@ -885,6 +906,8 @@ static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns, pkt_reformat->id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id); + pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_FW; + kfree(in); return err; } @@ -969,6 +992,7 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns, err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); modify_hdr->id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id); + modify_hdr->owner = MLX5_FLOW_RESOURCE_OWNER_FW; kfree(in); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index f137a0611b77..b043190e50a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -54,8 +54,14 @@ struct mlx5_flow_definer { u32 id; }; +enum mlx5_flow_resource_owner { + MLX5_FLOW_RESOURCE_OWNER_FW, + MLX5_FLOW_RESOURCE_OWNER_SW, +}; + struct mlx5_modify_hdr { enum mlx5_flow_namespace_type ns_type; + enum mlx5_flow_resource_owner owner; union { struct mlx5_fs_dr_action action; u32 id; @@ -65,6 +71,7 @@ struct mlx5_modify_hdr { struct mlx5_pkt_reformat { enum mlx5_flow_namespace_type ns_type; int reformat_type; /* from mlx5_ifc */ + enum mlx5_flow_resource_owner owner; union { struct mlx5_fs_dr_action action; u32 id; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 0eb9a8d7f282..57e22c5170df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -2129,6 +2129,11 @@ mlx5dr_action_create_aso(struct mlx5dr_domain *dmn, u32 obj_id, return action; } +u32 mlx5dr_action_get_pkt_reformat_id(struct mlx5dr_action *action) +{ + return action->reformat->id; +} + int mlx5dr_action_destroy(struct mlx5dr_action *action) { if (WARN_ON_ONCE(refcount_read(&action->refcount) > 1)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 984653756779..cc215beb7436 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -331,8 +331,16 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, } if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { - bool is_decap = fte->action.pkt_reformat->reformat_type == - MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; + bool is_decap; + + if (fte->action.pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_FW) { + err = -EINVAL; + mlx5dr_err(domain, "FW-owned reformat can't be used in SW rule\n"); + goto free_actions; + } + + is_decap = fte->action.pkt_reformat->reformat_type == + MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; if (is_decap) actions[num_actions++] = @@ -661,6 +669,7 @@ static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns return -EINVAL; } + pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_SW; pkt_reformat->action.dr_action = action; return 0; @@ -691,6 +700,7 @@ static int mlx5_cmd_dr_modify_header_alloc(struct mlx5_flow_root_namespace *ns, return -EINVAL; } + modify_hdr->owner = MLX5_FLOW_RESOURCE_OWNER_SW; modify_hdr->action.dr_action = action; return 0; @@ -816,6 +826,19 @@ static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns, return steering_caps; } +int mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat) +{ + switch (pkt_reformat->reformat_type) { + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_NVGRE: + case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: + case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: + case MLX5_REFORMAT_TYPE_INSERT_HDR: + return mlx5dr_action_get_pkt_reformat_id(pkt_reformat->action.dr_action); + } + return -EOPNOTSUPP; +} + bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) { return mlx5dr_is_supported(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h index d168622063d5..99a3b2eff6b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.h @@ -38,6 +38,8 @@ struct mlx5_fs_dr_table { bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev); +int mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat); + const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void); #else @@ -47,6 +49,11 @@ static inline const struct mlx5_flow_cmds *mlx5_fs_cmd_get_dr_cmds(void) return NULL; } +static inline u32 mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat) +{ + return 0; +} + static inline bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) { return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index 9afd268a2573..d1c04f43d86d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -150,6 +150,8 @@ mlx5dr_action_create_dest_match_range(struct mlx5dr_domain *dmn, int mlx5dr_action_destroy(struct mlx5dr_action *action); +u32 mlx5dr_action_get_pkt_reformat_id(struct mlx5dr_action *action); + int mlx5dr_definer_get(struct mlx5dr_domain *dmn, u16 format_id, u8 *dw_selectors, u8 *byte_selectors, u8 *match_mask, u32 *definer_id); -- cgit v1.2.3 From ef4c5afc783dc3d47640270a9b94713229c697e8 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 4 Jun 2023 21:07:04 +0300 Subject: net/mlx5: DR, Fix wrong action data allocation in decap action When TUNNEL_L3_TO_L2 decap action was created, a pointer to a local variable was passed as its HW action data, resulting in attempt to free invalid address: BUG: KASAN: invalid-free in mlx5dr_action_destroy+0x318/0x410 [mlx5_core] Fixes: 4781df92f4da ("net/mlx5: DR, Move STEv0 modify header logic") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 57e22c5170df..0f783e7906cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -1421,9 +1421,13 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn, } case DR_ACTION_TYP_TNL_L3_TO_L2: { - u8 hw_actions[DR_ACTION_CACHE_LINE_SIZE] = {}; + u8 *hw_actions; int ret; + hw_actions = kzalloc(DR_ACTION_CACHE_LINE_SIZE, GFP_KERNEL); + if (!hw_actions) + return -ENOMEM; + ret = mlx5dr_ste_set_action_decap_l3_list(dmn->ste_ctx, data, data_sz, hw_actions, @@ -1431,6 +1435,7 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn, &action->rewrite->num_of_actions); if (ret) { mlx5dr_dbg(dmn, "Failed creating decap l3 action list\n"); + kfree(hw_actions); return ret; } @@ -1440,6 +1445,7 @@ dr_action_create_reformat_action(struct mlx5dr_domain *dmn, ret = mlx5dr_ste_alloc_modify_hdr(action); if (ret) { mlx5dr_dbg(dmn, "Failed preparing reformat data\n"); + kfree(hw_actions); return ret; } return 0; -- cgit v1.2.3 From 314ded538e5f22e7610b1bf621402024a180ec80 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 8 Jun 2023 12:00:54 -0700 Subject: net/mlx5: Free IRQ rmap and notifier on kernel shutdown The kernel IRQ system needs the irq affinity notifier to be clear before attempting to free the irq, see WARN_ON log below. On a normal driver unload we don't have this issue since we do the complete cleanup of the irq resources. To fix this, put the important resources cleanup in a helper function and use it in both normal driver unload and shutdown flows. [ 4497.498434] ------------[ cut here ]------------ [ 4497.498726] WARNING: CPU: 0 PID: 9 at kernel/irq/manage.c:2034 free_irq+0x295/0x340 [ 4497.499193] Modules linked in: [ 4497.499386] CPU: 0 PID: 9 Comm: kworker/0:1 Tainted: G W 6.4.0-rc4+ #10 [ 4497.499876] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014 [ 4497.500518] Workqueue: events do_poweroff [ 4497.500849] RIP: 0010:free_irq+0x295/0x340 [ 4497.501132] Code: 85 c0 0f 84 1d ff ff ff 48 89 ef ff d0 0f 1f 00 e9 10 ff ff ff 0f 0b e9 72 ff ff ff 49 8d 7f 28 ff d0 0f 1f 00 e9 df fd ff ff <0f> 0b 48 c7 80 c0 008 [ 4497.502269] RSP: 0018:ffffc90000053da0 EFLAGS: 00010282 [ 4497.502589] RAX: ffff888100949600 RBX: ffff88810330b948 RCX: 0000000000000000 [ 4497.503035] RDX: ffff888100949600 RSI: ffff888100400490 RDI: 0000000000000023 [ 4497.503472] RBP: ffff88810330c7e0 R08: ffff8881004005d0 R09: ffffffff8273a260 [ 4497.503923] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8881009ae000 [ 4497.504359] R13: ffff8881009ae148 R14: 0000000000000000 R15: ffff888100949600 [ 4497.504804] FS: 0000000000000000(0000) GS:ffff88813bc00000(0000) knlGS:0000000000000000 [ 4497.505302] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4497.505671] CR2: 00007fce98806298 CR3: 000000000262e005 CR4: 0000000000370ef0 [ 4497.506104] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 4497.506540] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 4497.507002] Call Trace: [ 4497.507158] [ 4497.507299] ? free_irq+0x295/0x340 [ 4497.507522] ? __warn+0x7c/0x130 [ 4497.507740] ? free_irq+0x295/0x340 [ 4497.507963] ? report_bug+0x171/0x1a0 [ 4497.508197] ? handle_bug+0x3c/0x70 [ 4497.508417] ? exc_invalid_op+0x17/0x70 [ 4497.508662] ? asm_exc_invalid_op+0x1a/0x20 [ 4497.508926] ? free_irq+0x295/0x340 [ 4497.509146] mlx5_irq_pool_free_irqs+0x48/0x90 [ 4497.509421] mlx5_irq_table_free_irqs+0x38/0x50 [ 4497.509714] mlx5_core_eq_free_irqs+0x27/0x40 [ 4497.509984] shutdown+0x7b/0x100 [ 4497.510184] pci_device_shutdown+0x30/0x60 [ 4497.510440] device_shutdown+0x14d/0x240 [ 4497.510698] kernel_power_off+0x30/0x70 [ 4497.510938] process_one_work+0x1e6/0x3e0 [ 4497.511183] worker_thread+0x49/0x3b0 [ 4497.511407] ? __pfx_worker_thread+0x10/0x10 [ 4497.511679] kthread+0xe0/0x110 [ 4497.511879] ? __pfx_kthread+0x10/0x10 [ 4497.512114] ret_from_fork+0x29/0x50 [ 4497.512342] Fixes: 9c2d08010963 ("net/mlx5: Free irqs only on shutdown callback") Signed-off-by: Saeed Mahameed Reviewed-by: Shay Drory --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 25 +++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 33b9359de53d..98412bd5a696 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -126,14 +126,22 @@ out: return ret; } -static void irq_release(struct mlx5_irq *irq) +/* mlx5_system_free_irq - Free an IRQ + * @irq: IRQ to free + * + * Free the IRQ and other resources such as rmap from the system. + * BUT doesn't free or remove reference from mlx5. + * This function is very important for the shutdown flow, where we need to + * cleanup system resoruces but keep mlx5 objects alive, + * see mlx5_irq_table_free_irqs(). + */ +static void mlx5_system_free_irq(struct mlx5_irq *irq) { struct mlx5_irq_pool *pool = irq->pool; #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rmap; #endif - xa_erase(&pool->irqs, irq->pool_index); /* free_irq requires that affinity_hint and rmap will be cleared before * calling it. To satisfy this requirement, we call * irq_cpu_rmap_remove() to remove the notifier @@ -145,10 +153,18 @@ static void irq_release(struct mlx5_irq *irq) irq_cpu_rmap_remove(rmap, irq->map.virq); #endif - free_cpumask_var(irq->mask); free_irq(irq->map.virq, &irq->nh); if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev)) pci_msix_free_irq(pool->dev->pdev, irq->map); +} + +static void irq_release(struct mlx5_irq *irq) +{ + struct mlx5_irq_pool *pool = irq->pool; + + xa_erase(&pool->irqs, irq->pool_index); + mlx5_system_free_irq(irq); + free_cpumask_var(irq->mask); kfree(irq); } @@ -705,7 +721,8 @@ static void mlx5_irq_pool_free_irqs(struct mlx5_irq_pool *pool) unsigned long index; xa_for_each(&pool->irqs, index, irq) - free_irq(irq->map.virq, &irq->nh); + mlx5_system_free_irq(irq); + } static void mlx5_irq_pools_free_irqs(struct mlx5_irq_table *table) -- cgit v1.2.3 From cf5bb02320d4c4cf4e827efc0314b6ca4082799c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 5 Jun 2023 11:09:49 +0300 Subject: net/mlx5e: Don't delay release of hardware objects XFRM core provides two callbacks to release resources, one is .xdo_dev_policy_delete() and another is .xdo_dev_policy_free(). This separation allows delayed release so "ip xfrm policy free" commands won't starve. Unfortunately, mlx5 command interface can't run in .xdo_dev_policy_free() callbacks as the latter runs in ATOMIC context. BUG: scheduling while atomic: swapper/7/0/0x00000100 Modules linked in: act_mirred act_tunnel_key cls_flower sch_ingress vxlan mlx5_vdpa vringh vhost_iotlb vdpa rpcrdma rdma_ucm ib_iser libiscsi ib_umad scsi_transport_iscsi rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay mlx5_core zram zsmalloc fuse CPU: 7 PID: 0 Comm: swapper/7 Not tainted 6.3.0+ #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x33/0x50 __schedule_bug+0x4e/0x60 __schedule+0x5d5/0x780 ? __mod_timer+0x286/0x3d0 schedule+0x50/0x90 schedule_timeout+0x7c/0xf0 ? __bpf_trace_tick_stop+0x10/0x10 __wait_for_common+0x88/0x190 ? usleep_range_state+0x90/0x90 cmd_exec+0x42e/0xb40 [mlx5_core] mlx5_cmd_do+0x1e/0x40 [mlx5_core] mlx5_cmd_exec+0x18/0x30 [mlx5_core] mlx5_cmd_delete_fte+0xa8/0xd0 [mlx5_core] del_hw_fte+0x60/0x120 [mlx5_core] mlx5_del_flow_rules+0xec/0x270 [mlx5_core] ? default_send_IPI_single_phys+0x26/0x30 mlx5e_accel_ipsec_fs_del_pol+0x1a/0x60 [mlx5_core] mlx5e_xfrm_free_policy+0x15/0x20 [mlx5_core] xfrm_policy_destroy+0x5a/0xb0 xfrm4_dst_destroy+0x7b/0x100 dst_destroy+0x37/0x120 rcu_core+0x2d6/0x540 __do_softirq+0xcd/0x273 irq_exit_rcu+0x82/0xb0 sysvec_apic_timer_interrupt+0x72/0x90 asm_sysvec_apic_timer_interrupt+0x16/0x20 RIP: 0010:default_idle+0x13/0x20 Code: c0 08 00 00 00 4d 29 c8 4c 01 c7 4c 29 c2 e9 72 ff ff ff cc cc cc cc 8b 05 7a 4d ee 00 85 c0 7e 07 0f 00 2d 2f 98 2e 00 fb f4 c3 66 66 2e 0f 1f 84 00 00 00 00 00 65 48 8b 04 25 40 b4 02 00 RSP: 0018:ffff888100843ee0 EFLAGS: 00000242 RAX: 0000000000000001 RBX: ffff888100812b00 RCX: 4000000000000000 RDX: 0000000000000001 RSI: 0000000000000083 RDI: 000000000002d2ec RBP: 0000000000000007 R08: 00000021daeded59 R09: 0000000000000001 R10: 0000000000000000 R11: 000000000000000f R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 default_idle_call+0x30/0xb0 do_idle+0x1c1/0x1d0 cpu_startup_entry+0x19/0x20 start_secondary+0xfe/0x120 secondary_startup_64_no_verify+0xf3/0xfb bad: scheduling from the idle thread! Fixes: a5b8ca9471d3 ("net/mlx5e: Add XFRM policy offload logic") Signed-off-by: Leon Romanovsky Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 55b38544422f..d1c801723d35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -1040,11 +1040,17 @@ err_fs: return err; } -static void mlx5e_xfrm_free_policy(struct xfrm_policy *x) +static void mlx5e_xfrm_del_policy(struct xfrm_policy *x) { struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x); mlx5e_accel_ipsec_fs_del_pol(pol_entry); +} + +static void mlx5e_xfrm_free_policy(struct xfrm_policy *x) +{ + struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x); + kfree(pol_entry); } @@ -1065,6 +1071,7 @@ static const struct xfrmdev_ops mlx5e_ipsec_packet_xfrmdev_ops = { .xdo_dev_state_update_curlft = mlx5e_xfrm_update_curlft, .xdo_dev_policy_add = mlx5e_xfrm_add_policy, + .xdo_dev_policy_delete = mlx5e_xfrm_del_policy, .xdo_dev_policy_free = mlx5e_xfrm_free_policy, }; -- cgit v1.2.3 From fef06678931ff67b158d337b581e5cf5ca40a3a3 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Mon, 5 Jun 2023 11:09:50 +0300 Subject: net/mlx5e: Fix ESN update kernel panic Previously during mlx5e_ipsec_handle_event the driver tried to execute an operation that could sleep, while holding a spinlock, which caused the kernel panic mentioned below. Move the function call that can sleep outside of the spinlock context. Call Trace: dump_stack_lvl+0x49/0x6c __schedule_bug.cold+0x42/0x4e schedule_debug.constprop.0+0xe0/0x118 __schedule+0x59/0x58a ? __mod_timer+0x2a1/0x3ef schedule+0x5e/0xd4 schedule_timeout+0x99/0x164 ? __pfx_process_timeout+0x10/0x10 __wait_for_common+0x90/0x1da ? __pfx_schedule_timeout+0x10/0x10 wait_func+0x34/0x142 [mlx5_core] mlx5_cmd_invoke+0x1f3/0x313 [mlx5_core] cmd_exec+0x1fe/0x325 [mlx5_core] mlx5_cmd_do+0x22/0x50 [mlx5_core] mlx5_cmd_exec+0x1c/0x40 [mlx5_core] mlx5_modify_ipsec_obj+0xb2/0x17f [mlx5_core] mlx5e_ipsec_update_esn_state+0x69/0xf0 [mlx5_core] ? wake_affine+0x62/0x1f8 mlx5e_ipsec_handle_event+0xb1/0xc0 [mlx5_core] process_one_work+0x1e2/0x3e6 ? __pfx_worker_thread+0x10/0x10 worker_thread+0x54/0x3ad ? __pfx_worker_thread+0x10/0x10 kthread+0xda/0x101 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x29/0x37 BUG: workqueue leaked lock or atomic: kworker/u256:4/0x7fffffff/189754#012 last function: mlx5e_ipsec_handle_event [mlx5_core] CPU: 66 PID: 189754 Comm: kworker/u256:4 Kdump: loaded Tainted: G W 6.2.0-2596.20230309201517_5.el8uek.rc1.x86_64 #2 Hardware name: Oracle Corporation ORACLE SERVER X9-2/ASMMBX9-2, BIOS 61070300 08/17/2022 Workqueue: mlx5e_ipsec: eth%d mlx5e_ipsec_handle_event [mlx5_core] Call Trace: dump_stack_lvl+0x49/0x6c process_one_work.cold+0x2b/0x3c ? __pfx_worker_thread+0x10/0x10 worker_thread+0x54/0x3ad ? __pfx_worker_thread+0x10/0x10 kthread+0xda/0x101 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x29/0x37 BUG: scheduling while atomic: kworker/u256:4/189754/0x00000000 Fixes: cee137a63431 ("net/mlx5e: Handle ESN update events") Signed-off-by: Patrisious Haddad Signed-off-by: Leon Romanovsky Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index df90e19066bc..ca16cb9807ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -305,7 +305,17 @@ static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry, } mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs); + + /* It is safe to execute the modify below unlocked since the only flows + * that could affect this HW object, are create, destroy and this work. + * + * Creation flow can't co-exist with this modify work, the destruction + * flow would cancel this work, and this work is a single entity that + * can't conflict with it self. + */ + spin_unlock_bh(&sa_entry->x->lock); mlx5_accel_esp_modify_xfrm(sa_entry, &attrs); + spin_lock_bh(&sa_entry->x->lock); data.data_offset_condition_operand = MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET; @@ -431,7 +441,7 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work) aso = sa_entry->ipsec->aso; attrs = &sa_entry->attrs; - spin_lock(&sa_entry->x->lock); + spin_lock_bh(&sa_entry->x->lock); ret = mlx5e_ipsec_aso_query(sa_entry, NULL); if (ret) goto unlock; @@ -447,7 +457,7 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work) mlx5e_ipsec_handle_limits(sa_entry); unlock: - spin_unlock(&sa_entry->x->lock); + spin_unlock_bh(&sa_entry->x->lock); kfree(work); } -- cgit v1.2.3 From c75b94255aaa45d9e531df2763baa67020bb6fa9 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 5 Jun 2023 11:09:51 +0300 Subject: net/mlx5e: Drop XFRM state lock when modifying flow steering XFRM state which is changed to be XFRM_STATE_EXPIRED doesn't really need to hold lock while modifying flow steering rules to drop traffic. That state can be deleted only and as such mlx5e_ipsec_handle_tx_limit() work will be canceled anyway and won't run in parallel. Fixes: b2f7b01d36a9 ("net/mlx5e: Simulate missing IPsec TX limits hardware functionality") Signed-off-by: Leon Romanovsky Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index d1c801723d35..891d39b4bfd4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -61,16 +61,19 @@ static void mlx5e_ipsec_handle_tx_limit(struct work_struct *_work) struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry; struct xfrm_state *x = sa_entry->x; - spin_lock(&x->lock); + if (sa_entry->attrs.drop) + return; + + spin_lock_bh(&x->lock); xfrm_state_check_expire(x); if (x->km.state == XFRM_STATE_EXPIRED) { sa_entry->attrs.drop = true; - mlx5e_accel_ipsec_fs_modify(sa_entry); - } - spin_unlock(&x->lock); + spin_unlock_bh(&x->lock); - if (sa_entry->attrs.drop) + mlx5e_accel_ipsec_fs_modify(sa_entry); return; + } + spin_unlock_bh(&x->lock); queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork, MLX5_IPSEC_RESCHED); -- cgit v1.2.3 From a128f9d4c1227dfcf7f2328070760cb7ed1ec08d Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 5 Jun 2023 11:09:52 +0300 Subject: net/mlx5e: Fix scheduling of IPsec ASO query while in atomic ASO query can be scheduled in atomic context as such it can't use usleep. Use udelay as recommended in Documentation/timers/timers-howto.rst. Fixes: 76e463f6508b ("net/mlx5e: Overcome slow response for first IPsec ASO WQE") Signed-off-by: Leon Romanovsky Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index ca16cb9807ea..a3554bde3e07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -606,7 +606,8 @@ int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry, do { ret = mlx5_aso_poll_cq(aso->aso, false); if (ret) - usleep_range(2, 10); + /* We are in atomic context */ + udelay(10); } while (ret && time_is_after_jiffies(expires)); spin_unlock_bh(&aso->lock); return ret; -- cgit v1.2.3 From 4aaf2c52834b7f95acdf9fb0211a1b60adbf421b Mon Sep 17 00:00:00 2001 From: Íñigo Huguet Date: Thu, 15 Jun 2023 10:49:29 +0200 Subject: sfc: use budget for TX completions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When running workloads heavy unbalanced towards TX (high TX, low RX traffic), sfc driver can retain the CPU during too long times. Although in many cases this is not enough to be visible, it can affect performance and system responsiveness. A way to reproduce it is to use a debug kernel and run some parallel netperf TX tests. In some systems, this will lead to this message being logged: kernel:watchdog: BUG: soft lockup - CPU#12 stuck for 22s! The reason is that sfc driver doesn't account any NAPI budget for the TX completion events work. With high-TX/low-RX traffic, this makes that the CPU is held for long time for NAPI poll. Documentations says "drivers can process completions for any number of Tx packets but should only process up to budget number of Rx packets". However, many drivers do limit the amount of TX completions that they process in a single NAPI poll. In the same way, this patch adds a limit for the TX work in sfc. With the patch applied, the watchdog warning never appears. Tested with netperf in different combinations: single process / parallel processes, TCP / UDP and different sizes of UDP messages. Repeated the tests before and after the patch, without any noticeable difference in network or CPU performance. Test hardware: Intel(R) Xeon(R) CPU E5-1620 v4 @ 3.50GHz (4 cores, 2 threads/core) Solarflare Communications XtremeScale X2522-25G Network Adapter Fixes: 5227ecccea2d ("sfc: remove tx and MCDI handling from NAPI budget consideration") Fixes: d19a53721863 ("sfc_ef100: TX path for EF100 NICs") Reported-by: Fei Liu Signed-off-by: Íñigo Huguet Acked-by: Martin Habets Link: https://lore.kernel.org/r/20230615084929.10506-1-ihuguet@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/ef10.c | 25 ++++++++++++++++++------- drivers/net/ethernet/sfc/ef100_nic.c | 7 ++++++- drivers/net/ethernet/sfc/ef100_tx.c | 4 ++-- drivers/net/ethernet/sfc/ef100_tx.h | 2 +- drivers/net/ethernet/sfc/tx_common.c | 4 +++- drivers/net/ethernet/sfc/tx_common.h | 2 +- 6 files changed, 31 insertions(+), 13 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index d30459dbfe8f..b63e47af6365 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -2950,7 +2950,7 @@ static u32 efx_ef10_extract_event_ts(efx_qword_t *event) return tstamp; } -static void +static int efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) { struct efx_nic *efx = channel->efx; @@ -2958,13 +2958,14 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) unsigned int tx_ev_desc_ptr; unsigned int tx_ev_q_label; unsigned int tx_ev_type; + int work_done; u64 ts_part; if (unlikely(READ_ONCE(efx->reset_pending))) - return; + return 0; if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_TX_DROP_EVENT))) - return; + return 0; /* Get the transmit queue */ tx_ev_q_label = EFX_QWORD_FIELD(*event, ESF_DZ_TX_QLABEL); @@ -2973,8 +2974,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) if (!tx_queue->timestamping) { /* Transmit completion */ tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, ESF_DZ_TX_DESCR_INDX); - efx_xmit_done(tx_queue, tx_ev_desc_ptr & tx_queue->ptr_mask); - return; + return efx_xmit_done(tx_queue, tx_ev_desc_ptr & tx_queue->ptr_mask); } /* Transmit timestamps are only available for 8XXX series. They result @@ -3000,6 +3000,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) * fields in the event. */ tx_ev_type = EFX_QWORD_FIELD(*event, ESF_EZ_TX_SOFT1); + work_done = 0; switch (tx_ev_type) { case TX_TIMESTAMP_EVENT_TX_EV_COMPLETION: @@ -3016,6 +3017,7 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) tx_queue->completed_timestamp_major = ts_part; efx_xmit_done_single(tx_queue); + work_done = 1; break; default: @@ -3026,6 +3028,8 @@ efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event) EFX_QWORD_VAL(*event)); break; } + + return work_done; } static void @@ -3081,13 +3085,16 @@ static void efx_ef10_handle_driver_generated_event(struct efx_channel *channel, } } +#define EFX_NAPI_MAX_TX 512 + static int efx_ef10_ev_process(struct efx_channel *channel, int quota) { struct efx_nic *efx = channel->efx; efx_qword_t event, *p_event; unsigned int read_ptr; - int ev_code; + int spent_tx = 0; int spent = 0; + int ev_code; if (quota <= 0) return spent; @@ -3126,7 +3133,11 @@ static int efx_ef10_ev_process(struct efx_channel *channel, int quota) } break; case ESE_DZ_EV_CODE_TX_EV: - efx_ef10_handle_tx_event(channel, &event); + spent_tx += efx_ef10_handle_tx_event(channel, &event); + if (spent_tx >= EFX_NAPI_MAX_TX) { + spent = quota; + goto out; + } break; case ESE_DZ_EV_CODE_DRIVER_EV: efx_ef10_handle_driver_event(channel, &event); diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index 4dc643b0d2db..7adde9639c8a 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -253,6 +253,8 @@ static void ef100_ev_read_ack(struct efx_channel *channel) efx_reg(channel->efx, ER_GZ_EVQ_INT_PRIME)); } +#define EFX_NAPI_MAX_TX 512 + static int ef100_ev_process(struct efx_channel *channel, int quota) { struct efx_nic *efx = channel->efx; @@ -260,6 +262,7 @@ static int ef100_ev_process(struct efx_channel *channel, int quota) bool evq_phase, old_evq_phase; unsigned int read_ptr; efx_qword_t *p_event; + int spent_tx = 0; int spent = 0; bool ev_phase; int ev_type; @@ -295,7 +298,9 @@ static int ef100_ev_process(struct efx_channel *channel, int quota) efx_mcdi_process_event(channel, p_event); break; case ESE_GZ_EF100_EV_TX_COMPLETION: - ef100_ev_tx(channel, p_event); + spent_tx += ef100_ev_tx(channel, p_event); + if (spent_tx >= EFX_NAPI_MAX_TX) + spent = quota; break; case ESE_GZ_EF100_EV_DRIVER: netif_info(efx, drv, efx->net_dev, diff --git a/drivers/net/ethernet/sfc/ef100_tx.c b/drivers/net/ethernet/sfc/ef100_tx.c index 29ffaf35559d..849e5555bd12 100644 --- a/drivers/net/ethernet/sfc/ef100_tx.c +++ b/drivers/net/ethernet/sfc/ef100_tx.c @@ -346,7 +346,7 @@ void ef100_tx_write(struct efx_tx_queue *tx_queue) ef100_tx_push_buffers(tx_queue); } -void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event) +int ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event) { unsigned int tx_done = EFX_QWORD_FIELD(*p_event, ESF_GZ_EV_TXCMPL_NUM_DESC); @@ -357,7 +357,7 @@ void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event) unsigned int tx_index = (tx_queue->read_count + tx_done - 1) & tx_queue->ptr_mask; - efx_xmit_done(tx_queue, tx_index); + return efx_xmit_done(tx_queue, tx_index); } /* Add a socket buffer to a TX queue diff --git a/drivers/net/ethernet/sfc/ef100_tx.h b/drivers/net/ethernet/sfc/ef100_tx.h index e9e11540fcde..d9a0819c5a72 100644 --- a/drivers/net/ethernet/sfc/ef100_tx.h +++ b/drivers/net/ethernet/sfc/ef100_tx.h @@ -20,7 +20,7 @@ void ef100_tx_init(struct efx_tx_queue *tx_queue); void ef100_tx_write(struct efx_tx_queue *tx_queue); unsigned int ef100_tx_max_skb_descs(struct efx_nic *efx); -void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event); +int ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event); netdev_tx_t ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb); int __ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb, diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c index 67e789b96c43..755aa92bf823 100644 --- a/drivers/net/ethernet/sfc/tx_common.c +++ b/drivers/net/ethernet/sfc/tx_common.c @@ -249,7 +249,7 @@ void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue) } } -void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) +int efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) { unsigned int fill_level, pkts_compl = 0, bytes_compl = 0; unsigned int efv_pkts_compl = 0; @@ -279,6 +279,8 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) } efx_xmit_done_check_empty(tx_queue); + + return pkts_compl + efv_pkts_compl; } /* Remove buffers put into a tx_queue for the current packet. diff --git a/drivers/net/ethernet/sfc/tx_common.h b/drivers/net/ethernet/sfc/tx_common.h index d87aecbc7bf1..1e9f42938aac 100644 --- a/drivers/net/ethernet/sfc/tx_common.h +++ b/drivers/net/ethernet/sfc/tx_common.h @@ -28,7 +28,7 @@ static inline bool efx_tx_buffer_in_use(struct efx_tx_buffer *buffer) } void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue); -void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); +int efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index); void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, unsigned int insert_count); -- cgit v1.2.3 From 92717c2356cb62c89e8a3dc37cbbab2502562524 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 14 Jun 2023 23:06:56 +0200 Subject: net: qca_spi: Avoid high load if QCA7000 is not available In case the QCA7000 is not available via SPI (e.g. in reset), the driver will cause a high load. The reason for this is that the synchronization is never finished and schedule() is never called. Since the synchronization is not timing critical, it's safe to drop this from the scheduling condition. Signed-off-by: Stefan Wahren Fixes: 291ab06ecf67 ("net: qualcomm: new Ethernet over SPI driver for QCA7000") Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/qca_spi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index c865a4be05ee..4a1b94e5a8ea 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -582,8 +582,7 @@ qcaspi_spi_thread(void *data) while (!kthread_should_stop()) { set_current_state(TASK_INTERRUPTIBLE); if ((qca->intr_req == qca->intr_svc) && - (qca->txr.skb[qca->txr.head] == NULL) && - (qca->sync == QCASPI_SYNC_READY)) + !qca->txr.skb[qca->txr.head]) schedule(); set_current_state(TASK_RUNNING); -- cgit v1.2.3 From 67ac72a599d833ff7d9b210186a66d46c13f0a18 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Mon, 12 Jun 2023 14:14:57 -0700 Subject: net/mlx5: Add .getmaxphase ptp_clock_info callback Implement .getmaxphase callback of ptp_clock_info in mlx5 driver. No longer do a range check in .adjphase callback implementation. Handled by the ptp stack. Cc: Saeed Mahameed Signed-off-by: Rahul Rameshbabu Acked-by: Richard Cochran Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/lib/clock.c | 31 +++++++++++----------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 932fbc843c69..973babfaff25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -93,17 +93,23 @@ static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev) return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify); } -static bool mlx5_is_mtutc_time_adj_cap(struct mlx5_core_dev *mdev, s64 delta) +static s32 mlx5_ptp_getmaxphase(struct ptp_clock_info *ptp) { - s64 min = MLX5_MTUTC_OPERATION_ADJUST_TIME_MIN; - s64 max = MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX; + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev; - if (MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range)) { - min = MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MIN; - max = MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX; - } + mdev = container_of(clock, struct mlx5_core_dev, clock); + + return MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range) ? + MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX : + MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX; +} + +static bool mlx5_is_mtutc_time_adj_cap(struct mlx5_core_dev *mdev, s64 delta) +{ + s64 max = mlx5_ptp_getmaxphase(&mdev->clock.ptp_info); - if (delta < min || delta > max) + if (delta < -max || delta > max) return false; return true; @@ -351,14 +357,6 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta) { - struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); - struct mlx5_core_dev *mdev; - - mdev = container_of(clock, struct mlx5_core_dev, clock); - - if (!mlx5_is_mtutc_time_adj_cap(mdev, delta)) - return -ERANGE; - return mlx5_ptp_adjtime(ptp, delta); } @@ -734,6 +732,7 @@ static const struct ptp_clock_info mlx5_ptp_clock_info = { .pps = 0, .adjfine = mlx5_ptp_adjfine, .adjphase = mlx5_ptp_adjphase, + .getmaxphase = mlx5_ptp_getmaxphase, .adjtime = mlx5_ptp_adjtime, .gettimex64 = mlx5_ptp_gettimex, .settime64 = mlx5_ptp_settime, -- cgit v1.2.3 From 9a43827e876c9a071826cc81783aa2222b020f1d Mon Sep 17 00:00:00 2001 From: Josua Mayer Date: Fri, 16 Jun 2023 14:14:14 +0300 Subject: net: dpaa2-mac: add 25gbase-r support Layerscape MACs support 25Gbps network speed with dpmac "CAUI" mode. Add the mappings between DPMAC_ETH_IF_* and HY_INTERFACE_MODE_*, as well as the 25000 mac capability. Tested on SolidRun LX2162a Clearfog, serdes 1 protocol 18. Signed-off-by: Josua Mayer Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index b1871e6c4006..00e50bd30189 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -54,6 +54,9 @@ static int phy_mode(enum dpmac_eth_if eth_if, phy_interface_t *if_mode) case DPMAC_ETH_IF_XFI: *if_mode = PHY_INTERFACE_MODE_10GBASER; break; + case DPMAC_ETH_IF_CAUI: + *if_mode = PHY_INTERFACE_MODE_25GBASER; + break; default: return -EINVAL; } @@ -79,6 +82,8 @@ static enum dpmac_eth_if dpmac_eth_if_mode(phy_interface_t if_mode) return DPMAC_ETH_IF_XFI; case PHY_INTERFACE_MODE_1000BASEX: return DPMAC_ETH_IF_1000BASEX; + case PHY_INTERFACE_MODE_25GBASER: + return DPMAC_ETH_IF_CAUI; default: return DPMAC_ETH_IF_MII; } @@ -418,7 +423,7 @@ int dpaa2_mac_connect(struct dpaa2_mac *mac) mac->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE | MAC_10FD | MAC_100FD | MAC_1000FD | MAC_2500FD | MAC_5000FD | - MAC_10000FD; + MAC_10000FD | MAC_25000FD; dpaa2_mac_set_supported_interfaces(mac); -- cgit v1.2.3 From 857922b16bb893d26d5ecd83acf9f20cb28eaea2 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 16 Jun 2023 21:18:32 +0200 Subject: net: fec: allow to build without PAGE_POOL_STATS Commit 6970ef27ff7f ("net: fec: add xdp and page pool statistics") selected CONFIG_PAGE_POOL_STATS from the FEC driver symbol, making it impossible to build without the page pool statistics when this driver is enabled. The help text of those statistics mentions increased overhead. Allow the user to choose between usefulness of the statistics and the added overhead. Signed-off-by: Lucas Stach Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230616191832.2944130-1-l.stach@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/Kconfig | 2 +- drivers/net/ethernet/freescale/fec_main.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index 1c78f66a89da..75401d2a5fb4 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -29,7 +29,7 @@ config FEC select CRC32 select PHYLIB select PAGE_POOL - select PAGE_POOL_STATS + imply PAGE_POOL_STATS imply NET_SELFTESTS help Say Y here if you want to use the built-in 10/100 Fast ethernet diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 4d37a811ae15..8fbe47703d47 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2789,6 +2789,7 @@ static void fec_enet_get_xdp_stats(struct fec_enet_private *fep, u64 *data) static void fec_enet_page_pool_stats(struct fec_enet_private *fep, u64 *data) { +#ifdef CONFIG_PAGE_POOL_STATS struct page_pool_stats stats = {}; struct fec_enet_priv_rx_q *rxq; int i; @@ -2803,6 +2804,7 @@ static void fec_enet_page_pool_stats(struct fec_enet_private *fep, u64 *data) } page_pool_ethtool_stats_get(data, &stats); +#endif } static void fec_enet_get_ethtool_stats(struct net_device *dev, -- cgit v1.2.3 From 7580e0a78eb29e7bb1a772eba4088250bbb70d41 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Fri, 16 Jun 2023 17:45:49 +0100 Subject: be2net: Extend xmit workaround to BE3 chip We have seen a bug where the NIC incorrectly changes the length in the IP header of a padded packet to include the padding bytes. The driver already has a workaround for this so do the workaround for this NIC too. This resolves the issue. The NIC in question identifies itself as follows: [ 8.828494] be2net 0000:02:00.0: FW version is 10.7.110.31 [ 8.834759] be2net 0000:02:00.0: Emulex OneConnect(be3): PF FLEX10 port 1 02:00.0 Ethernet controller: Emulex Corporation OneConnect 10Gb NIC (be3) (rev 01) Fixes: ca34fe38f06d ("be2net: fix wrong usage of adapter->generation") Signed-off-by: Ross Lagerwall Link: https://lore.kernel.org/r/20230616164549.2863037-1-ross.lagerwall@citrix.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/emulex/benet/be_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 7e408bcc88de..0defd519ba62 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1135,8 +1135,8 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter, eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ? VLAN_ETH_HLEN : ETH_HLEN; if (skb->len <= 60 && - (lancer_chip(adapter) || skb_vlan_tag_present(skb)) && - is_ipv4_pkt(skb)) { + (lancer_chip(adapter) || BE3_chip(adapter) || + skb_vlan_tag_present(skb)) && is_ipv4_pkt(skb)) { ip = (struct iphdr *)ip_hdr(skb); pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len)); } -- cgit v1.2.3 From 365eb32e4b456064aea4db61adc0a65b8a09bc80 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Mon, 19 Jun 2023 11:36:38 +0530 Subject: octeontx2-pf: TC flower offload support for rxqueue mapping TC rule support to offload rx queue mapping rules. Eg: tc filter add dev eth2 ingress protocol ip flower \ dst_ip 192.168.8.100 \ action skbedit queue_mapping 4 skip_sw action mirred ingress redirect dev eth5 Packets destined to 192.168.8.100 will be forwarded to rx queue 4 of eth5 interface. tc filter add dev eth2 ingress protocol ip flower \ dst_ip 192.168.8.100 \ action skbedit queue_mapping 9 skip_sw Packets destined to 192.168.8.100 will be forwarded to rx queue 4 of eth2 interface. Signed-off-by: Ratheesh Kannoth Link: https://lore.kernel.org/r/20230619060638.1032304-1-rkannoth@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 231c3f0efb60..8a13df592af6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -396,8 +396,12 @@ static int otx2_tc_parse_actions(struct otx2_nic *nic, return -EOPNOTSUPP; } req->vf = priv->pcifunc & RVU_PFVF_FUNC_MASK; - req->op = NIX_RX_ACTION_DEFAULT; - return 0; + + /* if op is already set; avoid overwriting the same */ + if (!req->op) + req->op = NIX_RX_ACTION_DEFAULT; + break; + case FLOW_ACTION_VLAN_POP: req->vtag0_valid = true; /* use RX_VTAG_TYPE7 which is initialized to strip vlan tag */ @@ -433,6 +437,12 @@ static int otx2_tc_parse_actions(struct otx2_nic *nic, case FLOW_ACTION_MARK: mark = act->mark; break; + + case FLOW_ACTION_RX_QUEUE_MAPPING: + req->op = NIX_RX_ACTIONOP_UCAST; + req->index = act->rx_queue; + break; + default: return -EOPNOTSUPP; } -- cgit v1.2.3 From 40cba83370c2c97fd970cb0e273e76f99f0f2db6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 19 Jun 2023 11:12:09 +0200 Subject: sfc: add CONFIG_INET dependency for TC offload The driver now fails to link when CONFIG_INET is disabled, so add an explicit Kconfig dependency: ld.lld: error: undefined symbol: ip_route_output_flow >>> referenced by tc_encap_actions.c >>> drivers/net/ethernet/sfc/tc_encap_actions.o:(efx_tc_flower_create_encap_md) in archive vmlinux.a ld.lld: error: undefined symbol: ip_send_check >>> referenced by tc_encap_actions.c >>> drivers/net/ethernet/sfc/tc_encap_actions.o:(efx_gen_encap_header) in archive vmlinux.a >>> referenced by tc_encap_actions.c >>> drivers/net/ethernet/sfc/tc_encap_actions.o:(efx_gen_encap_header) in archive vmlinux.a ld.lld: error: undefined symbol: arp_tbl >>> referenced by tc_encap_actions.c >>> drivers/net/ethernet/sfc/tc_encap_actions.o:(efx_tc_netevent_event) in archive vmlinux.a >>> referenced by tc_encap_actions.c >>> drivers/net/ethernet/sfc/tc_encap_actions.o:(efx_tc_netevent_event) in archive vmlinux.a Fixes: a1e82162af0b8 ("sfc: generate encap headers for TC offload") Reviewed-by: Edward Cree Reviewed-by: Simon Horman Closes: https://lore.kernel.org/oe-kbuild-all/202306151656.yttECVTP-lkp@intel.com/ Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230619091215.2731541-1-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index 4af36ba8906b..3eb55dcfa8a6 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -50,6 +50,7 @@ config SFC_MCDI_MON config SFC_SRIOV bool "Solarflare SFC9100-family SR-IOV support" depends on SFC && PCI_IOV + depends on INET default y help This enables support for the Single Root I/O Virtualization -- cgit v1.2.3 From f61d2d5cf142436cd1a02ddc78425e91116b8b0d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 19 Jun 2023 11:12:10 +0200 Subject: sfc: fix uninitialized variable use The new efx_bind_neigh() function contains a broken code path when IPV6 is disabled: drivers/net/ethernet/sfc/tc_encap_actions.c:144:7: error: variable 'n' is used uninitialized whenever 'if' condition is true [-Werror,-Wsometimes-uninitialized] if (encap->type & EFX_ENCAP_FLAG_IPV6) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/sfc/tc_encap_actions.c:184:8: note: uninitialized use occurs here if (!n) { ^ drivers/net/ethernet/sfc/tc_encap_actions.c:144:3: note: remove the 'if' if its condition is always false if (encap->type & EFX_ENCAP_FLAG_IPV6) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/sfc/tc_encap_actions.c:141:22: note: initialize the variable 'n' to silence this warning struct neighbour *n; ^ = NULL Change it to use the existing error handling path here. Fixes: 7e5e7d800011a ("sfc: neighbour lookup for TC encap action offload") Suggested-by: Edward Cree Signed-off-by: Arnd Bergmann Reviewed-by: Edward Cree Link: https://lore.kernel.org/r/20230619091215.2731541-2-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/tc_encap_actions.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.c b/drivers/net/ethernet/sfc/tc_encap_actions.c index aac259528e73..7e8bcdb222ad 100644 --- a/drivers/net/ethernet/sfc/tc_encap_actions.c +++ b/drivers/net/ethernet/sfc/tc_encap_actions.c @@ -164,6 +164,7 @@ static int efx_bind_neigh(struct efx_nic *efx, */ rc = -EOPNOTSUPP; NL_SET_ERR_MSG_MOD(extack, "No IPv6 support (neigh bind)"); + goto out_free; #endif } else { rt = ip_route_output_key(net, &flow4); -- cgit v1.2.3 From 9fc68f23a6d3729ccbeb6ca7da6de0bc399f9ddb Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 19 Jun 2023 11:23:49 +0200 Subject: net: stmmac: dwmac-qcom-ethqos: shrink clock code with devres We can use a devm action to completely drop the remove callback and use stmmac_pltfr_remove() directly for remove. We can also drop one of the goto labels. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andrew Halaney Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 24 ++++++++++------------ 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index c801838fae2a..2da0738eed24 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -586,6 +586,11 @@ static int ethqos_clks_config(void *priv, bool enabled) return ret; } +static void ethqos_clks_disable(void *data) +{ + ethqos_clks_config(data, false); +} + static int qcom_ethqos_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -636,6 +641,10 @@ static int qcom_ethqos_probe(struct platform_device *pdev) if (ret) goto err_mem; + ret = devm_add_action_or_reset(&pdev->dev, ethqos_clks_disable, ethqos); + if (ret) + goto err_mem; + ethqos->speed = SPEED_1000; ethqos_update_rgmii_clk(ethqos, SPEED_1000); ethqos_set_func_clk_en(ethqos); @@ -653,27 +662,16 @@ static int qcom_ethqos_probe(struct platform_device *pdev) ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) - goto err_clk; + goto err_mem; return ret; -err_clk: - ethqos_clks_config(ethqos, false); - err_mem: stmmac_remove_config_dt(pdev, plat_dat); return ret; } -static void qcom_ethqos_remove(struct platform_device *pdev) -{ - struct qcom_ethqos *ethqos = get_stmmac_bsp_priv(&pdev->dev); - - stmmac_pltfr_remove(pdev); - ethqos_clks_config(ethqos, false); -} - static const struct of_device_id qcom_ethqos_match[] = { { .compatible = "qcom,qcs404-ethqos", .data = &emac_v2_3_0_data}, { .compatible = "qcom,sc8280xp-ethqos", .data = &emac_v3_0_0_data}, @@ -684,7 +682,7 @@ MODULE_DEVICE_TABLE(of, qcom_ethqos_match); static struct platform_driver qcom_ethqos_driver = { .probe = qcom_ethqos_probe, - .remove_new = qcom_ethqos_remove, + .remove_new = stmmac_pltfr_remove, .driver = { .name = "qcom-ethqos", .pm = &stmmac_pltfr_pm_ops, -- cgit v1.2.3 From 9bc580609139cfc1f559cdc4bfb2f4862b38503d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 19 Jun 2023 11:23:50 +0200 Subject: net: stmmac: dwmac-qcom-ethqos: rename a label in probe() The err_mem label's name is unclear. It actually should be reached on any error after stmmac_probe_config_dt() succeeds. Name it after the cleanup action that needs to be called before exiting. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andrew Halaney Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 2da0738eed24..16e856861558 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -615,14 +615,14 @@ static int qcom_ethqos_probe(struct platform_device *pdev) ethqos = devm_kzalloc(&pdev->dev, sizeof(*ethqos), GFP_KERNEL); if (!ethqos) { ret = -ENOMEM; - goto err_mem; + goto out_config_dt; } ethqos->pdev = pdev; ethqos->rgmii_base = devm_platform_ioremap_resource_byname(pdev, "rgmii"); if (IS_ERR(ethqos->rgmii_base)) { ret = PTR_ERR(ethqos->rgmii_base); - goto err_mem; + goto out_config_dt; } data = of_device_get_match_data(&pdev->dev); @@ -634,16 +634,16 @@ static int qcom_ethqos_probe(struct platform_device *pdev) ethqos->rgmii_clk = devm_clk_get(&pdev->dev, "rgmii"); if (IS_ERR(ethqos->rgmii_clk)) { ret = PTR_ERR(ethqos->rgmii_clk); - goto err_mem; + goto out_config_dt; } ret = ethqos_clks_config(ethqos, true); if (ret) - goto err_mem; + goto out_config_dt; ret = devm_add_action_or_reset(&pdev->dev, ethqos_clks_disable, ethqos); if (ret) - goto err_mem; + goto out_config_dt; ethqos->speed = SPEED_1000; ethqos_update_rgmii_clk(ethqos, SPEED_1000); @@ -662,11 +662,11 @@ static int qcom_ethqos_probe(struct platform_device *pdev) ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) - goto err_mem; + goto out_config_dt; return ret; -err_mem: +out_config_dt: stmmac_remove_config_dt(pdev, plat_dat); return ret; -- cgit v1.2.3 From 7b5e64a9382528d5e3db0fe714b03090a4b1433b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 19 Jun 2023 11:23:51 +0200 Subject: net: stmmac: dwmac-qcom-ethqos: tweak the order of local variables Make sure we follow the reverse-xmas tree convention. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andrew Halaney Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 16e856861558..28d2514a8795 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -594,9 +594,9 @@ static void ethqos_clks_disable(void *data) static int qcom_ethqos_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; + const struct ethqos_emac_driver_data *data; struct plat_stmmacenet_data *plat_dat; struct stmmac_resources stmmac_res; - const struct ethqos_emac_driver_data *data; struct qcom_ethqos *ethqos; int ret; -- cgit v1.2.3 From 302555a0ae3362b38eddd1df9b8d4b176050fc92 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 19 Jun 2023 11:23:52 +0200 Subject: net: stmmac: dwmac-qcom-ethqos: use a helper variable for &pdev->dev Shrink code and avoid line breaks by using a helper variable for &pdev->dev. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andrew Halaney Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 49 ++++++++++++---------- 1 file changed, 26 insertions(+), 23 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 28d2514a8795..f0776ddea3ab 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -123,25 +123,26 @@ static void rgmii_updatel(struct qcom_ethqos *ethqos, static void rgmii_dump(void *priv) { struct qcom_ethqos *ethqos = priv; + struct device *dev = ðqos->pdev->dev; - dev_dbg(ðqos->pdev->dev, "Rgmii register dump\n"); - dev_dbg(ðqos->pdev->dev, "RGMII_IO_MACRO_CONFIG: %x\n", + dev_dbg(dev, "Rgmii register dump\n"); + dev_dbg(dev, "RGMII_IO_MACRO_CONFIG: %x\n", rgmii_readl(ethqos, RGMII_IO_MACRO_CONFIG)); - dev_dbg(ðqos->pdev->dev, "SDCC_HC_REG_DLL_CONFIG: %x\n", + dev_dbg(dev, "SDCC_HC_REG_DLL_CONFIG: %x\n", rgmii_readl(ethqos, SDCC_HC_REG_DLL_CONFIG)); - dev_dbg(ðqos->pdev->dev, "SDCC_HC_REG_DDR_CONFIG: %x\n", + dev_dbg(dev, "SDCC_HC_REG_DDR_CONFIG: %x\n", rgmii_readl(ethqos, SDCC_HC_REG_DDR_CONFIG)); - dev_dbg(ðqos->pdev->dev, "SDCC_HC_REG_DLL_CONFIG2: %x\n", + dev_dbg(dev, "SDCC_HC_REG_DLL_CONFIG2: %x\n", rgmii_readl(ethqos, SDCC_HC_REG_DLL_CONFIG2)); - dev_dbg(ðqos->pdev->dev, "SDC4_STATUS: %x\n", + dev_dbg(dev, "SDC4_STATUS: %x\n", rgmii_readl(ethqos, SDC4_STATUS)); - dev_dbg(ðqos->pdev->dev, "SDCC_USR_CTL: %x\n", + dev_dbg(dev, "SDCC_USR_CTL: %x\n", rgmii_readl(ethqos, SDCC_USR_CTL)); - dev_dbg(ðqos->pdev->dev, "RGMII_IO_MACRO_CONFIG2: %x\n", + dev_dbg(dev, "RGMII_IO_MACRO_CONFIG2: %x\n", rgmii_readl(ethqos, RGMII_IO_MACRO_CONFIG2)); - dev_dbg(ðqos->pdev->dev, "RGMII_IO_MACRO_DEBUG1: %x\n", + dev_dbg(dev, "RGMII_IO_MACRO_DEBUG1: %x\n", rgmii_readl(ethqos, RGMII_IO_MACRO_DEBUG1)); - dev_dbg(ðqos->pdev->dev, "EMAC_SYSTEM_LOW_POWER_DEBUG: %x\n", + dev_dbg(dev, "EMAC_SYSTEM_LOW_POWER_DEBUG: %x\n", rgmii_readl(ethqos, EMAC_SYSTEM_LOW_POWER_DEBUG)); } @@ -242,6 +243,7 @@ static const struct ethqos_emac_driver_data emac_v3_0_0_data = { static int ethqos_dll_configure(struct qcom_ethqos *ethqos) { + struct device *dev = ðqos->pdev->dev; unsigned int val; int retry = 1000; @@ -279,7 +281,7 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos) retry--; } while (retry > 0); if (!retry) - dev_err(ðqos->pdev->dev, "Clear CK_OUT_EN timedout\n"); + dev_err(dev, "Clear CK_OUT_EN timedout\n"); /* Set CK_OUT_EN */ rgmii_updatel(ethqos, SDCC_DLL_CONFIG_CK_OUT_EN, @@ -296,7 +298,7 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos) retry--; } while (retry > 0); if (!retry) - dev_err(ðqos->pdev->dev, "Set CK_OUT_EN timedout\n"); + dev_err(dev, "Set CK_OUT_EN timedout\n"); /* Set DDR_CAL_EN */ rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DDR_CAL_EN, @@ -322,12 +324,13 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos) static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) { + struct device *dev = ðqos->pdev->dev; int phase_shift; int phy_mode; int loopback; /* Determine if the PHY adds a 2 ns TX delay or the MAC handles it */ - phy_mode = device_get_phy_mode(ðqos->pdev->dev); + phy_mode = device_get_phy_mode(dev); if (phy_mode == PHY_INTERFACE_MODE_RGMII_ID || phy_mode == PHY_INTERFACE_MODE_RGMII_TXID) phase_shift = 0; @@ -468,8 +471,7 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) loopback, RGMII_IO_MACRO_CONFIG); break; default: - dev_err(ðqos->pdev->dev, - "Invalid speed %d\n", ethqos->speed); + dev_err(dev, "Invalid speed %d\n", ethqos->speed); return -EINVAL; } @@ -478,6 +480,7 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) static int ethqos_configure(struct qcom_ethqos *ethqos) { + struct device *dev = ðqos->pdev->dev; volatile unsigned int dll_lock; unsigned int i, retry = 1000; @@ -540,8 +543,7 @@ static int ethqos_configure(struct qcom_ethqos *ethqos) retry--; } while (retry > 0); if (!retry) - dev_err(ðqos->pdev->dev, - "Timeout while waiting for DLL lock\n"); + dev_err(dev, "Timeout while waiting for DLL lock\n"); } if (ethqos->speed == SPEED_1000) @@ -597,6 +599,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) const struct ethqos_emac_driver_data *data; struct plat_stmmacenet_data *plat_dat; struct stmmac_resources stmmac_res; + struct device *dev = &pdev->dev; struct qcom_ethqos *ethqos; int ret; @@ -606,13 +609,13 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) { - dev_err(&pdev->dev, "dt configuration failed\n"); + dev_err(dev, "dt configuration failed\n"); return PTR_ERR(plat_dat); } plat_dat->clks_config = ethqos_clks_config; - ethqos = devm_kzalloc(&pdev->dev, sizeof(*ethqos), GFP_KERNEL); + ethqos = devm_kzalloc(dev, sizeof(*ethqos), GFP_KERNEL); if (!ethqos) { ret = -ENOMEM; goto out_config_dt; @@ -625,13 +628,13 @@ static int qcom_ethqos_probe(struct platform_device *pdev) goto out_config_dt; } - data = of_device_get_match_data(&pdev->dev); + data = of_device_get_match_data(dev); ethqos->por = data->por; ethqos->num_por = data->num_por; ethqos->rgmii_config_loopback_en = data->rgmii_config_loopback_en; ethqos->has_emac3 = data->has_emac3; - ethqos->rgmii_clk = devm_clk_get(&pdev->dev, "rgmii"); + ethqos->rgmii_clk = devm_clk_get(dev, "rgmii"); if (IS_ERR(ethqos->rgmii_clk)) { ret = PTR_ERR(ethqos->rgmii_clk); goto out_config_dt; @@ -641,7 +644,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) if (ret) goto out_config_dt; - ret = devm_add_action_or_reset(&pdev->dev, ethqos_clks_disable, ethqos); + ret = devm_add_action_or_reset(dev, ethqos_clks_disable, ethqos); if (ret) goto out_config_dt; @@ -660,7 +663,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) if (of_device_is_compatible(np, "qcom,qcs404-ethqos")) plat_dat->rx_clk_runs_in_lpi = 1; - ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); + ret = stmmac_dvr_probe(dev, plat_dat, &stmmac_res); if (ret) goto out_config_dt; -- cgit v1.2.3 From ee8dacca2fd3ff437b787f83fb569197a89894fd Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 19 Jun 2023 11:23:53 +0200 Subject: net: stmmac: dwmac-qcom-ethqos: add missing include device_get_phy_mode() is declared in linux/property.h but this header is not included. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andrew Halaney Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index f0776ddea3ab..b66d64d138cb 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "stmmac.h" #include "stmmac_platform.h" -- cgit v1.2.3 From 97f73bc59e1620c70635be68ab7ee91779bdf03e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 19 Jun 2023 11:23:54 +0200 Subject: net: stmmac: dwmac-qcom-ethqos: add a newline between headers Typically we use a newline between global and local headers so add it here as well. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andrew Halaney Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index b66d64d138cb..e3a9b785334d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -7,6 +7,7 @@ #include #include #include + #include "stmmac.h" #include "stmmac_platform.h" -- cgit v1.2.3 From f2b1758554eb026939407ed03e38dd5d43978cb4 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 19 Jun 2023 11:23:55 +0200 Subject: net: stmmac: dwmac-qcom-ethqos: remove stray space There's an unnecessary space in the rgmii_updatel() function, remove it. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andrew Halaney Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index e3a9b785334d..ec3bbd199501 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -117,7 +117,7 @@ static void rgmii_updatel(struct qcom_ethqos *ethqos, { unsigned int temp; - temp = rgmii_readl(ethqos, offset); + temp = rgmii_readl(ethqos, offset); temp = (temp & ~(mask)) | val; rgmii_writel(ethqos, temp, offset); } -- cgit v1.2.3 From 0dec3b48aa4edb653ba8ed8a62970bc0698f5bc1 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 19 Jun 2023 11:23:56 +0200 Subject: net: stmmac: dwmac-qcom-ethqos: add support for the optional serdes phy On sa8775p platforms, there's a SGMII SerDes PHY between the MAC and external PHY that we need to enable and configure. Signed-off-by: Bartosz Golaszewski Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 37 ++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index ec3bbd199501..042733b5e80b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "stmmac.h" @@ -93,6 +94,7 @@ struct qcom_ethqos { unsigned int rgmii_clk_rate; struct clk *rgmii_clk; + struct phy *serdes_phy; unsigned int speed; const struct ethqos_emac_por *por; @@ -565,6 +567,30 @@ static void ethqos_fix_mac_speed(void *priv, unsigned int speed) ethqos_configure(ethqos); } +static int qcom_ethqos_serdes_powerup(struct net_device *ndev, void *priv) +{ + struct qcom_ethqos *ethqos = priv; + int ret; + + ret = phy_init(ethqos->serdes_phy); + if (ret) + return ret; + + ret = phy_power_on(ethqos->serdes_phy); + if (ret) + return ret; + + return phy_set_speed(ethqos->serdes_phy, ethqos->speed); +} + +static void qcom_ethqos_serdes_powerdown(struct net_device *ndev, void *priv) +{ + struct qcom_ethqos *ethqos = priv; + + phy_power_off(ethqos->serdes_phy); + phy_exit(ethqos->serdes_phy); +} + static int ethqos_clks_config(void *priv, bool enabled) { struct qcom_ethqos *ethqos = priv; @@ -650,6 +676,12 @@ static int qcom_ethqos_probe(struct platform_device *pdev) if (ret) goto out_config_dt; + ethqos->serdes_phy = devm_phy_optional_get(dev, "serdes"); + if (IS_ERR(ethqos->serdes_phy)) { + ret = PTR_ERR(ethqos->serdes_phy); + goto out_config_dt; + } + ethqos->speed = SPEED_1000; ethqos_update_rgmii_clk(ethqos, SPEED_1000); ethqos_set_func_clk_en(ethqos); @@ -665,6 +697,11 @@ static int qcom_ethqos_probe(struct platform_device *pdev) if (of_device_is_compatible(np, "qcom,qcs404-ethqos")) plat_dat->rx_clk_runs_in_lpi = 1; + if (ethqos->serdes_phy) { + plat_dat->serdes_powerup = qcom_ethqos_serdes_powerup; + plat_dat->serdes_powerdown = qcom_ethqos_serdes_powerdown; + } + ret = stmmac_dvr_probe(dev, plat_dat, &stmmac_res); if (ret) goto out_config_dt; -- cgit v1.2.3 From feeb27165c46c1956c9ee002d306a2ed196fa5f0 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 19 Jun 2023 11:23:57 +0200 Subject: net: stmmac: dwmac-qcom-ethqos: add support for the phyaux clock On sa8775p, the EMAC revision is 4 and we use SGMII instead of RGMII. There's no "rgmii" clock but there's a fourth clock under a different name: "phyaux". Add a new field to the chip data struct that specifies the link clock name. Default to "rgmii" for backward compatibility. Signed-off-by: Bartosz Golaszewski Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 31 +++++++++++----------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 042733b5e80b..a739e1d5c046 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -85,6 +85,7 @@ struct ethqos_emac_driver_data { unsigned int num_por; bool rgmii_config_loopback_en; bool has_emac3; + const char *link_clk_name; struct dwmac4_addrs dwmac4_addrs; }; @@ -92,8 +93,8 @@ struct qcom_ethqos { struct platform_device *pdev; void __iomem *rgmii_base; - unsigned int rgmii_clk_rate; - struct clk *rgmii_clk; + unsigned int link_clk_rate; + struct clk *link_clk; struct phy *serdes_phy; unsigned int speed; @@ -156,23 +157,23 @@ static void rgmii_dump(void *priv) #define RGMII_ID_MODE_10_LOW_SVS_CLK_FREQ (5 * 1000 * 1000UL) static void -ethqos_update_rgmii_clk(struct qcom_ethqos *ethqos, unsigned int speed) +ethqos_update_link_clk(struct qcom_ethqos *ethqos, unsigned int speed) { switch (speed) { case SPEED_1000: - ethqos->rgmii_clk_rate = RGMII_1000_NOM_CLK_FREQ; + ethqos->link_clk_rate = RGMII_1000_NOM_CLK_FREQ; break; case SPEED_100: - ethqos->rgmii_clk_rate = RGMII_ID_MODE_100_LOW_SVS_CLK_FREQ; + ethqos->link_clk_rate = RGMII_ID_MODE_100_LOW_SVS_CLK_FREQ; break; case SPEED_10: - ethqos->rgmii_clk_rate = RGMII_ID_MODE_10_LOW_SVS_CLK_FREQ; + ethqos->link_clk_rate = RGMII_ID_MODE_10_LOW_SVS_CLK_FREQ; break; } - clk_set_rate(ethqos->rgmii_clk, ethqos->rgmii_clk_rate); + clk_set_rate(ethqos->link_clk, ethqos->link_clk_rate); } static void ethqos_set_func_clk_en(struct qcom_ethqos *ethqos) @@ -563,7 +564,7 @@ static void ethqos_fix_mac_speed(void *priv, unsigned int speed) struct qcom_ethqos *ethqos = priv; ethqos->speed = speed; - ethqos_update_rgmii_clk(ethqos, speed); + ethqos_update_link_clk(ethqos, speed); ethqos_configure(ethqos); } @@ -597,9 +598,9 @@ static int ethqos_clks_config(void *priv, bool enabled) int ret = 0; if (enabled) { - ret = clk_prepare_enable(ethqos->rgmii_clk); + ret = clk_prepare_enable(ethqos->link_clk); if (ret) { - dev_err(ðqos->pdev->dev, "rgmii_clk enable failed\n"); + dev_err(ðqos->pdev->dev, "link_clk enable failed\n"); return ret; } @@ -610,7 +611,7 @@ static int ethqos_clks_config(void *priv, bool enabled) */ ethqos_set_func_clk_en(ethqos); } else { - clk_disable_unprepare(ethqos->rgmii_clk); + clk_disable_unprepare(ethqos->link_clk); } return ret; @@ -662,9 +663,9 @@ static int qcom_ethqos_probe(struct platform_device *pdev) ethqos->rgmii_config_loopback_en = data->rgmii_config_loopback_en; ethqos->has_emac3 = data->has_emac3; - ethqos->rgmii_clk = devm_clk_get(dev, "rgmii"); - if (IS_ERR(ethqos->rgmii_clk)) { - ret = PTR_ERR(ethqos->rgmii_clk); + ethqos->link_clk = devm_clk_get(dev, data->link_clk_name ?: "rgmii"); + if (IS_ERR(ethqos->link_clk)) { + ret = PTR_ERR(ethqos->link_clk); goto out_config_dt; } @@ -683,7 +684,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) } ethqos->speed = SPEED_1000; - ethqos_update_rgmii_clk(ethqos, SPEED_1000); + ethqos_update_link_clk(ethqos, SPEED_1000); ethqos_set_func_clk_en(ethqos); plat_dat->bsp_priv = ethqos; -- cgit v1.2.3 From 25c4a0769443d77c74fe73c80a978e28b08dc976 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 19 Jun 2023 11:23:58 +0200 Subject: net: stmmac: dwmac-qcom-ethqos: prepare the driver for more PHY modes In preparation for supporting SGMII, let's make the code a bit more generic. Add a new callback for MAC configuration so that we can assign a different variant of it in the future. Signed-off-by: Bartosz Golaszewski Reviewed-by: Andrew Halaney Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 31 ++++++++++++++++++---- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index a739e1d5c046..0ececc951528 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -92,11 +92,13 @@ struct ethqos_emac_driver_data { struct qcom_ethqos { struct platform_device *pdev; void __iomem *rgmii_base; + int (*configure_func)(struct qcom_ethqos *ethqos); unsigned int link_clk_rate; struct clk *link_clk; struct phy *serdes_phy; unsigned int speed; + int phy_mode; const struct ethqos_emac_por *por; unsigned int num_por; @@ -331,13 +333,11 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) { struct device *dev = ðqos->pdev->dev; int phase_shift; - int phy_mode; int loopback; /* Determine if the PHY adds a 2 ns TX delay or the MAC handles it */ - phy_mode = device_get_phy_mode(dev); - if (phy_mode == PHY_INTERFACE_MODE_RGMII_ID || - phy_mode == PHY_INTERFACE_MODE_RGMII_TXID) + if (ethqos->phy_mode == PHY_INTERFACE_MODE_RGMII_ID || + ethqos->phy_mode == PHY_INTERFACE_MODE_RGMII_TXID) phase_shift = 0; else phase_shift = RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN; @@ -483,7 +483,7 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) return 0; } -static int ethqos_configure(struct qcom_ethqos *ethqos) +static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos) { struct device *dev = ðqos->pdev->dev; volatile unsigned int dll_lock; @@ -559,6 +559,11 @@ static int ethqos_configure(struct qcom_ethqos *ethqos) return 0; } +static int ethqos_configure(struct qcom_ethqos *ethqos) +{ + return ethqos->configure_func(ethqos); +} + static void ethqos_fix_mac_speed(void *priv, unsigned int speed) { struct qcom_ethqos *ethqos = priv; @@ -650,6 +655,22 @@ static int qcom_ethqos_probe(struct platform_device *pdev) goto out_config_dt; } + ethqos->phy_mode = device_get_phy_mode(dev); + switch (ethqos->phy_mode) { + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: + ethqos->configure_func = ethqos_configure_rgmii; + break; + case -ENODEV: + ret = -ENODEV; + goto out_config_dt; + default: + ret = -EINVAL; + goto out_config_dt; + } + ethqos->pdev = pdev; ethqos->rgmii_base = devm_platform_ioremap_resource_byname(pdev, "rgmii"); if (IS_ERR(ethqos->rgmii_base)) { -- cgit v1.2.3 From 463120c31c58bbca0237dd6ae73d20f77609c749 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 19 Jun 2023 11:23:59 +0200 Subject: net: stmmac: dwmac-qcom-ethqos: add support for SGMII On sa8775p the MAC is connected to the external PHY over SGMII so add support for it to the driver. Signed-off-by: Bartosz Golaszewski Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 37 ++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 0ececc951528..bdf59a179f87 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -75,6 +75,10 @@ #define RGMII_CONFIG2_DATA_DIVIDE_CLK_SEL BIT(6) #define RGMII_CONFIG2_TX_CLK_PHASE_SHIFT_EN BIT(5) +/* MAC_CTRL_REG bits */ +#define ETHQOS_MAC_CTRL_SPEED_MODE BIT(14) +#define ETHQOS_MAC_CTRL_PORT_SEL BIT(15) + struct ethqos_emac_por { unsigned int offset; unsigned int value; @@ -92,6 +96,7 @@ struct ethqos_emac_driver_data { struct qcom_ethqos { struct platform_device *pdev; void __iomem *rgmii_base; + void __iomem *mac_base; int (*configure_func)(struct qcom_ethqos *ethqos); unsigned int link_clk_rate; @@ -559,6 +564,33 @@ static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos) return 0; } +static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos) +{ + int val; + + val = readl(ethqos->mac_base + MAC_CTRL_REG); + + switch (ethqos->speed) { + case SPEED_1000: + val &= ~ETHQOS_MAC_CTRL_PORT_SEL; + rgmii_updatel(ethqos, RGMII_CONFIG2_RGMII_CLK_SEL_CFG, + RGMII_CONFIG2_RGMII_CLK_SEL_CFG, + RGMII_IO_MACRO_CONFIG2); + break; + case SPEED_100: + val |= ETHQOS_MAC_CTRL_PORT_SEL | ETHQOS_MAC_CTRL_SPEED_MODE; + break; + case SPEED_10: + val |= ETHQOS_MAC_CTRL_PORT_SEL; + val &= ~ETHQOS_MAC_CTRL_SPEED_MODE; + break; + } + + writel(val, ethqos->mac_base + MAC_CTRL_REG); + + return val; +} + static int ethqos_configure(struct qcom_ethqos *ethqos) { return ethqos->configure_func(ethqos); @@ -663,6 +695,9 @@ static int qcom_ethqos_probe(struct platform_device *pdev) case PHY_INTERFACE_MODE_RGMII_TXID: ethqos->configure_func = ethqos_configure_rgmii; break; + case PHY_INTERFACE_MODE_SGMII: + ethqos->configure_func = ethqos_configure_sgmii; + break; case -ENODEV: ret = -ENODEV; goto out_config_dt; @@ -678,6 +713,8 @@ static int qcom_ethqos_probe(struct platform_device *pdev) goto out_config_dt; } + ethqos->mac_base = stmmac_res.addr; + data = of_device_get_match_data(dev); ethqos->por = data->por; ethqos->num_por = data->num_por; -- cgit v1.2.3 From aa571b6275fb60da443c490ebeef021a6897d332 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 19 Jun 2023 11:24:00 +0200 Subject: net: stmmac: add new switch to struct plat_stmmacenet_data On some platforms, the PCS can be integrated in the MAC so the driver will not see any PCS link activity. Add a switch that allows the platform drivers to let the core code know. Signed-off-by: Bartosz Golaszewski Reviewed-by: Jose Abreu Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- include/linux/stmmac.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5c645b6d5660..10e8a5606ba6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5798,7 +5798,7 @@ static void stmmac_common_interrupt(struct stmmac_priv *priv) } /* PCS link status */ - if (priv->hw->pcs) { + if (priv->hw->pcs && !priv->plat->has_integrated_pcs) { if (priv->xstats.pcs_link) netif_carrier_on(priv->dev); else diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 225751a8fd8e..06090538fe2d 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -293,5 +293,6 @@ struct plat_stmmacenet_data { bool sph_disable; bool serdes_up_after_phy_linkup; const struct dwmac4_addrs *dwmac4_addrs; + bool has_integrated_pcs; }; #endif -- cgit v1.2.3 From 8c4d92e82d500a65e7dba101ea38e4f3499dc428 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 19 Jun 2023 11:24:02 +0200 Subject: net: stmmac: dwmac-qcom-ethqos: add support for emac4 on sa8775p platforms sa8775p uses EMAC version 4, add the relevant defines, rename the has_emac3 switch to has_emac_ge_3 (has emac greater-or-equal than 3) and add the new compatible. Signed-off-by: Bartosz Golaszewski Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 65 +++++++++++++++++----- 1 file changed, 51 insertions(+), 14 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index bdf59a179f87..fa0fc53c56a3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -88,8 +88,9 @@ struct ethqos_emac_driver_data { const struct ethqos_emac_por *por; unsigned int num_por; bool rgmii_config_loopback_en; - bool has_emac3; + bool has_emac_ge_3; const char *link_clk_name; + bool has_integrated_pcs; struct dwmac4_addrs dwmac4_addrs; }; @@ -108,7 +109,7 @@ struct qcom_ethqos { const struct ethqos_emac_por *por; unsigned int num_por; bool rgmii_config_loopback_en; - bool has_emac3; + bool has_emac_ge_3; }; static int rgmii_readl(struct qcom_ethqos *ethqos, unsigned int offset) @@ -202,7 +203,7 @@ static const struct ethqos_emac_driver_data emac_v2_3_0_data = { .por = emac_v2_3_0_por, .num_por = ARRAY_SIZE(emac_v2_3_0_por), .rgmii_config_loopback_en = true, - .has_emac3 = false, + .has_emac_ge_3 = false, }; static const struct ethqos_emac_por emac_v2_1_0_por[] = { @@ -218,7 +219,7 @@ static const struct ethqos_emac_driver_data emac_v2_1_0_data = { .por = emac_v2_1_0_por, .num_por = ARRAY_SIZE(emac_v2_1_0_por), .rgmii_config_loopback_en = false, - .has_emac3 = false, + .has_emac_ge_3 = false, }; static const struct ethqos_emac_por emac_v3_0_0_por[] = { @@ -234,7 +235,41 @@ static const struct ethqos_emac_driver_data emac_v3_0_0_data = { .por = emac_v3_0_0_por, .num_por = ARRAY_SIZE(emac_v3_0_0_por), .rgmii_config_loopback_en = false, - .has_emac3 = true, + .has_emac_ge_3 = true, + .dwmac4_addrs = { + .dma_chan = 0x00008100, + .dma_chan_offset = 0x1000, + .mtl_chan = 0x00008000, + .mtl_chan_offset = 0x1000, + .mtl_ets_ctrl = 0x00008010, + .mtl_ets_ctrl_offset = 0x1000, + .mtl_txq_weight = 0x00008018, + .mtl_txq_weight_offset = 0x1000, + .mtl_send_slp_cred = 0x0000801c, + .mtl_send_slp_cred_offset = 0x1000, + .mtl_high_cred = 0x00008020, + .mtl_high_cred_offset = 0x1000, + .mtl_low_cred = 0x00008024, + .mtl_low_cred_offset = 0x1000, + }, +}; + +static const struct ethqos_emac_por emac_v4_0_0_por[] = { + { .offset = RGMII_IO_MACRO_CONFIG, .value = 0x40c01343 }, + { .offset = SDCC_HC_REG_DLL_CONFIG, .value = 0x2004642c }, + { .offset = SDCC_HC_REG_DDR_CONFIG, .value = 0x80040800 }, + { .offset = SDCC_HC_REG_DLL_CONFIG2, .value = 0x00200000 }, + { .offset = SDCC_USR_CTL, .value = 0x00010800 }, + { .offset = RGMII_IO_MACRO_CONFIG2, .value = 0x00002060 }, +}; + +static const struct ethqos_emac_driver_data emac_v4_0_0_data = { + .por = emac_v4_0_0_por, + .num_por = ARRAY_SIZE(emac_v3_0_0_por), + .rgmii_config_loopback_en = false, + .has_emac_ge_3 = true, + .link_clk_name = "phyaux", + .has_integrated_pcs = true, .dwmac4_addrs = { .dma_chan = 0x00008100, .dma_chan_offset = 0x1000, @@ -275,7 +310,7 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos) rgmii_updatel(ethqos, SDCC_DLL_CONFIG_DLL_EN, SDCC_DLL_CONFIG_DLL_EN, SDCC_HC_REG_DLL_CONFIG); - if (!ethqos->has_emac3) { + if (!ethqos->has_emac_ge_3) { rgmii_updatel(ethqos, SDCC_DLL_MCLK_GATING_EN, 0, SDCC_HC_REG_DLL_CONFIG); @@ -316,7 +351,7 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos) rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DDR_CAL_EN, SDCC_DLL_CONFIG2_DDR_CAL_EN, SDCC_HC_REG_DLL_CONFIG2); - if (!ethqos->has_emac3) { + if (!ethqos->has_emac_ge_3) { rgmii_updatel(ethqos, SDCC_DLL_CONFIG2_DLL_CLOCK_DIS, 0, SDCC_HC_REG_DLL_CONFIG2); @@ -386,7 +421,7 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) /* PRG_RCLK_DLY = TCXO period * TCXO_CYCLES_CNT / 2 * RX delay ns, * in practice this becomes PRG_RCLK_DLY = 52 * 4 / 2 * RX delay ns */ - if (ethqos->has_emac3) { + if (ethqos->has_emac_ge_3) { /* 0.9 ns */ rgmii_updatel(ethqos, SDCC_DDR_CONFIG_PRG_RCLK_DLY, 115, SDCC_HC_REG_DDR_CONFIG); @@ -421,7 +456,7 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) rgmii_updatel(ethqos, RGMII_CONFIG2_RSVD_CONFIG15, 0, RGMII_IO_MACRO_CONFIG2); - if (ethqos->has_emac3) + if (ethqos->has_emac_ge_3) rgmii_updatel(ethqos, RGMII_CONFIG2_RX_PROG_SWAP, RGMII_CONFIG2_RX_PROG_SWAP, RGMII_IO_MACRO_CONFIG2); @@ -461,7 +496,7 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos) RGMII_IO_MACRO_CONFIG); rgmii_updatel(ethqos, RGMII_CONFIG2_RSVD_CONFIG15, 0, RGMII_IO_MACRO_CONFIG2); - if (ethqos->has_emac3) + if (ethqos->has_emac_ge_3) rgmii_updatel(ethqos, RGMII_CONFIG2_RX_PROG_SWAP, RGMII_CONFIG2_RX_PROG_SWAP, RGMII_IO_MACRO_CONFIG2); @@ -510,7 +545,7 @@ static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos) rgmii_updatel(ethqos, SDCC_DLL_CONFIG_PDN, SDCC_DLL_CONFIG_PDN, SDCC_HC_REG_DLL_CONFIG); - if (ethqos->has_emac3) { + if (ethqos->has_emac_ge_3) { if (ethqos->speed == SPEED_1000) { rgmii_writel(ethqos, 0x1800000, SDCC_TEST_CTL); rgmii_writel(ethqos, 0x2C010800, SDCC_USR_CTL); @@ -540,7 +575,7 @@ static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos) SDCC_HC_REG_DLL_CONFIG); /* Set USR_CTL bit 26 with mask of 3 bits */ - if (!ethqos->has_emac3) + if (!ethqos->has_emac_ge_3) rgmii_updatel(ethqos, GENMASK(26, 24), BIT(26), SDCC_USR_CTL); @@ -719,7 +754,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) ethqos->por = data->por; ethqos->num_por = data->num_por; ethqos->rgmii_config_loopback_en = data->rgmii_config_loopback_en; - ethqos->has_emac3 = data->has_emac3; + ethqos->has_emac_ge_3 = data->has_emac_ge_3; ethqos->link_clk = devm_clk_get(dev, data->link_clk_name ?: "rgmii"); if (IS_ERR(ethqos->link_clk)) { @@ -749,12 +784,13 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->fix_mac_speed = ethqos_fix_mac_speed; plat_dat->dump_debug_regs = rgmii_dump; plat_dat->has_gmac4 = 1; - if (ethqos->has_emac3) + if (ethqos->has_emac_ge_3) plat_dat->dwmac4_addrs = &data->dwmac4_addrs; plat_dat->pmt = 1; plat_dat->tso_en = of_property_read_bool(np, "snps,tso"); if (of_device_is_compatible(np, "qcom,qcs404-ethqos")) plat_dat->rx_clk_runs_in_lpi = 1; + plat_dat->has_integrated_pcs = data->has_integrated_pcs; if (ethqos->serdes_phy) { plat_dat->serdes_powerup = qcom_ethqos_serdes_powerup; @@ -775,6 +811,7 @@ out_config_dt: static const struct of_device_id qcom_ethqos_match[] = { { .compatible = "qcom,qcs404-ethqos", .data = &emac_v2_3_0_data}, + { .compatible = "qcom,sa8775p-ethqos", .data = &emac_v4_0_0_data}, { .compatible = "qcom,sc8280xp-ethqos", .data = &emac_v3_0_0_data}, { .compatible = "qcom,sm8150-ethqos", .data = &emac_v2_1_0_data}, { } -- cgit v1.2.3 From 7ad7b7023fcb1efdd71406ff7670ef6130de65a6 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 20 Jun 2023 16:48:55 +0200 Subject: bnxt_en: Link representors to PCI device Link VF representors to parent PCI device to benefit from systemd defined naming scheme. Without this change the representor is visible as ethN. Signed-off-by: Ivan Vecera Reviewed-by: Simon Horman Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/20230620144855.288443-1-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index 2f1a1f2d2157..1467b94a6427 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -468,6 +468,7 @@ static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep, struct net_device *pf_dev = bp->dev; u16 max_mtu; + SET_NETDEV_DEV(dev, &bp->pdev->dev); dev->netdev_ops = &bnxt_vf_rep_netdev_ops; dev->ethtool_ops = &bnxt_vf_rep_ethtool_ops; /* Just inherit all the featues of the parent PF as the VF-R -- cgit v1.2.3 From 9c50e2b150c8ee0eee5f8154e2ad168cdd748877 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 7 Jun 2023 14:32:29 -0700 Subject: igc: Fix race condition in PTP tx code Currently, the igc driver supports timestamping only one tx packet at a time. During the transmission flow, the skb that requires hardware timestamping is saved in adapter->ptp_tx_skb. Once hardware has the timestamp, an interrupt is delivered, and adapter->ptp_tx_work is scheduled. In igc_ptp_tx_work(), we read the timestamp register, update adapter->ptp_tx_skb, and notify the network stack. While the thread executing the transmission flow (the user process running in kernel mode) and the thread executing ptp_tx_work don't access adapter->ptp_tx_skb concurrently, there are two other places where adapter->ptp_tx_skb is accessed: igc_ptp_tx_hang() and igc_ptp_suspend(). igc_ptp_tx_hang() is executed by the adapter->watchdog_task worker thread which runs periodically so it is possible we have two threads accessing ptp_tx_skb at the same time. Consider the following scenario: right after __IGC_PTP_TX_IN_PROGRESS is set in igc_xmit_frame_ring(), igc_ptp_tx_hang() is executed. Since adapter->ptp_tx_start hasn't been written yet, this is considered a timeout and adapter->ptp_tx_skb is cleaned up. This patch fixes the issue described above by adding the ptp_tx_lock to protect access to ptp_tx_skb and ptp_tx_start fields from igc_adapter. Since igc_xmit_frame_ring() called in atomic context by the networking stack, ptp_tx_lock is defined as a spinlock, and the irq safe variants of lock/unlock are used. With the introduction of the ptp_tx_lock, the __IGC_PTP_TX_IN_PROGRESS flag doesn't provide much of a use anymore so this patch gets rid of it. Fixes: 2c344ae24501 ("igc: Add support for TX timestamping") Signed-off-by: Andre Guedes Signed-off-by: Vinicius Costa Gomes Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 5 ++- drivers/net/ethernet/intel/igc/igc_main.c | 9 +++-- drivers/net/ethernet/intel/igc/igc_ptp.c | 57 +++++++++++++++++-------------- 3 files changed, 41 insertions(+), 30 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 34aebf00a512..7da0657ea48f 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -229,6 +229,10 @@ struct igc_adapter { struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_caps; struct work_struct ptp_tx_work; + /* Access to ptp_tx_skb and ptp_tx_start are protected by the + * ptp_tx_lock. + */ + spinlock_t ptp_tx_lock; struct sk_buff *ptp_tx_skb; struct hwtstamp_config tstamp_config; unsigned long ptp_tx_start; @@ -401,7 +405,6 @@ enum igc_state_t { __IGC_TESTING, __IGC_RESETTING, __IGC_DOWN, - __IGC_PTP_TX_IN_PROGRESS, }; enum igc_tx_flags { diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index fa764190f270..9fcb263bd3a7 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1590,9 +1590,10 @@ done: * the other timer registers before skipping the * timestamping request. */ - if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && - !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS, - &adapter->state)) { + unsigned long flags; + + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && !adapter->ptp_tx_skb) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IGC_TX_FLAGS_TSTAMP; @@ -1601,6 +1602,8 @@ done: } else { adapter->tx_hwtstamp_skipped++; } + + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } if (skb_vlan_tag_present(skb)) { diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 4e10ced736db..56128e55f5c0 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -603,6 +603,7 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter, return 0; } +/* Requires adapter->ptp_tx_lock held by caller. */ static void igc_ptp_tx_timeout(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; @@ -610,7 +611,6 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter) dev_kfree_skb_any(adapter->ptp_tx_skb); adapter->ptp_tx_skb = NULL; adapter->tx_hwtstamp_timeouts++; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); /* Clear the tx valid bit in TSYNCTXCTL register to enable interrupt. */ rd32(IGC_TXSTMPH); netdev_warn(adapter->netdev, "Tx timestamp timeout\n"); @@ -618,20 +618,20 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter) void igc_ptp_tx_hang(struct igc_adapter *adapter) { - bool timeout = time_is_before_jiffies(adapter->ptp_tx_start + - IGC_PTP_TX_TIMEOUT); + unsigned long flags; - if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state)) - return; + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); - /* If we haven't received a timestamp within the timeout, it is - * reasonable to assume that it will never occur, so we can unlock the - * timestamp bit when this occurs. - */ - if (timeout) { - cancel_work_sync(&adapter->ptp_tx_work); - igc_ptp_tx_timeout(adapter); - } + if (!adapter->ptp_tx_skb) + goto unlock; + + if (time_is_after_jiffies(adapter->ptp_tx_start + IGC_PTP_TX_TIMEOUT)) + goto unlock; + + igc_ptp_tx_timeout(adapter); + +unlock: + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } /** @@ -641,6 +641,8 @@ void igc_ptp_tx_hang(struct igc_adapter *adapter) * If we were asked to do hardware stamping and such a time stamp is * available, then it must have been for this skb here because we only * allow only one such packet into the queue. + * + * Context: Expects adapter->ptp_tx_lock to be held by caller. */ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) { @@ -676,13 +678,7 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) shhwtstamps.hwtstamp = ktime_add_ns(shhwtstamps.hwtstamp, adjust); - /* Clear the lock early before calling skb_tstamp_tx so that - * applications are not woken up before the lock bit is clear. We use - * a copy of the skb pointer to ensure other threads can't change it - * while we're notifying the stack. - */ adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); /* Notify the stack and free the skb after we've unlocked */ skb_tstamp_tx(skb, &shhwtstamps); @@ -693,24 +689,33 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) * igc_ptp_tx_work * @work: pointer to work struct * - * This work function polls the TSYNCTXCTL valid bit to determine when a - * timestamp has been taken for the current stored skb. + * This work function checks the TSYNCTXCTL valid bit to determine when + * a timestamp has been taken for the current stored skb. */ static void igc_ptp_tx_work(struct work_struct *work) { struct igc_adapter *adapter = container_of(work, struct igc_adapter, ptp_tx_work); struct igc_hw *hw = &adapter->hw; + unsigned long flags; u32 tsynctxctl; - if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state)) - return; + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + + if (!adapter->ptp_tx_skb) + goto unlock; tsynctxctl = rd32(IGC_TSYNCTXCTL); - if (WARN_ON_ONCE(!(tsynctxctl & IGC_TSYNCTXCTL_TXTT_0))) - return; + tsynctxctl &= IGC_TSYNCTXCTL_TXTT_0; + if (!tsynctxctl) { + WARN_ONCE(1, "Received a TSTAMP interrupt but no TSTAMP is ready.\n"); + goto unlock; + } igc_ptp_tx_hwtstamp(adapter); + +unlock: + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } /** @@ -959,6 +964,7 @@ void igc_ptp_init(struct igc_adapter *adapter) return; } + spin_lock_init(&adapter->ptp_tx_lock); spin_lock_init(&adapter->tmreg_lock); INIT_WORK(&adapter->ptp_tx_work, igc_ptp_tx_work); @@ -1023,7 +1029,6 @@ void igc_ptp_suspend(struct igc_adapter *adapter) cancel_work_sync(&adapter->ptp_tx_work); dev_kfree_skb_any(adapter->ptp_tx_skb); adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); if (pci_device_is_present(adapter->pdev)) { igc_ptp_time_save(adapter); -- cgit v1.2.3 From ce58c7cc8b9910f2bc1d038d7ba60c3f011b2cb2 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 7 Jun 2023 14:32:30 -0700 Subject: igc: Check if hardware TX timestamping is enabled earlier Before requesting a packet transmission to be hardware timestamped, check if the user has TX timestamping enabled. Fixes an issue that if a packet was internally forwarded to the NIC, and it had the SKBTX_HW_TSTAMP flag set, the driver would mark that timestamp as skipped. In reality, that timestamp was "not for us", as TX timestamp could never be enabled in the NIC. Checking if the TX timestamping is enabled earlier has a secondary effect that when TX timestamping is disabled, there's no need to check for timestamp timeouts. We should only take care to free any pending timestamp when TX timestamping is disabled, as that skb would never be released otherwise. Fixes: 2c344ae24501 ("igc: Add support for TX timestamping") Suggested-by: Vladimir Oltean Signed-off-by: Vinicius Costa Gomes Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_main.c | 5 ++-- drivers/net/ethernet/intel/igc/igc_ptp.c | 42 ++++++++++++++++++++++++++++--- 3 files changed, 43 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 7da0657ea48f..66fb67c17e4f 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -581,6 +581,7 @@ enum igc_ring_flags_t { IGC_RING_FLAG_TX_CTX_IDX, IGC_RING_FLAG_TX_DETECT_HANG, IGC_RING_FLAG_AF_XDP_ZC, + IGC_RING_FLAG_TX_HWTSTAMP, }; #define ring_uses_large_buffer(ring) \ diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 9fcb263bd3a7..eb50bfd5b867 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1585,7 +1585,8 @@ done: } } - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) && + skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { /* FIXME: add support for retrieving timestamps from * the other timer registers before skipping the * timestamping request. @@ -1593,7 +1594,7 @@ done: unsigned long flags; spin_lock_irqsave(&adapter->ptp_tx_lock, flags); - if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && !adapter->ptp_tx_skb) { + if (!adapter->ptp_tx_skb) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IGC_TX_FLAGS_TSTAMP; diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 56128e55f5c0..42f622ceb64b 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -536,9 +536,36 @@ static void igc_ptp_enable_rx_timestamp(struct igc_adapter *adapter) wr32(IGC_TSYNCRXCTL, val); } +static void igc_ptp_clear_tx_tstamp(struct igc_adapter *adapter) +{ + unsigned long flags; + + cancel_work_sync(&adapter->ptp_tx_work); + + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); +} + static void igc_ptp_disable_tx_timestamp(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; + int i; + + /* Clear the flags first to avoid new packets to be enqueued + * for TX timestamping. + */ + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + clear_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags); + } + + /* Now we can clean the pending TX timestamp requests. */ + igc_ptp_clear_tx_tstamp(adapter); wr32(IGC_TSYNCTXCTL, 0); } @@ -546,12 +573,23 @@ static void igc_ptp_disable_tx_timestamp(struct igc_adapter *adapter) static void igc_ptp_enable_tx_timestamp(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; + int i; wr32(IGC_TSYNCTXCTL, IGC_TSYNCTXCTL_ENABLED | IGC_TSYNCTXCTL_TXSYNSIG); /* Read TXSTMP registers to discard any timestamp previously stored. */ rd32(IGC_TXSTMPL); rd32(IGC_TXSTMPH); + + /* The hardware is ready to accept TX timestamp requests, + * notify the transmit path. + */ + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + set_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags); + } + } /** @@ -1026,9 +1064,7 @@ void igc_ptp_suspend(struct igc_adapter *adapter) if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) return; - cancel_work_sync(&adapter->ptp_tx_work); - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; + igc_ptp_clear_tx_tstamp(adapter); if (pci_device_is_present(adapter->pdev)) { igc_ptp_time_save(adapter); -- cgit v1.2.3 From afa141583d82725f682b2fa762cb36a07f58b3f3 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 7 Jun 2023 14:32:31 -0700 Subject: igc: Retrieve TX timestamp during interrupt handling When the interrupt is handled, the TXTT_0 bit in the TSYNCTXCTL register should already be set and the timestamp value already loaded in the appropriate register. This simplifies the handling, and reduces the latency for retrieving the TX timestamp, which increase the amount of TX timestamps that can be handled in a given time period. As the "work" function doesn't run in a workqueue anymore, rename it to something more sensible, a event handler. Using ntpperf[1] we can see the following performance improvements: Before: $ sudo ./ntpperf -i enp3s0 -m 10:22:22:22:22:21 -d 192.168.1.3 -s 172.18.0.0/16 -I -H -o -37 | responses | TX timestamp offset (ns) rate clients | lost invalid basic xleave | min mean max stddev 1000 100 0.00% 0.00% 0.00% 100.00% -56 +9 +52 19 1500 150 0.00% 0.00% 0.00% 100.00% -40 +30 +75 22 2250 225 0.00% 0.00% 0.00% 100.00% -11 +29 +72 15 3375 337 0.00% 0.00% 0.00% 100.00% -18 +40 +88 22 5062 506 0.00% 0.00% 0.00% 100.00% -19 +23 +77 15 7593 759 0.00% 0.00% 0.00% 100.00% +7 +47 +5168 43 11389 1138 0.00% 0.00% 0.00% 100.00% -11 +41 +5240 39 17083 1708 0.00% 0.00% 0.00% 100.00% +19 +60 +5288 50 25624 2562 0.00% 0.00% 0.00% 100.00% +1 +56 +5368 58 38436 3843 0.00% 0.00% 0.00% 100.00% -84 +12 +8847 66 57654 5765 0.00% 0.00% 100.00% 0.00% 86481 8648 0.00% 0.00% 100.00% 0.00% 129721 12972 0.00% 0.00% 100.00% 0.00% 194581 16384 0.00% 0.00% 100.00% 0.00% 291871 16384 27.35% 0.00% 72.65% 0.00% 437806 16384 50.05% 0.00% 49.95% 0.00% After: $ sudo ./ntpperf -i enp3s0 -m 10:22:22:22:22:21 -d 192.168.1.3 -s 172.18.0.0/16 -I -H -o -37 | responses | TX timestamp offset (ns) rate clients | lost invalid basic xleave | min mean max stddev 1000 100 0.00% 0.00% 0.00% 100.00% -44 +0 +61 19 1500 150 0.00% 0.00% 0.00% 100.00% -6 +39 +81 16 2250 225 0.00% 0.00% 0.00% 100.00% -22 +25 +69 15 3375 337 0.00% 0.00% 0.00% 100.00% -28 +15 +56 14 5062 506 0.00% 0.00% 0.00% 100.00% +7 +78 +143 27 7593 759 0.00% 0.00% 0.00% 100.00% -54 +24 +144 47 11389 1138 0.00% 0.00% 0.00% 100.00% -90 -33 +28 21 17083 1708 0.00% 0.00% 0.00% 100.00% -50 -2 +35 14 25624 2562 0.00% 0.00% 0.00% 100.00% -62 +7 +66 23 38436 3843 0.00% 0.00% 0.00% 100.00% -33 +30 +5395 36 57654 5765 0.00% 0.00% 100.00% 0.00% 86481 8648 0.00% 0.00% 100.00% 0.00% 129721 12972 0.00% 0.00% 100.00% 0.00% 194581 16384 19.50% 0.00% 80.50% 0.00% 291871 16384 35.81% 0.00% 64.19% 0.00% 437806 16384 55.40% 0.00% 44.60% 0.00% [1] https://github.com/mlichvar/ntpperf Signed-off-by: Vinicius Costa Gomes Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 2 +- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- drivers/net/ethernet/intel/igc/igc_ptp.c | 15 +++++---------- 3 files changed, 7 insertions(+), 12 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 66fb67c17e4f..a00738bf6b19 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -228,7 +228,6 @@ struct igc_adapter { struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_caps; - struct work_struct ptp_tx_work; /* Access to ptp_tx_skb and ptp_tx_start are protected by the * ptp_tx_lock. */ @@ -638,6 +637,7 @@ int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); void igc_ptp_tx_hang(struct igc_adapter *adapter); void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts); +void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter); #define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring)) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index eb50bfd5b867..eb4f0e562f60 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -5223,7 +5223,7 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter) if (tsicr & IGC_TSICR_TXTS) { /* retrieve hardware timestamp */ - schedule_work(&adapter->ptp_tx_work); + igc_ptp_tx_tstamp_event(adapter); ack |= IGC_TSICR_TXTS; } diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 42f622ceb64b..cf963a12a92f 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -540,8 +540,6 @@ static void igc_ptp_clear_tx_tstamp(struct igc_adapter *adapter) { unsigned long flags; - cancel_work_sync(&adapter->ptp_tx_work); - spin_lock_irqsave(&adapter->ptp_tx_lock, flags); dev_kfree_skb_any(adapter->ptp_tx_skb); @@ -724,16 +722,14 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) } /** - * igc_ptp_tx_work - * @work: pointer to work struct + * igc_ptp_tx_tstamp_event + * @adapter: board private structure * - * This work function checks the TSYNCTXCTL valid bit to determine when - * a timestamp has been taken for the current stored skb. + * Called when a TX timestamp interrupt happens to retrieve the + * timestamp and send it up to the socket. */ -static void igc_ptp_tx_work(struct work_struct *work) +void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter) { - struct igc_adapter *adapter = container_of(work, struct igc_adapter, - ptp_tx_work); struct igc_hw *hw = &adapter->hw; unsigned long flags; u32 tsynctxctl; @@ -1004,7 +1000,6 @@ void igc_ptp_init(struct igc_adapter *adapter) spin_lock_init(&adapter->ptp_tx_lock); spin_lock_init(&adapter->tmreg_lock); - INIT_WORK(&adapter->ptp_tx_work, igc_ptp_tx_work); adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; -- cgit v1.2.3 From c789ad7cbebcac5d5f417296c140a1252c689524 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 7 Jun 2023 14:32:32 -0700 Subject: igc: Work around HW bug causing missing timestamps There's an hardware issue that can cause missing timestamps. The bug is that the interrupt is only cleared if the IGC_TXSTMPH_0 register is read. The bug can cause a race condition if a timestamp is captured at the wrong time, and we will miss that timestamp. To reduce the time window that the problem is able to happen, in case no timestamp was ready, we read the "previous" value of the timestamp registers, and we compare with the "current" one, if it didn't change we can be reasonably sure that no timestamp was captured. If they are different, we use the new value as the captured timestamp. The HW bug is not easy to reproduce, got to reproduce it when smashing the NIC with timestamping requests from multiple applications (e.g. multiple ntpperf instances + ptp4l), after 10s of minutes. This workaround has more impact when multiple timestamp registers are used, and the IGC_TXSTMPH_0 register always need to be read, so the interrupt is cleared. Fixes: 2c344ae24501 ("igc: Add support for TX timestamping") Signed-off-by: Vinicius Costa Gomes Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ptp.c | 48 ++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 11 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index cf963a12a92f..32ef112f8291 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -685,14 +685,49 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) struct sk_buff *skb = adapter->ptp_tx_skb; struct skb_shared_hwtstamps shhwtstamps; struct igc_hw *hw = &adapter->hw; + u32 tsynctxctl; int adjust = 0; u64 regval; if (WARN_ON_ONCE(!skb)) return; - regval = rd32(IGC_TXSTMPL); - regval |= (u64)rd32(IGC_TXSTMPH) << 32; + tsynctxctl = rd32(IGC_TSYNCTXCTL); + tsynctxctl &= IGC_TSYNCTXCTL_TXTT_0; + if (tsynctxctl) { + regval = rd32(IGC_TXSTMPL); + regval |= (u64)rd32(IGC_TXSTMPH) << 32; + } else { + /* There's a bug in the hardware that could cause + * missing interrupts for TX timestamping. The issue + * is that for new interrupts to be triggered, the + * IGC_TXSTMPH_0 register must be read. + * + * To avoid discarding a valid timestamp that just + * happened at the "wrong" time, we need to confirm + * that there was no timestamp captured, we do that by + * assuming that no two timestamps in sequence have + * the same nanosecond value. + * + * So, we read the "low" register, read the "high" + * register (to latch a new timestamp) and read the + * "low" register again, if "old" and "new" versions + * of the "low" register are different, a valid + * timestamp was captured, we can read the "high" + * register again. + */ + u32 txstmpl_old, txstmpl_new; + + txstmpl_old = rd32(IGC_TXSTMPL); + rd32(IGC_TXSTMPH); + txstmpl_new = rd32(IGC_TXSTMPL); + + if (txstmpl_old == txstmpl_new) + return; + + regval = txstmpl_new; + regval |= (u64)rd32(IGC_TXSTMPH) << 32; + } if (igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval)) return; @@ -730,22 +765,13 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) */ void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter) { - struct igc_hw *hw = &adapter->hw; unsigned long flags; - u32 tsynctxctl; spin_lock_irqsave(&adapter->ptp_tx_lock, flags); if (!adapter->ptp_tx_skb) goto unlock; - tsynctxctl = rd32(IGC_TSYNCTXCTL); - tsynctxctl &= IGC_TSYNCTXCTL_TXTT_0; - if (!tsynctxctl) { - WARN_ONCE(1, "Received a TSTAMP interrupt but no TSTAMP is ready.\n"); - goto unlock; - } - igc_ptp_tx_hwtstamp(adapter); unlock: -- cgit v1.2.3 From 61f723e6f3d20f3276c678cd346de5ea86b8e5d3 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Mon, 19 Jun 2023 04:06:35 -0400 Subject: iavf: fix err handling for MAC replace Defer removal of current primary MAC until a replacement is successfully added. Previous implementation would left filter list with no primary MAC. This was found while reading the code. The patch takes advantage of the fact that there can only be a single primary MAC filter at any time ([1] by Piotr) Piotr has also applied some review suggestions during our internal patch submittal process. [1] https://lore.kernel.org/netdev/20230614145302.902301-2-piotrx.gardocki@intel.com/ Reviewed-by: Michal Swiatkowski Tested-by: Rafal Romanowski Signed-off-by: Piotr Gardocki Signed-off-by: Przemek Kitszel Reviewed-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 42 +++++++++++++---------------- 1 file changed, 19 insertions(+), 23 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 4a66873882d1..f8e6f3cd7b38 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1007,40 +1007,36 @@ int iavf_replace_primary_mac(struct iavf_adapter *adapter, const u8 *new_mac) { struct iavf_hw *hw = &adapter->hw; - struct iavf_mac_filter *f; + struct iavf_mac_filter *new_f; + struct iavf_mac_filter *old_f; spin_lock_bh(&adapter->mac_vlan_list_lock); - list_for_each_entry(f, &adapter->mac_filter_list, list) { - f->is_primary = false; + new_f = iavf_add_filter(adapter, new_mac); + if (!new_f) { + spin_unlock_bh(&adapter->mac_vlan_list_lock); + return -ENOMEM; } - f = iavf_find_filter(adapter, hw->mac.addr); - if (f) { - f->remove = true; + old_f = iavf_find_filter(adapter, hw->mac.addr); + if (old_f) { + old_f->is_primary = false; + old_f->remove = true; adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER; } - - f = iavf_add_filter(adapter, new_mac); - - if (f) { - /* Always send the request to add if changing primary MAC - * even if filter is already present on the list - */ - f->is_primary = true; - f->add = true; - adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; - ether_addr_copy(hw->mac.addr, new_mac); - } + /* Always send the request to add if changing primary MAC, + * even if filter is already present on the list + */ + new_f->is_primary = true; + new_f->add = true; + adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; + ether_addr_copy(hw->mac.addr, new_mac); spin_unlock_bh(&adapter->mac_vlan_list_lock); /* schedule the watchdog task to immediately process the request */ - if (f) { - mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); - return 0; - } - return -ENOMEM; + mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); + return 0; } /** -- cgit v1.2.3 From b855bcdeb89777ff255bedf8f1330aac9b26b405 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Thu, 15 Jun 2023 07:03:08 -0400 Subject: iavf: remove some unused functions and pointless wrappers Remove iavf_aq_get_rss_lut(), iavf_aq_get_rss_key(), iavf_vf_reset(). Remove some "OS specific memory free for shared code" wrappers ;) Signed-off-by: Przemek Kitszel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_alloc.h | 3 +- drivers/net/ethernet/intel/iavf/iavf_common.c | 45 ------------------------ drivers/net/ethernet/intel/iavf/iavf_main.c | 22 ++++-------- drivers/net/ethernet/intel/iavf/iavf_osdep.h | 9 ----- drivers/net/ethernet/intel/iavf/iavf_prototype.h | 5 --- 5 files changed, 8 insertions(+), 76 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/iavf/iavf_alloc.h b/drivers/net/ethernet/intel/iavf/iavf_alloc.h index 2711573c14ec..162ea70685a6 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_alloc.h +++ b/drivers/net/ethernet/intel/iavf/iavf_alloc.h @@ -28,7 +28,6 @@ enum iavf_status iavf_free_dma_mem(struct iavf_hw *hw, struct iavf_dma_mem *mem); enum iavf_status iavf_allocate_virt_mem(struct iavf_hw *hw, struct iavf_virt_mem *mem, u32 size); -enum iavf_status iavf_free_virt_mem(struct iavf_hw *hw, - struct iavf_virt_mem *mem); +void iavf_free_virt_mem(struct iavf_hw *hw, struct iavf_virt_mem *mem); #endif /* _IAVF_ALLOC_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c index dd11dbbd5551..1afd761d8052 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_common.c +++ b/drivers/net/ethernet/intel/iavf/iavf_common.c @@ -35,7 +35,6 @@ enum iavf_status iavf_set_mac_type(struct iavf_hw *hw) status = IAVF_ERR_DEVICE_NOT_SUPPORTED; } - hw_dbg(hw, "found mac: %d, returns: %d\n", hw->mac.type, status); return status; } @@ -397,23 +396,6 @@ static enum iavf_status iavf_aq_get_set_rss_lut(struct iavf_hw *hw, return status; } -/** - * iavf_aq_get_rss_lut - * @hw: pointer to the hardware structure - * @vsi_id: vsi fw index - * @pf_lut: for PF table set true, for VSI table set false - * @lut: pointer to the lut buffer provided by the caller - * @lut_size: size of the lut buffer - * - * get the RSS lookup table, PF or VSI type - **/ -enum iavf_status iavf_aq_get_rss_lut(struct iavf_hw *hw, u16 vsi_id, - bool pf_lut, u8 *lut, u16 lut_size) -{ - return iavf_aq_get_set_rss_lut(hw, vsi_id, pf_lut, lut, lut_size, - false); -} - /** * iavf_aq_set_rss_lut * @hw: pointer to the hardware structure @@ -472,19 +454,6 @@ iavf_status iavf_aq_get_set_rss_key(struct iavf_hw *hw, u16 vsi_id, return status; } -/** - * iavf_aq_get_rss_key - * @hw: pointer to the hw struct - * @vsi_id: vsi fw index - * @key: pointer to key info struct - * - **/ -enum iavf_status iavf_aq_get_rss_key(struct iavf_hw *hw, u16 vsi_id, - struct iavf_aqc_get_set_rss_key_data *key) -{ - return iavf_aq_get_set_rss_key(hw, vsi_id, key, false); -} - /** * iavf_aq_set_rss_key * @hw: pointer to the hw struct @@ -828,17 +797,3 @@ void iavf_vf_parse_hw_config(struct iavf_hw *hw, vsi_res++; } } - -/** - * iavf_vf_reset - * @hw: pointer to the hardware structure - * - * Send a VF_RESET message to the PF. Does not wait for response from PF - * as none will be forthcoming. Immediately after calling this function, - * the admin queue should be shut down and (optionally) reinitialized. - **/ -enum iavf_status iavf_vf_reset(struct iavf_hw *hw) -{ - return iavf_aq_send_msg_to_pf(hw, VIRTCHNL_OP_RESET_VF, - 0, NULL, 0, NULL); -} diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index f8e6f3cd7b38..f83720a72cc0 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -192,12 +192,11 @@ enum iavf_status iavf_allocate_dma_mem_d(struct iavf_hw *hw, } /** - * iavf_free_dma_mem_d - OS specific memory free for shared code + * iavf_free_dma_mem - wrapper for DMA memory freeing * @hw: pointer to the HW structure * @mem: ptr to mem struct to free **/ -enum iavf_status iavf_free_dma_mem_d(struct iavf_hw *hw, - struct iavf_dma_mem *mem) +enum iavf_status iavf_free_dma_mem(struct iavf_hw *hw, struct iavf_dma_mem *mem) { struct iavf_adapter *adapter = (struct iavf_adapter *)hw->back; @@ -209,13 +208,13 @@ enum iavf_status iavf_free_dma_mem_d(struct iavf_hw *hw, } /** - * iavf_allocate_virt_mem_d - OS specific memory alloc for shared code + * iavf_allocate_virt_mem - virt memory alloc wrapper * @hw: pointer to the HW structure * @mem: ptr to mem struct to fill out * @size: size of memory requested **/ -enum iavf_status iavf_allocate_virt_mem_d(struct iavf_hw *hw, - struct iavf_virt_mem *mem, u32 size) +enum iavf_status iavf_allocate_virt_mem(struct iavf_hw *hw, + struct iavf_virt_mem *mem, u32 size) { if (!mem) return IAVF_ERR_PARAM; @@ -230,20 +229,13 @@ enum iavf_status iavf_allocate_virt_mem_d(struct iavf_hw *hw, } /** - * iavf_free_virt_mem_d - OS specific memory free for shared code + * iavf_free_virt_mem - virt memory free wrapper * @hw: pointer to the HW structure * @mem: ptr to mem struct to free **/ -enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw, - struct iavf_virt_mem *mem) +void iavf_free_virt_mem(struct iavf_hw *hw, struct iavf_virt_mem *mem) { - if (!mem) - return IAVF_ERR_PARAM; - - /* it's ok to kfree a NULL pointer */ kfree(mem->va); - - return 0; } /** diff --git a/drivers/net/ethernet/intel/iavf/iavf_osdep.h b/drivers/net/ethernet/intel/iavf/iavf_osdep.h index a452ce90679a..77d33deaabb5 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_osdep.h +++ b/drivers/net/ethernet/intel/iavf/iavf_osdep.h @@ -13,12 +13,6 @@ /* get readq/writeq support for 32 bit kernels, use the low-first version */ #include -/* File to be the magic between shared code and - * actual OS primitives - */ - -#define hw_dbg(hw, S, A...) do {} while (0) - #define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg))) #define rd32(a, reg) readl((a)->hw_addr + (reg)) @@ -35,14 +29,11 @@ struct iavf_dma_mem { #define iavf_allocate_dma_mem(h, m, unused, s, a) \ iavf_allocate_dma_mem_d(h, m, s, a) -#define iavf_free_dma_mem(h, m) iavf_free_dma_mem_d(h, m) struct iavf_virt_mem { void *va; u32 size; }; -#define iavf_allocate_virt_mem(h, m, s) iavf_allocate_virt_mem_d(h, m, s) -#define iavf_free_virt_mem(h, m) iavf_free_virt_mem_d(h, m) #define iavf_debug(h, m, s, ...) \ do { \ diff --git a/drivers/net/ethernet/intel/iavf/iavf_prototype.h b/drivers/net/ethernet/intel/iavf/iavf_prototype.h index edebfbbcffdc..940cb4203fbe 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_prototype.h +++ b/drivers/net/ethernet/intel/iavf/iavf_prototype.h @@ -40,12 +40,8 @@ enum iavf_status iavf_aq_queue_shutdown(struct iavf_hw *hw, bool unloading); const char *iavf_aq_str(struct iavf_hw *hw, enum iavf_admin_queue_err aq_err); const char *iavf_stat_str(struct iavf_hw *hw, enum iavf_status stat_err); -enum iavf_status iavf_aq_get_rss_lut(struct iavf_hw *hw, u16 seid, - bool pf_lut, u8 *lut, u16 lut_size); enum iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 seid, bool pf_lut, u8 *lut, u16 lut_size); -enum iavf_status iavf_aq_get_rss_key(struct iavf_hw *hw, u16 seid, - struct iavf_aqc_get_set_rss_key_data *key); enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 seid, struct iavf_aqc_get_set_rss_key_data *key); @@ -60,7 +56,6 @@ static inline struct iavf_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype) void iavf_vf_parse_hw_config(struct iavf_hw *hw, struct virtchnl_vf_resource *msg); -enum iavf_status iavf_vf_reset(struct iavf_hw *hw); enum iavf_status iavf_aq_send_msg_to_pf(struct iavf_hw *hw, enum virtchnl_ops v_opcode, enum iavf_status v_retval, -- cgit v1.2.3 From a4aadf0f5905661cd25c366b96cc1c840f05b756 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Wed, 21 Jun 2023 08:54:05 -0700 Subject: iavf: make functions static where possible Make all possible functions static. Move iavf_force_wb() up to avoid forward declaration. Suggested-by: Maciej Fijalkowski Reviewed-by: Maciej Fijalkowski Signed-off-by: Przemek Kitszel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf.h | 10 ------- drivers/net/ethernet/intel/iavf/iavf_main.c | 14 +++++----- drivers/net/ethernet/intel/iavf/iavf_txrx.c | 43 ++++++++++++++--------------- drivers/net/ethernet/intel/iavf/iavf_txrx.h | 4 --- 4 files changed, 28 insertions(+), 43 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 39d0fe76a38f..f80f2735e688 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -523,9 +523,6 @@ void iavf_schedule_request_stats(struct iavf_adapter *adapter); void iavf_reset(struct iavf_adapter *adapter); void iavf_set_ethtool_ops(struct net_device *netdev); void iavf_update_stats(struct iavf_adapter *adapter); -void iavf_reset_interrupt_capability(struct iavf_adapter *adapter); -int iavf_init_interrupt_scheme(struct iavf_adapter *adapter); -void iavf_irq_enable_queues(struct iavf_adapter *adapter); void iavf_free_all_tx_resources(struct iavf_adapter *adapter); void iavf_free_all_rx_resources(struct iavf_adapter *adapter); @@ -579,17 +576,10 @@ void iavf_enable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid); void iavf_disable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid); void iavf_enable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid); void iavf_disable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid); -int iavf_replace_primary_mac(struct iavf_adapter *adapter, - const u8 *new_mac); -void -iavf_set_vlan_offload_features(struct iavf_adapter *adapter, - netdev_features_t prev_features, - netdev_features_t features); void iavf_add_fdir_filter(struct iavf_adapter *adapter); void iavf_del_fdir_filter(struct iavf_adapter *adapter); void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter); void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter); struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, const u8 *macaddr); -int iavf_lock_timeout(struct mutex *lock, unsigned int msecs); #endif /* _IAVF_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index f83720a72cc0..a483eb185c99 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -245,7 +245,7 @@ void iavf_free_virt_mem(struct iavf_hw *hw, struct iavf_virt_mem *mem) * * Returns 0 on success, negative on failure **/ -int iavf_lock_timeout(struct mutex *lock, unsigned int msecs) +static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs) { unsigned int wait, delay = 10; @@ -354,7 +354,7 @@ static void iavf_irq_disable(struct iavf_adapter *adapter) * iavf_irq_enable_queues - Enable interrupt for all queues * @adapter: board private structure **/ -void iavf_irq_enable_queues(struct iavf_adapter *adapter) +static void iavf_irq_enable_queues(struct iavf_adapter *adapter) { struct iavf_hw *hw = &adapter->hw; int i; @@ -995,8 +995,8 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, * * Do not call this with mac_vlan_list_lock! **/ -int iavf_replace_primary_mac(struct iavf_adapter *adapter, - const u8 *new_mac) +static int iavf_replace_primary_mac(struct iavf_adapter *adapter, + const u8 *new_mac) { struct iavf_hw *hw = &adapter->hw; struct iavf_mac_filter *new_f; @@ -1851,7 +1851,7 @@ static void iavf_free_q_vectors(struct iavf_adapter *adapter) * @adapter: board private structure * **/ -void iavf_reset_interrupt_capability(struct iavf_adapter *adapter) +static void iavf_reset_interrupt_capability(struct iavf_adapter *adapter) { if (!adapter->msix_entries) return; @@ -1866,7 +1866,7 @@ void iavf_reset_interrupt_capability(struct iavf_adapter *adapter) * @adapter: board private structure to initialize * **/ -int iavf_init_interrupt_scheme(struct iavf_adapter *adapter) +static int iavf_init_interrupt_scheme(struct iavf_adapter *adapter) { int err; @@ -2164,7 +2164,7 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) * the watchdog if any changes are requested to expedite the request via * virtchnl. **/ -void +static void iavf_set_vlan_offload_features(struct iavf_adapter *adapter, netdev_features_t prev_features, netdev_features_t features) diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index e989feda133c..8c5f6096b002 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -54,7 +54,7 @@ static void iavf_unmap_and_free_tx_resource(struct iavf_ring *ring, * iavf_clean_tx_ring - Free any empty Tx buffers * @tx_ring: ring to be cleaned **/ -void iavf_clean_tx_ring(struct iavf_ring *tx_ring) +static void iavf_clean_tx_ring(struct iavf_ring *tx_ring) { unsigned long bi_size; u16 i; @@ -110,7 +110,7 @@ void iavf_free_tx_resources(struct iavf_ring *tx_ring) * Since there is no access to the ring head register * in XL710, we need to use our local copies **/ -u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw) +static u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw) { u32 head, tail; @@ -127,6 +127,24 @@ u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw) return 0; } +/** + * iavf_force_wb - Issue SW Interrupt so HW does a wb + * @vsi: the VSI we care about + * @q_vector: the vector on which to force writeback + **/ +static void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector) +{ + u32 val = IAVF_VFINT_DYN_CTLN1_INTENA_MASK | + IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */ + IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | + IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK + /* allow 00 to be written to the index */; + + wr32(&vsi->back->hw, + IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx), + val); +} + /** * iavf_detect_recover_hung - Function to detect and recover hung_queues * @vsi: pointer to vsi struct with tx queues @@ -352,25 +370,6 @@ static void iavf_enable_wb_on_itr(struct iavf_vsi *vsi, q_vector->arm_wb_state = true; } -/** - * iavf_force_wb - Issue SW Interrupt so HW does a wb - * @vsi: the VSI we care about - * @q_vector: the vector on which to force writeback - * - **/ -void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector) -{ - u32 val = IAVF_VFINT_DYN_CTLN1_INTENA_MASK | - IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */ - IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_MASK | - IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK - /* allow 00 to be written to the index */; - - wr32(&vsi->back->hw, - IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx), - val); -} - static inline bool iavf_container_is_rx(struct iavf_q_vector *q_vector, struct iavf_ring_container *rc) { @@ -687,7 +686,7 @@ err: * iavf_clean_rx_ring - Free Rx buffers * @rx_ring: ring to be cleaned **/ -void iavf_clean_rx_ring(struct iavf_ring *rx_ring) +static void iavf_clean_rx_ring(struct iavf_ring *rx_ring) { unsigned long bi_size; u16 i; diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h index 2624bf6d009e..7e6ee32d19b6 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h @@ -442,15 +442,11 @@ static inline unsigned int iavf_rx_pg_order(struct iavf_ring *ring) bool iavf_alloc_rx_buffers(struct iavf_ring *rxr, u16 cleaned_count); netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev); -void iavf_clean_tx_ring(struct iavf_ring *tx_ring); -void iavf_clean_rx_ring(struct iavf_ring *rx_ring); int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring); int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring); void iavf_free_tx_resources(struct iavf_ring *tx_ring); void iavf_free_rx_resources(struct iavf_ring *rx_ring); int iavf_napi_poll(struct napi_struct *napi, int budget); -void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector); -u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw); void iavf_detect_recover_hung(struct iavf_vsi *vsi); int __iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size); bool __iavf_chk_linearize(struct sk_buff *skb); -- cgit v1.2.3 From a734c43caa4d9a08da521be1a2135cadf1510e75 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 13 Jun 2023 13:40:53 -0700 Subject: ice: reduce initial wait for control queue messages The ice_sq_send_cmd() function is used to send messages to the control queues used to communicate with firmware, virtual functions, and even some hardware. When sending a control queue message, the driver is designed to synchronously wait for a response from the queue. Currently it waits between checks for 100 to 150 microseconds. Commit f86d6f9c49f6 ("ice: sleep, don't busy-wait, for ICE_CTL_Q_SQ_CMD_TIMEOUT") did recently change the behavior from an unnecessary delay into a sleep which is a significant improvement over the old behavior of polling using udelay. Because of the nature of PCIe transactions, the hardware won't be informed about a new message until the write to the tail register posts. This is only guaranteed to occur at the next register read. In ice_sq_send_cmd(), this happens at the ice_sq_done() call. Because of this, the driver essentially forces a minimum of one full wait time regardless of how fast the response is. For the hardware-based sideband queue, this is especially slow. It is expected that the hardware will respond within 2 or 3 microseconds, an order of magnitude faster than the 100-150 microsecond sleep. Allow such fast completions to occur without delay by introducing a small 5 microsecond delay first before entering the sleeping timeout loop. Ensure the tail write has been posted by using ice_flush(hw) first. While at it, lets also remove the ICE_CTL_Q_SQ_CMD_USEC macro as it obscures the sleep time in the inner loop. It was likely introduced to avoid "magic numbers", but in practice sleep and delay values are easier to read and understand when using actual numbers instead of a named constant. This change should allow the fast hardware based control queue messages to complete quickly without delay, while slower firmware queue response times will sleep while waiting for the response. Signed-off-by: Jacob Keller Reviewed-by: Michal Schmidt Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_controlq.c | 9 +++++++-- drivers/net/ethernet/intel/ice/ice_controlq.h | 1 - 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index d2faf1baad2f..385fd88831db 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -1056,14 +1056,19 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq, if (cq->sq.next_to_use == cq->sq.count) cq->sq.next_to_use = 0; wr32(hw, cq->sq.tail, cq->sq.next_to_use); + ice_flush(hw); + + /* Wait a short time before initial ice_sq_done() check, to allow + * hardware time for completion. + */ + udelay(5); timeout = jiffies + ICE_CTL_Q_SQ_CMD_TIMEOUT; do { if (ice_sq_done(hw, cq)) break; - usleep_range(ICE_CTL_Q_SQ_CMD_USEC, - ICE_CTL_Q_SQ_CMD_USEC * 3 / 2); + usleep_range(100, 150); } while (time_before(jiffies, timeout)); /* if ready, copy the desc back to temp */ diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h index 950b7f4a7a05..8f2fd1613a95 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -35,7 +35,6 @@ enum ice_ctl_q { /* Control Queue timeout settings - max delay 1s */ #define ICE_CTL_Q_SQ_CMD_TIMEOUT HZ /* Wait max 1s */ -#define ICE_CTL_Q_SQ_CMD_USEC 100 /* Check every 100usec */ #define ICE_CTL_Q_ADMIN_INIT_TIMEOUT 10 /* Count 10 times */ #define ICE_CTL_Q_ADMIN_INIT_MSEC 100 /* Check every 100msec */ -- cgit v1.2.3 From 469748429ac81f0a6a344637fc9d3b1d16a9f3d8 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 15 Jun 2023 13:33:26 +0200 Subject: ice: allow hot-swapping XDP programs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently ice driver's .ndo_bpf callback brings interface down and up independently of XDP resources' presence. This is only needed when either these resources have to be configured or removed. It means that if one is switching XDP programs on-the-fly with running traffic, packets will be dropped. To avoid this, compare early on ice_xdp_setup_prog() state of incoming bpf_prog pointer vs the bpf_prog pointer that is already assigned to VSI. Do the swap in case VSI has bpf_prog and incoming one are non-NULL. Lastly, while at it, put old bpf_prog *after* the update of Rx ring's bpf_prog pointer. In theory previous code could expose us to a state where Rx ring's bpf_prog would still be referring to old_prog that got released with earlier bpf_prog_put(). Signed-off-by: Maciej Fijalkowski Acked-by: Toke Høiland-Jørgensen Reviewed-by: Alexander Lobakin Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 65bf399a0efc..5dd88611141e 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2633,11 +2633,11 @@ static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog) int i; old_prog = xchg(&vsi->xdp_prog, prog); - if (old_prog) - bpf_prog_put(old_prog); - ice_for_each_rxq(vsi, i) WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog); + + if (old_prog) + bpf_prog_put(old_prog); } /** @@ -2922,6 +2922,12 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, } } + /* hot swap progs and avoid toggling link */ + if (ice_is_xdp_ena_vsi(vsi) == !!prog) { + ice_vsi_assign_bpf_prog(vsi, prog); + return 0; + } + /* need to stop netdev while setting up the program for Rx rings */ if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { ret = ice_down(vsi); @@ -2954,13 +2960,6 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, xdp_ring_err = ice_realloc_zc_buf(vsi, false); if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Rx resources failed"); - } else { - /* safe to call even when prog == vsi->xdp_prog as - * dev_xdp_install in net/core/dev.c incremented prog's - * refcount so corresponding bpf_prog_put won't cause - * underflow - */ - ice_vsi_assign_bpf_prog(vsi, prog); } if (if_running) -- cgit v1.2.3 From f98277479ad85ff1398e11c1e944ba97c3917393 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Wed, 31 May 2023 14:36:42 +0200 Subject: ice: clean up freeing SR-IOV VFs The check for existing VFs was redundant since very inception of SR-IOV sysfs interface in the kernel, see commit 1789382a72a5 ("PCI: SRIOV control and status via sysfs"). Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Signed-off-by: Przemek Kitszel Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sriov.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 2ea6d24977a6..1f66914c7a20 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -905,14 +905,13 @@ err_unroll_intr: */ static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs) { - int pre_existing_vfs = pci_num_vf(pf->pdev); struct device *dev = ice_pf_to_dev(pf); int err; - if (pre_existing_vfs && pre_existing_vfs != num_vfs) + if (!num_vfs) { ice_free_vfs(pf); - else if (pre_existing_vfs && pre_existing_vfs == num_vfs) return 0; + } if (num_vfs > pf->vfs.num_supported) { dev_err(dev, "Can't enable %d VFs, max VFs supported is %d\n", -- cgit v1.2.3 From ad667d626825383b626ad6ed38d6205618abb115 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Wed, 31 May 2023 14:38:40 +0200 Subject: ice: remove null checks before devm_kfree() calls We all know they are redundant. Reviewed-by: Michal Swiatkowski Reviewed-by: Michal Wilczynski Reviewed-by: Simon Horman Signed-off-by: Przemek Kitszel Tested-by: Arpana Arland (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 6 ++-- drivers/net/ethernet/intel/ice/ice_controlq.c | 3 +- drivers/net/ethernet/intel/ice/ice_flow.c | 23 ++------------- drivers/net/ethernet/intel/ice/ice_lib.c | 42 +++++++++------------------ drivers/net/ethernet/intel/ice/ice_sched.c | 11 ++----- drivers/net/ethernet/intel/ice/ice_switch.c | 19 ++++-------- 6 files changed, 29 insertions(+), 75 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index eb2dc0983776..6acb40f3c202 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -814,8 +814,7 @@ static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw) devm_kfree(ice_hw_to_dev(hw), lst_itr); } } - if (recps[i].root_buf) - devm_kfree(ice_hw_to_dev(hw), recps[i].root_buf); + devm_kfree(ice_hw_to_dev(hw), recps[i].root_buf); } ice_rm_all_sw_replay_rule_info(hw); devm_kfree(ice_hw_to_dev(hw), sw->recp_list); @@ -1011,8 +1010,7 @@ static int ice_cfg_fw_log(struct ice_hw *hw, bool enable) } out: - if (data) - devm_kfree(ice_hw_to_dev(hw), data); + devm_kfree(ice_hw_to_dev(hw), data); return status; } diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index 385fd88831db..e7d2474c431c 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -339,8 +339,7 @@ do { \ } \ } \ /* free the buffer info list */ \ - if ((qi)->ring.cmd_buf) \ - devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf); \ + devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf); \ /* free DMA head */ \ devm_kfree(ice_hw_to_dev(hw), (qi)->ring.dma_head); \ } while (0) diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c index ef103e47a8dc..85cca572c22a 100644 --- a/drivers/net/ethernet/intel/ice/ice_flow.c +++ b/drivers/net/ethernet/intel/ice/ice_flow.c @@ -1303,23 +1303,6 @@ ice_flow_find_prof_id(struct ice_hw *hw, enum ice_block blk, u64 prof_id) return NULL; } -/** - * ice_dealloc_flow_entry - Deallocate flow entry memory - * @hw: pointer to the HW struct - * @entry: flow entry to be removed - */ -static void -ice_dealloc_flow_entry(struct ice_hw *hw, struct ice_flow_entry *entry) -{ - if (!entry) - return; - - if (entry->entry) - devm_kfree(ice_hw_to_dev(hw), entry->entry); - - devm_kfree(ice_hw_to_dev(hw), entry); -} - /** * ice_flow_rem_entry_sync - Remove a flow entry * @hw: pointer to the HW struct @@ -1335,7 +1318,8 @@ ice_flow_rem_entry_sync(struct ice_hw *hw, enum ice_block __always_unused blk, list_del(&entry->l_entry); - ice_dealloc_flow_entry(hw, entry); + devm_kfree(ice_hw_to_dev(hw), entry->entry); + devm_kfree(ice_hw_to_dev(hw), entry); return 0; } @@ -1662,8 +1646,7 @@ ice_flow_add_entry(struct ice_hw *hw, enum ice_block blk, u64 prof_id, out: if (status && e) { - if (e->entry) - devm_kfree(ice_hw_to_dev(hw), e->entry); + devm_kfree(ice_hw_to_dev(hw), e->entry); devm_kfree(ice_hw_to_dev(hw), e); } diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 5ddb95d1073a..00e3afd507a4 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -321,31 +321,19 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi) dev = ice_pf_to_dev(pf); - if (vsi->af_xdp_zc_qps) { - bitmap_free(vsi->af_xdp_zc_qps); - vsi->af_xdp_zc_qps = NULL; - } + bitmap_free(vsi->af_xdp_zc_qps); + vsi->af_xdp_zc_qps = NULL; /* free the ring and vector containers */ - if (vsi->q_vectors) { - devm_kfree(dev, vsi->q_vectors); - vsi->q_vectors = NULL; - } - if (vsi->tx_rings) { - devm_kfree(dev, vsi->tx_rings); - vsi->tx_rings = NULL; - } - if (vsi->rx_rings) { - devm_kfree(dev, vsi->rx_rings); - vsi->rx_rings = NULL; - } - if (vsi->txq_map) { - devm_kfree(dev, vsi->txq_map); - vsi->txq_map = NULL; - } - if (vsi->rxq_map) { - devm_kfree(dev, vsi->rxq_map); - vsi->rxq_map = NULL; - } + devm_kfree(dev, vsi->q_vectors); + vsi->q_vectors = NULL; + devm_kfree(dev, vsi->tx_rings); + vsi->tx_rings = NULL; + devm_kfree(dev, vsi->rx_rings); + vsi->rx_rings = NULL; + devm_kfree(dev, vsi->txq_map); + vsi->txq_map = NULL; + devm_kfree(dev, vsi->rxq_map); + vsi->rxq_map = NULL; } /** @@ -902,10 +890,8 @@ static void ice_rss_clean(struct ice_vsi *vsi) dev = ice_pf_to_dev(pf); - if (vsi->rss_hkey_user) - devm_kfree(dev, vsi->rss_hkey_user); - if (vsi->rss_lut_user) - devm_kfree(dev, vsi->rss_lut_user); + devm_kfree(dev, vsi->rss_hkey_user); + devm_kfree(dev, vsi->rss_lut_user); ice_vsi_clean_rss_flow_fld(vsi); /* remove RSS replay list */ diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index b7682de0ae05..b664d60fd037 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -358,10 +358,7 @@ void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node) node->sibling; } - /* leaf nodes have no children */ - if (node->children) - devm_kfree(ice_hw_to_dev(hw), node->children); - + devm_kfree(ice_hw_to_dev(hw), node->children); kfree(node->name); xa_erase(&pi->sched_node_ids, node->id); devm_kfree(ice_hw_to_dev(hw), node); @@ -859,10 +856,8 @@ void ice_sched_cleanup_all(struct ice_hw *hw) if (!hw) return; - if (hw->layer_info) { - devm_kfree(ice_hw_to_dev(hw), hw->layer_info); - hw->layer_info = NULL; - } + devm_kfree(ice_hw_to_dev(hw), hw->layer_info); + hw->layer_info = NULL; ice_sched_clear_port(hw->port_info); diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 2ea9e1ae5517..6db4ca7978cb 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -1636,21 +1636,16 @@ ice_save_vsi_ctx(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi) */ static void ice_clear_vsi_q_ctx(struct ice_hw *hw, u16 vsi_handle) { - struct ice_vsi_ctx *vsi; + struct ice_vsi_ctx *vsi = ice_get_vsi_ctx(hw, vsi_handle); u8 i; - vsi = ice_get_vsi_ctx(hw, vsi_handle); if (!vsi) return; ice_for_each_traffic_class(i) { - if (vsi->lan_q_ctx[i]) { - devm_kfree(ice_hw_to_dev(hw), vsi->lan_q_ctx[i]); - vsi->lan_q_ctx[i] = NULL; - } - if (vsi->rdma_q_ctx[i]) { - devm_kfree(ice_hw_to_dev(hw), vsi->rdma_q_ctx[i]); - vsi->rdma_q_ctx[i] = NULL; - } + devm_kfree(ice_hw_to_dev(hw), vsi->lan_q_ctx[i]); + vsi->lan_q_ctx[i] = NULL; + devm_kfree(ice_hw_to_dev(hw), vsi->rdma_q_ctx[i]); + vsi->rdma_q_ctx[i] = NULL; } } @@ -5468,9 +5463,7 @@ err_unroll: devm_kfree(ice_hw_to_dev(hw), fvit); } - if (rm->root_buf) - devm_kfree(ice_hw_to_dev(hw), rm->root_buf); - + devm_kfree(ice_hw_to_dev(hw), rm->root_buf); kfree(rm); err_free_lkup_exts: -- cgit v1.2.3 From 1dacc49782e67d4316b46329e416c24473c0369c Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 11 Jun 2023 22:44:13 +0200 Subject: ice: Remove managed memory usage in ice_get_fw_log_cfg() There is no need to use managed memory allocation here. The memory is released at the end of the function. Use kzalloc()/kfree() to simplify the code. Signed-off-by: Christophe JAILLET Reviewed-by: Pavan Chebbi Reviewed-by: Jacob Keller Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 6acb40f3c202..e16d4c83ed5f 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -833,7 +833,7 @@ static int ice_get_fw_log_cfg(struct ice_hw *hw) u16 size; size = sizeof(*config) * ICE_AQC_FW_LOG_ID_MAX; - config = devm_kzalloc(ice_hw_to_dev(hw), size, GFP_KERNEL); + config = kzalloc(size, GFP_KERNEL); if (!config) return -ENOMEM; @@ -856,7 +856,7 @@ static int ice_get_fw_log_cfg(struct ice_hw *hw) } } - devm_kfree(ice_hw_to_dev(hw), config); + kfree(config); return status; } -- cgit v1.2.3 From b7a0345723385c3cc0438cf4266ccc110dc7b583 Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 13 Jun 2023 13:35:52 +0200 Subject: ice: use ice_down_up() where applicable ice_change_mtu() is currently using a separate ice_down() and ice_up() calls to reflect changed MTU. ice_down_up() serves this purpose, so do the refactoring here. Signed-off-by: Maciej Fijalkowski Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 5dd88611141e..93979ab18bc1 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7412,21 +7412,9 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) } netdev->mtu = (unsigned int)new_mtu; - - /* if VSI is up, bring it down and then back up */ - if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { - err = ice_down(vsi); - if (err) { - netdev_err(netdev, "change MTU if_down err %d\n", err); - return err; - } - - err = ice_up(vsi); - if (err) { - netdev_err(netdev, "change MTU if_up err %d\n", err); - return err; - } - } + err = ice_down_up(vsi); + if (err) + return err; netdev_dbg(netdev, "changed MTU to %d\n", new_mtu); set_bit(ICE_FLAG_MTU_CHANGED, pf->flags); -- cgit v1.2.3 From c4fc88ad2a765224a648db8ab35f125e120fe41b Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Jun 2023 15:55:37 +0200 Subject: net: stmmac: fix double serdes powerdown Commit 49725ffc15fc ("net: stmmac: power up/down serdes in stmmac_open/release") correctly added a call to the serdes_powerdown() callback to stmmac_release() but did not remove the one from stmmac_remove() which leads to a doubled call to serdes_powerdown(). This can lead to all kinds of problems: in the case of the qcom ethqos driver, it caused an unbalanced regulator disable splat. Fixes: 49725ffc15fc ("net: stmmac: power up/down serdes in stmmac_open/release") Signed-off-by: Bartosz Golaszewski Reviewed-by: Jiri Pirko Acked-by: Junxiao Chang Reviewed-by: Andrew Halaney Tested-by: Andrew Halaney Link: https://lore.kernel.org/r/20230621135537.376649-1-brgl@bgdev.pl Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 87510951f4e8..b74946bbee3c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -7457,12 +7457,6 @@ void stmmac_dvr_remove(struct device *dev) netif_carrier_off(ndev); unregister_netdev(ndev); - /* Serdes power down needs to happen after VLAN filter - * is deleted that is triggered by unregister_netdev(). - */ - if (priv->plat->serdes_powerdown) - priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv); - #ifdef CONFIG_DEBUG_FS stmmac_exit_fs(ndev); #endif -- cgit v1.2.3 From b028813ac97370e61351b1190c1860a1bd24fe56 Mon Sep 17 00:00:00 2001 From: Yueh-Shun Li Date: Thu, 22 Jun 2023 01:26:25 +0000 Subject: i40e, xsk: fix comment typo Spell "transmission" properly. Found by searching for keyword "tranm". Signed-off-by: Yueh-Shun Li Link: https://lore.kernel.org/r/20230622012627.15050-3-shamrocklee@posteo.net Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index cd7b52fb6b46..05ec1181471e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -582,7 +582,7 @@ static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring, * @vsi: Current VSI * @tx_ring: XDP Tx ring * - * Returns true if cleanup/tranmission is done. + * Returns true if cleanup/transmission is done. **/ bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring) { -- cgit v1.2.3 From febf2aaf05641f3258cc30e072aff65cffc7c82c Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 16 Jun 2023 13:06:32 +0100 Subject: net: phylink: pass neg_mode into phylink_mii_c22_pcs_config() Convert fman_dtsec, xilinx_axienet and pcs-lynx to pass the neg_mode into phylink_mii_c22_pcs_config(). Where appropriate, drivers are updated to have neg_mode passed into their pcs_config() and pcs_link_up() functions. For other drivers, we just hoist the call to phylink_pcs_neg_mode() to their pcs_config() method out of phylink_mii_c22_pcs_config(). Signed-off-by: Russell King (Oracle) Link: https://lore.kernel.org/r/E1qA8Do-00EaFM-Ra@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fman/fman_dtsec.c | 7 ++++--- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 6 ++++-- drivers/net/pcs/pcs-lynx.c | 18 ++++++++++++------ drivers/net/phy/phylink.c | 9 ++++----- include/linux/phylink.h | 5 +++-- 5 files changed, 27 insertions(+), 18 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c index d528ca681b6f..3088da7adf0f 100644 --- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c +++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c @@ -763,15 +763,15 @@ static void dtsec_pcs_get_state(struct phylink_pcs *pcs, phylink_mii_c22_pcs_get_state(dtsec->tbidev, state); } -static int dtsec_pcs_config(struct phylink_pcs *pcs, unsigned int mode, +static int dtsec_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) { struct fman_mac *dtsec = pcs_to_dtsec(pcs); - return phylink_mii_c22_pcs_config(dtsec->tbidev, mode, interface, - advertising); + return phylink_mii_c22_pcs_config(dtsec->tbidev, interface, + advertising, neg_mode); } static void dtsec_pcs_an_restart(struct phylink_pcs *pcs) @@ -1447,6 +1447,7 @@ int dtsec_initialization(struct mac_device *mac_dev, goto _return_fm_mac_free; } dtsec->pcs.ops = &dtsec_pcs_ops; + dtsec->pcs.neg_mode = true; dtsec->pcs.poll = true; supported = mac_dev->phylink_config.supported_interfaces; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 3e310b55bce2..ae7b9af7b7d7 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1631,7 +1631,7 @@ static void axienet_pcs_an_restart(struct phylink_pcs *pcs) phylink_mii_c22_pcs_an_restart(pcs_phy); } -static int axienet_pcs_config(struct phylink_pcs *pcs, unsigned int mode, +static int axienet_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) @@ -1653,7 +1653,8 @@ static int axienet_pcs_config(struct phylink_pcs *pcs, unsigned int mode, } } - ret = phylink_mii_c22_pcs_config(pcs_phy, mode, interface, advertising); + ret = phylink_mii_c22_pcs_config(pcs_phy, interface, advertising, + neg_mode); if (ret < 0) netdev_warn(ndev, "Failed to configure PCS: %d\n", ret); @@ -2129,6 +2130,7 @@ static int axienet_probe(struct platform_device *pdev) } of_node_put(np); lp->pcs.ops = &axienet_pcs_ops; + lp->pcs.neg_mode = true; lp->pcs.poll = true; } diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c index fca48ebf0b81..25bd4b45eb7b 100644 --- a/drivers/net/pcs/pcs-lynx.c +++ b/drivers/net/pcs/pcs-lynx.c @@ -112,9 +112,10 @@ static void lynx_pcs_get_state(struct phylink_pcs *pcs, state->link, state->an_complete); } -static int lynx_pcs_config_giga(struct mdio_device *pcs, unsigned int mode, +static int lynx_pcs_config_giga(struct mdio_device *pcs, phy_interface_t interface, - const unsigned long *advertising) + const unsigned long *advertising, + unsigned int neg_mode) { int link_timer_ns; u32 link_timer; @@ -132,8 +133,9 @@ static int lynx_pcs_config_giga(struct mdio_device *pcs, unsigned int mode, if (interface == PHY_INTERFACE_MODE_1000BASEX) { if_mode = 0; } else { + /* SGMII and QSGMII */ if_mode = IF_MODE_SGMII_EN; - if (mode == MLO_AN_INBAND) + if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) if_mode |= IF_MODE_USE_SGMII_AN; } @@ -143,7 +145,8 @@ static int lynx_pcs_config_giga(struct mdio_device *pcs, unsigned int mode, if (err) return err; - return phylink_mii_c22_pcs_config(pcs, mode, interface, advertising); + return phylink_mii_c22_pcs_config(pcs, interface, advertising, + neg_mode); } static int lynx_pcs_config_usxgmii(struct mdio_device *pcs, unsigned int mode, @@ -170,13 +173,16 @@ static int lynx_pcs_config(struct phylink_pcs *pcs, unsigned int mode, bool permit) { struct lynx_pcs *lynx = phylink_pcs_to_lynx(pcs); + unsigned int neg_mode; + + neg_mode = phylink_pcs_neg_mode(mode, ifmode, advertising); switch (ifmode) { case PHY_INTERFACE_MODE_1000BASEX: case PHY_INTERFACE_MODE_SGMII: case PHY_INTERFACE_MODE_QSGMII: - return lynx_pcs_config_giga(lynx->mdio, mode, ifmode, - advertising); + return lynx_pcs_config_giga(lynx->mdio, ifmode, advertising, + neg_mode); case PHY_INTERFACE_MODE_2500BASEX: if (phylink_autoneg_inband(mode)) { dev_err(&lynx->mdio->dev, diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 567fd22a8924..d0aaa5cad853 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -3545,20 +3545,20 @@ EXPORT_SYMBOL_GPL(phylink_mii_c22_pcs_encode_advertisement); /** * phylink_mii_c22_pcs_config() - configure clause 22 PCS * @pcs: a pointer to a &struct mdio_device. - * @mode: link autonegotiation mode * @interface: the PHY interface mode being configured * @advertising: the ethtool advertisement mask + * @neg_mode: PCS negotiation mode * * Configure a Clause 22 PCS PHY with the appropriate negotiation * parameters for the @mode, @interface and @advertising parameters. * Returns negative error number on failure, zero if the advertisement * has not changed, or positive if there is a change. */ -int phylink_mii_c22_pcs_config(struct mdio_device *pcs, unsigned int mode, +int phylink_mii_c22_pcs_config(struct mdio_device *pcs, phy_interface_t interface, - const unsigned long *advertising) + const unsigned long *advertising, + unsigned int neg_mode) { - unsigned int neg_mode; bool changed = 0; u16 bmcr; int ret, adv; @@ -3572,7 +3572,6 @@ int phylink_mii_c22_pcs_config(struct mdio_device *pcs, unsigned int mode, changed = ret; } - neg_mode = phylink_pcs_neg_mode(mode, interface, advertising); if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) bmcr = BMCR_ANENABLE; else diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 2b322d7fa51a..516240f1e950 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -743,9 +743,10 @@ void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs, struct phylink_link_state *state); int phylink_mii_c22_pcs_encode_advertisement(phy_interface_t interface, const unsigned long *advertising); -int phylink_mii_c22_pcs_config(struct mdio_device *pcs, unsigned int mode, +int phylink_mii_c22_pcs_config(struct mdio_device *pcs, phy_interface_t interface, - const unsigned long *advertising); + const unsigned long *advertising, + unsigned int neg_mode); void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs); void phylink_resolve_c73(struct phylink_link_state *state); -- cgit v1.2.3 From a0e93cfdac4c91b73f79a4bfbfcf74b0911c1ad3 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 16 Jun 2023 13:06:53 +0100 Subject: net: lan966x: update PCS driver to use neg_mode Update lan966x's embedded PCS driver to use neg_mode rather than the mode argument. As there is no pcs_link_up() method, this only affects the pcs_config() method. Signed-off-by: Russell King (Oracle) Link: https://lore.kernel.org/r/E1qA8E9-00EaFl-GN@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan966x/lan966x_main.c | 1 + drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c | 7 +++---- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index f6931dfb3e68..fbb0bb4594cd 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -818,6 +818,7 @@ static int lan966x_probe_port(struct lan966x *lan966x, u32 p, port->phylink_config.type = PHYLINK_NETDEV; port->phylink_pcs.poll = true; port->phylink_pcs.ops = &lan966x_phylink_pcs_ops; + port->phylink_pcs.neg_mode = true; port->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD | MAC_2500FD; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c b/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c index c5f9803e6e63..1d63903f9006 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_phylink.c @@ -95,8 +95,7 @@ static void lan966x_pcs_get_state(struct phylink_pcs *pcs, lan966x_port_status_get(port, state); } -static int lan966x_pcs_config(struct phylink_pcs *pcs, - unsigned int mode, +static int lan966x_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) @@ -107,8 +106,8 @@ static int lan966x_pcs_config(struct phylink_pcs *pcs, config = port->config; config.portmode = interface; - config.inband = phylink_autoneg_inband(mode); - config.autoneg = phylink_test(advertising, Autoneg); + config.inband = neg_mode & PHYLINK_PCS_NEG_INBAND; + config.autoneg = neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED; config.advertising = advertising; ret = lan966x_port_pcs_set(port, &config); -- cgit v1.2.3 From 140d1002e2a30db0df58d18c07df3f72dc0659fa Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 16 Jun 2023 13:06:58 +0100 Subject: net: mvneta: update PCS driver to use neg_mode Update mvneta's embedded PCS driver to use neg_mode rather than the mode argument. As there is no pcs_link_up() method, this only affects the pcs_config() method. Signed-off-by: Russell King (Oracle) Link: https://lore.kernel.org/r/E1qA8EE-00EaFr-Kx@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvneta.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index e2abc00d0472..ff5647bcdfca 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4002,8 +4002,8 @@ static void mvneta_pcs_get_state(struct phylink_pcs *pcs, state->pause |= MLO_PAUSE_TX; } -static int mvneta_pcs_config(struct phylink_pcs *pcs, - unsigned int mode, phy_interface_t interface, +static int mvneta_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, + phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) { @@ -4016,7 +4016,7 @@ static int mvneta_pcs_config(struct phylink_pcs *pcs, MVNETA_GMAC_AN_FLOW_CTRL_EN | MVNETA_GMAC_AN_DUPLEX_EN; - if (phylink_autoneg_inband(mode)) { + if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) { mask |= MVNETA_GMAC_CONFIG_MII_SPEED | MVNETA_GMAC_CONFIG_GMII_SPEED | MVNETA_GMAC_CONFIG_FULL_DUPLEX; @@ -5518,6 +5518,7 @@ static int mvneta_probe(struct platform_device *pdev) clk_prepare_enable(pp->clk_bus); pp->phylink_pcs.ops = &mvneta_phylink_pcs_ops; + pp->phylink_pcs.neg_mode = true; pp->phylink_config.dev = &dev->dev; pp->phylink_config.type = PHYLINK_NETDEV; -- cgit v1.2.3 From d5b16264fffe1e6a9ccad7b1cf311ea2fd5e2e79 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 16 Jun 2023 13:07:03 +0100 Subject: net: mvpp2: update PCS driver to use neg_mode Update mvpp2's embedded PCS drivers to use neg_mode rather than the mode argument, remembering to update the ACPI path as well. As there are no pcs_link_up() methods, this only affects the two pcs_config() methods. Signed-off-by: Russell King (Oracle) Link: https://lore.kernel.org/r/E1qA8EJ-00EaFx-P6@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index adc953611913..1fec84b4c068 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -6168,8 +6168,7 @@ static void mvpp2_xlg_pcs_get_state(struct phylink_pcs *pcs, state->pause |= MLO_PAUSE_RX; } -static int mvpp2_xlg_pcs_config(struct phylink_pcs *pcs, - unsigned int mode, +static int mvpp2_xlg_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) @@ -6232,7 +6231,7 @@ static void mvpp2_gmac_pcs_get_state(struct phylink_pcs *pcs, state->pause |= MLO_PAUSE_TX; } -static int mvpp2_gmac_pcs_config(struct phylink_pcs *pcs, unsigned int mode, +static int mvpp2_gmac_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) @@ -6246,7 +6245,7 @@ static int mvpp2_gmac_pcs_config(struct phylink_pcs *pcs, unsigned int mode, MVPP2_GMAC_FLOW_CTRL_AUTONEG | MVPP2_GMAC_AN_DUPLEX_EN; - if (phylink_autoneg_inband(mode)) { + if (neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED) { mask |= MVPP2_GMAC_CONFIG_MII_SPEED | MVPP2_GMAC_CONFIG_GMII_SPEED | MVPP2_GMAC_CONFIG_FULL_DUPLEX; @@ -6649,8 +6648,9 @@ static void mvpp2_acpi_start(struct mvpp2_port *port) mvpp2_mac_prepare(&port->phylink_config, MLO_AN_INBAND, port->phy_interface); mvpp2_mac_config(&port->phylink_config, MLO_AN_INBAND, &state); - pcs->ops->pcs_config(pcs, MLO_AN_INBAND, port->phy_interface, - state.advertising, false); + pcs->ops->pcs_config(pcs, PHYLINK_PCS_NEG_INBAND_ENABLED, + port->phy_interface, state.advertising, + false); mvpp2_mac_finish(&port->phylink_config, MLO_AN_INBAND, port->phy_interface); mvpp2_mac_link_up(&port->phylink_config, NULL, @@ -6896,7 +6896,9 @@ static int mvpp2_port_probe(struct platform_device *pdev, dev->dev.of_node = port_node; port->pcs_gmac.ops = &mvpp2_phylink_gmac_pcs_ops; + port->pcs_gmac.neg_mode = true; port->pcs_xlg.ops = &mvpp2_phylink_xlg_pcs_ops; + port->pcs_xlg.neg_mode = true; if (!mvpp2_use_acpi_compat_mode(port_fwnode)) { port->phylink_config.dev = &dev->dev; -- cgit v1.2.3 From d5a05299306227d73b0febba9cecedf88931c507 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 16 Jun 2023 13:07:08 +0100 Subject: net: prestera: update PCS driver to use neg_mode Update prestera's embedded PCS driver to use neg_mode rather than the mode argument. As there is no pcs_link_up() method, this only affects the pcs_config() method. Acked-by: Elad Nachman Signed-off-by: Russell King (Oracle) Link: https://lore.kernel.org/r/E1qA8EO-00EaG3-TR@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/prestera/prestera_main.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 9d504142e51a..4fb886c57cd7 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -300,8 +300,7 @@ static void prestera_pcs_get_state(struct phylink_pcs *pcs, } } -static int prestera_pcs_config(struct phylink_pcs *pcs, - unsigned int mode, +static int prestera_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) @@ -316,30 +315,25 @@ static int prestera_pcs_config(struct phylink_pcs *pcs, cfg_mac.admin = true; cfg_mac.fec = PRESTERA_PORT_FEC_OFF; + cfg_mac.inband = neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED; switch (interface) { case PHY_INTERFACE_MODE_10GBASER: cfg_mac.speed = SPEED_10000; - cfg_mac.inband = 0; cfg_mac.mode = PRESTERA_MAC_MODE_SR_LR; break; case PHY_INTERFACE_MODE_2500BASEX: cfg_mac.speed = SPEED_2500; cfg_mac.duplex = DUPLEX_FULL; - cfg_mac.inband = test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, - advertising); cfg_mac.mode = PRESTERA_MAC_MODE_SGMII; break; case PHY_INTERFACE_MODE_SGMII: - cfg_mac.inband = 1; cfg_mac.mode = PRESTERA_MAC_MODE_SGMII; break; case PHY_INTERFACE_MODE_1000BASEX: default: cfg_mac.speed = SPEED_1000; cfg_mac.duplex = DUPLEX_FULL; - cfg_mac.inband = test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, - advertising); cfg_mac.mode = PRESTERA_MAC_MODE_1000BASE_X; break; } @@ -401,6 +395,7 @@ static int prestera_port_sfp_bind(struct prestera_port *port) continue; port->phylink_pcs.ops = &prestera_pcs_ops; + port->phylink_pcs.neg_mode = true; port->phy_config.dev = &port->dev->dev; port->phy_config.type = PHYLINK_NETDEV; -- cgit v1.2.3 From 6e5bb3da9842950a161625b4ab2743d1d5c64715 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 16 Jun 2023 13:07:19 +0100 Subject: net: sparx5: update PCS driver to use neg_mode Update Sparx5's embedded PCS driver to use neg_mode rather than the mode argument. As there is no pcs_link_up() method, this only affects the pcs_config() method. Signed-off-by: Russell King (Oracle) Link: https://lore.kernel.org/r/E1qA8EZ-00EaGF-6F@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 1 + drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index a7edf524eedb..dc9af480bfea 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -281,6 +281,7 @@ static int sparx5_create_port(struct sparx5 *sparx5, spx5_port->custom_etype = 0x8880; /* Vitesse */ spx5_port->phylink_pcs.poll = true; spx5_port->phylink_pcs.ops = &sparx5_phylink_pcs_ops; + spx5_port->phylink_pcs.neg_mode = true; spx5_port->is_mrouter = false; INIT_LIST_HEAD(&spx5_port->tc_templates); sparx5->ports[config->portno] = spx5_port; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c b/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c index bb97d27a1da4..f8562c1a894d 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c @@ -91,8 +91,7 @@ static void sparx5_pcs_get_state(struct phylink_pcs *pcs, state->pause = status.pause; } -static int sparx5_pcs_config(struct phylink_pcs *pcs, - unsigned int mode, +static int sparx5_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) @@ -104,8 +103,9 @@ static int sparx5_pcs_config(struct phylink_pcs *pcs, conf = port->conf; conf.power_down = false; conf.portmode = interface; - conf.inband = phylink_autoneg_inband(mode); - conf.autoneg = phylink_test(advertising, Autoneg); + conf.inband = neg_mode == PHYLINK_PCS_NEG_INBAND_DISABLED || + neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED; + conf.autoneg = neg_mode == PHYLINK_PCS_NEG_INBAND_ENABLED; conf.pause_adv = 0; if (phylink_test(advertising, Pause)) conf.pause_adv |= ADVERTISE_1000XPAUSE; -- cgit v1.2.3 From f40df95d375dc9c96da541a2c4ac0ce1e630309d Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 16 Jun 2023 13:07:34 +0100 Subject: net: macb: update PCS driver to use neg_mode Update macb's embedded PCS drivers to use neg_mode, even though it makes no use of it or the "mode" argument. This makes the driver consistent with converted drivers. Signed-off-by: Russell King (Oracle) Link: https://lore.kernel.org/r/E1qA8Eo-00EaGX-KJ@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 2e35e200fdcb..f6a0f12a6d52 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -563,7 +563,7 @@ static void macb_set_tx_clk(struct macb *bp, int speed) netdev_err(bp->dev, "adjusting tx_clk failed.\n"); } -static void macb_usx_pcs_link_up(struct phylink_pcs *pcs, unsigned int mode, +static void macb_usx_pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, phy_interface_t interface, int speed, int duplex) { @@ -596,7 +596,7 @@ static void macb_usx_pcs_get_state(struct phylink_pcs *pcs, } static int macb_usx_pcs_config(struct phylink_pcs *pcs, - unsigned int mode, + unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) @@ -621,7 +621,7 @@ static void macb_pcs_an_restart(struct phylink_pcs *pcs) } static int macb_pcs_config(struct phylink_pcs *pcs, - unsigned int mode, + unsigned int neg_mode, phy_interface_t interface, const unsigned long *advertising, bool permit_pause_to_mac) @@ -862,7 +862,9 @@ static int macb_mii_probe(struct net_device *dev) struct macb *bp = netdev_priv(dev); bp->phylink_sgmii_pcs.ops = &macb_phylink_pcs_ops; + bp->phylink_sgmii_pcs.neg_mode = true; bp->phylink_usx_pcs.ops = &macb_phylink_usx_pcs_ops; + bp->phylink_usx_pcs.neg_mode = true; bp->phylink_config.dev = &dev->dev; bp->phylink_config.type = PHYLINK_NETDEV; -- cgit v1.2.3 From 2a441a3dbe84be61be502142a2fb8ea633fcc528 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Tue, 20 Jun 2023 14:25:19 +0800 Subject: net: txgbe: remove unused buffer in txgbe_calc_eeprom_checksum Half a year passed since commit 049fe5365324c ("net: txgbe: Add operations to interact with firmware") was submitted, the buffer in txgbe_calc_eeprom_checksum was not used. So remove it and the related branch codes. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202306200242.FXsHokaJ-lkp@intel.com/ Reviewed-by: Jiawen Wu Signed-off-by: Zhengchao Shao Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230620062519.1575298-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c | 32 +++++++++------------------ 1 file changed, 11 insertions(+), 21 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c index ebc46f3be056..12405d71c5ee 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c @@ -160,34 +160,24 @@ int txgbe_read_pba_string(struct wx *wx, u8 *pba_num, u32 pba_num_size) static int txgbe_calc_eeprom_checksum(struct wx *wx, u16 *checksum) { u16 *eeprom_ptrs = NULL; - u32 buffer_size = 0; - u16 *buffer = NULL; u16 *local_buffer; int status; u16 i; wx_init_eeprom_params(wx); - if (!buffer) { - eeprom_ptrs = kvmalloc_array(TXGBE_EEPROM_LAST_WORD, sizeof(u16), - GFP_KERNEL); - if (!eeprom_ptrs) - return -ENOMEM; - /* Read pointer area */ - status = wx_read_ee_hostif_buffer(wx, 0, - TXGBE_EEPROM_LAST_WORD, - eeprom_ptrs); - if (status != 0) { - wx_err(wx, "Failed to read EEPROM image\n"); - kvfree(eeprom_ptrs); - return status; - } - local_buffer = eeprom_ptrs; - } else { - if (buffer_size < TXGBE_EEPROM_LAST_WORD) - return -EFAULT; - local_buffer = buffer; + eeprom_ptrs = kvmalloc_array(TXGBE_EEPROM_LAST_WORD, sizeof(u16), + GFP_KERNEL); + if (!eeprom_ptrs) + return -ENOMEM; + /* Read pointer area */ + status = wx_read_ee_hostif_buffer(wx, 0, TXGBE_EEPROM_LAST_WORD, eeprom_ptrs); + if (status != 0) { + wx_err(wx, "Failed to read EEPROM image\n"); + kvfree(eeprom_ptrs); + return status; } + local_buffer = eeprom_ptrs; for (i = 0; i < TXGBE_EEPROM_LAST_WORD; i++) if (i != wx->eeprom.sw_region_offset + TXGBE_EEPROM_CHECKSUM) -- cgit v1.2.3 From 004d25060c78fc31f66da0fa439c544dda1ac9d5 Mon Sep 17 00:00:00 2001 From: Ying Hsu Date: Tue, 20 Jun 2023 10:47:32 -0700 Subject: igb: Fix igb_down hung on surprise removal In a setup where a Thunderbolt hub connects to Ethernet and a display through USB Type-C, users may experience a hung task timeout when they remove the cable between the PC and the Thunderbolt hub. This is because the igb_down function is called multiple times when the Thunderbolt hub is unplugged. For example, the igb_io_error_detected triggers the first call, and the igb_remove triggers the second call. The second call to igb_down will block at napi_synchronize. Here's the call trace: __schedule+0x3b0/0xddb ? __mod_timer+0x164/0x5d3 schedule+0x44/0xa8 schedule_timeout+0xb2/0x2a4 ? run_local_timers+0x4e/0x4e msleep+0x31/0x38 igb_down+0x12c/0x22a [igb 6615058754948bfde0bf01429257eb59f13030d4] __igb_close+0x6f/0x9c [igb 6615058754948bfde0bf01429257eb59f13030d4] igb_close+0x23/0x2b [igb 6615058754948bfde0bf01429257eb59f13030d4] __dev_close_many+0x95/0xec dev_close_many+0x6e/0x103 unregister_netdevice_many+0x105/0x5b1 unregister_netdevice_queue+0xc2/0x10d unregister_netdev+0x1c/0x23 igb_remove+0xa7/0x11c [igb 6615058754948bfde0bf01429257eb59f13030d4] pci_device_remove+0x3f/0x9c device_release_driver_internal+0xfe/0x1b4 pci_stop_bus_device+0x5b/0x7f pci_stop_bus_device+0x30/0x7f pci_stop_bus_device+0x30/0x7f pci_stop_and_remove_bus_device+0x12/0x19 pciehp_unconfigure_device+0x76/0xe9 pciehp_disable_slot+0x6e/0x131 pciehp_handle_presence_or_link_change+0x7a/0x3f7 pciehp_ist+0xbe/0x194 irq_thread_fn+0x22/0x4d ? irq_thread+0x1fd/0x1fd irq_thread+0x17b/0x1fd ? irq_forced_thread_fn+0x5f/0x5f kthread+0x142/0x153 ? __irq_get_irqchip_state+0x46/0x46 ? kthread_associate_blkcg+0x71/0x71 ret_from_fork+0x1f/0x30 In this case, igb_io_error_detected detaches the network interface and requests a PCIE slot reset, however, the PCIE reset callback is not being invoked and thus the Ethernet connection breaks down. As the PCIE error in this case is a non-fatal one, requesting a slot reset can be avoided. This patch fixes the task hung issue and preserves Ethernet connection by ignoring non-fatal PCIE errors. Signed-off-by: Ying Hsu Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230620174732.4145155-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igb/igb_main.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 9fcac96022d7..9a2561409b06 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -9587,6 +9587,11 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); + if (state == pci_channel_io_normal) { + dev_warn(&pdev->dev, "Non-correctable non-fatal error reported.\n"); + return PCI_ERS_RESULT_CAN_RECOVER; + } + netif_device_detach(netdev); if (state == pci_channel_io_perm_failure) -- cgit v1.2.3 From 9a14f2e3dab106df7f27d1730cc540247317d4b9 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 21 Jun 2023 13:15:04 +0100 Subject: sfc: keep alive neighbour entries while a TC encap action is using them When processing counter updates, if any action set using the newly incremented counter includes an encap action, prod the corresponding neighbouring entry to indicate to the neighbour cache that the entry is still in use and passing traffic. Signed-off-by: Edward Cree Link: https://lore.kernel.org/r/20230621121504.17004-1-edward.cree@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/tc.c | 20 +++++++++++- drivers/net/ethernet/sfc/tc.h | 1 + drivers/net/ethernet/sfc/tc_counters.c | 58 ++++++++++++++++++++++++++++++++++ drivers/net/ethernet/sfc/tc_counters.h | 3 ++ 4 files changed, 81 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index 77acdb60381e..15ebd3973922 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -110,8 +110,13 @@ static void efx_tc_free_action_set(struct efx_nic *efx, */ list_del(&act->list); } - if (act->count) + if (act->count) { + spin_lock_bh(&act->count->cnt->lock); + if (!list_empty(&act->count_user)) + list_del(&act->count_user); + spin_unlock_bh(&act->count->cnt->lock); efx_tc_flower_put_counter_index(efx, act->count); + } if (act->encap_md) { list_del(&act->encap_user); efx_tc_flower_release_encap_md(efx, act->encap_md); @@ -796,6 +801,7 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx, goto release; } act->count = ctr; + INIT_LIST_HEAD(&act->count_user); } if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) { @@ -1083,6 +1089,7 @@ static int efx_tc_flower_replace(struct efx_nic *efx, goto release; } act->count = ctr; + INIT_LIST_HEAD(&act->count_user); } switch (fa->id) { @@ -1120,6 +1127,17 @@ static int efx_tc_flower_replace(struct efx_nic *efx, list_add_tail(&act->encap_user, &encap->users); act->dest_mport = encap->dest_mport; act->deliver = 1; + if (act->count && !WARN_ON(!act->count->cnt)) { + /* This counter is used by an encap + * action, which needs a reference back + * so it can prod neighbouring whenever + * traffic is seen. + */ + spin_lock_bh(&act->count->cnt->lock); + list_add_tail(&act->count_user, + &act->count->cnt->users); + spin_unlock_bh(&act->count->cnt->lock); + } rc = efx_mae_alloc_action_set(efx, act); if (rc) { NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (encap)"); diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h index 607429f8bb28..1549c3df43bb 100644 --- a/drivers/net/ethernet/sfc/tc.h +++ b/drivers/net/ethernet/sfc/tc.h @@ -38,6 +38,7 @@ struct efx_tc_action_set { struct efx_tc_encap_action *encap_md; /* entry in tc_encap_ht table */ struct list_head encap_user; /* entry on encap_md->users list */ struct efx_tc_action_set_list *user; /* Only populated if encap_md */ + struct list_head count_user; /* entry on counter->users list, if encap */ u32 dest_mport; u32 fw_id; /* index of this entry in firmware actions table */ struct list_head list; diff --git a/drivers/net/ethernet/sfc/tc_counters.c b/drivers/net/ethernet/sfc/tc_counters.c index d1a91d54c6bb..979f49058a0c 100644 --- a/drivers/net/ethernet/sfc/tc_counters.c +++ b/drivers/net/ethernet/sfc/tc_counters.c @@ -9,6 +9,7 @@ */ #include "tc_counters.h" +#include "tc_encap_actions.h" #include "mae_counter_format.h" #include "mae.h" #include "rx_common.h" @@ -31,6 +32,15 @@ static void efx_tc_counter_free(void *ptr, void *__unused) { struct efx_tc_counter *cnt = ptr; + WARN_ON(!list_empty(&cnt->users)); + /* We'd like to synchronize_rcu() here, but unfortunately we aren't + * removing the element from the hashtable (it's not clear that's a + * safe thing to do in an rhashtable_free_and_destroy free_fn), so + * threads could still be obtaining new pointers to *cnt if they can + * race against this function at all. + */ + flush_work(&cnt->work); + EFX_WARN_ON_PARANOID(spin_is_locked(&cnt->lock)); kfree(cnt); } @@ -74,6 +84,49 @@ void efx_tc_fini_counters(struct efx_nic *efx) rhashtable_free_and_destroy(&efx->tc->counter_ht, efx_tc_counter_free, NULL); } +static void efx_tc_counter_work(struct work_struct *work) +{ + struct efx_tc_counter *cnt = container_of(work, struct efx_tc_counter, work); + struct efx_tc_encap_action *encap; + struct efx_tc_action_set *act; + unsigned long touched; + struct neighbour *n; + + spin_lock_bh(&cnt->lock); + touched = READ_ONCE(cnt->touched); + + list_for_each_entry(act, &cnt->users, count_user) { + encap = act->encap_md; + if (!encap) + continue; + if (!encap->neigh) /* can't happen */ + continue; + if (time_after_eq(encap->neigh->used, touched)) + continue; + encap->neigh->used = touched; + /* We have passed traffic using this ARP entry, so + * indicate to the ARP cache that it's still active + */ + if (encap->neigh->dst_ip) + n = neigh_lookup(&arp_tbl, &encap->neigh->dst_ip, + encap->neigh->egdev); + else +#if IS_ENABLED(CONFIG_IPV6) + n = neigh_lookup(ipv6_stub->nd_tbl, + &encap->neigh->dst_ip6, + encap->neigh->egdev); +#else + n = NULL; +#endif + if (!n) + continue; + + neigh_event_send(n, NULL); + neigh_release(n); + } + spin_unlock_bh(&cnt->lock); +} + /* Counter allocation */ static struct efx_tc_counter *efx_tc_flower_allocate_counter(struct efx_nic *efx, @@ -87,12 +140,14 @@ static struct efx_tc_counter *efx_tc_flower_allocate_counter(struct efx_nic *efx return ERR_PTR(-ENOMEM); spin_lock_init(&cnt->lock); + INIT_WORK(&cnt->work, efx_tc_counter_work); cnt->touched = jiffies; cnt->type = type; rc = efx_mae_allocate_counter(efx, cnt); if (rc) goto fail1; + INIT_LIST_HEAD(&cnt->users); rc = rhashtable_insert_fast(&efx->tc->counter_ht, &cnt->linkage, efx_tc_counter_ht_params); if (rc) @@ -126,6 +181,7 @@ static void efx_tc_flower_release_counter(struct efx_nic *efx, netif_warn(efx, hw, efx->net_dev, "Failed to free MAE counter %u, rc %d\n", cnt->fw_id, rc); + WARN_ON(!list_empty(&cnt->users)); /* This doesn't protect counter updates coming in arbitrarily long * after we deleted the counter. The RCU just ensures that we won't * free the counter while another thread has a pointer to it. @@ -133,6 +189,7 @@ static void efx_tc_flower_release_counter(struct efx_nic *efx, * is handled by the generation count. */ synchronize_rcu(); + flush_work(&cnt->work); EFX_WARN_ON_PARANOID(spin_is_locked(&cnt->lock)); kfree(cnt); } @@ -302,6 +359,7 @@ static void efx_tc_counter_update(struct efx_nic *efx, cnt->touched = jiffies; } spin_unlock_bh(&cnt->lock); + schedule_work(&cnt->work); out: rcu_read_unlock(); } diff --git a/drivers/net/ethernet/sfc/tc_counters.h b/drivers/net/ethernet/sfc/tc_counters.h index 8fc7c4bbb29c..41e57f34b763 100644 --- a/drivers/net/ethernet/sfc/tc_counters.h +++ b/drivers/net/ethernet/sfc/tc_counters.h @@ -32,6 +32,9 @@ struct efx_tc_counter { u64 old_packets, old_bytes; /* Values last time passed to userspace */ /* jiffies of the last time we saw packets increase */ unsigned long touched; + struct work_struct work; /* For notifying encap actions */ + /* owners of corresponding count actions */ + struct list_head users; }; struct efx_tc_counter_index { -- cgit v1.2.3 From 9b476494da1aad70f4f083e853eb817bcb292d08 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Wed, 21 Jun 2023 20:33:07 +0800 Subject: net: hns3: refine the tcam key convert handle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The result of expression '(k ^ ~v) & k' is exactly the same with 'k & v', so simplify it. (k ^ ~v) & k == k & v The truth table (in non table form): k == 0, v == 0: (k ^ ~v) & k == (0 ^ ~0) & 0 == (0 ^ 1) & 0 == 1 & 0 == 0 k & v == 0 & 0 == 0 k == 0, v == 1: (k ^ ~v) & k == (0 ^ ~1) & 0 == (0 ^ 0) & 0 == 1 & 0 == 0 k & v == 0 & 1 == 0 k == 1, v == 0: (k ^ ~v) & k == (1 ^ ~0) & 1 == (1 ^ 1) & 1 == 0 & 1 == 0 k & v == 1 & 0 == 0 k == 1, v == 1: (k ^ ~v) & k == (1 ^ ~1) & 1 == (1 ^ 0) & 1 == 1 & 1 == 1 k & v == 1 & 1 == 1 Signed-off-by: Jian Shen Signed-off-by: Hao Lan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 81aa6b0facf5..6a43d1515585 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -835,15 +835,10 @@ struct hclge_vf_vlan_cfg { * Then for input key(k) and mask(v), we can calculate the value by * the formulae: * x = (~k) & v - * y = (k ^ ~v) & k + * y = k & v */ -#define calc_x(x, k, v) (x = ~(k) & (v)) -#define calc_y(y, k, v) \ - do { \ - const typeof(k) _k_ = (k); \ - const typeof(v) _v_ = (v); \ - (y) = (_k_ ^ ~_v_) & (_k_); \ - } while (0) +#define calc_x(x, k, v) ((x) = ~(k) & (v)) +#define calc_y(y, k, v) ((y) = (k) & (v)) #define HCLGE_MAC_STATS_FIELD_OFF(f) (offsetof(struct hclge_mac_stats, f)) #define HCLGE_STATS_READ(p, offset) (*(u64 *)((u8 *)(p) + (offset))) -- cgit v1.2.3 From 1cf3d5567f273a8746d1bade00633a93204f80f0 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Wed, 21 Jun 2023 20:33:08 +0800 Subject: net: hns3: fix strncpy() not using dest-buf length as length issue Now, strncpy() in hns3_dbg_fill_content() use src-length as copy-length, it may result in dest-buf overflow. This patch is to fix intel compile warning for csky-linux-gcc (GCC) 12.1.0 compiler. The warning reports as below: hclge_debugfs.c:92:25: warning: 'strncpy' specified bound depends on the length of the source argument [-Wstringop-truncation] strncpy(pos, items[i].name, strlen(items[i].name)); hclge_debugfs.c:90:25: warning: 'strncpy' output truncated before terminating nul copying as many bytes from a string as its length [-Wstringop-truncation] strncpy(pos, result[i], strlen(result[i])); strncpy() use src-length as copy-length, it may result in dest-buf overflow. So,this patch add some values check to avoid this issue. Signed-off-by: Hao Chen Reported-by: kernel test robot Closes: https://lore.kernel.org/lkml/202207170606.7WtHs9yS-lkp@intel.com/T/ Signed-off-by: Hao Lan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 31 +++++++++++++++++----- .../ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 29 ++++++++++++++++---- 2 files changed, 48 insertions(+), 12 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index d385ffc21876..32bb14303473 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -438,19 +438,36 @@ static void hns3_dbg_fill_content(char *content, u16 len, const struct hns3_dbg_item *items, const char **result, u16 size) { +#define HNS3_DBG_LINE_END_LEN 2 char *pos = content; + u16 item_len; u16 i; + if (!len) { + return; + } else if (len <= HNS3_DBG_LINE_END_LEN) { + *pos++ = '\0'; + return; + } + memset(content, ' ', len); - for (i = 0; i < size; i++) { - if (result) - strncpy(pos, result[i], strlen(result[i])); - else - strncpy(pos, items[i].name, strlen(items[i].name)); + len -= HNS3_DBG_LINE_END_LEN; - pos += strlen(items[i].name) + items[i].interval; + for (i = 0; i < size; i++) { + item_len = strlen(items[i].name) + items[i].interval; + if (len < item_len) + break; + + if (result) { + if (item_len < strlen(result[i])) + break; + strscpy(pos, result[i], strlen(result[i])); + } else { + strscpy(pos, items[i].name, strlen(items[i].name)); + } + pos += item_len; + len -= item_len; } - *pos++ = '\n'; *pos++ = '\0'; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index a0b46e7d863e..233c132dc513 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -88,16 +88,35 @@ static void hclge_dbg_fill_content(char *content, u16 len, const struct hclge_dbg_item *items, const char **result, u16 size) { +#define HCLGE_DBG_LINE_END_LEN 2 char *pos = content; + u16 item_len; u16 i; + if (!len) { + return; + } else if (len <= HCLGE_DBG_LINE_END_LEN) { + *pos++ = '\0'; + return; + } + memset(content, ' ', len); + len -= HCLGE_DBG_LINE_END_LEN; + for (i = 0; i < size; i++) { - if (result) - strncpy(pos, result[i], strlen(result[i])); - else - strncpy(pos, items[i].name, strlen(items[i].name)); - pos += strlen(items[i].name) + items[i].interval; + item_len = strlen(items[i].name) + items[i].interval; + if (len < item_len) + break; + + if (result) { + if (item_len < strlen(result[i])) + break; + strscpy(pos, result[i], strlen(result[i])); + } else { + strscpy(pos, items[i].name, strlen(items[i].name)); + } + pos += item_len; + len -= item_len; } *pos++ = '\n'; *pos++ = '\0'; -- cgit v1.2.3 From ed1c6f35b73ec9249c07ebbd300423155c7baac3 Mon Sep 17 00:00:00 2001 From: Peiyang Wang Date: Wed, 21 Jun 2023 20:33:09 +0800 Subject: net: hns3: clear hns unused parameter alarm Several functions in the hns3 driver have unused parameters. The compiler will warn about them when building with -Wunused-parameter option of hns3. Signed-off-by: Peiyang Wang Signed-off-by: Hao Lan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 3 +- .../hisilicon/hns3/hns3_common/hclge_comm_rss.c | 3 +- .../hisilicon/hns3/hns3_common/hclge_comm_rss.h | 3 +- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 5 +-- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 7 ++-- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 47 +++++++++------------- .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 14 +++---- 8 files changed, 33 insertions(+), 51 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 9c9c72dc57e0..b99d75260d59 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -647,8 +647,7 @@ struct hnae3_ae_ops { int (*rm_mc_addr)(struct hnae3_handle *handle, const unsigned char *addr); void (*set_tso_stats)(struct hnae3_handle *handle, int enable); - void (*update_stats)(struct hnae3_handle *handle, - struct net_device_stats *net_stats); + void (*update_stats)(struct hnae3_handle *handle); void (*get_stats)(struct hnae3_handle *handle, u64 *data); void (*get_mac_stats)(struct hnae3_handle *handle, struct hns3_mac_stats *mac_stats); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.c index ae2736549526..b4ae2160aff4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.c @@ -305,8 +305,7 @@ int hclge_comm_set_rss_indir_table(struct hnae3_ae_dev *ae_dev, return 0; } -int hclge_comm_set_rss_input_tuple(struct hnae3_handle *nic, - struct hclge_comm_hw *hw, bool is_pf, +int hclge_comm_set_rss_input_tuple(struct hclge_comm_hw *hw, struct hclge_comm_rss_cfg *rss_cfg) { struct hclge_comm_rss_input_tuple_cmd *req; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.h b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.h index 92af3d2980d3..cdafa63fe38b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_rss.h @@ -112,8 +112,7 @@ int hclge_comm_init_rss_tuple_cmd(struct hclge_comm_rss_cfg *rss_cfg, struct hnae3_ae_dev *ae_dev, struct hclge_comm_rss_input_tuple_cmd *req); u64 hclge_comm_convert_rss_tuple(u8 tuple_sets); -int hclge_comm_set_rss_input_tuple(struct hnae3_handle *nic, - struct hclge_comm_hw *hw, bool is_pf, +int hclge_comm_set_rss_input_tuple(struct hclge_comm_hw *hw, struct hclge_comm_rss_cfg *rss_cfg); int hclge_comm_set_rss_indir_table(struct hnae3_ae_dev *ae_dev, struct hclge_comm_hw *hw, const u16 *indir); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 32bb14303473..6546cfe7f7cc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -958,8 +958,7 @@ static const struct hns3_dbg_item tx_bd_info_items[] = { { "MSS_HW_CSUM", 0 }, }; -static void hns3_dump_tx_bd_info(struct hns3_nic_priv *priv, - struct hns3_desc *desc, char **result, int idx) +static void hns3_dump_tx_bd_info(struct hns3_desc *desc, char **result, int idx) { unsigned int j = 0; @@ -1008,7 +1007,7 @@ static int hns3_dbg_tx_bd_info(struct hns3_dbg_data *d, char *buf, int len) for (i = 0; i < ring->desc_num; i++) { desc = &ring->desc[i]; - hns3_dump_tx_bd_info(priv, desc, result, i); + hns3_dump_tx_bd_info(desc, result, i); hns3_dbg_fill_content(content, sizeof(content), tx_bd_info_items, (const char **)result, ARRAY_SIZE(tx_bd_info_items)); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index b676496ec6d7..9f6890059666 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2538,7 +2538,7 @@ static void hns3_nic_get_stats64(struct net_device *netdev, if (test_bit(HNS3_NIC_STATE_DOWN, &priv->state)) return; - handle->ae_algo->ops->update_stats(handle, &netdev->stats); + handle->ae_algo->ops->update_stats(handle); memset(&ring_total_stats, 0, sizeof(ring_total_stats)); for (idx = 0; idx < queue_num; idx++) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index 51d1278b18f6..407d30ee55d2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -228,7 +228,7 @@ static u32 hns3_lb_check_rx_ring(struct hns3_nic_priv *priv, u32 budget) } static void hns3_lb_clear_tx_ring(struct hns3_nic_priv *priv, u32 start_ringid, - u32 end_ringid, u32 budget) + u32 end_ringid) { u32 i; @@ -295,8 +295,7 @@ static int hns3_lp_run_test(struct net_device *ndev, enum hnae3_loop mode) out: hns3_lb_clear_tx_ring(priv, HNS3_NIC_LB_TEST_RING_ID, - HNS3_NIC_LB_TEST_RING_ID, - HNS3_NIC_LB_TEST_PKT_NUM); + HNS3_NIC_LB_TEST_RING_ID); kfree_skb(skb); return ret_val; @@ -618,7 +617,7 @@ static void hns3_get_stats(struct net_device *netdev, return; } - h->ae_algo->ops->update_stats(h, &netdev->stats); + h->ae_algo->ops->update_stats(h); /* get per-queue stats */ p = hns3_get_stats_tqps(h, p); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 2689b108f7df..bf675c15fbb9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -689,8 +689,7 @@ static void hclge_update_stats_for_all(struct hclge_dev *hdev) "Update MAC stats fail, status = %d.\n", status); } -static void hclge_update_stats(struct hnae3_handle *handle, - struct net_device_stats *net_stats) +static void hclge_update_stats(struct hnae3_handle *handle) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; @@ -824,7 +823,7 @@ static void hclge_get_mac_stat(struct hnae3_handle *handle, struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - hclge_update_stats(handle, NULL); + hclge_update_stats(handle); mac_stats->tx_pause_cnt = hdev->mac_stats.mac_tx_mac_pause_num; mac_stats->rx_pause_cnt = hdev->mac_stats.mac_rx_mac_pause_num; @@ -4965,9 +4964,7 @@ int hclge_rss_init_hw(struct hclge_dev *hdev) if (ret) return ret; - ret = hclge_comm_set_rss_input_tuple(&hdev->vport[0].nic, - &hdev->hw.hw, true, - &hdev->rss_cfg); + ret = hclge_comm_set_rss_input_tuple(&hdev->hw.hw, &hdev->rss_cfg); if (ret) return ret; @@ -6243,8 +6240,7 @@ static int hclge_fd_check_spec(struct hclge_dev *hdev, return hclge_fd_check_ext_tuple(hdev, fs, unused_tuple); } -static void hclge_fd_get_tcpip4_tuple(struct hclge_dev *hdev, - struct ethtool_rx_flow_spec *fs, +static void hclge_fd_get_tcpip4_tuple(struct ethtool_rx_flow_spec *fs, struct hclge_fd_rule *rule, u8 ip_proto) { rule->tuples.src_ip[IPV4_INDEX] = @@ -6273,8 +6269,7 @@ static void hclge_fd_get_tcpip4_tuple(struct hclge_dev *hdev, rule->tuples_mask.ip_proto = 0xFF; } -static void hclge_fd_get_ip4_tuple(struct hclge_dev *hdev, - struct ethtool_rx_flow_spec *fs, +static void hclge_fd_get_ip4_tuple(struct ethtool_rx_flow_spec *fs, struct hclge_fd_rule *rule) { rule->tuples.src_ip[IPV4_INDEX] = @@ -6297,8 +6292,7 @@ static void hclge_fd_get_ip4_tuple(struct hclge_dev *hdev, rule->tuples_mask.ether_proto = 0xFFFF; } -static void hclge_fd_get_tcpip6_tuple(struct hclge_dev *hdev, - struct ethtool_rx_flow_spec *fs, +static void hclge_fd_get_tcpip6_tuple(struct ethtool_rx_flow_spec *fs, struct hclge_fd_rule *rule, u8 ip_proto) { be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.tcp_ip6_spec.ip6src, @@ -6327,8 +6321,7 @@ static void hclge_fd_get_tcpip6_tuple(struct hclge_dev *hdev, rule->tuples_mask.ip_proto = 0xFF; } -static void hclge_fd_get_ip6_tuple(struct hclge_dev *hdev, - struct ethtool_rx_flow_spec *fs, +static void hclge_fd_get_ip6_tuple(struct ethtool_rx_flow_spec *fs, struct hclge_fd_rule *rule) { be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.usr_ip6_spec.ip6src, @@ -6351,8 +6344,7 @@ static void hclge_fd_get_ip6_tuple(struct hclge_dev *hdev, rule->tuples_mask.ether_proto = 0xFFFF; } -static void hclge_fd_get_ether_tuple(struct hclge_dev *hdev, - struct ethtool_rx_flow_spec *fs, +static void hclge_fd_get_ether_tuple(struct ethtool_rx_flow_spec *fs, struct hclge_fd_rule *rule) { ether_addr_copy(rule->tuples.src_mac, fs->h_u.ether_spec.h_source); @@ -6388,8 +6380,7 @@ static void hclge_fd_get_user_def_tuple(struct hclge_fd_user_def_info *info, rule->ep.user_def = *info; } -static int hclge_fd_get_tuple(struct hclge_dev *hdev, - struct ethtool_rx_flow_spec *fs, +static int hclge_fd_get_tuple(struct ethtool_rx_flow_spec *fs, struct hclge_fd_rule *rule, struct hclge_fd_user_def_info *info) { @@ -6397,31 +6388,31 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev, switch (flow_type) { case SCTP_V4_FLOW: - hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_SCTP); + hclge_fd_get_tcpip4_tuple(fs, rule, IPPROTO_SCTP); break; case TCP_V4_FLOW: - hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_TCP); + hclge_fd_get_tcpip4_tuple(fs, rule, IPPROTO_TCP); break; case UDP_V4_FLOW: - hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_UDP); + hclge_fd_get_tcpip4_tuple(fs, rule, IPPROTO_UDP); break; case IP_USER_FLOW: - hclge_fd_get_ip4_tuple(hdev, fs, rule); + hclge_fd_get_ip4_tuple(fs, rule); break; case SCTP_V6_FLOW: - hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_SCTP); + hclge_fd_get_tcpip6_tuple(fs, rule, IPPROTO_SCTP); break; case TCP_V6_FLOW: - hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_TCP); + hclge_fd_get_tcpip6_tuple(fs, rule, IPPROTO_TCP); break; case UDP_V6_FLOW: - hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_UDP); + hclge_fd_get_tcpip6_tuple(fs, rule, IPPROTO_UDP); break; case IPV6_USER_FLOW: - hclge_fd_get_ip6_tuple(hdev, fs, rule); + hclge_fd_get_ip6_tuple(fs, rule); break; case ETHER_FLOW: - hclge_fd_get_ether_tuple(hdev, fs, rule); + hclge_fd_get_ether_tuple(fs, rule); break; default: return -EOPNOTSUPP; @@ -6578,7 +6569,7 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle, if (!rule) return -ENOMEM; - ret = hclge_fd_get_tuple(hdev, fs, rule, &info); + ret = hclge_fd_get_tuple(fs, rule, &info); if (ret) { kfree(rule); return ret; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index dd08989a4c7c..34f02ca8d1d2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -121,8 +121,7 @@ static struct hclgevf_dev *hclgevf_ae_get_hdev(struct hnae3_handle *handle) return container_of(handle, struct hclgevf_dev, nic); } -static void hclgevf_update_stats(struct hnae3_handle *handle, - struct net_device_stats *net_stats) +static void hclgevf_update_stats(struct hnae3_handle *handle) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); int status; @@ -1645,8 +1644,7 @@ err_reset: hclgevf_reset_err_handle(hdev); } -static enum hnae3_reset_type hclgevf_get_reset_level(struct hclgevf_dev *hdev, - unsigned long *addr) +static enum hnae3_reset_type hclgevf_get_reset_level(unsigned long *addr) { enum hnae3_reset_type rst_level = HNAE3_NONE_RESET; @@ -1685,8 +1683,7 @@ static void hclgevf_reset_event(struct pci_dev *pdev, if (hdev->default_reset_request) hdev->reset_level = - hclgevf_get_reset_level(hdev, - &hdev->default_reset_request); + hclgevf_get_reset_level(&hdev->default_reset_request); else hdev->reset_level = HNAE3_VF_FUNC_RESET; @@ -1828,7 +1825,7 @@ static void hclgevf_reset_service_task(struct hclgevf_dev *hdev) hdev->last_reset_time = jiffies; hdev->reset_type = - hclgevf_get_reset_level(hdev, &hdev->reset_pending); + hclgevf_get_reset_level(&hdev->reset_pending); if (hdev->reset_type != HNAE3_NONE_RESET) hclgevf_reset(hdev); } else if (test_and_clear_bit(HCLGEVF_RESET_REQUESTED, @@ -2160,8 +2157,7 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev) if (ret) return ret; - ret = hclge_comm_set_rss_input_tuple(&hdev->nic, &hdev->hw.hw, - false, rss_cfg); + ret = hclge_comm_set_rss_input_tuple(&hdev->hw.hw, rss_cfg); if (ret) return ret; } -- cgit v1.2.3 From da744fd1362cd8ccf71043c62825cb88cb946886 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 14 Jun 2023 16:26:07 +0300 Subject: net/mlx5: Fix UAF in mlx5_eswitch_cleanup() mlx5_eswitch_cleanup() is using esw right after freeing it for releasing devlink_param. Fix it by releasing the devlink_param before freeing the esw, and adjust the create function accordingly. Fixes: 3f90840305e2 ("net/mlx5: Move esw multiport devlink param to eswitch code") Reported-by: Dan Carpenter Signed-off-by: Shay Drory Reviewed-by: Automatic Verification Reviewed-by: Gal Pressman Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 5aaedbf71783..b4e465856127 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1751,16 +1751,14 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) if (!MLX5_VPORT_MANAGER(dev) && !MLX5_ESWITCH_MANAGER(dev)) return 0; + esw = kzalloc(sizeof(*esw), GFP_KERNEL); + if (!esw) + return -ENOMEM; + err = devl_params_register(priv_to_devlink(dev), mlx5_eswitch_params, ARRAY_SIZE(mlx5_eswitch_params)); if (err) - return err; - - esw = kzalloc(sizeof(*esw), GFP_KERNEL); - if (!esw) { - err = -ENOMEM; - goto unregister_param; - } + goto free_esw; esw->dev = dev; esw->manager_vport = mlx5_eswitch_manager_vport(dev); @@ -1821,10 +1819,10 @@ abort: if (esw->work_queue) destroy_workqueue(esw->work_queue); debugfs_remove_recursive(esw->debugfs_root); - kfree(esw); -unregister_param: devl_params_unregister(priv_to_devlink(dev), mlx5_eswitch_params, ARRAY_SIZE(mlx5_eswitch_params)); +free_esw: + kfree(esw); return err; } @@ -1848,9 +1846,9 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw_offloads_cleanup(esw); mlx5_esw_vports_cleanup(esw); debugfs_remove_recursive(esw->debugfs_root); - kfree(esw); devl_params_unregister(priv_to_devlink(esw->dev), mlx5_eswitch_params, ARRAY_SIZE(mlx5_eswitch_params)); + kfree(esw); } /* Vport Administration */ -- cgit v1.2.3 From 690ad62fc6e445cc371e625fe2016e62c3793a0f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 20 Jun 2023 16:43:07 +0300 Subject: net/mlx5: Fix error code in mlx5_is_reset_now_capable() The mlx5_is_reset_now_capable() function returns bool, not negative error codes. So if fast teardown is not supported it should return false instead of -EOPNOTSUPP. Fixes: 92501fa6e421 ("net/mlx5: Ack on sync_reset_request only if PF can do reset_now") Signed-off-by: Dan Carpenter Reviewed-by: Kalesh AP Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 7af2b14ab5d8..fb7874da3caa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -327,7 +327,7 @@ static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev) if (!MLX5_CAP_GEN(dev, fast_teardown)) { mlx5_core_warn(dev, "fast teardown is not supported by firmware\n"); - return -EOPNOTSUPP; + return false; } err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id); -- cgit v1.2.3 From 8ec91f5d077c09e72e4e11d701a83eb1f1504ea3 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 23 May 2023 12:02:06 +0300 Subject: net/mlx5: Lag, Remove duplicate code checking lag is supported Remove duplicate function for checking if device has lag support. Signed-off-by: Roi Dayan Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 15 +++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h | 10 +--------- 3 files changed, 14 insertions(+), 15 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index fdf482f6fb34..9056b0b014f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2868,7 +2868,7 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) return; - if (!mlx5_is_lag_supported(esw->dev)) + if (!mlx5_lag_is_supported(esw->dev)) return; mlx5_devcom_register_component(devcom, @@ -2890,7 +2890,7 @@ void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) if (!MLX5_CAP_ESW(esw->dev, merged_eswitch)) return; - if (!mlx5_is_lag_supported(esw->dev)) + if (!mlx5_lag_is_supported(esw->dev)) return; mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index ffd7e17b8ebe..f0a074b2fcdf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1268,14 +1268,21 @@ recheck: mlx5_ldev_put(ldev); } +bool mlx5_lag_is_supported(struct mlx5_core_dev *dev) +{ + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + !MLX5_CAP_GEN(dev, lag_master) || + MLX5_CAP_GEN(dev, num_lag_ports) < 2 || + MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS) + return false; + return true; +} + void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) { int err; - if (!MLX5_CAP_GEN(dev, vport_group_manager) || - !MLX5_CAP_GEN(dev, lag_master) || - (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS || - MLX5_CAP_GEN(dev, num_lag_ports) <= 1)) + if (!mlx5_lag_is_supported(dev)) return; recheck: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index d7e7fa2348a5..a061b1873e27 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -74,15 +74,7 @@ struct mlx5_lag { struct lag_mpesw lag_mpesw; }; -static inline bool mlx5_is_lag_supported(struct mlx5_core_dev *dev) -{ - if (!MLX5_CAP_GEN(dev, vport_group_manager) || - !MLX5_CAP_GEN(dev, lag_master) || - MLX5_CAP_GEN(dev, num_lag_ports) < 2 || - MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS) - return false; - return true; -} +bool mlx5_lag_is_supported(struct mlx5_core_dev *dev); static inline struct mlx5_lag * mlx5_lag_dev(struct mlx5_core_dev *dev) -- cgit v1.2.3 From 1da9f36252d4852205a1990f003549d753320e8c Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 28 May 2023 10:58:03 +0300 Subject: net/mlx5e: Use vhca_id for device index in vport rx rules Device index is like PF index and limited to max physical ports. For example, SFs created under PF the device index is the PF device index. Use vhca_id which gets the FW index per vport, for vport rx rules and vport pair events. Signed-off-by: Roi Dayan Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 965a8261c99b..152b62138450 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -408,7 +408,7 @@ static int mlx5e_sqs2vport_add_peers_rules(struct mlx5_eswitch *esw, struct mlx5 mlx5_devcom_for_each_peer_entry(devcom, MLX5_DEVCOM_ESW_OFFLOADS, peer_esw, tmp) { - int peer_rule_idx = mlx5_get_dev_index(peer_esw->dev); + u16 peer_rule_idx = MLX5_CAP_GEN(peer_esw->dev, vhca_id); struct mlx5e_rep_sq_peer *sq_peer; int err; @@ -1581,7 +1581,7 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep, struct mlx5_eswitch *peer_esw) { - int i = mlx5_get_dev_index(peer_esw->dev); + u16 i = MLX5_CAP_GEN(peer_esw->dev, vhca_id); struct mlx5e_rep_priv *rpriv; struct mlx5e_rep_sq *rep_sq; @@ -1603,7 +1603,7 @@ static int mlx5e_vport_rep_event_pair(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, struct mlx5_eswitch *peer_esw) { - int i = mlx5_get_dev_index(peer_esw->dev); + u16 i = MLX5_CAP_GEN(peer_esw->dev, vhca_id); struct mlx5_flow_handle *flow_rule; struct mlx5e_rep_sq_peer *sq_peer; struct mlx5e_rep_priv *rpriv; -- cgit v1.2.3 From 1552e9b51810761881f7438d26c9b2dad171e423 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 28 May 2023 12:10:26 +0300 Subject: net/mlx5e: E-Switch, Add peer fdb miss rules for vport manager or ecpf Add peer fdb rules for E-Switch that are vport managers or ecpf device. It is not needed for other devices. Signed-off-by: Roi Dayan Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 9056b0b014f6..ed986d1c9e90 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1069,6 +1069,9 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, void *misc; int err; + if (!MLX5_VPORT_MANAGER(esw->dev) && !mlx5_core_is_ecpf_esw_manager(esw->dev)) + return 0; + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) return -ENOMEM; @@ -1177,11 +1180,14 @@ alloc_flows_err: static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_core_dev *peer_dev) { + u16 peer_index = mlx5_get_dev_index(peer_dev); struct mlx5_flow_handle **flows; struct mlx5_vport *vport; unsigned long i; - flows = esw->fdb_table.offloads.peer_miss_rules[mlx5_get_dev_index(peer_dev)]; + flows = esw->fdb_table.offloads.peer_miss_rules[peer_index]; + if (!flows) + return; if (mlx5_core_ec_sriov_enabled(esw->dev)) { mlx5_esw_for_each_ec_vf_vport(esw, i, vport, mlx5_core_max_ec_vfs(esw->dev)) { @@ -1206,7 +1212,9 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw, vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); mlx5_del_flow_rules(flows[vport->index]); } + kvfree(flows); + esw->fdb_table.offloads.peer_miss_rules[peer_index] = NULL; } static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) -- cgit v1.2.3 From 70c36438393546be0bbfd001d043a08e8ff611e9 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 28 May 2023 12:11:47 +0300 Subject: net/mlx5e: E-Switch, Use xarray for devcom paired device index To allow devcom events on E-Switch that is not a vport group manager, use vhca id as an index instead of device index which might be shared between several E-Switches. for example SF and its PF. Signed-off-by: Roi Dayan Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 2 +- .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 30 +++++++++++++++++----- 2 files changed, 25 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 7064609f4998..ae0dc8a3060d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -353,7 +353,7 @@ struct mlx5_eswitch { u32 large_group_num; } params; struct blocking_notifier_head n_head; - bool paired[MLX5_MAX_PORTS]; + struct xarray paired; }; void esw_offloads_disable(struct mlx5_eswitch *esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index ed986d1c9e90..6f3b7d5eb6a4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2807,15 +2807,21 @@ static int mlx5_esw_offloads_devcom_event(int event, struct mlx5_eswitch *esw = my_data; struct mlx5_devcom *devcom = esw->dev->priv.devcom; struct mlx5_eswitch *peer_esw = event_data; + u16 esw_i, peer_esw_i; + bool esw_paired; int err; + peer_esw_i = MLX5_CAP_GEN(peer_esw->dev, vhca_id); + esw_i = MLX5_CAP_GEN(esw->dev, vhca_id); + esw_paired = !!xa_load(&esw->paired, peer_esw_i); + switch (event) { case ESW_OFFLOADS_DEVCOM_PAIR: if (mlx5_eswitch_vport_match_metadata_enabled(esw) != mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) break; - if (esw->paired[mlx5_get_dev_index(peer_esw->dev)]) + if (esw_paired) break; err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true); @@ -2829,23 +2835,29 @@ static int mlx5_esw_offloads_devcom_event(int event, if (err) goto err_pair; - esw->paired[mlx5_get_dev_index(peer_esw->dev)] = true; - peer_esw->paired[mlx5_get_dev_index(esw->dev)] = true; + err = xa_insert(&esw->paired, peer_esw_i, peer_esw, GFP_KERNEL); + if (err) + goto err_xa; + + err = xa_insert(&peer_esw->paired, esw_i, esw, GFP_KERNEL); + if (err) + goto err_peer_xa; + esw->num_peers++; peer_esw->num_peers++; mlx5_devcom_comp_set_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true); break; case ESW_OFFLOADS_DEVCOM_UNPAIR: - if (!esw->paired[mlx5_get_dev_index(peer_esw->dev)]) + if (!esw_paired) break; peer_esw->num_peers--; esw->num_peers--; if (!esw->num_peers && !peer_esw->num_peers) mlx5_devcom_comp_set_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false); - esw->paired[mlx5_get_dev_index(peer_esw->dev)] = false; - peer_esw->paired[mlx5_get_dev_index(esw->dev)] = false; + xa_erase(&peer_esw->paired, esw_i); + xa_erase(&esw->paired, peer_esw_i); mlx5_esw_offloads_unpair(peer_esw, esw); mlx5_esw_offloads_unpair(esw, peer_esw); mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false); @@ -2854,6 +2866,10 @@ static int mlx5_esw_offloads_devcom_event(int event, return 0; +err_peer_xa: + xa_erase(&esw->paired, peer_esw_i); +err_xa: + mlx5_esw_offloads_unpair(peer_esw, esw); err_pair: mlx5_esw_offloads_unpair(esw, peer_esw); err_peer: @@ -2879,6 +2895,7 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw) if (!mlx5_lag_is_supported(esw->dev)) return; + xa_init(&esw->paired); mlx5_devcom_register_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS, mlx5_esw_offloads_devcom_event, @@ -2906,6 +2923,7 @@ void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) ESW_OFFLOADS_DEVCOM_UNPAIR, esw); mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + xa_destroy(&esw->paired); } bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw) -- cgit v1.2.3 From 4575ab3b7de04813400acf989ca7f26dd7e29c59 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 28 May 2023 17:10:43 +0300 Subject: net/mlx5e: E-Switch, Pass other_vport flag if vport is not 0 When creating flow table for shared fdb resources, there is only need to pass other_vport flag if vport is not 0 or if the port is ECPF in BlueField. Signed-off-by: Roi Dayan Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 6f3b7d5eb6a4..ee507b12e908 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2551,13 +2551,13 @@ static int __esw_set_master_egress_rule(struct mlx5_core_dev *master, return err; } -static int esw_master_egress_create_resources(struct mlx5_flow_namespace *egress_ns, +static int esw_master_egress_create_resources(struct mlx5_eswitch *esw, + struct mlx5_flow_namespace *egress_ns, struct mlx5_vport *vport, size_t count) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_table_attr ft_attr = { .max_fte = count, .prio = 0, .level = 0, - .flags = MLX5_FLOW_TABLE_OTHER_VPORT, }; struct mlx5_flow_table *acl; struct mlx5_flow_group *g; @@ -2572,6 +2572,9 @@ static int esw_master_egress_create_resources(struct mlx5_flow_namespace *egress if (!flow_group_in) return -ENOMEM; + if (vport->vport || mlx5_core_is_ecpf(esw->dev)) + ft_attr.flags = MLX5_FLOW_TABLE_OTHER_VPORT; + acl = mlx5_create_vport_flow_table(egress_ns, &ft_attr, vport->vport); if (IS_ERR(acl)) { err = PTR_ERR(acl); @@ -2642,7 +2645,7 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master, if (vport->egress.acl && vport->egress.type != VPORT_EGRESS_ACL_TYPE_SHARED_FDB) return 0; - err = esw_master_egress_create_resources(egress_ns, vport, count); + err = esw_master_egress_create_resources(esw, egress_ns, vport, count); if (err) return err; -- cgit v1.2.3 From ae4de894931d37ff12405db29ca3a2395d3a0449 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 29 May 2023 09:24:54 +0300 Subject: net/mlx5e: Remove redundant comment The function comment says what it is and the comment is redundant. Signed-off-by: Roi Dayan Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index ee507b12e908..612be82a8ad5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1904,7 +1904,6 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) if (!flow_group_in) return -ENOMEM; - /* create vport rx group */ mlx5_esw_set_flow_group_source_port(esw, flow_group_in, 0); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); -- cgit v1.2.3 From 15ddd72ee323cf4b7012dc8e002ebb812f92e11f Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 29 May 2023 09:32:57 +0300 Subject: net/mlx5e: E-Switch, Fix shared fdb error flow On error flow resources being freed in esw_master_egress_destroy_resources() but pointers not being set to null if error flow is from creating a bounce rule. Then in esw_acl_egress_ofld_cleanup() we try to access already freed pointers. Fix it by resetting the pointers to null. Also if error is from creating a second or later bounce rule then the flow group and table being used and cannot and should not be freed. Add a check to destroy the flow group and table if there are no bounce rules. mlx5_core.sf mlx5_core.sf.2: mlx5_destroy_flow_group:2306:(pid 2235): Flow group 4 wasn't destroyed, refcount > 1 mlx5_core.sf mlx5_core.sf.2: mlx5_destroy_flow_table:2295:(pid 2235): Flow table 3 wasn't destroyed, refcount > 1 Fixes: 5e0202eb49ed ("net/mlx5: E-switch, Handle multiple master egress rules") Signed-off-by: Roi Dayan Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 612be82a8ad5..cf58295ad7e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2618,8 +2618,12 @@ out: static void esw_master_egress_destroy_resources(struct mlx5_vport *vport) { + if (!xa_empty(&vport->egress.offloads.bounce_rules)) + return; mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp); + vport->egress.offloads.bounce_grp = NULL; mlx5_destroy_flow_table(vport->egress.acl); + vport->egress.acl = NULL; } static int esw_set_master_egress_rule(struct mlx5_core_dev *master, -- cgit v1.2.3 From 61955da523d93b4d89f45f84dc6ce9d44ced7bae Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 1 Jun 2023 13:12:37 +0200 Subject: net/mlx5: Remove redundant MLX5_ESWITCH_MANAGER() check from is_ib_rep_supported() MLX5_ESWITCH_MANAGER() check is done in is_eth_rep_supported(). Function is_ib_rep_supported() calls is_eth_rep_supported(). Remove the redundant check from it. Signed-off-by: Jiri Pirko Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 617ac7e5d75c..3b1e925f16d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -151,9 +151,6 @@ static bool is_ib_rep_supported(struct mlx5_core_dev *dev) if (!is_eth_rep_supported(dev)) return false; - if (!MLX5_ESWITCH_MANAGER(dev)) - return false; - if (!is_mdev_switchdev_mode(dev)) return false; -- cgit v1.2.3 From 0d0946d6488e785c30a0c4fce4cf21dc7da6dc1f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 1 Jun 2023 13:15:12 +0200 Subject: net/mlx5: Remove redundant is_mdev_switchdev_mode() check from is_ib_rep_supported() is_mdev_switchdev_mode() check is done in is_eth_rep_supported(). Function is_ib_rep_supported() calls is_eth_rep_supported(). Remove the redundant check from it. Signed-off-by: Jiri Pirko Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 3b1e925f16d2..edb06fb9bbc5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -151,9 +151,6 @@ static bool is_ib_rep_supported(struct mlx5_core_dev *dev) if (!is_eth_rep_supported(dev)) return false; - if (!is_mdev_switchdev_mode(dev)) - return false; - if (mlx5_core_mp_enabled(dev)) return false; -- cgit v1.2.3 From 899862b653d74ed5fc3a61cd5e14a88b824a71d7 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 31 May 2023 15:11:07 +0200 Subject: net/mlx5: Remove redundant check from mlx5_esw_query_vport_vhca_id() Since mlx5_esw_query_vport_vhca_id() could be called either from mlx5_esw_vport_enable() or mlx5_esw_vport_disable() where the the check is done, this is always false here. Remove the redundant check. Signed-off-by: Jiri Pirko Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index cf58295ad7e2..bdfe609cc9ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3961,9 +3961,6 @@ static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num, int err; *vhca_id = 0; - if (mlx5_esw_is_manager_vport(esw, vport_num) || - !MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) - return -EPERM; query_ctx = kzalloc(query_out_sz, GFP_KERNEL); if (!query_ctx) -- cgit v1.2.3 From 29e4c95faee52a9b7a4f1293cb92cd17a0b5fd91 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 1 Jun 2023 09:17:17 +0200 Subject: net/mlx5: Remove pointless vport lookup from mlx5_esw_check_port_type() As xa_get_mark() returns false in case the entry is not present, no need to redundantly check if vport is present. Remove the lookup. Signed-off-by: Jiri Pirko Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index b4e465856127..faec7d7a4400 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1908,12 +1908,6 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, static bool mlx5_esw_check_port_type(struct mlx5_eswitch *esw, u16 vport_num, xa_mark_t mark) { - struct mlx5_vport *vport; - - vport = mlx5_eswitch_get_vport(esw, vport_num); - if (IS_ERR(vport)) - return false; - return xa_get_mark(&esw->vports, vport_num, mark); } -- cgit v1.2.3 From 1b5ea7ffb7a3bdfffb4b7f40ce0d20a3372ee405 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 22 Jun 2023 03:31:07 -0700 Subject: net: bcmgenet: Ensure MDIO unregistration has clocks enabled With support for Ethernet PHY LEDs having been added, while unregistering a MDIO bus and its child device liks PHYs there may be "late" accesses to the MDIO bus. One typical use case is setting the PHY LEDs brightness to OFF for instance. We need to ensure that the MDIO bus controller remains entirely functional since it runs off the main GENET adapter clock. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20230617155500.4005881-1-andrew@lunn.ch/ Fixes: 9a4e79697009 ("net: bcmgenet: utilize generic Broadcom UniMAC MDIO controller driver") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230622103107.1760280-1-florian.fainelli@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index c15ed0acdb77..0092e46c46f8 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -673,5 +673,7 @@ void bcmgenet_mii_exit(struct net_device *dev) if (of_phy_is_fixed_link(dn)) of_phy_deregister_fixed_link(dn); of_node_put(priv->phy_dn); + clk_prepare_enable(priv->clk); platform_device_unregister(priv->mii_pdev); + clk_disable_unprepare(priv->clk); } -- cgit v1.2.3 From ebbd17ce297a3f367ca20058272063eaeeced63a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 22 Jun 2023 15:33:02 +0200 Subject: mlxsw: spectrum_router: Add extack argument to mlxsw_sp_lb_rif_init() The extack will be handy in later patches. Signed-off-by: Petr Machata Reviewed-by: Danielle Ratson Link: https://lore.kernel.org/r/e87ba300121010d580b80a281877573a7b1377ca.1687438411.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 43e8f19c7a0a..0b1c17819388 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -10561,7 +10561,8 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); } -static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp) +static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp, + struct netlink_ext_ack *extack) { u16 lb_rif_index; int err; @@ -10674,7 +10675,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_vrs_init; - err = mlxsw_sp_lb_rif_init(mlxsw_sp); + err = mlxsw_sp_lb_rif_init(mlxsw_sp, extack); if (err) goto err_lb_rif_init; -- cgit v1.2.3 From f3c85eed1ac364ae2cb2729959c6150813cc9c20 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 22 Jun 2023 15:33:03 +0200 Subject: mlxsw: spectrum_router: Use mlxsw_sp_ul_rif_get() to get main VRF LB RIF The current function, mlxsw_sp_router_ul_rif_get(), is a wrapper around the function mentioned in the subject. As such it forms an external interface of the router code. In future patches we will want to maintain connection between RIFs and the CRIFs (introduced in the next patch) that back them. That will not hold for the VRF-based loopback netdevices, so the whole CRIF business can be kept hidden from the rest of mlxsw. But for the main VRF loopback RIF we do want to keep the RIF-CRIF connection, because that RIF is used for blackhole next hops, and the next hop code can be kept simpler for assuming rif->crif is valid. Hence, instead, call mlxsw_sp_ul_rif_get() to create the main VRF loopback RIF. This being an internal function will take the CRIF argument anyway. Furthermore, the function does not lock, which is not necessary at this point in code yet. Signed-off-by: Petr Machata Reviewed-by: Danielle Ratson Link: https://lore.kernel.org/r/7a39a011a02a84164cd7f5da7985ec5b2ae01ba5.1687438411.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 0b1c17819388..15ce0d557f39 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -10564,19 +10564,20 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp, struct netlink_ext_ack *extack) { - u16 lb_rif_index; + struct mlxsw_sp_rif *lb_rif; int err; /* Create a generic loopback RIF associated with the main table * (default VRF). Any table can be used, but the main table exists * anyway, so we do not waste resources. */ - err = mlxsw_sp_router_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN, - &lb_rif_index); - if (err) + lb_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN, extack); + if (IS_ERR(lb_rif)) { + err = PTR_ERR(lb_rif); return err; + } - mlxsw_sp->router->lb_rif_index = lb_rif_index; + mlxsw_sp->router->lb_rif_index = lb_rif->rif_index; return 0; } -- cgit v1.2.3 From 4796c287b70a0f60fbc6a5df2ab33d92e8971732 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 22 Jun 2023 15:33:04 +0200 Subject: mlxsw: spectrum_router: Maintain a hash table of CRIFs CRIFs are objects that mlxsw maintains for netdevices that may not have an associated RIF (i.e. they may not have been instantiated in the ASIC), but if indeed they do not, it is quite possible they will in the future. These netdevices are candidate RIFs, hence CRIFs. Netdevices for which CRIFs are created include e.g. bridges, LAGs, or front panel ports. The idea is that next hops would be kept at CRIFs, not RIFs, and thus it would be easier to offload and unoffload the entities that have been added before the RIF was created. In this patch, add the code for low-level CRIF maintenance: create and destroy, and keep in a table keyed by the netdevice pointer for easy recall. Signed-off-by: Petr Machata Reviewed-by: Danielle Ratson Link: https://lore.kernel.org/r/186d44e399c475159da20689f2c540719f2d1ed0.1687438411.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 175 +++++++++++++++++++++ .../net/ethernet/mellanox/mlxsw/spectrum_router.h | 1 + 2 files changed, 176 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 15ce0d557f39..d251a926d140 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -51,6 +51,21 @@ struct mlxsw_sp_vr; struct mlxsw_sp_lpm_tree; struct mlxsw_sp_rif_ops; +struct mlxsw_sp_crif_key { + struct net_device *dev; +}; + +struct mlxsw_sp_crif { + struct mlxsw_sp_crif_key key; + struct rhash_head ht_node; +}; + +static const struct rhashtable_params mlxsw_sp_crif_ht_params = { + .key_offset = offsetof(struct mlxsw_sp_crif, key), + .key_len = sizeof_field(struct mlxsw_sp_crif, key), + .head_offset = offsetof(struct mlxsw_sp_crif, ht_node), +}; + struct mlxsw_sp_rif { struct list_head nexthop_list; struct list_head neigh_list; @@ -1060,6 +1075,56 @@ u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev) return tb_id; } +static void +mlxsw_sp_crif_init(struct mlxsw_sp_crif *crif, struct net_device *dev) +{ + crif->key.dev = dev; +} + +static struct mlxsw_sp_crif * +mlxsw_sp_crif_alloc(struct net_device *dev) +{ + struct mlxsw_sp_crif *crif; + + crif = kzalloc(sizeof(*crif), GFP_KERNEL); + if (!crif) + return NULL; + + mlxsw_sp_crif_init(crif, dev); + return crif; +} + +static void mlxsw_sp_crif_free(struct mlxsw_sp_crif *crif) +{ + kfree(crif); +} + +static int mlxsw_sp_crif_insert(struct mlxsw_sp_router *router, + struct mlxsw_sp_crif *crif) +{ + return rhashtable_insert_fast(&router->crif_ht, &crif->ht_node, + mlxsw_sp_crif_ht_params); +} + +static void mlxsw_sp_crif_remove(struct mlxsw_sp_router *router, + struct mlxsw_sp_crif *crif) +{ + rhashtable_remove_fast(&router->crif_ht, &crif->ht_node, + mlxsw_sp_crif_ht_params); +} + +static struct mlxsw_sp_crif * +mlxsw_sp_crif_lookup(struct mlxsw_sp_router *router, + const struct net_device *dev) +{ + struct mlxsw_sp_crif_key key = { + .dev = (struct net_device *)dev, + }; + + return rhashtable_lookup_fast(&router->crif_ht, &key, + mlxsw_sp_crif_ht_params); +} + static struct mlxsw_sp_rif * mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_rif_params *params, @@ -9148,6 +9213,95 @@ static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif, return -ENOBUFS; } +static bool mlxsw_sp_router_netdevice_interesting(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev) +{ + struct vlan_dev_priv *vlan; + + if (netif_is_lag_master(dev) || + netif_is_bridge_master(dev) || + mlxsw_sp_port_dev_check(dev) || + mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev) || + netif_is_l3_master(dev)) + return true; + + if (!is_vlan_dev(dev)) + return false; + + vlan = vlan_dev_priv(dev); + return netif_is_lag_master(vlan->real_dev) || + netif_is_bridge_master(vlan->real_dev) || + mlxsw_sp_port_dev_check(vlan->real_dev); +} + +static struct mlxsw_sp_crif * +mlxsw_sp_crif_register(struct mlxsw_sp_router *router, struct net_device *dev) +{ + struct mlxsw_sp_crif *crif; + int err; + + if (WARN_ON(mlxsw_sp_crif_lookup(router, dev))) + return NULL; + + crif = mlxsw_sp_crif_alloc(dev); + if (!crif) + return ERR_PTR(-ENOMEM); + + err = mlxsw_sp_crif_insert(router, crif); + if (err) + goto err_netdev_insert; + + return crif; + +err_netdev_insert: + mlxsw_sp_crif_free(crif); + return ERR_PTR(err); +} + +static void mlxsw_sp_crif_unregister(struct mlxsw_sp_router *router, + struct mlxsw_sp_crif *crif) +{ + mlxsw_sp_crif_remove(router, crif); + mlxsw_sp_crif_free(crif); +} + +static int mlxsw_sp_netdevice_register(struct mlxsw_sp_router *router, + struct net_device *dev) +{ + struct mlxsw_sp_crif *crif; + + if (!mlxsw_sp_router_netdevice_interesting(router->mlxsw_sp, dev)) + return 0; + + crif = mlxsw_sp_crif_register(router, dev); + return PTR_ERR_OR_ZERO(crif); +} + +static void mlxsw_sp_netdevice_unregister(struct mlxsw_sp_router *router, + struct net_device *dev) +{ + struct mlxsw_sp_crif *crif; + + if (!mlxsw_sp_router_netdevice_interesting(router->mlxsw_sp, dev)) + return; + + /* netdev_run_todo(), by way of netdev_wait_allrefs_any(), rebroadcasts + * the NETDEV_UNREGISTER message, so we can get here twice. If that's + * what happened, the netdevice state is NETREG_UNREGISTERED. In that + * case, we expect to have collected the CRIF already, and warn if it + * still exists. Otherwise we expect the CRIF to exist. + */ + crif = mlxsw_sp_crif_lookup(router, dev); + if (dev->reg_state == NETREG_UNREGISTERED) { + if (!WARN_ON(crif)) + return; + } + if (WARN_ON(!crif)) + return; + + mlxsw_sp_crif_unregister(router, crif); +} + static bool mlxsw_sp_is_offload_xstats_event(unsigned long event) { switch (event) { @@ -9367,6 +9521,15 @@ static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb, mutex_lock(&mlxsw_sp->router->lock); + if (event == NETDEV_REGISTER) { + err = mlxsw_sp_netdevice_register(router, dev); + if (err) + /* No need to roll this back, UNREGISTER will collect it + * anyhow. + */ + goto out; + } + if (mlxsw_sp_is_offload_xstats_event(event)) err = mlxsw_sp_netdevice_offload_xstats_cmd(mlxsw_sp, dev, event, ptr); @@ -9381,6 +9544,10 @@ static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb, else if (mlxsw_sp_is_vrf_event(event, ptr)) err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); + if (event == NETDEV_UNREGISTER) + mlxsw_sp_netdevice_unregister(router, dev); + +out: mutex_unlock(&mlxsw_sp->router->lock); return notifier_from_errno(err); @@ -10649,6 +10816,11 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_ipips_init; + err = rhashtable_init(&mlxsw_sp->router->crif_ht, + &mlxsw_sp_crif_ht_params); + if (err) + goto err_crif_ht_init; + err = mlxsw_sp_rifs_init(mlxsw_sp); if (err) goto err_rifs_init; @@ -10780,6 +10952,8 @@ err_nexthop_group_ht_init: err_nexthop_ht_init: mlxsw_sp_rifs_fini(mlxsw_sp); err_rifs_init: + rhashtable_destroy(&mlxsw_sp->router->crif_ht); +err_crif_ht_init: mlxsw_sp_ipips_fini(mlxsw_sp); err_ipips_init: __mlxsw_sp_router_fini(mlxsw_sp); @@ -10815,6 +10989,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) rhashtable_destroy(&router->nexthop_group_ht); rhashtable_destroy(&router->nexthop_ht); mlxsw_sp_rifs_fini(mlxsw_sp); + rhashtable_destroy(&mlxsw_sp->router->crif_ht); mlxsw_sp_ipips_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); cancel_delayed_work_sync(&router->nh_grp_activity_dw); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 5a0babc614b4..b223e80303f5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -20,6 +20,7 @@ struct mlxsw_sp_router_nve_decap { struct mlxsw_sp_router { struct mlxsw_sp *mlxsw_sp; + struct rhashtable crif_ht; struct gen_pool *rifs_table; struct mlxsw_sp_rif **rifs; struct idr rif_mac_profiles_idr; -- cgit v1.2.3 From 78126cfd5dc97da7baf66415374452e3f8805faf Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 22 Jun 2023 15:33:05 +0200 Subject: mlxsw: spectrum_router: Maintain CRIF for fallback loopback RIF CRIFs are generally not maintained for loopback RIFs. However, the RIF for the default VRF is used for offloading of blackhole nexthops. Nexthops expect to have a valid CRIF. Therefore in this patch, add code to maintain CRIF for the loopback RIF as well. Signed-off-by: Petr Machata Reviewed-by: Danielle Ratson Link: https://lore.kernel.org/r/7f2b2fcc98770167ed1254a904c3f7f585ba43f0.1687438411.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 12 +++++++++++- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h | 1 + 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index d251a926d140..c4d538e0169e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -10731,9 +10731,14 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp, struct netlink_ext_ack *extack) { + struct mlxsw_sp_router *router = mlxsw_sp->router; struct mlxsw_sp_rif *lb_rif; int err; + router->lb_crif = mlxsw_sp_crif_alloc(NULL); + if (IS_ERR(router->lb_crif)) + return PTR_ERR(router->lb_crif); + /* Create a generic loopback RIF associated with the main table * (default VRF). Any table can be used, but the main table exists * anyway, so we do not waste resources. @@ -10741,17 +10746,22 @@ static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp, lb_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN, extack); if (IS_ERR(lb_rif)) { err = PTR_ERR(lb_rif); - return err; + goto err_ul_rif_get; } mlxsw_sp->router->lb_rif_index = lb_rif->rif_index; return 0; + +err_ul_rif_get: + mlxsw_sp_crif_free(router->lb_crif); + return err; } static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp) { mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->router->lb_rif_index); + mlxsw_sp_crif_free(mlxsw_sp->router->lb_crif); } static int mlxsw_sp1_router_init(struct mlxsw_sp *mlxsw_sp) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index b223e80303f5..0909cf229c86 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -61,6 +61,7 @@ struct mlxsw_sp_router { struct mutex lock; /* Protects shared router resources */ struct mlxsw_sp_fib_entry_op_ctx *ll_op_ctx; u16 lb_rif_index; + struct mlxsw_sp_crif *lb_crif; const struct mlxsw_sp_adj_grp_size_range *adj_grp_size_ranges; size_t adj_grp_size_ranges_count; struct delayed_work nh_grp_activity_dw; -- cgit v1.2.3 From aa21242b07a8cde689bb6aedcbc224eda9646d9f Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 22 Jun 2023 15:33:06 +0200 Subject: mlxsw: spectrum_router: Link CRIFs to RIFs When a RIF is about to be created, the registration of the netdevice that it should be associated with must have been seen in the past, and a CRIF created. Therefore make this a hard requirement by looking up the CRIF during RIF creation, and complaining loudly when there isn't one. This then allows to keep a link between a RIF and its corresponding CRIF (and back, as the relationship is one-to-at-most-one), which do. The CRIF will later be useful as the objects tracked there will be offloaded lazily as a result of RIF creation. CRIFs are created when an "interesting" netdevice is registered, and destroyed after such device is unregistered. CRIFs are supposed to already exist when a RIF creation request arises, and exist at least as long as that RIF exists. This makes for a simple invariant: it is always safe to dereference CRIF pointer from "its" RIF. To guarantee this, CRIFs cannot be removed immediately when the UNREGISTER event is delivered. The reason is that if a RIF's netdevices has an IPv6 address, removal of this address is notified in an atomic block. To remove the RIF, the IPv6 removal handler schedules a work item. It must be safe for this work item to access the associated CRIF as well. Thus when a netdevice that backs the CRIF is removed, if it still has a RIF, do not actually free the CRIF, only toggle its can_destroy flag, which this patch adds. Later on, mlxsw_sp_rif_destroy() collects the CRIF. Signed-off-by: Petr Machata Reviewed-by: Danielle Ratson Link: https://lore.kernel.org/r/68c8e33afa6b8c03c431b435e1685ffdff752e63.1687438411.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 59 +++++++++++++++++----- 1 file changed, 47 insertions(+), 12 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index c4d538e0169e..daa59fc59d3b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -58,6 +58,8 @@ struct mlxsw_sp_crif_key { struct mlxsw_sp_crif { struct mlxsw_sp_crif_key key; struct rhash_head ht_node; + bool can_destroy; + struct mlxsw_sp_rif *rif; }; static const struct rhashtable_params mlxsw_sp_crif_ht_params = { @@ -67,9 +69,9 @@ static const struct rhashtable_params mlxsw_sp_crif_ht_params = { }; struct mlxsw_sp_rif { + struct mlxsw_sp_crif *crif; /* NULL for underlay RIF */ struct list_head nexthop_list; struct list_head neigh_list; - struct net_device *dev; /* NULL for underlay RIF */ struct mlxsw_sp_fid *fid; unsigned char addr[ETH_ALEN]; int mtu; @@ -88,7 +90,9 @@ struct mlxsw_sp_rif { static struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif) { - return rif->dev; + if (!rif->crif) + return NULL; + return rif->crif->key.dev; } struct mlxsw_sp_rif_params { @@ -1096,6 +1100,9 @@ mlxsw_sp_crif_alloc(struct net_device *dev) static void mlxsw_sp_crif_free(struct mlxsw_sp_crif *crif) { + if (WARN_ON(crif->rif)) + return; + kfree(crif); } @@ -7970,8 +7977,9 @@ static void mlxsw_sp_rif_index_free(struct mlxsw_sp *mlxsw_sp, u16 rif_index, static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, u16 vr_id, - struct net_device *l3_dev) + struct mlxsw_sp_crif *crif) { + struct net_device *l3_dev = crif ? crif->key.dev : NULL; struct mlxsw_sp_rif *rif; rif = kzalloc(rif_size, GFP_KERNEL); @@ -7983,10 +7991,13 @@ static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, if (l3_dev) { ether_addr_copy(rif->addr, l3_dev->dev_addr); rif->mtu = l3_dev->mtu; - rif->dev = l3_dev; } rif->vr_id = vr_id; rif->rif_index = rif_index; + if (crif) { + rif->crif = crif; + crif->rif = rif; + } return rif; } @@ -7995,6 +8006,9 @@ static void mlxsw_sp_rif_free(struct mlxsw_sp_rif *rif) { WARN_ON(!list_empty(&rif->neigh_list)); WARN_ON(!list_empty(&rif->nexthop_list)); + + if (rif->crif) + rif->crif->rif = NULL; kfree(rif); } @@ -8228,6 +8242,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_rif_ops *ops; struct mlxsw_sp_fid *fid = NULL; enum mlxsw_sp_rif_type type; + struct mlxsw_sp_crif *crif; struct mlxsw_sp_rif *rif; struct mlxsw_sp_vr *vr; u16 rif_index; @@ -8247,7 +8262,13 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, goto err_rif_index_alloc; } - rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev); + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, params->dev); + if (WARN_ON(!crif)) { + err = -ENOENT; + goto err_crif_lookup; + } + + rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, crif); if (!rif) { err = -ENOMEM; goto err_rif_alloc; @@ -8306,6 +8327,7 @@ err_fid_get: dev_put(params->dev); mlxsw_sp_rif_free(rif); err_rif_alloc: +err_crif_lookup: mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); err_rif_index_alloc: vr->rif_count--; @@ -8318,6 +8340,7 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) struct net_device *dev = mlxsw_sp_rif_dev(rif); const struct mlxsw_sp_rif_ops *ops = rif->ops; struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_crif *crif = rif->crif; struct mlxsw_sp_fid *fid = rif->fid; u8 rif_entries = rif->rif_entries; u16 rif_index = rif->rif_index; @@ -8348,6 +8371,9 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); vr->rif_count--; mlxsw_sp_vr_put(mlxsw_sp, vr); + + if (crif->can_destroy) + mlxsw_sp_crif_free(crif); } void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, @@ -9262,7 +9288,10 @@ static void mlxsw_sp_crif_unregister(struct mlxsw_sp_router *router, struct mlxsw_sp_crif *crif) { mlxsw_sp_crif_remove(router, crif); - mlxsw_sp_crif_free(crif); + if (crif->rif) + crif->can_destroy = true; + else + mlxsw_sp_crif_free(crif); } static int mlxsw_sp_netdevice_register(struct mlxsw_sp_router *router, @@ -10068,6 +10097,7 @@ mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable) static struct mlxsw_sp_rif * mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, + struct mlxsw_sp_crif *ul_crif, struct netlink_ext_ack *extack) { struct mlxsw_sp_rif *ul_rif; @@ -10081,7 +10111,8 @@ mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, return ERR_PTR(err); } - ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL); + ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, + ul_crif); if (!ul_rif) { err = -ENOMEM; goto err_rif_alloc; @@ -10120,6 +10151,7 @@ static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif) static struct mlxsw_sp_rif * mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + struct mlxsw_sp_crif *ul_crif, struct netlink_ext_ack *extack) { struct mlxsw_sp_vr *vr; @@ -10132,7 +10164,7 @@ mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, if (refcount_inc_not_zero(&vr->ul_rif_refcnt)) return vr->ul_rif; - vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack); + vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, ul_crif, extack); if (IS_ERR(vr->ul_rif)) { err = PTR_ERR(vr->ul_rif); goto err_ul_rif_create; @@ -10170,7 +10202,7 @@ int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, int err = 0; mutex_lock(&mlxsw_sp->router->lock); - ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL); + ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL, NULL); if (IS_ERR(ul_rif)) { err = PTR_ERR(ul_rif); goto out; @@ -10206,7 +10238,7 @@ mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif, struct mlxsw_sp_rif *ul_rif; int err; - ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, extack); + ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL, extack); if (IS_ERR(ul_rif)) return PTR_ERR(ul_rif); @@ -10741,9 +10773,12 @@ static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp, /* Create a generic loopback RIF associated with the main table * (default VRF). Any table can be used, but the main table exists - * anyway, so we do not waste resources. + * anyway, so we do not waste resources. Loopback RIFs are usually + * created with a NULL CRIF, but this RIF is used as a fallback RIF + * for blackhole nexthops, and nexthops expect to have a valid CRIF. */ - lb_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN, extack); + lb_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, RT_TABLE_MAIN, router->lb_crif, + extack); if (IS_ERR(lb_rif)) { err = PTR_ERR(lb_rif); goto err_ul_rif_get; -- cgit v1.2.3 From bdc0b78e79a641fbbb928a9e5da56dbdd42ff674 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 22 Jun 2023 15:33:07 +0200 Subject: mlxsw: spectrum_router: Use router.lb_crif instead of .lb_rif_index A previous patch added a pointer to loopback CRIF to the router data structure. That makes the loopback RIF index redundant, as everything necessary can be derived from the CRIF. Drop the field and adjust the code accordingly. Signed-off-by: Petr Machata Reviewed-by: Danielle Ratson Link: https://lore.kernel.org/r/8637bf959bc5b6c9d5184b9bd8a0cd53c5132835.1687438411.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 12 ++++-------- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h | 1 - 2 files changed, 4 insertions(+), 9 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index daa59fc59d3b..acd6f1b5eef9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3563,7 +3563,7 @@ static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u16 rif_index; rif_index = nh->rif ? nh->rif->rif_index : - mlxsw_sp->router->lb_rif_index; + mlxsw_sp->router->lb_crif->rif->rif_index; op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY : MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY; mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET, @@ -4530,7 +4530,7 @@ static int mlxsw_sp_adj_trap_entry_init(struct mlxsw_sp *mlxsw_sp) mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true, MLXSW_REG_RATR_TYPE_ETHERNET, mlxsw_sp->router->adj_trap_index, - mlxsw_sp->router->lb_rif_index); + mlxsw_sp->router->lb_crif->rif->rif_index); mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action); mlxsw_reg_ratr_trap_id_set(ratr_pl, MLXSW_TRAP_ID_RTR_EGRESS0); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); @@ -4846,15 +4846,13 @@ static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { - u16 lb_rif_index = mlxsw_sp->router->lb_rif_index; - nh->action = MLXSW_SP_NEXTHOP_ACTION_DISCARD; nh->should_offload = 1; /* While nexthops that discard packets do not forward packets * via an egress RIF, they still need to be programmed using a * valid RIF, so use the loopback RIF created during init. */ - nh->rif = mlxsw_sp->router->rifs[lb_rif_index]; + nh->rif = mlxsw_sp->router->lb_crif->rif; } static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp, @@ -10784,8 +10782,6 @@ static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp, goto err_ul_rif_get; } - mlxsw_sp->router->lb_rif_index = lb_rif->rif_index; - return 0; err_ul_rif_get: @@ -10795,7 +10791,7 @@ err_ul_rif_get: static void mlxsw_sp_lb_rif_fini(struct mlxsw_sp *mlxsw_sp) { - mlxsw_sp_router_ul_rif_put(mlxsw_sp, mlxsw_sp->router->lb_rif_index); + mlxsw_sp_ul_rif_put(mlxsw_sp->router->lb_crif->rif); mlxsw_sp_crif_free(mlxsw_sp->router->lb_crif); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 0909cf229c86..9a2669a08480 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -60,7 +60,6 @@ struct mlxsw_sp_router { struct mlxsw_sp_router_nve_decap nve_decap_config; struct mutex lock; /* Protects shared router resources */ struct mlxsw_sp_fib_entry_op_ctx *ll_op_ctx; - u16 lb_rif_index; struct mlxsw_sp_crif *lb_crif; const struct mlxsw_sp_adj_grp_size_range *adj_grp_size_ranges; size_t adj_grp_size_ranges_count; -- cgit v1.2.3 From a285d664236eb0655eea0ceb97095ad4b07fcbae Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 22 Jun 2023 15:33:08 +0200 Subject: mlxsw: spectrum_router: Split nexthop finalization to two stages Nexthop finalization consists of two steps: the part where the offload is removed, because the backing RIF is now gone; and the part where the association to the RIF is severed. Extract from mlxsw_sp_nexthop_type_fini() a helper that covers the unoffloading part, mlxsw_sp_nexthop_type_rif_gone(), so that it can later be called independently. Note that this swaps around the ordering of mlxsw_sp_nexthop_ipip_fini() vs. mlxsw_sp_nexthop_rif_fini(). The current ordering is more of a historical happenstance than a conscious decision. The two cleanups do not depend on each other, and this change should have no observable effects. Signed-off-by: Petr Machata Reviewed-by: Danielle Ratson Link: https://lore.kernel.org/r/7134559534c5f5c4807c3a1569fae56f8887e763.1687438411.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index acd6f1b5eef9..6c9244c35192 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -4369,21 +4369,26 @@ err_neigh_init: return err; } -static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) +static void mlxsw_sp_nexthop_type_rif_gone(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) { switch (nh->type) { case MLXSW_SP_NEXTHOP_TYPE_ETH: mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); - mlxsw_sp_nexthop_rif_fini(nh); break; case MLXSW_SP_NEXTHOP_TYPE_IPIP: - mlxsw_sp_nexthop_rif_fini(nh); mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh); break; } } +static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh); + mlxsw_sp_nexthop_rif_fini(nh); +} + static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp, struct mlxsw_sp_nexthop *nh, -- cgit v1.2.3 From 9464a3d68ea99ccac7e3518e1dfd366ac80bbc90 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 22 Jun 2023 15:33:09 +0200 Subject: mlxsw: spectrum_router: Track next hops at CRIFs Move the list of next hops from struct mlxsw_sp_rif to mlxsw_sp_crif. The reason is that eventually, next hops for mlxsw uppers should be offloaded and unoffloaded on demand as a netdevice becomes an upper, or stops being one. Currently, next hops are tracked at RIFs, but RIFs do not exist when a netdevice is not an mlxsw uppers. CRIFs are kept track of throughout the netdevice lifetime. Correspondingly, track at each next hop not its RIF, but its CRIF (from which a RIF can always be deduced). Note that now that next hops are tracked at a CRIF, it is not necessary to move each over to a new RIF when it is necessary to edit a RIF. Therefore drop mlxsw_sp_nexthop_rif_migrate() and have mlxsw_sp_rif_migrate_destroy() call mlxsw_sp_nexthop_rif_update() directly. Signed-off-by: Petr Machata Reviewed-by: Danielle Ratson Link: https://lore.kernel.org/r/e7c1c0a7dd13883b0f09aeda12c4fcf4d63a70e3.1687438411.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 127 ++++++++++++--------- 1 file changed, 75 insertions(+), 52 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 6c9244c35192..445ba7fe3c40 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -59,6 +59,7 @@ struct mlxsw_sp_crif { struct mlxsw_sp_crif_key key; struct rhash_head ht_node; bool can_destroy; + struct list_head nexthop_list; struct mlxsw_sp_rif *rif; }; @@ -70,7 +71,6 @@ static const struct rhashtable_params mlxsw_sp_crif_ht_params = { struct mlxsw_sp_rif { struct mlxsw_sp_crif *crif; /* NULL for underlay RIF */ - struct list_head nexthop_list; struct list_head neigh_list; struct mlxsw_sp_fid *fid; unsigned char addr[ETH_ALEN]; @@ -1083,6 +1083,7 @@ static void mlxsw_sp_crif_init(struct mlxsw_sp_crif *crif, struct net_device *dev) { crif->key.dev = dev; + INIT_LIST_HEAD(&crif->nexthop_list); } static struct mlxsw_sp_crif * @@ -1103,6 +1104,7 @@ static void mlxsw_sp_crif_free(struct mlxsw_sp_crif *crif) if (WARN_ON(crif->rif)) return; + WARN_ON(!list_empty(&crif->nexthop_list)); kfree(crif); } @@ -1720,17 +1722,26 @@ static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry); } -static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *old_rif, - struct mlxsw_sp_rif *new_rif); +static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif); + static void mlxsw_sp_rif_migrate_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *old_rif, struct mlxsw_sp_rif *new_rif, bool migrate_nhs) { + struct mlxsw_sp_crif *crif = old_rif->crif; + struct mlxsw_sp_crif mock_crif = {}; + if (migrate_nhs) - mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, old_rif, new_rif); + mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif); + /* Plant a mock CRIF so that destroying the old RIF doesn't unoffload + * our nexthops and IPIP tunnels, and doesn't sever the crif->rif link. + */ + mlxsw_sp_crif_init(&mock_crif, crif->key.dev); + old_rif->crif = &mock_crif; + mock_crif.rif = old_rif; mlxsw_sp_rif_destroy(old_rif); } @@ -1756,9 +1767,6 @@ mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *rif); - /** * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry. * @mlxsw_sp: mlxsw_sp. @@ -2987,7 +2995,7 @@ struct mlxsw_sp_nexthop_key { struct mlxsw_sp_nexthop { struct list_head neigh_list_node; /* member of neigh entry list */ - struct list_head rif_list_node; + struct list_head crif_list_node; struct list_head router_list_node; struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group * this nexthop belongs to @@ -3000,7 +3008,7 @@ struct mlxsw_sp_nexthop { int nh_weight; int norm_nh_weight; int num_adj_entries; - struct mlxsw_sp_rif *rif; + struct mlxsw_sp_crif *crif; u8 should_offload:1, /* set indicates this nexthop should be written * to the adjacency table. */ @@ -3023,9 +3031,9 @@ struct mlxsw_sp_nexthop { static struct net_device * mlxsw_sp_nexthop_dev(const struct mlxsw_sp_nexthop *nh) { - if (nh->rif) - return mlxsw_sp_rif_dev(nh->rif); - return NULL; + if (!nh->crif) + return NULL; + return nh->crif->key.dev; } enum mlxsw_sp_nexthop_group_type { @@ -3050,7 +3058,11 @@ struct mlxsw_sp_nexthop_group_info { static struct mlxsw_sp_rif * mlxsw_sp_nhgi_rif(const struct mlxsw_sp_nexthop_group_info *nhgi) { - return nhgi->nexthops[0].rif; + struct mlxsw_sp_crif *crif = nhgi->nexthops[0].crif; + + if (!crif) + return NULL; + return crif->rif; } struct mlxsw_sp_nexthop_group_vr_key { @@ -3174,7 +3186,9 @@ int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh) { - return nh->rif; + if (WARN_ON(!nh->crif)) + return NULL; + return nh->crif->rif; } bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh) @@ -3559,11 +3573,12 @@ static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, bool force, char *ratr_pl) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; + struct mlxsw_sp_rif *rif = mlxsw_sp_nexthop_rif(nh); enum mlxsw_reg_ratr_op op; u16 rif_index; - rif_index = nh->rif ? nh->rif->rif_index : - mlxsw_sp->router->lb_crif->rif->rif_index; + rif_index = rif ? rif->rif_index : + mlxsw_sp->router->lb_crif->rif->rif_index; op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY : MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY; mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET, @@ -4181,23 +4196,23 @@ mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, } } -static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh, - struct mlxsw_sp_rif *rif) +static void mlxsw_sp_nexthop_crif_init(struct mlxsw_sp_nexthop *nh, + struct mlxsw_sp_crif *crif) { - if (nh->rif) + if (nh->crif) return; - nh->rif = rif; - list_add(&nh->rif_list_node, &rif->nexthop_list); + nh->crif = crif; + list_add(&nh->crif_list_node, &crif->nexthop_list); } -static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh) +static void mlxsw_sp_nexthop_crif_fini(struct mlxsw_sp_nexthop *nh) { - if (!nh->rif) + if (!nh->crif) return; - list_del(&nh->rif_list_node); - nh->rif = NULL; + list_del(&nh->crif_list_node); + nh->crif = NULL; } static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, @@ -4209,6 +4224,9 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, u8 nud_state, dead; int err; + if (WARN_ON(!nh->crif->rif)) + return 0; + if (!nh->nhgi->gateway || nh->neigh_entry) return 0; dev = mlxsw_sp_nexthop_dev(nh); @@ -4299,15 +4317,20 @@ static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, struct mlxsw_sp_ipip_entry *ipip_entry) { + struct mlxsw_sp_crif *crif; bool removing; if (!nh->nhgi->gateway || nh->ipip_entry) return; + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, ipip_entry->ol_dev); + if (WARN_ON(!crif)) + return; + nh->ipip_entry = ipip_entry; removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev); __mlxsw_sp_nexthop_neigh_update(nh, removing); - mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common); + mlxsw_sp_nexthop_crif_init(nh, crif); } static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp, @@ -4339,7 +4362,7 @@ static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp, { const struct mlxsw_sp_ipip_ops *ipip_ops; struct mlxsw_sp_ipip_entry *ipip_entry; - struct mlxsw_sp_rif *rif; + struct mlxsw_sp_crif *crif; int err; ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev); @@ -4353,11 +4376,15 @@ static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp, } nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!rif) + crif = mlxsw_sp_crif_lookup(mlxsw_sp->router, dev); + if (!crif) + return 0; + + mlxsw_sp_nexthop_crif_init(nh, crif); + + if (!crif->rif) return 0; - mlxsw_sp_nexthop_rif_init(nh, rif); err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); if (err) goto err_neigh_init; @@ -4365,7 +4392,7 @@ static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp, return 0; err_neigh_init: - mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop_crif_fini(nh); return err; } @@ -4386,7 +4413,7 @@ static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh); - mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop_crif_fini(nh); } static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, @@ -4479,7 +4506,7 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh; bool removing; - list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) { + list_for_each_entry(nh, &rif->crif->nexthop_list, crif_list_node) { switch (nh->type) { case MLXSW_SP_NEXTHOP_TYPE_ETH: removing = false; @@ -4497,25 +4524,14 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, } } -static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *old_rif, - struct mlxsw_sp_rif *new_rif) -{ - struct mlxsw_sp_nexthop *nh; - - list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list); - list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node) - nh->rif = new_rif; - mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif); -} - static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { struct mlxsw_sp_nexthop *nh, *tmp; - list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) { - mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); + list_for_each_entry_safe(nh, tmp, &rif->crif->nexthop_list, + crif_list_node) { + mlxsw_sp_nexthop_type_rif_gone(mlxsw_sp, nh); mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } } @@ -4857,13 +4873,13 @@ static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp, * via an egress RIF, they still need to be programmed using a * valid RIF, so use the loopback RIF created during init. */ - nh->rif = mlxsw_sp->router->lb_crif->rif; + nh->crif = mlxsw_sp->router->lb_crif; } static void mlxsw_sp_nexthop_obj_blackhole_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { - nh->rif = NULL; + nh->crif = NULL; nh->should_offload = 0; } @@ -7871,6 +7887,9 @@ static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { + /* Signal to nexthop cleanup that the RIF is going away. */ + rif->crif->rif = NULL; + mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index); mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif); mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); @@ -7989,7 +8008,6 @@ static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, if (!rif) return NULL; - INIT_LIST_HEAD(&rif->nexthop_list); INIT_LIST_HEAD(&rif->neigh_list); if (l3_dev) { ether_addr_copy(rif->addr, l3_dev->dev_addr); @@ -8008,7 +8026,6 @@ static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, static void mlxsw_sp_rif_free(struct mlxsw_sp_rif *rif) { WARN_ON(!list_empty(&rif->neigh_list)); - WARN_ON(!list_empty(&rif->nexthop_list)); if (rif->crif) rif->crif->rif = NULL; @@ -9290,7 +9307,13 @@ err_netdev_insert: static void mlxsw_sp_crif_unregister(struct mlxsw_sp_router *router, struct mlxsw_sp_crif *crif) { + struct mlxsw_sp_nexthop *nh, *tmp; + mlxsw_sp_crif_remove(router, crif); + + list_for_each_entry_safe(nh, tmp, &crif->nexthop_list, crif_list_node) + mlxsw_sp_nexthop_type_fini(router->mlxsw_sp, nh); + if (crif->rif) crif->can_destroy = true; else -- cgit v1.2.3 From 97117eb51ec8e9c397a0baa0b9d62acb51250a83 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 23 Jun 2023 12:04:07 +0200 Subject: net: stmmac: platform: provide stmmac_pltfr_init() Provide a helper wrapper around calling the platform's init() callback. This allows users to skip checking if the callback exists. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20230623100417.93592-2-brgl@bgdev.pl Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_platform.c | 25 ++++++++++++++++++++-- .../net/ethernet/stmicro/stmmac/stmmac_platform.h | 3 +++ 2 files changed, 26 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 3c6b55b60461..41ca4fc9f863 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -701,6 +701,25 @@ int stmmac_get_platform_resources(struct platform_device *pdev, } EXPORT_SYMBOL_GPL(stmmac_get_platform_resources); +/** + * stmmac_pltfr_init + * @pdev: pointer to the platform device + * @plat: driver data platform structure + * Description: Call the platform's init callback (if any) and propagate + * the return value. + */ +int stmmac_pltfr_init(struct platform_device *pdev, + struct plat_stmmacenet_data *plat) +{ + int ret = 0; + + if (plat->init) + ret = plat->init(pdev, plat->bsp_priv); + + return ret; +} +EXPORT_SYMBOL_GPL(stmmac_pltfr_init); + /** * stmmac_pltfr_remove * @pdev: platform device pointer @@ -755,9 +774,11 @@ static int __maybe_unused stmmac_pltfr_resume(struct device *dev) struct net_device *ndev = dev_get_drvdata(dev); struct stmmac_priv *priv = netdev_priv(ndev); struct platform_device *pdev = to_platform_device(dev); + int ret; - if (priv->plat->init) - priv->plat->init(pdev, priv->plat->bsp_priv); + ret = stmmac_pltfr_init(pdev, priv->plat->bsp_priv); + if (ret) + return ret; return stmmac_resume(dev); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h index f7e457946681..6a2cd47fedcd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h @@ -19,6 +19,9 @@ void stmmac_remove_config_dt(struct platform_device *pdev, int stmmac_get_platform_resources(struct platform_device *pdev, struct stmmac_resources *stmmac_res); +int stmmac_pltfr_init(struct platform_device *pdev, + struct plat_stmmacenet_data *plat); + void stmmac_pltfr_remove(struct platform_device *pdev); extern const struct dev_pm_ops stmmac_pltfr_pm_ops; -- cgit v1.2.3 From 4450e7d4231af63027009967b01c8e258966801c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 23 Jun 2023 12:04:08 +0200 Subject: net: stmmac: dwmac-generic: use stmmac_pltfr_init() Shrink the code in dwmac-generic by using the new stmmac_pltfr_init() helper. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20230623100417.93592-3-brgl@bgdev.pl Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c index ef1023930fd0..b7fc79864e8c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c @@ -47,11 +47,9 @@ static int dwmac_generic_probe(struct platform_device *pdev) } /* Custom initialisation (if needed) */ - if (plat_dat->init) { - ret = plat_dat->init(pdev, plat_dat->bsp_priv); - if (ret) - goto err_remove_config_dt; - } + ret = stmmac_pltfr_init(pdev, plat_dat); + if (ret) + goto err_remove_config_dt; ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) -- cgit v1.2.3 From 5b0acf8dd2c1bf3349257daad36dc34a8b62571e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 23 Jun 2023 12:04:09 +0200 Subject: net: stmmac: platform: provide stmmac_pltfr_exit() Provide a helper wrapper around calling the platform's exit() callback. This allows users to skip checking if the callback exists. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20230623100417.93592-4-brgl@bgdev.pl Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_platform.c | 22 ++++++++++++++++------ .../net/ethernet/stmicro/stmmac/stmmac_platform.h | 2 ++ 2 files changed, 18 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 41ca4fc9f863..5b2bc129cd85 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -720,6 +720,20 @@ int stmmac_pltfr_init(struct platform_device *pdev, } EXPORT_SYMBOL_GPL(stmmac_pltfr_init); +/** + * stmmac_pltfr_exit + * @pdev: pointer to the platform device + * @plat: driver data platform structure + * Description: Call the platform's exit callback (if any). + */ +void stmmac_pltfr_exit(struct platform_device *pdev, + struct plat_stmmacenet_data *plat) +{ + if (plat->exit) + plat->exit(pdev, plat->bsp_priv); +} +EXPORT_SYMBOL_GPL(stmmac_pltfr_exit); + /** * stmmac_pltfr_remove * @pdev: platform device pointer @@ -733,10 +747,7 @@ void stmmac_pltfr_remove(struct platform_device *pdev) struct plat_stmmacenet_data *plat = priv->plat; stmmac_dvr_remove(&pdev->dev); - - if (plat->exit) - plat->exit(pdev, plat->bsp_priv); - + stmmac_pltfr_exit(pdev, plat); stmmac_remove_config_dt(pdev, plat); } EXPORT_SYMBOL_GPL(stmmac_pltfr_remove); @@ -756,8 +767,7 @@ static int __maybe_unused stmmac_pltfr_suspend(struct device *dev) struct platform_device *pdev = to_platform_device(dev); ret = stmmac_suspend(dev); - if (priv->plat->exit) - priv->plat->exit(pdev, priv->plat->bsp_priv); + stmmac_pltfr_exit(pdev, priv->plat); return ret; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h index 6a2cd47fedcd..e79134cc1d3d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h @@ -21,6 +21,8 @@ int stmmac_get_platform_resources(struct platform_device *pdev, int stmmac_pltfr_init(struct platform_device *pdev, struct plat_stmmacenet_data *plat); +void stmmac_pltfr_exit(struct platform_device *pdev, + struct plat_stmmacenet_data *plat); void stmmac_pltfr_remove(struct platform_device *pdev); extern const struct dev_pm_ops stmmac_pltfr_pm_ops; -- cgit v1.2.3 From 40db9f1ddfcc97425433a609e1f829dde74aa157 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 23 Jun 2023 12:04:10 +0200 Subject: net: stmmac: dwmac-generic: use stmmac_pltfr_exit() Shrink the code in dwmac-generic by using the new stmmac_pltfr_exit() helper. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20230623100417.93592-5-brgl@bgdev.pl Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c index b7fc79864e8c..dabf05601221 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c @@ -58,8 +58,7 @@ static int dwmac_generic_probe(struct platform_device *pdev) return 0; err_exit: - if (plat_dat->exit) - plat_dat->exit(pdev, plat_dat->bsp_priv); + stmmac_pltfr_exit(pdev, plat_dat); err_remove_config_dt: if (pdev->dev.of_node) stmmac_remove_config_dt(pdev, plat_dat); -- cgit v1.2.3 From 3d5bf75d76ea8c6bfcffd1b6aa76686d86f9ea34 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 23 Jun 2023 12:04:11 +0200 Subject: net: stmmac: platform: provide stmmac_pltfr_probe() Implement stmmac_pltfr_probe() which is the logical API counterpart for stmmac_pltfr_remove(). It calls the platform's init() callback and then probes the stmmac device. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20230623100417.93592-6-brgl@bgdev.pl Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_platform.c | 28 ++++++++++++++++++++++ .../net/ethernet/stmicro/stmmac/stmmac_platform.h | 3 +++ 2 files changed, 31 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 5b2bc129cd85..df417cdab8c1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -734,6 +734,34 @@ void stmmac_pltfr_exit(struct platform_device *pdev, } EXPORT_SYMBOL_GPL(stmmac_pltfr_exit); +/** + * stmmac_pltfr_probe + * @pdev: platform device pointer + * @plat: driver data platform structure + * @res: stmmac resources structure + * Description: This calls the platform's init() callback and probes the + * stmmac driver. + */ +int stmmac_pltfr_probe(struct platform_device *pdev, + struct plat_stmmacenet_data *plat, + struct stmmac_resources *res) +{ + int ret; + + ret = stmmac_pltfr_init(pdev, plat); + if (ret) + return ret; + + ret = stmmac_dvr_probe(&pdev->dev, plat, res); + if (ret) { + stmmac_pltfr_exit(pdev, plat); + return ret; + } + + return ret; +} +EXPORT_SYMBOL_GPL(stmmac_pltfr_probe); + /** * stmmac_pltfr_remove * @pdev: platform device pointer diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h index e79134cc1d3d..f968e658c9d2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h @@ -24,6 +24,9 @@ int stmmac_pltfr_init(struct platform_device *pdev, void stmmac_pltfr_exit(struct platform_device *pdev, struct plat_stmmacenet_data *plat); +int stmmac_pltfr_probe(struct platform_device *pdev, + struct plat_stmmacenet_data *plat, + struct stmmac_resources *res); void stmmac_pltfr_remove(struct platform_device *pdev); extern const struct dev_pm_ops stmmac_pltfr_pm_ops; -- cgit v1.2.3 From 0a68a59493e043170be1a064558bae6a30fea39d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 23 Jun 2023 12:04:12 +0200 Subject: net: stmmac: dwmac-generic: use stmmac_pltfr_probe() Shrink the code and remove labels by using the new stmmac_pltfr_probe() function. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20230623100417.93592-7-brgl@bgdev.pl Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c index dabf05601221..20fc455b3337 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c @@ -46,19 +46,12 @@ static int dwmac_generic_probe(struct platform_device *pdev) plat_dat->unicast_filter_entries = 1; } - /* Custom initialisation (if needed) */ - ret = stmmac_pltfr_init(pdev, plat_dat); + ret = stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); if (ret) goto err_remove_config_dt; - ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); - if (ret) - goto err_exit; - return 0; -err_exit: - stmmac_pltfr_exit(pdev, plat_dat); err_remove_config_dt: if (pdev->dev.of_node) stmmac_remove_config_dt(pdev, plat_dat); -- cgit v1.2.3 From 1be0c9d65e17684865d9ed039ac20eeb21019652 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 23 Jun 2023 12:04:13 +0200 Subject: net: stmmac: platform: provide stmmac_pltfr_remove_no_dt() Add a variant of stmmac_pltfr_remove() that only frees resources allocated by stmmac_pltfr_probe() and - unlike stmmac_pltfr_remove() - does not call stmmac_remove_config_dt(). Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20230623100417.93592-8-brgl@bgdev.pl Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_platform.c | 20 ++++++++++++++++++-- .../net/ethernet/stmicro/stmmac/stmmac_platform.h | 1 + 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index df417cdab8c1..58d5c5cc2269 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -762,6 +762,23 @@ int stmmac_pltfr_probe(struct platform_device *pdev, } EXPORT_SYMBOL_GPL(stmmac_pltfr_probe); +/** + * stmmac_pltfr_remove_no_dt + * @pdev: pointer to the platform device + * Description: This undoes the effects of stmmac_pltfr_probe() by removing the + * driver and calling the platform's exit() callback. + */ +void stmmac_pltfr_remove_no_dt(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct stmmac_priv *priv = netdev_priv(ndev); + struct plat_stmmacenet_data *plat = priv->plat; + + stmmac_dvr_remove(&pdev->dev); + stmmac_pltfr_exit(pdev, plat); +} +EXPORT_SYMBOL_GPL(stmmac_pltfr_remove_no_dt); + /** * stmmac_pltfr_remove * @pdev: platform device pointer @@ -774,8 +791,7 @@ void stmmac_pltfr_remove(struct platform_device *pdev) struct stmmac_priv *priv = netdev_priv(ndev); struct plat_stmmacenet_data *plat = priv->plat; - stmmac_dvr_remove(&pdev->dev); - stmmac_pltfr_exit(pdev, plat); + stmmac_pltfr_remove_no_dt(pdev); stmmac_remove_config_dt(pdev, plat); } EXPORT_SYMBOL_GPL(stmmac_pltfr_remove); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h index f968e658c9d2..af52d5aa2b9a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h @@ -27,6 +27,7 @@ void stmmac_pltfr_exit(struct platform_device *pdev, int stmmac_pltfr_probe(struct platform_device *pdev, struct plat_stmmacenet_data *plat, struct stmmac_resources *res); +void stmmac_pltfr_remove_no_dt(struct platform_device *pdev); void stmmac_pltfr_remove(struct platform_device *pdev); extern const struct dev_pm_ops stmmac_pltfr_pm_ops; -- cgit v1.2.3 From d74065427374da6659a2d7fad4ec55c8926d43c4 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 23 Jun 2023 12:04:14 +0200 Subject: net: stmmac: platform: provide devm_stmmac_probe_config_dt() Provide a devres variant of stmmac_probe_config_dt() that allows users to skip calling stmmac_remove_config_dt() at driver detach. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20230623100417.93592-9-brgl@bgdev.pl Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_platform.c | 41 ++++++++++++++++++++++ .../net/ethernet/stmicro/stmmac/stmmac_platform.h | 2 ++ 2 files changed, 43 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 58d5c5cc2269..82d8a1c76476 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -8,6 +8,7 @@ Author: Giuseppe Cavallaro *******************************************************************************/ +#include #include #include #include @@ -629,6 +630,39 @@ error_pclk_get: return ret; } +static void devm_stmmac_remove_config_dt(void *data) +{ + struct plat_stmmacenet_data *plat = data; + + /* Platform data argument is unused */ + stmmac_remove_config_dt(NULL, plat); +} + +/** + * devm_stmmac_probe_config_dt + * @pdev: platform_device structure + * @mac: MAC address to use + * Description: Devres variant of stmmac_probe_config_dt(). Does not require + * the user to call stmmac_remove_config_dt() at driver detach. + */ +struct plat_stmmacenet_data * +devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) +{ + struct plat_stmmacenet_data *plat; + int ret; + + plat = stmmac_probe_config_dt(pdev, mac); + if (IS_ERR(plat)) + return plat; + + ret = devm_add_action_or_reset(&pdev->dev, + devm_stmmac_remove_config_dt, plat); + if (ret) + return ERR_PTR(ret); + + return plat; +} + /** * stmmac_remove_config_dt - undo the effects of stmmac_probe_config_dt() * @pdev: platform_device structure @@ -651,12 +685,19 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) return ERR_PTR(-EINVAL); } +struct plat_stmmacenet_data * +devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) +{ + return ERR_PTR(-EINVAL); +} + void stmmac_remove_config_dt(struct platform_device *pdev, struct plat_stmmacenet_data *plat) { } #endif /* CONFIG_OF */ EXPORT_SYMBOL_GPL(stmmac_probe_config_dt); +EXPORT_SYMBOL_GPL(devm_stmmac_probe_config_dt); EXPORT_SYMBOL_GPL(stmmac_remove_config_dt); int stmmac_get_platform_resources(struct platform_device *pdev, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h index af52d5aa2b9a..8c1e5b2e9dae 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h @@ -13,6 +13,8 @@ struct plat_stmmacenet_data * stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac); +struct plat_stmmacenet_data * +devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac); void stmmac_remove_config_dt(struct platform_device *pdev, struct plat_stmmacenet_data *plat); -- cgit v1.2.3 From 061425d933ef9259dbe3789a3a3c63063f53202d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 23 Jun 2023 12:04:15 +0200 Subject: net: stmmac: dwmac-qco-ethqos: use devm_stmmac_probe_config_dt() Significantly simplify the driver's probe() function by using the devres variant of stmmac_probe_config_dt(). This allows to drop the goto jumps entirely. The remove_new() callback now needs to be switched to stmmac_pltfr_remove_no_dt(). Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20230623100417.93592-10-brgl@bgdev.pl Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 49 +++++++--------------- 1 file changed, 15 insertions(+), 34 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index fa0fc53c56a3..7b9fbcb8d84d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -708,7 +708,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) if (ret) return ret; - plat_dat = stmmac_probe_config_dt(pdev, stmmac_res.mac); + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); if (IS_ERR(plat_dat)) { dev_err(dev, "dt configuration failed\n"); return PTR_ERR(plat_dat); @@ -717,10 +717,8 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->clks_config = ethqos_clks_config; ethqos = devm_kzalloc(dev, sizeof(*ethqos), GFP_KERNEL); - if (!ethqos) { - ret = -ENOMEM; - goto out_config_dt; - } + if (!ethqos) + return -ENOMEM; ethqos->phy_mode = device_get_phy_mode(dev); switch (ethqos->phy_mode) { @@ -734,19 +732,15 @@ static int qcom_ethqos_probe(struct platform_device *pdev) ethqos->configure_func = ethqos_configure_sgmii; break; case -ENODEV: - ret = -ENODEV; - goto out_config_dt; + return -ENODEV; default: - ret = -EINVAL; - goto out_config_dt; + return -EINVAL; } ethqos->pdev = pdev; ethqos->rgmii_base = devm_platform_ioremap_resource_byname(pdev, "rgmii"); - if (IS_ERR(ethqos->rgmii_base)) { - ret = PTR_ERR(ethqos->rgmii_base); - goto out_config_dt; - } + if (IS_ERR(ethqos->rgmii_base)) + return PTR_ERR(ethqos->rgmii_base); ethqos->mac_base = stmmac_res.addr; @@ -757,24 +751,20 @@ static int qcom_ethqos_probe(struct platform_device *pdev) ethqos->has_emac_ge_3 = data->has_emac_ge_3; ethqos->link_clk = devm_clk_get(dev, data->link_clk_name ?: "rgmii"); - if (IS_ERR(ethqos->link_clk)) { - ret = PTR_ERR(ethqos->link_clk); - goto out_config_dt; - } + if (IS_ERR(ethqos->link_clk)) + return PTR_ERR(ethqos->link_clk); ret = ethqos_clks_config(ethqos, true); if (ret) - goto out_config_dt; + return ret; ret = devm_add_action_or_reset(dev, ethqos_clks_disable, ethqos); if (ret) - goto out_config_dt; + return ret; ethqos->serdes_phy = devm_phy_optional_get(dev, "serdes"); - if (IS_ERR(ethqos->serdes_phy)) { - ret = PTR_ERR(ethqos->serdes_phy); - goto out_config_dt; - } + if (IS_ERR(ethqos->serdes_phy)) + return PTR_ERR(ethqos->serdes_phy); ethqos->speed = SPEED_1000; ethqos_update_link_clk(ethqos, SPEED_1000); @@ -797,16 +787,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->serdes_powerdown = qcom_ethqos_serdes_powerdown; } - ret = stmmac_dvr_probe(dev, plat_dat, &stmmac_res); - if (ret) - goto out_config_dt; - - return ret; - -out_config_dt: - stmmac_remove_config_dt(pdev, plat_dat); - - return ret; + return stmmac_dvr_probe(dev, plat_dat, &stmmac_res); } static const struct of_device_id qcom_ethqos_match[] = { @@ -820,7 +801,7 @@ MODULE_DEVICE_TABLE(of, qcom_ethqos_match); static struct platform_driver qcom_ethqos_driver = { .probe = qcom_ethqos_probe, - .remove_new = stmmac_pltfr_remove, + .remove_new = stmmac_pltfr_remove_no_dt, .driver = { .name = "qcom-ethqos", .pm = &stmmac_pltfr_pm_ops, -- cgit v1.2.3 From fc9ee2ac4f9c366d92e6bb4c89f316c47d3a8de6 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 23 Jun 2023 12:04:16 +0200 Subject: net: stmmac: platform: provide devm_stmmac_pltfr_probe() Provide a devres variant of stmmac_pltfr_probe() which allows users to skip calling stmmac_pltfr_remove() at driver detach. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20230623100417.93592-11-brgl@bgdev.pl Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_platform.c | 30 ++++++++++++++++++++++ .../net/ethernet/stmicro/stmmac/stmmac_platform.h | 3 +++ 2 files changed, 33 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 82d8a1c76476..231152ee5a32 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -803,6 +803,36 @@ int stmmac_pltfr_probe(struct platform_device *pdev, } EXPORT_SYMBOL_GPL(stmmac_pltfr_probe); +static void devm_stmmac_pltfr_remove(void *data) +{ + struct platform_device *pdev = data; + + stmmac_pltfr_remove_no_dt(pdev); +} + +/** + * devm_stmmac_pltfr_probe + * @pdev: pointer to the platform device + * @plat: driver data platform structure + * @res: stmmac resources + * Description: Devres variant of stmmac_pltfr_probe(). Allows users to skip + * calling stmmac_pltfr_remove() on driver detach. + */ +int devm_stmmac_pltfr_probe(struct platform_device *pdev, + struct plat_stmmacenet_data *plat, + struct stmmac_resources *res) +{ + int ret; + + ret = stmmac_pltfr_probe(pdev, plat, res); + if (ret) + return ret; + + return devm_add_action_or_reset(&pdev->dev, devm_stmmac_pltfr_remove, + pdev); +} +EXPORT_SYMBOL_GPL(devm_stmmac_pltfr_probe); + /** * stmmac_pltfr_remove_no_dt * @pdev: pointer to the platform device diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h index 8c1e5b2e9dae..c5565b2a70ac 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h @@ -29,6 +29,9 @@ void stmmac_pltfr_exit(struct platform_device *pdev, int stmmac_pltfr_probe(struct platform_device *pdev, struct plat_stmmacenet_data *plat, struct stmmac_resources *res); +int devm_stmmac_pltfr_probe(struct platform_device *pdev, + struct plat_stmmacenet_data *plat, + struct stmmac_resources *res); void stmmac_pltfr_remove_no_dt(struct platform_device *pdev); void stmmac_pltfr_remove(struct platform_device *pdev); extern const struct dev_pm_ops stmmac_pltfr_pm_ops; -- cgit v1.2.3 From 4194f32a4b2b1e41c00fac7a1f5f63375a94ba11 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 23 Jun 2023 12:04:17 +0200 Subject: net: stmmac: dwmac-qcom-ethqos: use devm_stmmac_pltfr_probe() Use the devres variant of stmmac_pltfr_probe() and finally drop the remove() callback entirely. Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20230623100417.93592-12-brgl@bgdev.pl Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 7b9fbcb8d84d..e62940414e54 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -787,7 +787,7 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->serdes_powerdown = qcom_ethqos_serdes_powerdown; } - return stmmac_dvr_probe(dev, plat_dat, &stmmac_res); + return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); } static const struct of_device_id qcom_ethqos_match[] = { @@ -801,7 +801,6 @@ MODULE_DEVICE_TABLE(of, qcom_ethqos_match); static struct platform_driver qcom_ethqos_driver = { .probe = qcom_ethqos_probe, - .remove_new = stmmac_pltfr_remove_no_dt, .driver = { .name = "qcom-ethqos", .pm = &stmmac_pltfr_pm_ops, -- cgit v1.2.3 From f1bc9fc4a06de0108e0dca2a9a7e99ba1fc632f9 Mon Sep 17 00:00:00 2001 From: Maxim Kochetkov Date: Thu, 22 Jun 2023 22:22:45 +0300 Subject: net: axienet: Move reset before 64-bit DMA detection 64-bit DMA detection will fail if axienet was started before (by boot loader, boot ROM, etc). In this state axienet will not start properly. XAXIDMA_TX_CDESC_OFFSET + 4 register (MM2S_CURDESC_MSB) is used to detect 64-bit DMA capability here. But datasheet says: When DMACR.RS is 1 (axienet is in enabled state), CURDESC_PTR becomes Read Only (RO) and is used to fetch the first descriptor. So iowrite32()/ioread32() trick to this register to detect 64-bit DMA will not work. So move axienet reset before 64-bit DMA detection. Fixes: f735c40ed93c ("net: axienet: Autodetect 64-bit DMA capability") Signed-off-by: Maxim Kochetkov Reviewed-by: Robert Hancock Reviewed-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/20230622192245.116864-1-fido_max@inbox.ru Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 3e310b55bce2..734822321e0a 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -2042,6 +2042,11 @@ static int axienet_probe(struct platform_device *pdev) goto cleanup_clk; } + /* Reset core now that clocks are enabled, prior to accessing MDIO */ + ret = __axienet_device_reset(lp); + if (ret) + goto cleanup_clk; + /* Autodetect the need for 64-bit DMA pointers. * When the IP is configured for a bus width bigger than 32 bits, * writing the MSB registers is mandatory, even if they are all 0. @@ -2096,11 +2101,6 @@ static int axienet_probe(struct platform_device *pdev) lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD; lp->coalesce_usec_tx = XAXIDMA_DFT_TX_USEC; - /* Reset core now that clocks are enabled, prior to accessing MDIO */ - ret = __axienet_device_reset(lp); - if (ret) - goto cleanup_clk; - ret = axienet_mdio_setup(lp); if (ret) dev_warn(&pdev->dev, -- cgit v1.2.3 From dc97391e661009eab46783030d2404c9b6e6f2e7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Jun 2023 23:55:12 +0100 Subject: sock: Remove ->sendpage*() in favour of sendmsg(MSG_SPLICE_PAGES) Remove ->sendpage() and ->sendpage_locked(). sendmsg() with MSG_SPLICE_PAGES should be used instead. This allows multiple pages and multipage folios to be passed through. Signed-off-by: David Howells Acked-by: Marc Kleine-Budde # for net/can cc: Jens Axboe cc: Matthew Wilcox cc: linux-afs@lists.infradead.org cc: mptcp@lists.linux.dev cc: rds-devel@oss.oracle.com cc: tipc-discussion@lists.sourceforge.net cc: virtualization@lists.linux-foundation.org Link: https://lore.kernel.org/r/20230623225513.2732256-16-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- Documentation/bpf/map_sockmap.rst | 10 ++--- Documentation/filesystems/locking.rst | 2 - Documentation/filesystems/vfs.rst | 1 - Documentation/networking/scaling.rst | 4 +- crypto/af_alg.c | 28 ------------- crypto/algif_aead.c | 22 ++-------- crypto/algif_rng.c | 2 - crypto/algif_skcipher.c | 14 ------- .../ethernet/chelsio/inline_crypto/chtls/chtls.h | 2 - .../chelsio/inline_crypto/chtls/chtls_io.c | 14 ------- .../chelsio/inline_crypto/chtls/chtls_main.c | 1 - fs/nfsd/vfs.c | 2 +- include/crypto/if_alg.h | 2 - include/linux/net.h | 8 ---- include/net/inet_common.h | 2 - include/net/sock.h | 6 --- include/net/tcp.h | 4 -- net/appletalk/ddp.c | 1 - net/atm/pvc.c | 1 - net/atm/svc.c | 1 - net/ax25/af_ax25.c | 1 - net/caif/caif_socket.c | 2 - net/can/bcm.c | 1 - net/can/isotp.c | 1 - net/can/j1939/socket.c | 1 - net/can/raw.c | 1 - net/core/sock.c | 35 +--------------- net/dccp/ipv4.c | 1 - net/dccp/ipv6.c | 1 - net/ieee802154/socket.c | 2 - net/ipv4/af_inet.c | 21 ---------- net/ipv4/tcp.c | 43 ++----------------- net/ipv4/tcp_bpf.c | 23 +---------- net/ipv4/tcp_ipv4.c | 1 - net/ipv4/udp.c | 15 ------- net/ipv4/udp_impl.h | 2 - net/ipv4/udplite.c | 1 - net/ipv6/af_inet6.c | 3 -- net/ipv6/raw.c | 1 - net/ipv6/tcp_ipv6.c | 1 - net/kcm/kcmsock.c | 20 --------- net/key/af_key.c | 1 - net/l2tp/l2tp_ip.c | 1 - net/l2tp/l2tp_ip6.c | 1 - net/llc/af_llc.c | 1 - net/mctp/af_mctp.c | 1 - net/mptcp/protocol.c | 2 - net/netlink/af_netlink.c | 1 - net/netrom/af_netrom.c | 1 - net/packet/af_packet.c | 2 - net/phonet/socket.c | 2 - net/qrtr/af_qrtr.c | 1 - net/rds/af_rds.c | 1 - net/rose/af_rose.c | 1 - net/rxrpc/af_rxrpc.c | 1 - net/sctp/protocol.c | 1 - net/socket.c | 48 ---------------------- net/tipc/socket.c | 3 -- net/tls/tls.h | 6 --- net/tls/tls_device.c | 17 -------- net/tls/tls_main.c | 7 ---- net/tls/tls_sw.c | 35 ---------------- net/unix/af_unix.c | 19 --------- net/vmw_vsock/af_vsock.c | 3 -- net/x25/af_x25.c | 1 - net/xdp/xsk.c | 1 - 66 files changed, 20 insertions(+), 442 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/Documentation/bpf/map_sockmap.rst b/Documentation/bpf/map_sockmap.rst index cc92047c6630..2d630686a00b 100644 --- a/Documentation/bpf/map_sockmap.rst +++ b/Documentation/bpf/map_sockmap.rst @@ -240,11 +240,11 @@ offsets into ``msg``, respectively. If a program of type ``BPF_PROG_TYPE_SK_MSG`` is run on a ``msg`` it can only parse data that the (``data``, ``data_end``) pointers have already consumed. For ``sendmsg()`` hooks this is likely the first scatterlist element. But for -calls relying on the ``sendpage`` handler (e.g., ``sendfile()``) this will be -the range (**0**, **0**) because the data is shared with user space and by -default the objective is to avoid allowing user space to modify data while (or -after) BPF verdict is being decided. This helper can be used to pull in data -and to set the start and end pointers to given values. Data will be copied if +calls relying on MSG_SPLICE_PAGES (e.g., ``sendfile()``) this will be the +range (**0**, **0**) because the data is shared with user space and by default +the objective is to avoid allowing user space to modify data while (or after) +BPF verdict is being decided. This helper can be used to pull in data and to +set the start and end pointers to given values. Data will be copied if necessary (i.e., if data was not linear and if start and end pointers do not point to the same chunk). diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index aa1a233b0fa8..ed148919e11a 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -521,8 +521,6 @@ prototypes:: int (*fsync) (struct file *, loff_t start, loff_t end, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); - ssize_t (*sendpage) (struct file *, struct page *, int, size_t, - loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index 769be5230210..cb2a97e49872 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -1086,7 +1086,6 @@ This describes how the VFS can manipulate an open file. As of kernel int (*fsync) (struct file *, loff_t, loff_t, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); - ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); int (*flock) (struct file *, int, struct file_lock *); diff --git a/Documentation/networking/scaling.rst b/Documentation/networking/scaling.rst index 3d435caa3ef2..92c9fb46d6a2 100644 --- a/Documentation/networking/scaling.rst +++ b/Documentation/networking/scaling.rst @@ -269,8 +269,8 @@ a single application thread handles flows with many different flow hashes. rps_sock_flow_table is a global flow table that contains the *desired* CPU for flows: the CPU that is currently processing the flow in userspace. Each table value is a CPU index that is updated during calls to recvmsg -and sendmsg (specifically, inet_recvmsg(), inet_sendmsg(), inet_sendpage() -and tcp_splice_read()). +and sendmsg (specifically, inet_recvmsg(), inet_sendmsg() and +tcp_splice_read()). When the scheduler moves a thread to a new CPU while it has outstanding receive packets on the old CPU, packets may arrive out of order. To diff --git a/crypto/af_alg.c b/crypto/af_alg.c index cdb1dcc5dd1a..6218c773d71c 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -482,7 +482,6 @@ static const struct proto_ops alg_proto_ops = { .listen = sock_no_listen, .shutdown = sock_no_shutdown, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, .sendmsg = sock_no_sendmsg, .recvmsg = sock_no_recvmsg, @@ -1106,33 +1105,6 @@ unlock: } EXPORT_SYMBOL_GPL(af_alg_sendmsg); -/** - * af_alg_sendpage - sendpage system call handler - * @sock: socket of connection to user space to write to - * @page: data to send - * @offset: offset into page to begin sending - * @size: length of data - * @flags: message send/receive flags - * - * This is a generic implementation of sendpage to fill ctx->tsgl_list. - */ -ssize_t af_alg_sendpage(struct socket *sock, struct page *page, - int offset, size_t size, int flags) -{ - struct bio_vec bvec; - struct msghdr msg = { - .msg_flags = flags | MSG_SPLICE_PAGES, - }; - - if (flags & MSG_SENDPAGE_NOTLAST) - msg.msg_flags |= MSG_MORE; - - bvec_set_page(&bvec, page, size, offset); - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); - return sock_sendmsg(sock, &msg); -} -EXPORT_SYMBOL_GPL(af_alg_sendpage); - /** * af_alg_free_resources - release resources required for crypto request * @areq: Request holding the TX and RX SGL diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 35bfa283748d..7d58cbbce4af 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -9,10 +9,10 @@ * The following concept of the memory management is used: * * The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is - * filled by user space with the data submitted via sendpage. Filling up - * the TX SGL does not cause a crypto operation -- the data will only be - * tracked by the kernel. Upon receipt of one recvmsg call, the caller must - * provide a buffer which is tracked with the RX SGL. + * filled by user space with the data submitted via sendmsg (maybe with + * MSG_SPLICE_PAGES). Filling up the TX SGL does not cause a crypto operation + * -- the data will only be tracked by the kernel. Upon receipt of one recvmsg + * call, the caller must provide a buffer which is tracked with the RX SGL. * * During the processing of the recvmsg operation, the cipher request is * allocated and prepared. As part of the recvmsg operation, the processed @@ -370,7 +370,6 @@ static struct proto_ops algif_aead_ops = { .release = af_alg_release, .sendmsg = aead_sendmsg, - .sendpage = af_alg_sendpage, .recvmsg = aead_recvmsg, .poll = af_alg_poll, }; @@ -422,18 +421,6 @@ static int aead_sendmsg_nokey(struct socket *sock, struct msghdr *msg, return aead_sendmsg(sock, msg, size); } -static ssize_t aead_sendpage_nokey(struct socket *sock, struct page *page, - int offset, size_t size, int flags) -{ - int err; - - err = aead_check_key(sock); - if (err) - return err; - - return af_alg_sendpage(sock, page, offset, size, flags); -} - static int aead_recvmsg_nokey(struct socket *sock, struct msghdr *msg, size_t ignored, int flags) { @@ -461,7 +448,6 @@ static struct proto_ops algif_aead_ops_nokey = { .release = af_alg_release, .sendmsg = aead_sendmsg_nokey, - .sendpage = aead_sendpage_nokey, .recvmsg = aead_recvmsg_nokey, .poll = af_alg_poll, }; diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c index 407408c43730..10c41adac3b1 100644 --- a/crypto/algif_rng.c +++ b/crypto/algif_rng.c @@ -174,7 +174,6 @@ static struct proto_ops algif_rng_ops = { .bind = sock_no_bind, .accept = sock_no_accept, .sendmsg = sock_no_sendmsg, - .sendpage = sock_no_sendpage, .release = af_alg_release, .recvmsg = rng_recvmsg, @@ -192,7 +191,6 @@ static struct proto_ops __maybe_unused algif_rng_test_ops = { .mmap = sock_no_mmap, .bind = sock_no_bind, .accept = sock_no_accept, - .sendpage = sock_no_sendpage, .release = af_alg_release, .recvmsg = rng_test_recvmsg, diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index b1f321b9f846..9ada9b741af8 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -194,7 +194,6 @@ static struct proto_ops algif_skcipher_ops = { .release = af_alg_release, .sendmsg = skcipher_sendmsg, - .sendpage = af_alg_sendpage, .recvmsg = skcipher_recvmsg, .poll = af_alg_poll, }; @@ -246,18 +245,6 @@ static int skcipher_sendmsg_nokey(struct socket *sock, struct msghdr *msg, return skcipher_sendmsg(sock, msg, size); } -static ssize_t skcipher_sendpage_nokey(struct socket *sock, struct page *page, - int offset, size_t size, int flags) -{ - int err; - - err = skcipher_check_key(sock); - if (err) - return err; - - return af_alg_sendpage(sock, page, offset, size, flags); -} - static int skcipher_recvmsg_nokey(struct socket *sock, struct msghdr *msg, size_t ignored, int flags) { @@ -285,7 +272,6 @@ static struct proto_ops algif_skcipher_ops_nokey = { .release = af_alg_release, .sendmsg = skcipher_sendmsg_nokey, - .sendpage = skcipher_sendpage_nokey, .recvmsg = skcipher_recvmsg_nokey, .poll = af_alg_poll, }; diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h index da4818d2c856..68562a82d036 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls.h @@ -569,8 +569,6 @@ int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len); void chtls_splice_eof(struct socket *sock); -int chtls_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags); int send_tx_flowc_wr(struct sock *sk, int compl, u32 snd_nxt, u32 rcv_nxt); void chtls_tcp_push(struct sock *sk, int flags); diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c index e08ac960c967..5fc64e47568a 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c @@ -1246,20 +1246,6 @@ void chtls_splice_eof(struct socket *sock) release_sock(sk); } -int chtls_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags) -{ - struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, }; - struct bio_vec bvec; - - if (flags & MSG_SENDPAGE_NOTLAST) - msg.msg_flags |= MSG_MORE; - - bvec_set_page(&bvec, page, size, offset); - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); - return chtls_sendmsg(sk, &msg, size); -} - static void chtls_select_window(struct sock *sk) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c index 6b6787eafd2f..455a54708be4 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_main.c @@ -607,7 +607,6 @@ static void __init chtls_init_ulp_ops(void) chtls_cpl_prot.shutdown = chtls_shutdown; chtls_cpl_prot.sendmsg = chtls_sendmsg; chtls_cpl_prot.splice_eof = chtls_splice_eof; - chtls_cpl_prot.sendpage = chtls_sendpage; chtls_cpl_prot.recvmsg = chtls_recvmsg; chtls_cpl_prot.setsockopt = chtls_setsockopt; chtls_cpl_prot.getsockopt = chtls_getsockopt; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index db67f8e19344..8879e207ff5a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -936,7 +936,7 @@ nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags, /* * Grab and keep cached pages associated with a file in the svc_rqst - * so that they can be passed to the network sendmsg/sendpage routines + * so that they can be passed to the network sendmsg routines * directly. They will be released after the sending has completed. * * Return values: Number of bytes consumed, or -EIO if there are no diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 34224e77f5a2..ef8ce86b1f78 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -229,8 +229,6 @@ void af_alg_wmem_wakeup(struct sock *sk); int af_alg_wait_for_data(struct sock *sk, unsigned flags, unsigned min); int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size, unsigned int ivsize); -ssize_t af_alg_sendpage(struct socket *sock, struct page *page, - int offset, size_t size, int flags); void af_alg_free_resources(struct af_alg_async_req *areq); void af_alg_async_cb(void *data, int err); __poll_t af_alg_poll(struct file *file, struct socket *sock, diff --git a/include/linux/net.h b/include/linux/net.h index 23324e9a2b3d..41c608c1b02c 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -207,8 +207,6 @@ struct proto_ops { size_t total_len, int flags); int (*mmap) (struct file *file, struct socket *sock, struct vm_area_struct * vma); - ssize_t (*sendpage) (struct socket *sock, struct page *page, - int offset, size_t size, int flags); ssize_t (*splice_read)(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); void (*splice_eof)(struct socket *sock); @@ -222,8 +220,6 @@ struct proto_ops { sk_read_actor_t recv_actor); /* This is different from read_sock(), it reads an entire skb at a time. */ int (*read_skb)(struct sock *sk, skb_read_actor_t recv_actor); - int (*sendpage_locked)(struct sock *sk, struct page *page, - int offset, size_t size, int flags); int (*sendmsg_locked)(struct sock *sk, struct msghdr *msg, size_t size); int (*set_rcvlowat)(struct sock *sk, int val); @@ -341,10 +337,6 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, int flags); int kernel_getsockname(struct socket *sock, struct sockaddr *addr); int kernel_getpeername(struct socket *sock, struct sockaddr *addr); -int kernel_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags); -int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, - size_t size, int flags); int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how); /* Routine returns the IP overhead imposed by a (caller-protected) socket. */ diff --git a/include/net/inet_common.h b/include/net/inet_common.h index a75333342c4e..b86b8e21de7f 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -36,8 +36,6 @@ void __inet_accept(struct socket *sock, struct socket *newsock, int inet_send_prepare(struct sock *sk); int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size); void inet_splice_eof(struct socket *sock); -ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags); int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); int inet_shutdown(struct socket *sock, int how); diff --git a/include/net/sock.h b/include/net/sock.h index 62a1b99da349..121284f455a8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1277,8 +1277,6 @@ struct proto { size_t len); int (*recvmsg)(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len); - int (*sendpage)(struct sock *sk, struct page *page, - int offset, size_t size, int flags); void (*splice_eof)(struct socket *sock); int (*bind)(struct sock *sk, struct sockaddr *addr, int addr_len); @@ -1919,10 +1917,6 @@ int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t len); int sock_no_recvmsg(struct socket *, struct msghdr *, size_t, int); int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma); -ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags); -ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page, - int offset, size_t size, int flags); /* * Functions to fill in entries in struct proto_ops when a protocol diff --git a/include/net/tcp.h b/include/net/tcp.h index 31b534370787..226bce6d1e8c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -329,10 +329,6 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied, size_t size, struct ubuf_info *uarg); void tcp_splice_eof(struct socket *sock); -int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, - int flags); -int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, - size_t size, int flags); int tcp_send_mss(struct sock *sk, int *size_goal, int flags); int tcp_wmem_schedule(struct sock *sk, int copy); void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle, diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index a06f4d4a6f47..8978fb6212ff 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1929,7 +1929,6 @@ static const struct proto_ops atalk_dgram_ops = { .sendmsg = atalk_sendmsg, .recvmsg = atalk_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct notifier_block ddp_notifier = { diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 53e7d3f39e26..66d9a9bd5896 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -126,7 +126,6 @@ static const struct proto_ops pvc_proto_ops = { .sendmsg = vcc_sendmsg, .recvmsg = vcc_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; diff --git a/net/atm/svc.c b/net/atm/svc.c index d83556d8beb9..36a814f1fbd1 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -654,7 +654,6 @@ static const struct proto_ops svc_proto_ops = { .sendmsg = vcc_sendmsg, .recvmsg = vcc_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index d8da400cb4de..5db805d5f74d 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -2022,7 +2022,6 @@ static const struct proto_ops ax25_proto_ops = { .sendmsg = ax25_sendmsg, .recvmsg = ax25_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; /* diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 4eebcc66c19a..9c82698da4f5 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -976,7 +976,6 @@ static const struct proto_ops caif_seqpacket_ops = { .sendmsg = caif_seqpkt_sendmsg, .recvmsg = caif_seqpkt_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static const struct proto_ops caif_stream_ops = { @@ -996,7 +995,6 @@ static const struct proto_ops caif_stream_ops = { .sendmsg = caif_stream_sendmsg, .recvmsg = caif_stream_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; /* This function is called when a socket is finally destroyed. */ diff --git a/net/can/bcm.c b/net/can/bcm.c index a962ec2b8ba5..9ba35685b043 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1703,7 +1703,6 @@ static const struct proto_ops bcm_ops = { .sendmsg = bcm_sendmsg, .recvmsg = bcm_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct proto bcm_proto __read_mostly = { diff --git a/net/can/isotp.c b/net/can/isotp.c index 84f9aba02901..1f25b45868cf 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1699,7 +1699,6 @@ static const struct proto_ops isotp_ops = { .sendmsg = isotp_sendmsg, .recvmsg = isotp_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct proto isotp_proto __read_mostly = { diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 35970c25496a..feaec4ad6d16 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -1306,7 +1306,6 @@ static const struct proto_ops j1939_ops = { .sendmsg = j1939_sk_sendmsg, .recvmsg = j1939_sk_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct proto j1939_proto __read_mostly = { diff --git a/net/can/raw.c b/net/can/raw.c index f64469b98260..15c79b079184 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -962,7 +962,6 @@ static const struct proto_ops raw_ops = { .sendmsg = raw_sendmsg, .recvmsg = raw_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct proto raw_proto __read_mostly = { diff --git a/net/core/sock.c b/net/core/sock.c index 5f1747c12004..de719094b804 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3261,36 +3261,6 @@ void __receive_sock(struct file *file) } } -ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) -{ - ssize_t res; - struct msghdr msg = {.msg_flags = flags}; - struct kvec iov; - char *kaddr = kmap(page); - iov.iov_base = kaddr + offset; - iov.iov_len = size; - res = kernel_sendmsg(sock, &msg, &iov, 1, size); - kunmap(page); - return res; -} -EXPORT_SYMBOL(sock_no_sendpage); - -ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page, - int offset, size_t size, int flags) -{ - ssize_t res; - struct msghdr msg = {.msg_flags = flags}; - struct kvec iov; - char *kaddr = kmap(page); - - iov.iov_base = kaddr + offset; - iov.iov_len = size; - res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size); - kunmap(page); - return res; -} -EXPORT_SYMBOL(sock_no_sendpage_locked); - /* * Default Socket Callbacks */ @@ -4046,7 +4016,7 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto) { seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " - "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", + "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", proto->name, proto->obj_size, sock_prot_inuse_get(seq_file_net(seq), proto), @@ -4067,7 +4037,6 @@ static void proto_seq_printf(struct seq_file *seq, struct proto *proto) proto_method_implemented(proto->getsockopt), proto_method_implemented(proto->sendmsg), proto_method_implemented(proto->recvmsg), - proto_method_implemented(proto->sendpage), proto_method_implemented(proto->bind), proto_method_implemented(proto->backlog_rcv), proto_method_implemented(proto->hash), @@ -4088,7 +4057,7 @@ static int proto_seq_show(struct seq_file *seq, void *v) "maxhdr", "slab", "module", - "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n"); + "cl co di ac io in de sh ss gs se re bi br ha uh gp em\n"); else proto_seq_printf(seq, list_entry(v, struct proto, node)); return 0; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3ab68415d121..fa8079303cb0 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1010,7 +1010,6 @@ static const struct proto_ops inet_dccp_ops = { .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct inet_protosw dccp_v4_protosw = { diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 93c98990d726..7249ef218178 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1087,7 +1087,6 @@ static const struct proto_ops inet6_dccp_ops = { .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 9c124705120d..00302e8b9615 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -426,7 +426,6 @@ static const struct proto_ops ieee802154_raw_ops = { .sendmsg = ieee802154_sock_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; /* DGRAM Sockets (802.15.4 dataframes) */ @@ -989,7 +988,6 @@ static const struct proto_ops ieee802154_dgram_ops = { .sendmsg = ieee802154_sock_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static void ieee802154_sock_destruct(struct sock *sk) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 38e649fb4474..9b2ca2fcc5a1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -847,23 +847,6 @@ void inet_splice_eof(struct socket *sock) } EXPORT_SYMBOL_GPL(inet_splice_eof); -ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags) -{ - struct sock *sk = sock->sk; - const struct proto *prot; - - if (unlikely(inet_send_prepare(sk))) - return -EAGAIN; - - /* IPV6_ADDRFORM can change sk->sk_prot under us. */ - prot = READ_ONCE(sk->sk_prot); - if (prot->sendpage) - return prot->sendpage(sk, page, offset, size, flags); - return sock_no_sendpage(sock, page, offset, size, flags); -} -EXPORT_SYMBOL(inet_sendpage); - INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *, size_t, int, int *)); int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, @@ -1067,12 +1050,10 @@ const struct proto_ops inet_stream_ops = { .mmap = tcp_mmap, #endif .splice_eof = inet_splice_eof, - .sendpage = inet_sendpage, .splice_read = tcp_splice_read, .read_sock = tcp_read_sock, .read_skb = tcp_read_skb, .sendmsg_locked = tcp_sendmsg_locked, - .sendpage_locked = tcp_sendpage_locked, .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_ioctl = inet_compat_ioctl, @@ -1102,7 +1083,6 @@ const struct proto_ops inet_dgram_ops = { .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .splice_eof = inet_splice_eof, - .sendpage = inet_sendpage, .set_peek_off = sk_set_peek_off, #ifdef CONFIG_COMPAT .compat_ioctl = inet_compat_ioctl, @@ -1134,7 +1114,6 @@ static const struct proto_ops inet_sockraw_ops = { .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .splice_eof = inet_splice_eof, - .sendpage = inet_sendpage, #ifdef CONFIG_COMPAT .compat_ioctl = inet_compat_ioctl, #endif diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d56edc2c885f..e03e08745308 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -923,11 +923,10 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags) return mss_now; } -/* In some cases, both sendpage() and sendmsg() could have added - * an skb to the write queue, but failed adding payload on it. - * We need to remove it to consume less memory, but more - * importantly be able to generate EPOLLOUT for Edge Trigger epoll() - * users. +/* In some cases, both sendmsg() could have added an skb to the write queue, + * but failed adding payload on it. We need to remove it to consume less + * memory, but more importantly be able to generate EPOLLOUT for Edge Trigger + * epoll() users. */ void tcp_remove_empty_skb(struct sock *sk) { @@ -975,40 +974,6 @@ int tcp_wmem_schedule(struct sock *sk, int copy) return min(copy, sk->sk_forward_alloc); } -int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, - size_t size, int flags) -{ - struct bio_vec bvec; - struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, }; - - if (!(sk->sk_route_caps & NETIF_F_SG)) - return sock_no_sendpage_locked(sk, page, offset, size, flags); - - tcp_rate_check_app_limited(sk); /* is sending application-limited? */ - - bvec_set_page(&bvec, page, size, offset); - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); - - if (flags & MSG_SENDPAGE_NOTLAST) - msg.msg_flags |= MSG_MORE; - - return tcp_sendmsg_locked(sk, &msg, size); -} -EXPORT_SYMBOL_GPL(tcp_sendpage_locked); - -int tcp_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags) -{ - int ret; - - lock_sock(sk); - ret = tcp_sendpage_locked(sk, page, offset, size, flags); - release_sock(sk); - - return ret; -} -EXPORT_SYMBOL(tcp_sendpage); - void tcp_free_fastopen_req(struct tcp_sock *tp) { if (tp->fastopen_req) { diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 31d6005cea9b..81f0dff69e0b 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -486,7 +486,7 @@ static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) long timeo; int flags; - /* Don't let internal sendpage flags through */ + /* Don't let internal flags through */ flags = (msg->msg_flags & ~MSG_SENDPAGE_DECRYPTED); flags |= MSG_NO_SHARED_FRAGS; @@ -566,23 +566,6 @@ out_err: return copied ? copied : err; } -static int tcp_bpf_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags) -{ - struct bio_vec bvec; - struct msghdr msg = { - .msg_flags = flags | MSG_SPLICE_PAGES, - }; - - bvec_set_page(&bvec, page, size, offset); - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); - - if (flags & MSG_SENDPAGE_NOTLAST) - msg.msg_flags |= MSG_MORE; - - return tcp_bpf_sendmsg(sk, &msg, size); -} - enum { TCP_BPF_IPV4, TCP_BPF_IPV6, @@ -612,7 +595,6 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], prot[TCP_BPF_TX] = prot[TCP_BPF_BASE]; prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg; - prot[TCP_BPF_TX].sendpage = tcp_bpf_sendpage; prot[TCP_BPF_RX] = prot[TCP_BPF_BASE]; prot[TCP_BPF_RX].recvmsg = tcp_bpf_recvmsg_parser; @@ -647,8 +629,7 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops) * indeed valid assumptions. */ return ops->recvmsg == tcp_recvmsg && - ops->sendmsg == tcp_sendmsg && - ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP; + ops->sendmsg == tcp_sendmsg ? 0 : -ENOTSUPP; } int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9213804b034f..fd365de4d5ff 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3117,7 +3117,6 @@ struct proto tcp_prot = { .recvmsg = tcp_recvmsg, .sendmsg = tcp_sendmsg, .splice_eof = tcp_splice_eof, - .sendpage = tcp_sendpage, .backlog_rcv = tcp_v4_do_rcv, .release_cb = tcp_release_cb, .hash = inet_hash, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 48fdcd3cad9c..42a96b3547c9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1340,20 +1340,6 @@ void udp_splice_eof(struct socket *sock) } EXPORT_SYMBOL_GPL(udp_splice_eof); -int udp_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags) -{ - struct bio_vec bvec; - struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES }; - - if (flags & MSG_SENDPAGE_NOTLAST) - msg.msg_flags |= MSG_MORE; - - bvec_set_page(&bvec, page, size, offset); - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); - return udp_sendmsg(sk, &msg, size); -} - #define UDP_SKB_IS_STATELESS 0x80000000 /* all head states (dst, sk, nf conntrack) except skb extensions are @@ -2933,7 +2919,6 @@ struct proto udp_prot = { .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .splice_eof = udp_splice_eof, - .sendpage = udp_sendpage, .release_cb = ip4_datagram_release_cb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 4ba7a88a1b1d..e1ff3a375996 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -19,8 +19,6 @@ int udp_getsockopt(struct sock *sk, int level, int optname, int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len); -int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, - int flags); void udp_destroy_sock(struct sock *sk); #ifdef CONFIG_PROC_FS diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 143f93a12f25..39ecdad1b50c 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -56,7 +56,6 @@ struct proto udplite_prot = { .getsockopt = udp_getsockopt, .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, - .sendpage = udp_sendpage, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .rehash = udp_v4_rehash, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b3451cf47d29..5d593ddc0347 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -696,9 +696,7 @@ const struct proto_ops inet6_stream_ops = { .mmap = tcp_mmap, #endif .splice_eof = inet_splice_eof, - .sendpage = inet_sendpage, .sendmsg_locked = tcp_sendmsg_locked, - .sendpage_locked = tcp_sendpage_locked, .splice_read = tcp_splice_read, .read_sock = tcp_read_sock, .read_skb = tcp_read_skb, @@ -729,7 +727,6 @@ const struct proto_ops inet6_dgram_ops = { .recvmsg = inet6_recvmsg, /* retpoline's sake */ .read_skb = udp_read_skb, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, .set_peek_off = sk_set_peek_off, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index c9caeb5a43ed..ac1cef094c5f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1296,7 +1296,6 @@ const struct proto_ops inet6_sockraw_ops = { .sendmsg = inet_sendmsg, /* ok */ .recvmsg = sock_common_recvmsg, /* ok */ .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c17c8ff94b79..40dd92a2f480 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2151,7 +2151,6 @@ struct proto tcpv6_prot = { .recvmsg = tcp_recvmsg, .sendmsg = tcp_sendmsg, .splice_eof = tcp_splice_eof, - .sendpage = tcp_sendpage, .backlog_rcv = tcp_v6_do_rcv, .release_cb = tcp_release_cb, .hash = inet6_hash, diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index d0537c1c8cd7..393f01b2a7e6 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -963,24 +963,6 @@ static void kcm_splice_eof(struct socket *sock) release_sock(sk); } -static ssize_t kcm_sendpage(struct socket *sock, struct page *page, - int offset, size_t size, int flags) - -{ - struct bio_vec bvec; - struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, }; - - if (flags & MSG_SENDPAGE_NOTLAST) - msg.msg_flags |= MSG_MORE; - - if (flags & MSG_OOB) - return -EOPNOTSUPP; - - bvec_set_page(&bvec, page, size, offset); - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); - return kcm_sendmsg(sock, &msg, size); -} - static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { @@ -1769,7 +1751,6 @@ static const struct proto_ops kcm_dgram_ops = { .recvmsg = kcm_recvmsg, .mmap = sock_no_mmap, .splice_eof = kcm_splice_eof, - .sendpage = kcm_sendpage, }; static const struct proto_ops kcm_seqpacket_ops = { @@ -1791,7 +1772,6 @@ static const struct proto_ops kcm_seqpacket_ops = { .recvmsg = kcm_recvmsg, .mmap = sock_no_mmap, .splice_eof = kcm_splice_eof, - .sendpage = kcm_sendpage, .splice_read = kcm_splice_read, }; diff --git a/net/key/af_key.c b/net/key/af_key.c index 31ab12fd720a..ede3c6a60353 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3761,7 +3761,6 @@ static const struct proto_ops pfkey_ops = { .listen = sock_no_listen, .shutdown = sock_no_shutdown, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, /* Now the operations that really occur. */ .release = pfkey_release, diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 2b795c1064f5..f9073bc7281f 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -624,7 +624,6 @@ static const struct proto_ops l2tp_ip_ops = { .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct inet_protosw l2tp_ip_protosw = { diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 5137ea1861ce..b1623f9c4f92 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -751,7 +751,6 @@ static const struct proto_ops l2tp_ip6_ops = { .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 9ffbc667be6c..57c35c960b2c 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -1232,7 +1232,6 @@ static const struct proto_ops llc_ui_ops = { .sendmsg = llc_ui_sendmsg, .recvmsg = llc_ui_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static const char llc_proc_err_msg[] __initconst = diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index bb4bd0b6a4f7..f6be58b68c6f 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -485,7 +485,6 @@ static const struct proto_ops mctp_dgram_ops = { .sendmsg = mctp_sendmsg, .recvmsg = mctp_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, #ifdef CONFIG_COMPAT .compat_ioctl = mctp_compat_ioctl, #endif diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index bd023debedc8..e892673deb73 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3866,7 +3866,6 @@ static const struct proto_ops mptcp_stream_ops = { .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, - .sendpage = inet_sendpage, }; static struct inet_protosw mptcp_protosw = { @@ -3961,7 +3960,6 @@ static const struct proto_ops mptcp_v6_stream_ops = { .sendmsg = inet6_sendmsg, .recvmsg = inet6_recvmsg, .mmap = sock_no_mmap, - .sendpage = inet_sendpage, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index cbd9aa7ee24a..39cfb778ebc5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2815,7 +2815,6 @@ static const struct proto_ops netlink_ops = { .sendmsg = netlink_sendmsg, .recvmsg = netlink_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static const struct net_proto_family netlink_family_ops = { diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 5a4cb796150f..eb8ccbd58df7 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1364,7 +1364,6 @@ static const struct proto_ops nr_proto_ops = { .sendmsg = nr_sendmsg, .recvmsg = nr_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct notifier_block nr_dev_notifier = { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index a2dbeb264f26..85ff90a03b0c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4621,7 +4621,6 @@ static const struct proto_ops packet_ops_spkt = { .sendmsg = packet_sendmsg_spkt, .recvmsg = packet_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static const struct proto_ops packet_ops = { @@ -4643,7 +4642,6 @@ static const struct proto_ops packet_ops = { .sendmsg = packet_sendmsg, .recvmsg = packet_recvmsg, .mmap = packet_mmap, - .sendpage = sock_no_sendpage, }; static const struct net_proto_family packet_family_ops = { diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 967f9b4dc026..1018340d89a7 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -441,7 +441,6 @@ const struct proto_ops phonet_dgram_ops = { .sendmsg = pn_socket_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; const struct proto_ops phonet_stream_ops = { @@ -462,7 +461,6 @@ const struct proto_ops phonet_stream_ops = { .sendmsg = pn_socket_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; EXPORT_SYMBOL(phonet_stream_ops); diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c index 76f0434d3d06..78beb74146e7 100644 --- a/net/qrtr/af_qrtr.c +++ b/net/qrtr/af_qrtr.c @@ -1244,7 +1244,6 @@ static const struct proto_ops qrtr_proto_ops = { .shutdown = sock_no_shutdown, .release = qrtr_release, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct proto qrtr_proto = { diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 3ff6995244e5..01c4cdfef45d 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -653,7 +653,6 @@ static const struct proto_ops rds_proto_ops = { .sendmsg = rds_sendmsg, .recvmsg = rds_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static void rds_sock_destruct(struct sock *sk) diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index ca2b17f32670..49dafe9ac72f 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1496,7 +1496,6 @@ static const struct proto_ops rose_proto_ops = { .sendmsg = rose_sendmsg, .recvmsg = rose_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct notifier_block rose_dev_notifier = { diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index da0b3b5157d5..f2cf4aa99db2 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -954,7 +954,6 @@ static const struct proto_ops rxrpc_rpc_ops = { .sendmsg = rxrpc_sendmsg, .recvmsg = rxrpc_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct proto rxrpc_proto = { diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 664d1f2e9121..274d07bd774f 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1133,7 +1133,6 @@ static const struct proto_ops inet_seqpacket_ops = { .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; /* Registration with AF_INET family. */ diff --git a/net/socket.c b/net/socket.c index b778fc03c6e0..8c3c8b29995a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3552,54 +3552,6 @@ int kernel_getpeername(struct socket *sock, struct sockaddr *addr) } EXPORT_SYMBOL(kernel_getpeername); -/** - * kernel_sendpage - send a &page through a socket (kernel space) - * @sock: socket - * @page: page - * @offset: page offset - * @size: total size in bytes - * @flags: flags (MSG_DONTWAIT, ...) - * - * Returns the total amount sent in bytes or an error. - */ - -int kernel_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags) -{ - if (sock->ops->sendpage) { - /* Warn in case the improper page to zero-copy send */ - WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send"); - return sock->ops->sendpage(sock, page, offset, size, flags); - } - return sock_no_sendpage(sock, page, offset, size, flags); -} -EXPORT_SYMBOL(kernel_sendpage); - -/** - * kernel_sendpage_locked - send a &page through the locked sock (kernel space) - * @sk: sock - * @page: page - * @offset: page offset - * @size: total size in bytes - * @flags: flags (MSG_DONTWAIT, ...) - * - * Returns the total amount sent in bytes or an error. - * Caller must hold @sk. - */ - -int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, - size_t size, int flags) -{ - struct socket *sock = sk->sk_socket; - - if (sock->ops->sendpage_locked) - return sock->ops->sendpage_locked(sk, page, offset, size, - flags); - - return sock_no_sendpage_locked(sk, page, offset, size, flags); -} -EXPORT_SYMBOL(kernel_sendpage_locked); - /** * kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space) * @sock: socket diff --git a/net/tipc/socket.c b/net/tipc/socket.c index dd73d71c02a9..ef8e5139a873 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3375,7 +3375,6 @@ static const struct proto_ops msg_ops = { .sendmsg = tipc_sendmsg, .recvmsg = tipc_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage }; static const struct proto_ops packet_ops = { @@ -3396,7 +3395,6 @@ static const struct proto_ops packet_ops = { .sendmsg = tipc_send_packet, .recvmsg = tipc_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage }; static const struct proto_ops stream_ops = { @@ -3417,7 +3415,6 @@ static const struct proto_ops stream_ops = { .sendmsg = tipc_sendstream, .recvmsg = tipc_recvstream, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage }; static const struct net_proto_family tipc_family_ops = { diff --git a/net/tls/tls.h b/net/tls/tls.h index d002c3af1966..86cef1c68e03 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -98,10 +98,6 @@ void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); void tls_sw_strparser_done(struct tls_context *tls_ctx); int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); void tls_sw_splice_eof(struct socket *sock); -int tls_sw_sendpage_locked(struct sock *sk, struct page *page, - int offset, size_t size, int flags); -int tls_sw_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags); void tls_sw_cancel_work_tx(struct tls_context *tls_ctx); void tls_sw_release_resources_tx(struct sock *sk); void tls_sw_free_ctx_tx(struct tls_context *tls_ctx); @@ -117,8 +113,6 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); void tls_device_splice_eof(struct socket *sock); -int tls_device_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags); int tls_tx_records(struct sock *sk, int flags); void tls_sw_write_space(struct sock *sk, struct tls_context *ctx); diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 975299d7213b..840ee06f1708 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -621,23 +621,6 @@ void tls_device_splice_eof(struct socket *sock) mutex_unlock(&tls_ctx->tx_lock); } -int tls_device_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags) -{ - struct bio_vec bvec; - struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, }; - - if (flags & MSG_SENDPAGE_NOTLAST) - msg.msg_flags |= MSG_MORE; - - if (flags & MSG_OOB) - return -EOPNOTSUPP; - - bvec_set_page(&bvec, page, size, offset); - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); - return tls_device_sendmsg(sk, &msg, size); -} - struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, u32 seq, u64 *p_record_sn) { diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 7b9c83dd7de2..d5ed4d47b16e 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -958,7 +958,6 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG] ops[TLS_SW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE]; ops[TLS_SW ][TLS_BASE].splice_eof = tls_sw_splice_eof; - ops[TLS_SW ][TLS_BASE].sendpage_locked = tls_sw_sendpage_locked; ops[TLS_BASE][TLS_SW ] = ops[TLS_BASE][TLS_BASE]; ops[TLS_BASE][TLS_SW ].splice_read = tls_sw_splice_read; @@ -970,17 +969,14 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG] #ifdef CONFIG_TLS_DEVICE ops[TLS_HW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE]; - ops[TLS_HW ][TLS_BASE].sendpage_locked = NULL; ops[TLS_HW ][TLS_SW ] = ops[TLS_BASE][TLS_SW ]; - ops[TLS_HW ][TLS_SW ].sendpage_locked = NULL; ops[TLS_BASE][TLS_HW ] = ops[TLS_BASE][TLS_SW ]; ops[TLS_SW ][TLS_HW ] = ops[TLS_SW ][TLS_SW ]; ops[TLS_HW ][TLS_HW ] = ops[TLS_HW ][TLS_SW ]; - ops[TLS_HW ][TLS_HW ].sendpage_locked = NULL; #endif #ifdef CONFIG_TLS_TOE ops[TLS_HW_RECORD][TLS_HW_RECORD] = *base; @@ -1029,7 +1025,6 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; prot[TLS_SW][TLS_BASE].sendmsg = tls_sw_sendmsg; prot[TLS_SW][TLS_BASE].splice_eof = tls_sw_splice_eof; - prot[TLS_SW][TLS_BASE].sendpage = tls_sw_sendpage; prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE]; prot[TLS_BASE][TLS_SW].recvmsg = tls_sw_recvmsg; @@ -1045,12 +1040,10 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE]; prot[TLS_HW][TLS_BASE].sendmsg = tls_device_sendmsg; prot[TLS_HW][TLS_BASE].splice_eof = tls_device_splice_eof; - prot[TLS_HW][TLS_BASE].sendpage = tls_device_sendpage; prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW]; prot[TLS_HW][TLS_SW].sendmsg = tls_device_sendmsg; prot[TLS_HW][TLS_SW].splice_eof = tls_device_splice_eof; - prot[TLS_HW][TLS_SW].sendpage = tls_device_sendpage; prot[TLS_BASE][TLS_HW] = prot[TLS_BASE][TLS_SW]; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 319f61590d2c..9b3aa89a4292 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1281,41 +1281,6 @@ unlock: mutex_unlock(&tls_ctx->tx_lock); } -int tls_sw_sendpage_locked(struct sock *sk, struct page *page, - int offset, size_t size, int flags) -{ - struct bio_vec bvec; - struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, }; - - if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | - MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY | - MSG_NO_SHARED_FRAGS)) - return -EOPNOTSUPP; - if (flags & MSG_SENDPAGE_NOTLAST) - msg.msg_flags |= MSG_MORE; - - bvec_set_page(&bvec, page, size, offset); - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); - return tls_sw_sendmsg_locked(sk, &msg, size); -} - -int tls_sw_sendpage(struct sock *sk, struct page *page, - int offset, size_t size, int flags) -{ - struct bio_vec bvec; - struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, }; - - if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | - MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY)) - return -EOPNOTSUPP; - if (flags & MSG_SENDPAGE_NOTLAST) - msg.msg_flags |= MSG_MORE; - - bvec_set_page(&bvec, page, size, offset); - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); - return tls_sw_sendmsg(sk, &msg, size); -} - static int tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, bool released) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f9d196439b49..f2f234f0b92c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -758,8 +758,6 @@ static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon static int unix_shutdown(struct socket *, int); static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); -static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, - size_t size, int flags); static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, struct pipe_inode_info *, size_t size, unsigned int flags); @@ -852,7 +850,6 @@ static const struct proto_ops unix_stream_ops = { .recvmsg = unix_stream_recvmsg, .read_skb = unix_stream_read_skb, .mmap = sock_no_mmap, - .sendpage = unix_stream_sendpage, .splice_read = unix_stream_splice_read, .set_peek_off = unix_set_peek_off, .show_fdinfo = unix_show_fdinfo, @@ -878,7 +875,6 @@ static const struct proto_ops unix_dgram_ops = { .read_skb = unix_read_skb, .recvmsg = unix_dgram_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, .set_peek_off = unix_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; @@ -902,7 +898,6 @@ static const struct proto_ops unix_seqpacket_ops = { .sendmsg = unix_seqpacket_sendmsg, .recvmsg = unix_seqpacket_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, .set_peek_off = unix_set_peek_off, .show_fdinfo = unix_show_fdinfo, }; @@ -2294,20 +2289,6 @@ out_err: return sent ? : err; } -static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, - int offset, size_t size, int flags) -{ - struct bio_vec bvec; - struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES }; - - if (flags & MSG_SENDPAGE_NOTLAST) - msg.msg_flags |= MSG_MORE; - - bvec_set_page(&bvec, page, size, offset); - iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); - return unix_stream_sendmsg(socket, &msg, size); -} - static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index efb8a0937a13..020cf17ab7e4 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1306,7 +1306,6 @@ static const struct proto_ops vsock_dgram_ops = { .sendmsg = vsock_dgram_sendmsg, .recvmsg = vsock_dgram_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, .read_skb = vsock_read_skb, }; @@ -2234,7 +2233,6 @@ static const struct proto_ops vsock_stream_ops = { .sendmsg = vsock_connectible_sendmsg, .recvmsg = vsock_connectible_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, .set_rcvlowat = vsock_set_rcvlowat, .read_skb = vsock_read_skb, }; @@ -2257,7 +2255,6 @@ static const struct proto_ops vsock_seqpacket_ops = { .sendmsg = vsock_connectible_sendmsg, .recvmsg = vsock_connectible_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, .read_skb = vsock_read_skb, }; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 5c7ad301d742..0fb5143bec7a 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1757,7 +1757,6 @@ static const struct proto_ops x25_proto_ops = { .sendmsg = x25_sendmsg, .recvmsg = x25_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, }; static struct packet_type x25_packet_type __read_mostly = { diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index cc1e7f15fa73..5a8c0dd250af 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -1389,7 +1389,6 @@ static const struct proto_ops xsk_proto_ops = { .sendmsg = xsk_sendmsg, .recvmsg = xsk_recvmsg, .mmap = xsk_mmap, - .sendpage = sock_no_sendpage, }; static void xsk_destruct(struct sock *sk) -- cgit v1.2.3 From d1b355438b8325a486f087e506d412c4e852f37b Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 23 Jun 2023 15:34:48 +0100 Subject: sfc: fix crash when reading stats while NIC is resetting efx_net_stats() (.ndo_get_stats64) can be called during an ethtool selftest, during which time nic_data->mc_stats is NULL as the NIC has been fini'd. In this case do not attempt to fetch the latest stats from the hardware, else we will crash on a NULL dereference: BUG: kernel NULL pointer dereference, address: 0000000000000038 RIP efx_nic_update_stats abridged calltrace: efx_ef10_update_stats_pf efx_net_stats dev_get_stats dev_seq_printf_stats Skipping the read is safe, we will simply give out stale stats. To ensure that the free in efx_ef10_fini_nic() does not race against efx_ef10_update_stats_pf(), which could cause a TOCTTOU bug, take the efx->stats_lock in fini_nic (it is already held across update_stats). Fixes: d3142c193dca ("sfc: refactor EF10 stats handling") Reviewed-by: Pieter Jansen van Vuuren Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index b63e47af6365..8c019f382a7f 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1297,8 +1297,10 @@ static void efx_ef10_fini_nic(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; + spin_lock_bh(&efx->stats_lock); kfree(nic_data->mc_stats); nic_data->mc_stats = NULL; + spin_unlock_bh(&efx->stats_lock); } static int efx_ef10_init_nic(struct efx_nic *efx) @@ -1852,9 +1854,14 @@ static size_t efx_ef10_update_stats_pf(struct efx_nic *efx, u64 *full_stats, efx_ef10_get_stat_mask(efx, mask); - efx_nic_copy_stats(efx, nic_data->mc_stats); - efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, - mask, stats, nic_data->mc_stats, false); + /* If NIC was fini'd (probably resetting), then we can't read + * updated stats right now. + */ + if (nic_data->mc_stats) { + efx_nic_copy_stats(efx, nic_data->mc_stats); + efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, + mask, stats, nic_data->mc_stats, false); + } /* Update derived statistics */ efx_nic_fix_nodesc_drop_stat(efx, -- cgit v1.2.3 From cf60ed469629927fe43c2f4b4ef28a563d991935 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 23 Jun 2023 19:38:04 +0100 Subject: sfc: use padding to fix alignment in loopback test Add two bytes of padding to the start of struct efx_loopback_payload, which are not sent on the wire. This ensures the 'ip' member is 4-byte aligned, preventing the following W=1 warning: net/ethernet/sfc/selftest.c:46:15: error: field ip within 'struct efx_loopback_payload' is less aligned than 'struct iphdr' and is usually due to 'struct efx_loopback_payload' being packed, which can lead to unaligned accesses [-Werror,-Wunaligned-access] struct iphdr ip; Reported-by: Arnd Bergmann Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/selftest.c | 47 ++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c index 3c5227afd497..96d856b9043c 100644 --- a/drivers/net/ethernet/sfc/selftest.c +++ b/drivers/net/ethernet/sfc/selftest.c @@ -42,12 +42,16 @@ * Falcon only performs RSS on TCP/UDP packets. */ struct efx_loopback_payload { + char pad[2]; /* Ensures ip is 4-byte aligned */ struct ethhdr header; struct iphdr ip; struct udphdr udp; __be16 iteration; char msg[64]; -} __packed; +} __packed __aligned(4); +#define EFX_LOOPBACK_PAYLOAD_LEN (sizeof(struct efx_loopback_payload) - \ + offsetof(struct efx_loopback_payload, \ + header)) /* Loopback test source MAC address */ static const u8 payload_source[ETH_ALEN] __aligned(2) = { @@ -282,7 +286,7 @@ void efx_loopback_rx_packet(struct efx_nic *efx, const char *buf_ptr, int pkt_len) { struct efx_loopback_state *state = efx->loopback_selftest; - struct efx_loopback_payload *received; + struct efx_loopback_payload received; struct efx_loopback_payload *payload; BUG_ON(!buf_ptr); @@ -293,13 +297,14 @@ void efx_loopback_rx_packet(struct efx_nic *efx, payload = &state->payload; - received = (struct efx_loopback_payload *) buf_ptr; - received->ip.saddr = payload->ip.saddr; + memcpy(&received.header, buf_ptr, + min_t(int, pkt_len, EFX_LOOPBACK_PAYLOAD_LEN)); + received.ip.saddr = payload->ip.saddr; if (state->offload_csum) - received->ip.check = payload->ip.check; + received.ip.check = payload->ip.check; /* Check that header exists */ - if (pkt_len < sizeof(received->header)) { + if (pkt_len < sizeof(received.header)) { netif_err(efx, drv, efx->net_dev, "saw runt RX packet (length %d) in %s loopback " "test\n", pkt_len, LOOPBACK_MODE(efx)); @@ -307,7 +312,7 @@ void efx_loopback_rx_packet(struct efx_nic *efx, } /* Check that the ethernet header exists */ - if (memcmp(&received->header, &payload->header, ETH_HLEN) != 0) { + if (memcmp(&received.header, &payload->header, ETH_HLEN) != 0) { netif_err(efx, drv, efx->net_dev, "saw non-loopback RX packet in %s loopback test\n", LOOPBACK_MODE(efx)); @@ -315,16 +320,16 @@ void efx_loopback_rx_packet(struct efx_nic *efx, } /* Check packet length */ - if (pkt_len != sizeof(*payload)) { + if (pkt_len != EFX_LOOPBACK_PAYLOAD_LEN) { netif_err(efx, drv, efx->net_dev, "saw incorrect RX packet length %d (wanted %d) in " - "%s loopback test\n", pkt_len, (int)sizeof(*payload), - LOOPBACK_MODE(efx)); + "%s loopback test\n", pkt_len, + (int)EFX_LOOPBACK_PAYLOAD_LEN, LOOPBACK_MODE(efx)); goto err; } /* Check that IP header matches */ - if (memcmp(&received->ip, &payload->ip, sizeof(payload->ip)) != 0) { + if (memcmp(&received.ip, &payload->ip, sizeof(payload->ip)) != 0) { netif_err(efx, drv, efx->net_dev, "saw corrupted IP header in %s loopback test\n", LOOPBACK_MODE(efx)); @@ -332,7 +337,7 @@ void efx_loopback_rx_packet(struct efx_nic *efx, } /* Check that msg and padding matches */ - if (memcmp(&received->msg, &payload->msg, sizeof(received->msg)) != 0) { + if (memcmp(&received.msg, &payload->msg, sizeof(received.msg)) != 0) { netif_err(efx, drv, efx->net_dev, "saw corrupted RX packet in %s loopback test\n", LOOPBACK_MODE(efx)); @@ -340,10 +345,10 @@ void efx_loopback_rx_packet(struct efx_nic *efx, } /* Check that iteration matches */ - if (received->iteration != payload->iteration) { + if (received.iteration != payload->iteration) { netif_err(efx, drv, efx->net_dev, "saw RX packet from iteration %d (wanted %d) in " - "%s loopback test\n", ntohs(received->iteration), + "%s loopback test\n", ntohs(received.iteration), ntohs(payload->iteration), LOOPBACK_MODE(efx)); goto err; } @@ -363,7 +368,8 @@ void efx_loopback_rx_packet(struct efx_nic *efx, buf_ptr, pkt_len, 0); netif_err(efx, drv, efx->net_dev, "expected packet:\n"); print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1, - &state->payload, sizeof(state->payload), 0); + &state->payload.header, EFX_LOOPBACK_PAYLOAD_LEN, + 0); } #endif atomic_inc(&state->rx_bad); @@ -385,14 +391,15 @@ static void efx_iterate_state(struct efx_nic *efx) payload->ip.daddr = htonl(INADDR_LOOPBACK); payload->ip.ihl = 5; payload->ip.check = (__force __sum16) htons(0xdead); - payload->ip.tot_len = htons(sizeof(*payload) - sizeof(struct ethhdr)); + payload->ip.tot_len = htons(sizeof(*payload) - + offsetof(struct efx_loopback_payload, ip)); payload->ip.version = IPVERSION; payload->ip.protocol = IPPROTO_UDP; /* Initialise udp header */ payload->udp.source = 0; - payload->udp.len = htons(sizeof(*payload) - sizeof(struct ethhdr) - - sizeof(struct iphdr)); + payload->udp.len = htons(sizeof(*payload) - + offsetof(struct efx_loopback_payload, udp)); payload->udp.check = 0; /* checksum ignored */ /* Fill out payload */ @@ -418,7 +425,7 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue) for (i = 0; i < state->packet_count; i++) { /* Allocate an skb, holding an extra reference for * transmit completion counting */ - skb = alloc_skb(sizeof(state->payload), GFP_KERNEL); + skb = alloc_skb(EFX_LOOPBACK_PAYLOAD_LEN, GFP_KERNEL); if (!skb) return -ENOMEM; state->skbs[i] = skb; @@ -429,6 +436,8 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue) payload = skb_put(skb, sizeof(state->payload)); memcpy(payload, &state->payload, sizeof(state->payload)); payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2)); + /* Strip off the leading padding */ + skb_pull(skb, offsetof(struct efx_loopback_payload, header)); /* Ensure everything we've written is visible to the * interrupt handler. */ -- cgit v1.2.3 From 30c24dd87f3f4640ee3dc693230f343023227c1c Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 23 Jun 2023 19:38:05 +0100 Subject: sfc: siena: use padding to fix alignment in loopback test Add two bytes of padding to the start of struct efx_loopback_payload, which are not sent on the wire. This ensures the 'ip' member is 4-byte aligned, preventing the following W=1 warning: net/ethernet/sfc/siena/selftest.c:46:15: error: field ip within 'struct efx_loopback_payload' is less aligned than 'struct iphdr' and is usually due to 'struct efx_loopback_payload' being packed, which can lead to unaligned accesses [-Werror,-Wunaligned-access] struct iphdr ip; Reported-by: Arnd Bergmann Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/siena/selftest.c | 47 ++++++++++++++++++------------- 1 file changed, 28 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/siena/selftest.c b/drivers/net/ethernet/sfc/siena/selftest.c index 07715a3d6bea..111ac17194a5 100644 --- a/drivers/net/ethernet/sfc/siena/selftest.c +++ b/drivers/net/ethernet/sfc/siena/selftest.c @@ -42,12 +42,16 @@ * Falcon only performs RSS on TCP/UDP packets. */ struct efx_loopback_payload { + char pad[2]; /* Ensures ip is 4-byte aligned */ struct ethhdr header; struct iphdr ip; struct udphdr udp; __be16 iteration; char msg[64]; -} __packed; +} __packed __aligned(4); +#define EFX_LOOPBACK_PAYLOAD_LEN (sizeof(struct efx_loopback_payload) - \ + offsetof(struct efx_loopback_payload, \ + header)) /* Loopback test source MAC address */ static const u8 payload_source[ETH_ALEN] __aligned(2) = { @@ -282,7 +286,7 @@ void efx_siena_loopback_rx_packet(struct efx_nic *efx, const char *buf_ptr, int pkt_len) { struct efx_loopback_state *state = efx->loopback_selftest; - struct efx_loopback_payload *received; + struct efx_loopback_payload received; struct efx_loopback_payload *payload; BUG_ON(!buf_ptr); @@ -293,13 +297,14 @@ void efx_siena_loopback_rx_packet(struct efx_nic *efx, payload = &state->payload; - received = (struct efx_loopback_payload *) buf_ptr; - received->ip.saddr = payload->ip.saddr; + memcpy(&received.header, buf_ptr, + min_t(int, pkt_len, EFX_LOOPBACK_PAYLOAD_LEN)); + received.ip.saddr = payload->ip.saddr; if (state->offload_csum) - received->ip.check = payload->ip.check; + received.ip.check = payload->ip.check; /* Check that header exists */ - if (pkt_len < sizeof(received->header)) { + if (pkt_len < sizeof(received.header)) { netif_err(efx, drv, efx->net_dev, "saw runt RX packet (length %d) in %s loopback " "test\n", pkt_len, LOOPBACK_MODE(efx)); @@ -307,7 +312,7 @@ void efx_siena_loopback_rx_packet(struct efx_nic *efx, } /* Check that the ethernet header exists */ - if (memcmp(&received->header, &payload->header, ETH_HLEN) != 0) { + if (memcmp(&received.header, &payload->header, ETH_HLEN) != 0) { netif_err(efx, drv, efx->net_dev, "saw non-loopback RX packet in %s loopback test\n", LOOPBACK_MODE(efx)); @@ -315,16 +320,16 @@ void efx_siena_loopback_rx_packet(struct efx_nic *efx, } /* Check packet length */ - if (pkt_len != sizeof(*payload)) { + if (pkt_len != EFX_LOOPBACK_PAYLOAD_LEN) { netif_err(efx, drv, efx->net_dev, "saw incorrect RX packet length %d (wanted %d) in " - "%s loopback test\n", pkt_len, (int)sizeof(*payload), - LOOPBACK_MODE(efx)); + "%s loopback test\n", pkt_len, + (int)EFX_LOOPBACK_PAYLOAD_LEN, LOOPBACK_MODE(efx)); goto err; } /* Check that IP header matches */ - if (memcmp(&received->ip, &payload->ip, sizeof(payload->ip)) != 0) { + if (memcmp(&received.ip, &payload->ip, sizeof(payload->ip)) != 0) { netif_err(efx, drv, efx->net_dev, "saw corrupted IP header in %s loopback test\n", LOOPBACK_MODE(efx)); @@ -332,7 +337,7 @@ void efx_siena_loopback_rx_packet(struct efx_nic *efx, } /* Check that msg and padding matches */ - if (memcmp(&received->msg, &payload->msg, sizeof(received->msg)) != 0) { + if (memcmp(&received.msg, &payload->msg, sizeof(received.msg)) != 0) { netif_err(efx, drv, efx->net_dev, "saw corrupted RX packet in %s loopback test\n", LOOPBACK_MODE(efx)); @@ -340,10 +345,10 @@ void efx_siena_loopback_rx_packet(struct efx_nic *efx, } /* Check that iteration matches */ - if (received->iteration != payload->iteration) { + if (received.iteration != payload->iteration) { netif_err(efx, drv, efx->net_dev, "saw RX packet from iteration %d (wanted %d) in " - "%s loopback test\n", ntohs(received->iteration), + "%s loopback test\n", ntohs(received.iteration), ntohs(payload->iteration), LOOPBACK_MODE(efx)); goto err; } @@ -363,7 +368,8 @@ void efx_siena_loopback_rx_packet(struct efx_nic *efx, buf_ptr, pkt_len, 0); netif_err(efx, drv, efx->net_dev, "expected packet:\n"); print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1, - &state->payload, sizeof(state->payload), 0); + &state->payload.header, EFX_LOOPBACK_PAYLOAD_LEN, + 0); } #endif atomic_inc(&state->rx_bad); @@ -385,14 +391,15 @@ static void efx_iterate_state(struct efx_nic *efx) payload->ip.daddr = htonl(INADDR_LOOPBACK); payload->ip.ihl = 5; payload->ip.check = (__force __sum16) htons(0xdead); - payload->ip.tot_len = htons(sizeof(*payload) - sizeof(struct ethhdr)); + payload->ip.tot_len = htons(sizeof(*payload) - + offsetof(struct efx_loopback_payload, ip)); payload->ip.version = IPVERSION; payload->ip.protocol = IPPROTO_UDP; /* Initialise udp header */ payload->udp.source = 0; - payload->udp.len = htons(sizeof(*payload) - sizeof(struct ethhdr) - - sizeof(struct iphdr)); + payload->udp.len = htons(sizeof(*payload) - + offsetof(struct efx_loopback_payload, udp)); payload->udp.check = 0; /* checksum ignored */ /* Fill out payload */ @@ -418,7 +425,7 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue) for (i = 0; i < state->packet_count; i++) { /* Allocate an skb, holding an extra reference for * transmit completion counting */ - skb = alloc_skb(sizeof(state->payload), GFP_KERNEL); + skb = alloc_skb(EFX_LOOPBACK_PAYLOAD_LEN, GFP_KERNEL); if (!skb) return -ENOMEM; state->skbs[i] = skb; @@ -429,6 +436,8 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue) payload = skb_put(skb, sizeof(state->payload)); memcpy(payload, &state->payload, sizeof(state->payload)); payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2)); + /* Strip off the leading padding */ + skb_pull(skb, offsetof(struct efx_loopback_payload, header)); /* Ensure everything we've written is visible to the * interrupt handler. */ -- cgit v1.2.3 From 1186c6b31ee14fa1e83f5a94be0daa9bc99f9b30 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 23 Jun 2023 19:38:06 +0100 Subject: sfc: falcon: use padding to fix alignment in loopback test Add two bytes of padding to the start of struct ef4_loopback_payload, which are not sent on the wire. This ensures the 'ip' member is 4-byte aligned, preventing the following W=1 warning: net/ethernet/sfc/falcon/selftest.c:43:15: error: field ip within 'struct ef4_loopback_payload' is less aligned than 'struct iphdr' and is usually due to 'struct ef4_loopback_payload' being packed, which can lead to unaligned accesses [-Werror,-Wunaligned-access] struct iphdr ip; Reported-by: Arnd Bergmann Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/falcon/selftest.c | 47 ++++++++++++++++++------------ 1 file changed, 28 insertions(+), 19 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/falcon/selftest.c b/drivers/net/ethernet/sfc/falcon/selftest.c index 6a454ac6f876..9e5ce2a13787 100644 --- a/drivers/net/ethernet/sfc/falcon/selftest.c +++ b/drivers/net/ethernet/sfc/falcon/selftest.c @@ -39,12 +39,16 @@ * Falcon only performs RSS on TCP/UDP packets. */ struct ef4_loopback_payload { + char pad[2]; /* Ensures ip is 4-byte aligned */ struct ethhdr header; struct iphdr ip; struct udphdr udp; __be16 iteration; char msg[64]; -} __packed; +} __packed __aligned(4); +#define EF4_LOOPBACK_PAYLOAD_LEN (sizeof(struct ef4_loopback_payload) - \ + offsetof(struct ef4_loopback_payload, \ + header)) /* Loopback test source MAC address */ static const u8 payload_source[ETH_ALEN] __aligned(2) = { @@ -284,7 +288,7 @@ void ef4_loopback_rx_packet(struct ef4_nic *efx, const char *buf_ptr, int pkt_len) { struct ef4_loopback_state *state = efx->loopback_selftest; - struct ef4_loopback_payload *received; + struct ef4_loopback_payload received; struct ef4_loopback_payload *payload; BUG_ON(!buf_ptr); @@ -295,13 +299,14 @@ void ef4_loopback_rx_packet(struct ef4_nic *efx, payload = &state->payload; - received = (struct ef4_loopback_payload *) buf_ptr; - received->ip.saddr = payload->ip.saddr; + memcpy(&received.header, buf_ptr, + min_t(int, pkt_len, EF4_LOOPBACK_PAYLOAD_LEN)); + received.ip.saddr = payload->ip.saddr; if (state->offload_csum) - received->ip.check = payload->ip.check; + received.ip.check = payload->ip.check; /* Check that header exists */ - if (pkt_len < sizeof(received->header)) { + if (pkt_len < sizeof(received.header)) { netif_err(efx, drv, efx->net_dev, "saw runt RX packet (length %d) in %s loopback " "test\n", pkt_len, LOOPBACK_MODE(efx)); @@ -309,7 +314,7 @@ void ef4_loopback_rx_packet(struct ef4_nic *efx, } /* Check that the ethernet header exists */ - if (memcmp(&received->header, &payload->header, ETH_HLEN) != 0) { + if (memcmp(&received.header, &payload->header, ETH_HLEN) != 0) { netif_err(efx, drv, efx->net_dev, "saw non-loopback RX packet in %s loopback test\n", LOOPBACK_MODE(efx)); @@ -317,16 +322,16 @@ void ef4_loopback_rx_packet(struct ef4_nic *efx, } /* Check packet length */ - if (pkt_len != sizeof(*payload)) { + if (pkt_len != EF4_LOOPBACK_PAYLOAD_LEN) { netif_err(efx, drv, efx->net_dev, "saw incorrect RX packet length %d (wanted %d) in " - "%s loopback test\n", pkt_len, (int)sizeof(*payload), - LOOPBACK_MODE(efx)); + "%s loopback test\n", pkt_len, + (int)EF4_LOOPBACK_PAYLOAD_LEN, LOOPBACK_MODE(efx)); goto err; } /* Check that IP header matches */ - if (memcmp(&received->ip, &payload->ip, sizeof(payload->ip)) != 0) { + if (memcmp(&received.ip, &payload->ip, sizeof(payload->ip)) != 0) { netif_err(efx, drv, efx->net_dev, "saw corrupted IP header in %s loopback test\n", LOOPBACK_MODE(efx)); @@ -334,7 +339,7 @@ void ef4_loopback_rx_packet(struct ef4_nic *efx, } /* Check that msg and padding matches */ - if (memcmp(&received->msg, &payload->msg, sizeof(received->msg)) != 0) { + if (memcmp(&received.msg, &payload->msg, sizeof(received.msg)) != 0) { netif_err(efx, drv, efx->net_dev, "saw corrupted RX packet in %s loopback test\n", LOOPBACK_MODE(efx)); @@ -342,10 +347,10 @@ void ef4_loopback_rx_packet(struct ef4_nic *efx, } /* Check that iteration matches */ - if (received->iteration != payload->iteration) { + if (received.iteration != payload->iteration) { netif_err(efx, drv, efx->net_dev, "saw RX packet from iteration %d (wanted %d) in " - "%s loopback test\n", ntohs(received->iteration), + "%s loopback test\n", ntohs(received.iteration), ntohs(payload->iteration), LOOPBACK_MODE(efx)); goto err; } @@ -365,7 +370,8 @@ void ef4_loopback_rx_packet(struct ef4_nic *efx, buf_ptr, pkt_len, 0); netif_err(efx, drv, efx->net_dev, "expected packet:\n"); print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1, - &state->payload, sizeof(state->payload), 0); + &state->payload.header, EF4_LOOPBACK_PAYLOAD_LEN, + 0); } #endif atomic_inc(&state->rx_bad); @@ -387,14 +393,15 @@ static void ef4_iterate_state(struct ef4_nic *efx) payload->ip.daddr = htonl(INADDR_LOOPBACK); payload->ip.ihl = 5; payload->ip.check = (__force __sum16) htons(0xdead); - payload->ip.tot_len = htons(sizeof(*payload) - sizeof(struct ethhdr)); + payload->ip.tot_len = htons(sizeof(*payload) - + offsetof(struct ef4_loopback_payload, ip)); payload->ip.version = IPVERSION; payload->ip.protocol = IPPROTO_UDP; /* Initialise udp header */ payload->udp.source = 0; - payload->udp.len = htons(sizeof(*payload) - sizeof(struct ethhdr) - - sizeof(struct iphdr)); + payload->udp.len = htons(sizeof(*payload) - + offsetof(struct ef4_loopback_payload, udp)); payload->udp.check = 0; /* checksum ignored */ /* Fill out payload */ @@ -420,7 +427,7 @@ static int ef4_begin_loopback(struct ef4_tx_queue *tx_queue) for (i = 0; i < state->packet_count; i++) { /* Allocate an skb, holding an extra reference for * transmit completion counting */ - skb = alloc_skb(sizeof(state->payload), GFP_KERNEL); + skb = alloc_skb(EF4_LOOPBACK_PAYLOAD_LEN, GFP_KERNEL); if (!skb) return -ENOMEM; state->skbs[i] = skb; @@ -431,6 +438,8 @@ static int ef4_begin_loopback(struct ef4_tx_queue *tx_queue) payload = skb_put(skb, sizeof(state->payload)); memcpy(payload, &state->payload, sizeof(state->payload)); payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2)); + /* Strip off the leading padding */ + skb_pull(skb, offsetof(struct ef4_loopback_payload, header)); /* Ensure everything we've written is visible to the * interrupt handler. */ -- cgit v1.2.3 From a5639fade0cfe5a45584f2770811034dab43baaa Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 25 May 2023 16:01:28 +0200 Subject: net/mlx5: Update the driver with the recent thermal changes The thermal framework is migrating to the generic trip points. The set of changes also implies a self-encapsulation of the thermal zone device structure where the internals are no longer directly accessible but with accessors. Use the new API instead, so the next changes can be pushed in the thermal framework without this driver failing to compile. No functional changes intended. Cc: Sandipan Patra Cc: Gal Pressman Cc: Saeed Mahameed Cc: Jakub Kicinski Signed-off-by: Daniel Lezcano Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230525140135.3589917-2-daniel.lezcano@linaro.org --- drivers/net/ethernet/mellanox/mlx5/core/thermal.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c index e47fa6fb836f..20bb5eb266c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c @@ -45,7 +45,7 @@ static int mlx5_thermal_get_mtmp_temp(struct mlx5_core_dev *mdev, u32 id, int *p static int mlx5_thermal_get_temp(struct thermal_zone_device *tzdev, int *p_temp) { - struct mlx5_thermal *thermal = tzdev->devdata; + struct mlx5_thermal *thermal = thermal_zone_device_priv(tzdev); struct mlx5_core_dev *mdev = thermal->mdev; int err; @@ -81,12 +81,13 @@ int mlx5_thermal_init(struct mlx5_core_dev *mdev) return -ENOMEM; thermal->mdev = mdev; - thermal->tzdev = thermal_zone_device_register(data, - MLX5_THERMAL_NUM_TRIPS, - MLX5_THERMAL_TRIP_MASK, - thermal, - &mlx5_thermal_ops, - NULL, 0, MLX5_THERMAL_POLL_INT_MSEC); + thermal->tzdev = thermal_zone_device_register_with_trips(data, + NULL, + MLX5_THERMAL_NUM_TRIPS, + MLX5_THERMAL_TRIP_MASK, + thermal, + &mlx5_thermal_ops, + NULL, 0, MLX5_THERMAL_POLL_INT_MSEC); if (IS_ERR(thermal->tzdev)) { dev_err(mdev->device, "Failed to register thermal zone device (%s) %ld\n", data, PTR_ERR(thermal->tzdev)); -- cgit v1.2.3 From 32d462a5c3e5b312e7dcd886e20e71b0c33abf10 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 27 Jun 2023 16:43:17 +0200 Subject: octeon_ep: use vmalloc_array and vcalloc Use vmalloc_array and vcalloc to protect against multiplication overflows. The changes were done using the following Coccinelle semantic patch: // @initialize:ocaml@ @@ let rename alloc = match alloc with "vmalloc" -> "vmalloc_array" | "vzalloc" -> "vcalloc" | _ -> failwith "unknown" @@ size_t e1,e2; constant C1, C2; expression E1, E2, COUNT, x1, x2, x3; typedef u8; typedef __u8; type t = {u8,__u8,char,unsigned char}; identifier alloc = {vmalloc,vzalloc}; fresh identifier realloc = script:ocaml(alloc) { rename alloc }; @@ ( alloc(x1*x2*x3) | alloc(C1 * C2) | alloc((sizeof(t)) * (COUNT), ...) | - alloc((e1) * (e2)) + realloc(e1, e2) | - alloc((e1) * (COUNT)) + realloc(COUNT, e1) | - alloc((E1) * (E2)) + realloc(E1, E2) ) // Signed-off-by: Julia Lawall Link: https://lore.kernel.org/r/20230627144339.144478-3-Julia.Lawall@inria.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeon_ep/octep_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c index 392d9b0da0d7..3c43f8078528 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_rx.c @@ -158,7 +158,7 @@ static int octep_setup_oq(struct octep_device *oct, int q_no) goto desc_dma_alloc_err; } - oq->buff_info = vzalloc(oq->max_count * OCTEP_OQ_RECVBUF_SIZE); + oq->buff_info = vcalloc(oq->max_count, OCTEP_OQ_RECVBUF_SIZE); if (unlikely(!oq->buff_info)) { dev_err(&oct->pdev->dev, "Failed to allocate buffer info for OQ-%d\n", q_no); -- cgit v1.2.3 From a13de901e8d590a7d26a6d4a1c4c7e9eebbb6ca6 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 27 Jun 2023 16:43:19 +0200 Subject: gve: use vmalloc_array and vcalloc Use vmalloc_array and vcalloc to protect against multiplication overflows. The changes were done using the following Coccinelle semantic patch: // @initialize:ocaml@ @@ let rename alloc = match alloc with "vmalloc" -> "vmalloc_array" | "vzalloc" -> "vcalloc" | _ -> failwith "unknown" @@ size_t e1,e2; constant C1, C2; expression E1, E2, COUNT, x1, x2, x3; typedef u8; typedef __u8; type t = {u8,__u8,char,unsigned char}; identifier alloc = {vmalloc,vzalloc}; fresh identifier realloc = script:ocaml(alloc) { rename alloc }; @@ ( alloc(x1*x2*x3) | alloc(C1 * C2) | alloc((sizeof(t)) * (COUNT), ...) | - alloc((e1) * (e2)) + realloc(e1, e2) | - alloc((e1) * (COUNT)) + realloc(COUNT, e1) | - alloc((E1) * (E2)) + realloc(E1, E2) ) // Signed-off-by: Julia Lawall Link: https://lore.kernel.org/r/20230627144339.144478-5-Julia.Lawall@inria.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c index 813da572abca..6957a865cff3 100644 --- a/drivers/net/ethernet/google/gve/gve_tx.c +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -248,7 +248,7 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) tx->mask = slots - 1; /* alloc metadata */ - tx->info = vzalloc(sizeof(*tx->info) * slots); + tx->info = vcalloc(slots, sizeof(*tx->info)); if (!tx->info) return -ENOMEM; -- cgit v1.2.3 From 906a76cc764541bdb7f602a01e3b4cdd93dea96a Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 27 Jun 2023 16:43:24 +0200 Subject: pds_core: use vmalloc_array and vcalloc Use vmalloc_array and vcalloc to protect against multiplication overflows. The changes were done using the following Coccinelle semantic patch: // @initialize:ocaml@ @@ let rename alloc = match alloc with "vmalloc" -> "vmalloc_array" | "vzalloc" -> "vcalloc" | _ -> failwith "unknown" @@ size_t e1,e2; constant C1, C2; expression E1, E2, COUNT, x1, x2, x3; typedef u8; typedef __u8; type t = {u8,__u8,char,unsigned char}; identifier alloc = {vmalloc,vzalloc}; fresh identifier realloc = script:ocaml(alloc) { rename alloc }; @@ ( alloc(x1*x2*x3) | alloc(C1 * C2) | alloc((sizeof(t)) * (COUNT), ...) | - alloc((e1) * (e2)) + realloc(e1, e2) | - alloc((e1) * (COUNT)) + realloc(COUNT, e1) | - alloc((E1) * (E2)) + realloc(E1, E2) ) // Signed-off-by: Julia Lawall Link: https://lore.kernel.org/r/20230627144339.144478-10-Julia.Lawall@inria.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 483a070d96fa..f2c79456d745 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -196,7 +196,7 @@ int pdsc_qcq_alloc(struct pdsc *pdsc, unsigned int type, unsigned int index, dma_addr_t q_base_pa; int err; - qcq->q.info = vzalloc(num_descs * sizeof(*qcq->q.info)); + qcq->q.info = vcalloc(num_descs, sizeof(*qcq->q.info)); if (!qcq->q.info) { err = -ENOMEM; goto err_out; @@ -219,7 +219,7 @@ int pdsc_qcq_alloc(struct pdsc *pdsc, unsigned int type, unsigned int index, if (err) goto err_out_free_q_info; - qcq->cq.info = vzalloc(num_descs * sizeof(*qcq->cq.info)); + qcq->cq.info = vcalloc(num_descs, sizeof(*qcq->cq.info)); if (!qcq->cq.info) { err = -ENOMEM; goto err_out_free_irq; -- cgit v1.2.3 From f712c8297e0a4dadc14ba2094c92e3e99a0ff871 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 27 Jun 2023 16:43:26 +0200 Subject: ionic: use vmalloc_array and vcalloc Use vmalloc_array and vcalloc to protect against multiplication overflows. The changes were done using the following Coccinelle semantic patch: // @initialize:ocaml@ @@ let rename alloc = match alloc with "vmalloc" -> "vmalloc_array" | "vzalloc" -> "vcalloc" | _ -> failwith "unknown" @@ size_t e1,e2; constant C1, C2; expression E1, E2, COUNT, x1, x2, x3; typedef u8; typedef __u8; type t = {u8,__u8,char,unsigned char}; identifier alloc = {vmalloc,vzalloc}; fresh identifier realloc = script:ocaml(alloc) { rename alloc }; @@ ( alloc(x1*x2*x3) | alloc(C1 * C2) | alloc((sizeof(t)) * (COUNT), ...) | - alloc((e1) * (e2)) + realloc(e1, e2) | - alloc((e1) * (COUNT)) + realloc(COUNT, e1) | - alloc((E1) * (E2)) + realloc(E1, E2) ) // Signed-off-by: Julia Lawall Link: https://lore.kernel.org/r/20230627144339.144478-12-Julia.Lawall@inria.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 6ccc1ea91992..7c20a44e549b 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -561,7 +561,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, new->q.dev = dev; new->flags = flags; - new->q.info = vzalloc(num_descs * sizeof(*new->q.info)); + new->q.info = vcalloc(num_descs, sizeof(*new->q.info)); if (!new->q.info) { netdev_err(lif->netdev, "Cannot allocate queue info\n"); err = -ENOMEM; @@ -582,7 +582,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type, if (err) goto err_out; - new->cq.info = vzalloc(num_descs * sizeof(*new->cq.info)); + new->cq.info = vcalloc(num_descs, sizeof(*new->cq.info)); if (!new->cq.info) { netdev_err(lif->netdev, "Cannot allocate completion queue info\n"); err = -ENOMEM; -- cgit v1.2.3 From fa87c54693ae248db9ff867baa28b792db671b24 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 27 Jun 2023 16:43:33 +0200 Subject: net: enetc: use vmalloc_array and vcalloc Use vmalloc_array and vcalloc to protect against multiplication overflows. The changes were done using the following Coccinelle semantic patch: // @initialize:ocaml@ @@ let rename alloc = match alloc with "vmalloc" -> "vmalloc_array" | "vzalloc" -> "vcalloc" | _ -> failwith "unknown" @@ size_t e1,e2; constant C1, C2; expression E1, E2, COUNT, x1, x2, x3; typedef u8; typedef __u8; type t = {u8,__u8,char,unsigned char}; identifier alloc = {vmalloc,vzalloc}; fresh identifier realloc = script:ocaml(alloc) { rename alloc }; @@ ( alloc(x1*x2*x3) | alloc(C1 * C2) | alloc((sizeof(t)) * (COUNT), ...) | - alloc((e1) * (e2)) + realloc(e1, e2) | - alloc((e1) * (COUNT)) + realloc(COUNT, e1) | - alloc((E1) * (E2)) + realloc(E1, E2) ) // Signed-off-by: Julia Lawall Link: https://lore.kernel.org/r/20230627144339.144478-19-Julia.Lawall@inria.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 164b73df9f6b..35461165de0d 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1789,7 +1789,7 @@ static int enetc_alloc_tx_resource(struct enetc_bdr_resource *res, res->bd_count = bd_count; res->bd_size = sizeof(union enetc_tx_bd); - res->tx_swbd = vzalloc(bd_count * sizeof(*res->tx_swbd)); + res->tx_swbd = vcalloc(bd_count, sizeof(*res->tx_swbd)); if (!res->tx_swbd) return -ENOMEM; @@ -1877,7 +1877,7 @@ static int enetc_alloc_rx_resource(struct enetc_bdr_resource *res, if (extended) res->bd_size *= 2; - res->rx_swbd = vzalloc(bd_count * sizeof(struct enetc_rx_swbd)); + res->rx_swbd = vcalloc(bd_count, sizeof(struct enetc_rx_swbd)); if (!res->rx_swbd) return -ENOMEM; -- cgit v1.2.3 From e9c74f8b8a31f77f8e9d7bbed5fc9f2eacbf32a5 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 27 Jun 2023 16:43:37 +0200 Subject: net: mana: use vmalloc_array and vcalloc Use vmalloc_array and vcalloc to protect against multiplication overflows. The changes were done using the following Coccinelle semantic patch: // @initialize:ocaml@ @@ let rename alloc = match alloc with "vmalloc" -> "vmalloc_array" | "vzalloc" -> "vcalloc" | _ -> failwith "unknown" @@ size_t e1,e2; constant C1, C2; expression E1, E2, COUNT, x1, x2, x3; typedef u8; typedef __u8; type t = {u8,__u8,char,unsigned char}; identifier alloc = {vmalloc,vzalloc}; fresh identifier realloc = script:ocaml(alloc) { rename alloc }; @@ ( alloc(x1*x2*x3) | alloc(C1 * C2) | alloc((sizeof(t)) * (COUNT), ...) | - alloc((e1) * (e2)) + realloc(e1, e2) | - alloc((e1) * (COUNT)) + realloc(COUNT, e1) | - alloc((E1) * (E2)) + realloc(E1, E2) ) // Signed-off-by: Julia Lawall Link: https://lore.kernel.org/r/20230627144339.144478-23-Julia.Lawall@inria.fr Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/mana/hw_channel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index 9d1507eba5b9..2bd1d74021f7 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -627,7 +627,7 @@ static int mana_hwc_establish_channel(struct gdma_context *gc, u16 *q_depth, if (WARN_ON(cq->id >= gc->max_num_cqs)) return -EPROTO; - gc->cq_table = vzalloc(gc->max_num_cqs * sizeof(struct gdma_queue *)); + gc->cq_table = vcalloc(gc->max_num_cqs, sizeof(struct gdma_queue *)); if (!gc->cq_table) return -ENOMEM; -- cgit v1.2.3 From 30ac666a2fccaa8c164199ea8844dc28aa714453 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Tue, 27 Jun 2023 03:54:32 +0000 Subject: net: lan743x: Simplify comparison Simplify comparison, no functional changes. Cc: Bryan Whitehead Cc: UNGLinuxDriver@microchip.com Suggested-by: Jakub Kicinski Signed-off-by: Moritz Fischer Link: https://lore.kernel.org/r/20230627035432.1296760-1-moritzf@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/lan743x_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index f1bded993edc..5b0e8b0e0c89 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -152,7 +152,7 @@ static int lan743x_csr_wait_for_bit(struct lan743x_adapter *adapter, u32 data; return readx_poll_timeout(LAN743X_CSR_READ_OP, offset, data, - target_value == ((data & bit_mask) ? 1 : 0), + target_value == !!(data & bit_mask), usleep_max, usleep_min * count); } -- cgit v1.2.3 From 7a8227b2e76be506b2ac64d2beac950ca04892a5 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Tue, 27 Jun 2023 03:50:00 +0000 Subject: net: lan743x: Don't sleep in atomic context dev_set_rx_mode() grabs a spin_lock, and the lan743x implementation proceeds subsequently to go to sleep using readx_poll_timeout(). Introduce a helper wrapping the readx_poll_timeout_atomic() function and use it to replace the calls to readx_polL_timeout(). Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Cc: stable@vger.kernel.org Cc: Bryan Whitehead Cc: UNGLinuxDriver@microchip.com Signed-off-by: Moritz Fischer Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230627035000.1295254-1-moritzf@google.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/microchip/lan743x_main.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 5b0e8b0e0c89..a36f6369f132 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -144,6 +144,18 @@ static int lan743x_csr_light_reset(struct lan743x_adapter *adapter) !(data & HW_CFG_LRST_), 100000, 10000000); } +static int lan743x_csr_wait_for_bit_atomic(struct lan743x_adapter *adapter, + int offset, u32 bit_mask, + int target_value, int udelay_min, + int udelay_max, int count) +{ + u32 data; + + return readx_poll_timeout_atomic(LAN743X_CSR_READ_OP, offset, data, + target_value == !!(data & bit_mask), + udelay_max, udelay_min * count); +} + static int lan743x_csr_wait_for_bit(struct lan743x_adapter *adapter, int offset, u32 bit_mask, int target_value, int usleep_min, @@ -736,8 +748,8 @@ static int lan743x_dp_write(struct lan743x_adapter *adapter, u32 dp_sel; int i; - if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_, - 1, 40, 100, 100)) + if (lan743x_csr_wait_for_bit_atomic(adapter, DP_SEL, DP_SEL_DPRDY_, + 1, 40, 100, 100)) return -EIO; dp_sel = lan743x_csr_read(adapter, DP_SEL); dp_sel &= ~DP_SEL_MASK_; @@ -748,8 +760,9 @@ static int lan743x_dp_write(struct lan743x_adapter *adapter, lan743x_csr_write(adapter, DP_ADDR, addr + i); lan743x_csr_write(adapter, DP_DATA_0, buf[i]); lan743x_csr_write(adapter, DP_CMD, DP_CMD_WRITE_); - if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_, - 1, 40, 100, 100)) + if (lan743x_csr_wait_for_bit_atomic(adapter, DP_SEL, + DP_SEL_DPRDY_, + 1, 40, 100, 100)) return -EIO; } -- cgit v1.2.3 From 4fd44b82b7aceaa35c2901c6546d2c4198e0799d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 27 Jun 2023 19:31:12 +0300 Subject: net: mscc: ocelot: don't report that RX timestamping is enabled by default PTP RX timestamping should be enabled when the user requests it, not by default. If it is enabled by default, it can be problematic when the ocelot driver is a DSA master, and it sidesteps what DSA tries to avoid through __dsa_master_hwtstamp_validate(). Additionally, after the change which made ocelot trap PTP packets only to the CPU at ocelot_hwtstamp_set() time, it is no longer even true that RX timestamping is enabled by default, because until ocelot_hwtstamp_set() is called, the PTP traps are actually not set up. So the rx_filter field of ocelot->hwtstamp_config reflects an incorrect reality. Fixes: 96ca08c05838 ("net: mscc: ocelot: set up traps for PTP packets") Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support") Signed-off-by: Vladimir Oltean Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mscc/ocelot_ptp.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c index 2180ae94c744..673bfd70867a 100644 --- a/drivers/net/ethernet/mscc/ocelot_ptp.c +++ b/drivers/net/ethernet/mscc/ocelot_ptp.c @@ -824,11 +824,6 @@ int ocelot_init_timestamp(struct ocelot *ocelot, ocelot_write(ocelot, PTP_CFG_MISC_PTP_EN, PTP_CFG_MISC); - /* There is no device reconfiguration, PTP Rx stamping is always - * enabled. - */ - ocelot->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; - return 0; } EXPORT_SYMBOL(ocelot_init_timestamp); -- cgit v1.2.3 From 45d0fcb5bc9558d0bf3d2fa7fabc5d8a88d35439 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 27 Jun 2023 19:31:13 +0300 Subject: net: mscc: ocelot: don't keep PTP configuration of all ports in single structure In a future change, the driver will need to determine whether PTP RX timestamping is enabled on a port (including whether traps were set up on that port in particular) and that is currently not possible. The driver supports different RX filters (L2, L4) and kinds of TX timestamping (one-step, two-step) on its ports, but it saves all configuration in a single struct hwtstamp_config that is global to the switch. So, the latest timestamping configuration on one port (including a request to disable timestamping) affects what gets reported for all ports, even though the configuration itself is still individual to each port. The port timestamping configurations are only coupled because of the common structure, so replace the hwtstamp_config with a mask of trapped protocols saved per port. We also have the ptp_cmd to distinguish between one-step and two-step PTP timestamping, so with those 2 bits of information we can fully reconstruct a descriptive struct hwtstamp_config for each port, during the SIOCGHWTSTAMP ioctl. Fixes: 4e3b0468e6d7 ("net: mscc: PTP Hardware Clock (PHC) support") Fixes: 96ca08c05838 ("net: mscc: ocelot: set up traps for PTP packets") Signed-off-by: Vladimir Oltean Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mscc/ocelot.c | 1 - drivers/net/ethernet/mscc/ocelot_ptp.c | 61 +++++++++++++++++++++++----------- include/soc/mscc/ocelot.h | 10 ++++-- 3 files changed, 48 insertions(+), 24 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 1f5f00b30441..2fa833d041ba 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -2925,7 +2925,6 @@ int ocelot_init(struct ocelot *ocelot) } } - mutex_init(&ocelot->ptp_lock); mutex_init(&ocelot->mact_lock); mutex_init(&ocelot->fwd_domain_lock); mutex_init(&ocelot->tas_lock); diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c index 673bfd70867a..cb32234a5bf1 100644 --- a/drivers/net/ethernet/mscc/ocelot_ptp.c +++ b/drivers/net/ethernet/mscc/ocelot_ptp.c @@ -439,8 +439,12 @@ static int ocelot_ipv6_ptp_trap_del(struct ocelot *ocelot, int port) static int ocelot_setup_ptp_traps(struct ocelot *ocelot, int port, bool l2, bool l4) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; int err; + ocelot_port->trap_proto &= ~(OCELOT_PROTO_PTP_L2 | + OCELOT_PROTO_PTP_L4); + if (l2) err = ocelot_l2_ptp_trap_add(ocelot, port); else @@ -464,6 +468,11 @@ static int ocelot_setup_ptp_traps(struct ocelot *ocelot, int port, if (err) return err; + if (l2) + ocelot_port->trap_proto |= OCELOT_PROTO_PTP_L2; + if (l4) + ocelot_port->trap_proto |= OCELOT_PROTO_PTP_L4; + return 0; err_ipv6: @@ -474,10 +483,38 @@ err_ipv4: return err; } +static int ocelot_traps_to_ptp_rx_filter(unsigned int proto) +{ + if ((proto & OCELOT_PROTO_PTP_L2) && (proto & OCELOT_PROTO_PTP_L4)) + return HWTSTAMP_FILTER_PTP_V2_EVENT; + else if (proto & OCELOT_PROTO_PTP_L2) + return HWTSTAMP_FILTER_PTP_V2_L2_EVENT; + else if (proto & OCELOT_PROTO_PTP_L4) + return HWTSTAMP_FILTER_PTP_V2_L4_EVENT; + + return HWTSTAMP_FILTER_NONE; +} + int ocelot_hwstamp_get(struct ocelot *ocelot, int port, struct ifreq *ifr) { - return copy_to_user(ifr->ifr_data, &ocelot->hwtstamp_config, - sizeof(ocelot->hwtstamp_config)) ? -EFAULT : 0; + struct ocelot_port *ocelot_port = ocelot->ports[port]; + struct hwtstamp_config cfg = {}; + + switch (ocelot_port->ptp_cmd) { + case IFH_REW_OP_TWO_STEP_PTP: + cfg.tx_type = HWTSTAMP_TX_ON; + break; + case IFH_REW_OP_ORIGIN_PTP: + cfg.tx_type = HWTSTAMP_TX_ONESTEP_SYNC; + break; + default: + cfg.tx_type = HWTSTAMP_TX_OFF; + break; + } + + cfg.rx_filter = ocelot_traps_to_ptp_rx_filter(ocelot_port->trap_proto); + + return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; } EXPORT_SYMBOL(ocelot_hwstamp_get); @@ -509,8 +546,6 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) return -ERANGE; } - mutex_lock(&ocelot->ptp_lock); - switch (cfg.rx_filter) { case HWTSTAMP_FILTER_NONE: break; @@ -531,28 +566,14 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) l4 = true; break; default: - mutex_unlock(&ocelot->ptp_lock); return -ERANGE; } err = ocelot_setup_ptp_traps(ocelot, port, l2, l4); - if (err) { - mutex_unlock(&ocelot->ptp_lock); + if (err) return err; - } - - if (l2 && l4) - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; - else if (l2) - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; - else if (l4) - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; - else - cfg.rx_filter = HWTSTAMP_FILTER_NONE; - /* Commit back the result & save it */ - memcpy(&ocelot->hwtstamp_config, &cfg, sizeof(cfg)); - mutex_unlock(&ocelot->ptp_lock); + cfg.rx_filter = ocelot_traps_to_ptp_rx_filter(ocelot_port->trap_proto); return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; } diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index cb8fbb241879..22aae505c813 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -730,6 +730,11 @@ enum macaccess_entry_type { ENTRYTYPE_MACv6, }; +enum ocelot_proto { + OCELOT_PROTO_PTP_L2 = BIT(0), + OCELOT_PROTO_PTP_L4 = BIT(1), +}; + #define OCELOT_QUIRK_PCS_PERFORMS_RATE_ADAPTATION BIT(0) #define OCELOT_QUIRK_QSGMII_PORTS_MUST_BE_UP BIT(1) @@ -775,6 +780,8 @@ struct ocelot_port { unsigned int ptp_skbs_in_flight; struct sk_buff_head tx_skbs; + unsigned int trap_proto; + u16 mrp_ring_id; u8 ptp_cmd; @@ -868,12 +875,9 @@ struct ocelot { u8 mm_supported:1; struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_info; - struct hwtstamp_config hwtstamp_config; unsigned int ptp_skbs_in_flight; /* Protects the 2-step TX timestamp ID logic */ spinlock_t ts_id_lock; - /* Protects the PTP interface state */ - struct mutex ptp_lock; /* Protects the PTP clock */ spinlock_t ptp_clock_lock; struct ptp_pin_desc ptp_pins[OCELOT_PTP_PINS_NUM]; -- cgit v1.2.3 From 046f753da6143ee16452966915087ec8b0de3c70 Mon Sep 17 00:00:00 2001 From: Tobias Heider Date: Wed, 28 Jun 2023 02:13:32 +0200 Subject: Add MODULE_FIRMWARE() for FIRMWARE_TG357766. Fixes a bug where on the M1 mac mini initramfs-tools fails to include the necessary firmware into the initrd. Fixes: c4dab50697ff ("tg3: Download 57766 EEE service patch firmware") Signed-off-by: Tobias Heider Reviewed-by: Michael Chan Link: https://lore.kernel.org/r/ZJt7LKzjdz8+dClx@tobhe.de Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/tg3.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 5e68a6a4b2af..5ef073a79ce9 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -225,6 +225,7 @@ MODULE_AUTHOR("David S. Miller (davem@redhat.com) and Jeff Garzik (jgarzik@pobox MODULE_DESCRIPTION("Broadcom Tigon3 ethernet driver"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(FIRMWARE_TG3); +MODULE_FIRMWARE(FIRMWARE_TG357766); MODULE_FIRMWARE(FIRMWARE_TG3TSO); MODULE_FIRMWARE(FIRMWARE_TG3TSO5); -- cgit v1.2.3 From 915057ae79692d47f9fb3504785855be49abaea4 Mon Sep 17 00:00:00 2001 From: Martin Habets Date: Wed, 28 Jun 2023 13:32:20 +0100 Subject: sfc: support for devlink port requires MAE access On systems without MAE permission efx->mae is not initialised, and trying to lookup an mport results in a NULL pointer dereference. Fixes: 25414b2a64ae ("sfc: add devlink port support for ef100") Signed-off-by: Martin Habets Reviewed-by: Jacob Keller Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/efx_devlink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/sfc/efx_devlink.c b/drivers/net/ethernet/sfc/efx_devlink.c index b82dad50a5b1..3cd750820fdd 100644 --- a/drivers/net/ethernet/sfc/efx_devlink.c +++ b/drivers/net/ethernet/sfc/efx_devlink.c @@ -626,6 +626,9 @@ static struct devlink_port *ef100_set_devlink_port(struct efx_nic *efx, u32 idx) u32 id; int rc; + if (!efx->mae) + return NULL; + if (efx_mae_lookup_mport(efx, idx, &id)) { /* This should not happen. */ if (idx == MAE_MPORT_DESC_VF_IDX_NULL) -- cgit v1.2.3 From 48538ccb825b05544ec308a509e2cc9c013402db Mon Sep 17 00:00:00 2001 From: Nick Child Date: Wed, 28 Jun 2023 13:22:44 -0500 Subject: ibmvnic: Do not reset dql stats on NON_FATAL err All ibmvnic resets, make a call to netdev_tx_reset_queue() when re-opening the device. netdev_tx_reset_queue() resets the num_queued and num_completed byte counters. These stats are used in Byte Queue Limit (BQL) algorithms. The difference between these two stats tracks the number of bytes currently sitting on the physical NIC. ibmvnic increases the number of queued bytes though calls to netdev_tx_sent_queue() in the drivers xmit function. When, VIOS reports that it is done transmitting bytes, the ibmvnic device increases the number of completed bytes through calls to netdev_tx_completed_queue(). It is important to note that the driver batches its transmit calls and num_queued is increased every time that an skb is added to the next batch, not necessarily when the batch is sent to VIOS for transmission. Unlike other reset types, a NON FATAL reset will not flush the sub crq tx buffers. Therefore, it is possible for the batched skb array to be partially full. So if there is call to netdev_tx_reset_queue() when re-opening the device, the value of num_queued (0) would not account for the skb's that are currently batched. Eventually, when the batch is sent to VIOS, the call to netdev_tx_completed_queue() would increase num_completed to a value greater than the num_queued. This causes a BUG_ON crash: ibmvnic 30000002: Firmware reports error, cause: adapter problem. Starting recovery... ibmvnic 30000002: tx error 600 ibmvnic 30000002: tx error 600 ibmvnic 30000002: tx error 600 ibmvnic 30000002: tx error 600 ------------[ cut here ]------------ kernel BUG at lib/dynamic_queue_limits.c:27! Oops: Exception in kernel mode, sig: 5 [....] NIP dql_completed+0x28/0x1c0 LR ibmvnic_complete_tx.isra.0+0x23c/0x420 [ibmvnic] Call Trace: ibmvnic_complete_tx.isra.0+0x3f8/0x420 [ibmvnic] (unreliable) ibmvnic_interrupt_tx+0x40/0x70 [ibmvnic] __handle_irq_event_percpu+0x98/0x270 ---[ end trace ]--- Therefore, do not reset the dql stats when performing a NON_FATAL reset. Fixes: 0d973388185d ("ibmvnic: Introduce xmit_more support using batched subCRQ hcalls") Signed-off-by: Nick Child Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ibm/ibmvnic.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index c63d3ec9d328..763d613adbcc 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1816,7 +1816,14 @@ static int __ibmvnic_open(struct net_device *netdev) if (prev_state == VNIC_CLOSED) enable_irq(adapter->tx_scrq[i]->irq); enable_scrq_irq(adapter, adapter->tx_scrq[i]); - netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i)); + /* netdev_tx_reset_queue will reset dql stats. During NON_FATAL + * resets, don't reset the stats because there could be batched + * skb's waiting to be sent. If we reset dql stats, we risk + * num_completed being greater than num_queued. This will cause + * a BUG_ON in dql_completed(). + */ + if (adapter->reset_reason != VNIC_RESET_NON_FATAL) + netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i)); } rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); -- cgit v1.2.3 From 08fc75735fda3be97194bfbf3c899c87abb3d0fe Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 30 Jun 2023 09:26:47 +0800 Subject: mlxsw: minimal: fix potential memory leak in mlxsw_m_linecards_init The line cards array is not freed in the error path of mlxsw_m_linecards_init(), which can lead to a memory leak. Fix by freeing the array in the error path, thereby making the error path identical to mlxsw_m_linecards_fini(). Fixes: 01328e23a476 ("mlxsw: minimal: Extend module to port mapping with slot index") Signed-off-by: Zhengchao Shao Reviewed-by: Petr Machata Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/20230630012647.1078002-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/minimal.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index 6b56eadd736e..6b98c3287b49 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -417,6 +417,7 @@ static int mlxsw_m_linecards_init(struct mlxsw_m *mlxsw_m) err_kmalloc_array: for (i--; i >= 0; i--) kfree(mlxsw_m->line_cards[i]); + kfree(mlxsw_m->line_cards); err_kcalloc: kfree(mlxsw_m->ports); return err; -- cgit v1.2.3 From 4c5a331cacda995e995a7857f0e44e8937d98d2c Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 30 Jun 2023 11:58:42 +0530 Subject: octeontx2-af: cn10kb: fix interrupt csr addresses The current design is that, for asynchronous events like link_up and link_down firmware raises the interrupt to kernel. The previous patch which added RPM_USX driver has a bug where it uses old csr addresses for configuring interrupts. Which is resulting in losing interrupts from source firmware. This patch fixes the issue by correcting csr addresses. Fixes: b9d0fedc6234 ("octeontx2-af: cn10kb: Add RPM_USX MAC support") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Goutham Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rpm.c | 2 +- drivers/net/ethernet/marvell/octeontx2/af/rpm.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index de0d88dd10d6..a433f92c51ea 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -47,7 +47,7 @@ static struct mac_ops rpm2_mac_ops = { .int_set_reg = RPM2_CMRX_SW_INT_ENA_W1S, .irq_offset = 1, .int_ena_bit = BIT_ULL(0), - .lmac_fwi = RPM_LMAC_FWI, + .lmac_fwi = RPM2_LMAC_FWI, .non_contiguous_serdes_lane = true, .rx_stats_cnt = 43, .tx_stats_cnt = 34, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h index 22147b4c2137..be294eebab26 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h @@ -94,7 +94,8 @@ /* CN10KB CSR Declaration */ #define RPM2_CMRX_SW_INT 0x1b0 -#define RPM2_CMRX_SW_INT_ENA_W1S 0x1b8 +#define RPM2_CMRX_SW_INT_ENA_W1S 0x1c8 +#define RPM2_LMAC_FWI 0x12 #define RPM2_CMR_CHAN_MSK_OR 0x3120 #define RPM2_CMR_RX_OVR_BP_EN BIT_ULL(2) #define RPM2_CMR_RX_OVR_BP_BP BIT_ULL(1) -- cgit v1.2.3 From 2e7bc57b976bb016c6569a54d95c1b8d88f9450a Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 30 Jun 2023 11:58:43 +0530 Subject: octeontx2-af: Fix mapping for NIX block from CGX connection Firmware configures NIX block mapping for all MAC blocks. The current implementation reads the configuration and creates the mapping between RVU PF and NIX blocks. But this configuration is only valid for silicons that support multiple blocks. For all other silicons, all MAC blocks map to NIX0. This patch corrects the mapping by adding a check for the same. Fixes: c5a73b632b90 ("octeontx2-af: Map NIX block from CGX connection") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Goutham Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 11 +++++++++++ drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index b5a7ee63508c..d4b8d4546de2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -23,6 +23,7 @@ #define PCI_DEVID_OCTEONTX2_LBK 0xA061 /* Subsystem Device ID */ +#define PCI_SUBSYS_DEVID_98XX 0xB100 #define PCI_SUBSYS_DEVID_96XX 0xB200 #define PCI_SUBSYS_DEVID_CN10K_A 0xB900 #define PCI_SUBSYS_DEVID_CNF10K_B 0xBC00 @@ -686,6 +687,16 @@ static inline u16 rvu_nix_chan_cpt(struct rvu *rvu, u8 chan) return rvu->hw->cpt_chan_base + chan; } +static inline bool is_rvu_supports_nix1(struct rvu *rvu) +{ + struct pci_dev *pdev = rvu->pdev; + + if (pdev->subsystem_device == PCI_SUBSYS_DEVID_98XX) + return true; + + return false; +} + /* Function Prototypes * RVU */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 83b342fa8d75..48611e603228 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -114,7 +114,7 @@ static void rvu_map_cgx_nix_block(struct rvu *rvu, int pf, p2x = cgx_lmac_get_p2x(cgx_id, lmac_id); /* Firmware sets P2X_SELECT as either NIX0 or NIX1 */ pfvf->nix_blkaddr = BLKADDR_NIX0; - if (p2x == CMR_P2X_SEL_NIX1) + if (is_rvu_supports_nix1(rvu) && p2x == CMR_P2X_SEL_NIX1) pfvf->nix_blkaddr = BLKADDR_NIX1; } -- cgit v1.2.3 From 79ebb53772c95d3a6ae51b3c65f9985fdd430df6 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 30 Jun 2023 11:58:44 +0530 Subject: octeontx2-af: Add validation before accessing cgx and lmac with the addition of new MAC blocks like CN10K RPM and CN10KB RPM_USX, LMACs are noncontiguous and CGX blocks are also noncontiguous. But during RVU driver initialization, the driver is assuming they are contiguous and trying to access cgx or lmac with their id which is resulting in kernel panic. This patch fixes the issue by adding proper checks. [ 23.219150] pc : cgx_lmac_read+0x38/0x70 [ 23.219154] lr : rvu_program_channels+0x3f0/0x498 [ 23.223852] sp : ffff000100d6fc80 [ 23.227158] x29: ffff000100d6fc80 x28: ffff00010009f880 x27: 000000000000005a [ 23.234288] x26: ffff000102586768 x25: 0000000000002500 x24: fffffffffff0f000 Fixes: 91c6945ea1f9 ("octeontx2-af: cn10k: Add RPM MAC support") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Goutham Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index bd77152bb8d7..f4bdca662d61 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -169,6 +169,9 @@ void cgx_lmac_write(int cgx_id, int lmac_id, u64 offset, u64 val) { struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + /* Software must not access disabled LMAC registers */ + if (!is_lmac_valid(cgx_dev, lmac_id)) + return; cgx_write(cgx_dev, lmac_id, offset, val); } @@ -176,6 +179,10 @@ u64 cgx_lmac_read(int cgx_id, int lmac_id, u64 offset) { struct cgx *cgx_dev = cgx_get_pdata(cgx_id); + /* Software must not access disabled LMAC registers */ + if (!is_lmac_valid(cgx_dev, lmac_id)) + return 0; + return cgx_read(cgx_dev, lmac_id, offset); } -- cgit v1.2.3 From 2e3e94c2f5dc98a8a0e93850407064bc5389c306 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 30 Jun 2023 11:58:45 +0530 Subject: octeontx2-af: Reset MAC features in FLR AF driver configures MAC features like internal loopback and PFC upon receiving the request from PF and its VF netdev. But these features are not getting reset in FLR. This patch fixes the issue by resetting the same. Fixes: 23999b30ae67 ("octeontx2-af: Enable or disable CGX internal loopback") Fixes: 1121f6b02e7a ("octeontx2-af: Priority flow control configuration support") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 26 ++++++++++++++++--- drivers/net/ethernet/marvell/octeontx2/af/cgx.h | 2 ++ .../ethernet/marvell/octeontx2/af/lmac_common.h | 3 +++ drivers/net/ethernet/marvell/octeontx2/af/rpm.c | 30 +++++++++++++++++++--- drivers/net/ethernet/marvell/octeontx2/af/rpm.h | 2 ++ drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 1 + drivers/net/ethernet/marvell/octeontx2/af/rvu.h | 1 + .../net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 18 +++++++++++++ 8 files changed, 77 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index f4bdca662d61..592037f4e55b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -537,14 +537,15 @@ static u32 cgx_get_lmac_fifo_len(void *cgxd, int lmac_id) int cgx_lmac_internal_loopback(void *cgxd, int lmac_id, bool enable) { struct cgx *cgx = cgxd; - u8 lmac_type; + struct lmac *lmac; u64 cfg; if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; - lmac_type = cgx->mac_ops->get_lmac_type(cgx, lmac_id); - if (lmac_type == LMAC_MODE_SGMII || lmac_type == LMAC_MODE_QSGMII) { + lmac = lmac_pdata(lmac_id, cgx); + if (lmac->lmac_type == LMAC_MODE_SGMII || + lmac->lmac_type == LMAC_MODE_QSGMII) { cfg = cgx_read(cgx, lmac_id, CGXX_GMP_PCS_MRX_CTL); if (enable) cfg |= CGXX_GMP_PCS_MRX_CTL_LBK; @@ -1563,6 +1564,23 @@ int cgx_lmac_linkup_start(void *cgxd) return 0; } +int cgx_lmac_reset(void *cgxd, int lmac_id, u8 pf_req_flr) +{ + struct cgx *cgx = cgxd; + u64 cfg; + + if (!is_lmac_valid(cgx, lmac_id)) + return -ENODEV; + + /* Resetting PFC related CSRs */ + cfg = 0xff; + cgx_write(cgxd, lmac_id, CGXX_CMRX_RX_LOGL_XON, cfg); + + if (pf_req_flr) + cgx_lmac_internal_loopback(cgxd, lmac_id, false); + return 0; +} + static int cgx_configure_interrupt(struct cgx *cgx, struct lmac *lmac, int cnt, bool req_free) { @@ -1682,6 +1700,7 @@ static int cgx_lmac_init(struct cgx *cgx) cgx->lmac_idmap[lmac->lmac_id] = lmac; set_bit(lmac->lmac_id, &cgx->lmac_bmap); cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true); + lmac->lmac_type = cgx->mac_ops->get_lmac_type(cgx, lmac->lmac_id); } return cgx_lmac_verify_fwi_version(cgx); @@ -1778,6 +1797,7 @@ static struct mac_ops cgx_mac_ops = { .mac_tx_enable = cgx_lmac_tx_enable, .pfc_config = cgx_lmac_pfc_config, .mac_get_pfc_frm_cfg = cgx_lmac_get_pfc_frm_cfg, + .mac_reset = cgx_lmac_reset, }; static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index 5a20d93004c7..574114179688 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -35,6 +35,7 @@ #define CGXX_CMRX_INT_ENA_W1S 0x058 #define CGXX_CMRX_RX_ID_MAP 0x060 #define CGXX_CMRX_RX_STAT0 0x070 +#define CGXX_CMRX_RX_LOGL_XON 0x100 #define CGXX_CMRX_RX_LMACS 0x128 #define CGXX_CMRX_RX_DMAC_CTL0 (0x1F8 + mac_ops->csr_offset) #define CGX_DMAC_CTL0_CAM_ENABLE BIT_ULL(3) @@ -181,4 +182,5 @@ int cgx_lmac_get_pfc_frm_cfg(void *cgxd, int lmac_id, u8 *tx_pause, u8 *rx_pause); int verify_lmac_fc_cfg(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause, int pfvf_idx); +int cgx_lmac_reset(void *cgxd, int lmac_id, u8 pf_req_flr); #endif /* CGX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h index 39aaf0e4467d..0b4cba03f2e8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h @@ -24,6 +24,7 @@ * @cgx: parent cgx port * @mcast_filters_count: Number of multicast filters installed * @lmac_id: lmac port id + * @lmac_type: lmac type like SGMII/XAUI * @cmd_pend: flag set before new command is started * flag cleared after command response is received * @name: lmac port name @@ -43,6 +44,7 @@ struct lmac { struct cgx *cgx; u8 mcast_filters_count; u8 lmac_id; + u8 lmac_type; bool cmd_pend; char *name; }; @@ -125,6 +127,7 @@ struct mac_ops { int (*mac_get_pfc_frm_cfg)(void *cgxd, int lmac_id, u8 *tx_pause, u8 *rx_pause); + int (*mac_reset)(void *cgxd, int lmac_id, u8 pf_req_flr); /* FEC stats */ int (*get_fec_stats)(void *cgxd, int lmac_id, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index a433f92c51ea..b4fcb20c3f4f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -37,6 +37,7 @@ static struct mac_ops rpm_mac_ops = { .mac_tx_enable = rpm_lmac_tx_enable, .pfc_config = rpm_lmac_pfc_config, .mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg, + .mac_reset = rpm_lmac_reset, }; static struct mac_ops rpm2_mac_ops = { @@ -68,6 +69,7 @@ static struct mac_ops rpm2_mac_ops = { .mac_tx_enable = rpm_lmac_tx_enable, .pfc_config = rpm_lmac_pfc_config, .mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg, + .mac_reset = rpm_lmac_reset, }; bool is_dev_rpm2(void *rpmd) @@ -537,14 +539,15 @@ u32 rpm2_get_lmac_fifo_len(void *rpmd, int lmac_id) int rpm_lmac_internal_loopback(void *rpmd, int lmac_id, bool enable) { rpm_t *rpm = rpmd; - u8 lmac_type; + struct lmac *lmac; u64 cfg; if (!is_lmac_valid(rpm, lmac_id)) return -ENODEV; - lmac_type = rpm->mac_ops->get_lmac_type(rpm, lmac_id); - if (lmac_type == LMAC_MODE_QSGMII || lmac_type == LMAC_MODE_SGMII) { + lmac = lmac_pdata(lmac_id, rpm); + if (lmac->lmac_type == LMAC_MODE_QSGMII || + lmac->lmac_type == LMAC_MODE_SGMII) { dev_err(&rpm->pdev->dev, "loopback not supported for LPC mode\n"); return 0; } @@ -713,3 +716,24 @@ int rpm_get_fec_stats(void *rpmd, int lmac_id, struct cgx_fec_stats_rsp *rsp) return 0; } + +int rpm_lmac_reset(void *rpmd, int lmac_id, u8 pf_req_flr) +{ + u64 rx_logl_xon, cfg; + rpm_t *rpm = rpmd; + + if (!is_lmac_valid(rpm, lmac_id)) + return -ENODEV; + + /* Resetting PFC related CSRs */ + rx_logl_xon = is_dev_rpm2(rpm) ? RPM2_CMRX_RX_LOGL_XON : + RPMX_CMRX_RX_LOGL_XON; + cfg = 0xff; + + rpm_write(rpm, lmac_id, rx_logl_xon, cfg); + + if (pf_req_flr) + rpm_lmac_internal_loopback(rpm, lmac_id, false); + + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h index be294eebab26..b79cfbc6f877 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h @@ -74,6 +74,7 @@ #define RPMX_MTI_MAC100X_CL01_PAUSE_QUANTA 0x80A8 #define RPMX_MTI_MAC100X_CL89_PAUSE_QUANTA 0x8108 #define RPM_DEFAULT_PAUSE_TIME 0x7FF +#define RPMX_CMRX_RX_LOGL_XON 0x4100 #define RPMX_MTI_MAC100X_XIF_MODE 0x8100 #define RPMX_ONESTEP_ENABLE BIT_ULL(5) @@ -132,4 +133,5 @@ int rpm_lmac_get_pfc_frm_cfg(void *rpmd, int lmac_id, u8 *tx_pause, int rpm2_get_nr_lmacs(void *rpmd); bool is_dev_rpm2(void *rpmd); int rpm_get_fec_stats(void *cgxd, int lmac_id, struct cgx_fec_stats_rsp *rsp); +int rpm_lmac_reset(void *rpmd, int lmac_id, u8 pf_req_flr); #endif /* RPM_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 0069e60afa3b..8dbc35c481f6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2629,6 +2629,7 @@ static void __rvu_flr_handler(struct rvu *rvu, u16 pcifunc) * Since LF is detached use LF number as -1. */ rvu_npc_free_mcam_entries(rvu, pcifunc, -1); + rvu_mac_reset(rvu, pcifunc); mutex_unlock(&rvu->flr_lock); } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index d4b8d4546de2..e8e65fd7888d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -895,6 +895,7 @@ int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable); int rvu_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause, u16 pfc_en); int rvu_cgx_cfg_pause_frm(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause); +void rvu_mac_reset(struct rvu *rvu, u16 pcifunc); u32 rvu_cgx_get_lmac_fifolen(struct rvu *rvu, int cgx, int lmac); int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf, int type); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 48611e603228..4b8559ac0404 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -1250,3 +1250,21 @@ int rvu_mbox_handler_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, mac_ops->mac_get_pfc_frm_cfg(cgxd, lmac_id, &rsp->tx_pause, &rsp->rx_pause); return err; } + +void rvu_mac_reset(struct rvu *rvu, u16 pcifunc) +{ + int pf = rvu_get_pf(pcifunc); + struct mac_ops *mac_ops; + struct cgx *cgxd; + u8 cgx, lmac; + + if (!is_pf_cgxmapped(rvu, pf)) + return; + + rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx, &lmac); + cgxd = rvu_cgx_pdata(cgx, rvu); + mac_ops = get_mac_ops(cgxd); + + if (mac_ops->mac_reset(cgxd, lmac, !is_vf(pcifunc))) + dev_err(rvu->dev, "Failed to reset MAC\n"); +} -- cgit v1.2.3 From 90a8007bbeb616e3ea57e2696190e57aa0329531 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 3 Jul 2023 18:24:52 +0300 Subject: mlxsw: spectrum_router: Fix an IS_ERR() vs NULL check The mlxsw_sp_crif_alloc() function returns NULL on error. It doesn't return error pointers. Fix the check. Fixes: 78126cfd5dc9 ("mlxsw: spectrum_router: Maintain CRIF for fallback loopback RIF") Signed-off-by: Dan Carpenter Reviewed-by: Alexander Lobakin Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 445ba7fe3c40..b32adf277a22 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -10794,8 +10794,8 @@ static int mlxsw_sp_lb_rif_init(struct mlxsw_sp *mlxsw_sp, int err; router->lb_crif = mlxsw_sp_crif_alloc(NULL); - if (IS_ERR(router->lb_crif)) - return PTR_ERR(router->lb_crif); + if (!router->lb_crif) + return -ENOMEM; /* Create a generic loopback RIF associated with the main table * (default VRF). Any table can be used, but the main table exists -- cgit v1.2.3 From 14bb236b29922c4f57d8c05bfdbcb82677f917c9 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Tue, 4 Jul 2023 09:56:53 +0530 Subject: octeontx-af: fix hardware timestamp configuration MAC block on CN10K (RPM) supports hardware timestamp configuration. The previous patch which added timestamp configuration support has a bug. Though the netdev driver requests to disable timestamp configuration, the driver is always enabling it. This patch fixes the same. Fixes: d1489208681d ("octeontx2-af: cn10k: RPM hardware timestamp configuration") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Goutham Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 4b8559ac0404..095b2cc4a699 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -763,7 +763,7 @@ static int rvu_cgx_ptp_rx_cfg(struct rvu *rvu, u16 pcifunc, bool enable) cgxd = rvu_cgx_pdata(cgx_id, rvu); mac_ops = get_mac_ops(cgxd); - mac_ops->mac_enadis_ptp_config(cgxd, lmac_id, true); + mac_ops->mac_enadis_ptp_config(cgxd, lmac_id, enable); /* If PTP is enabled then inform NPC that packets to be * parsed by this PF will have their data shifted by 8 bytes * and if PTP is disabled then no shift is required -- cgit v1.2.3 From 5f16da6ee6ac32e6c8098bc4cfcc4f170694f9da Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Fri, 9 Jun 2023 17:40:23 -0700 Subject: ice: Fix max_rate check while configuring TX rate limits Remove incorrect check in ice_validate_mqprio_opt() that limits filter configuration when sum of max_rates of all TCs exceeds the link speed. The max rate of each TC is unrelated to value used by other TCs and is valid as long as it is less than link speed. Fixes: fbc7b27af0f9 ("ice: enable ndo_setup_tc support for mqprio_qdisc") Signed-off-by: Sridhar Samudrala Signed-off-by: Sudheer Mogilappagari Tested-by: Bharathi Sreenivas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 93979ab18bc1..64efe4c83a3e 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7872,10 +7872,10 @@ static int ice_validate_mqprio_qopt(struct ice_vsi *vsi, struct tc_mqprio_qopt_offload *mqprio_qopt) { - u64 sum_max_rate = 0, sum_min_rate = 0; int non_power_of_2_qcount = 0; struct ice_pf *pf = vsi->back; int max_rss_q_cnt = 0; + u64 sum_min_rate = 0; struct device *dev; int i, speed; u8 num_tc; @@ -7891,6 +7891,7 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, dev = ice_pf_to_dev(pf); vsi->ch_rss_size = 0; num_tc = mqprio_qopt->qopt.num_tc; + speed = ice_get_link_speed_kbps(vsi); for (i = 0; num_tc; i++) { int qcount = mqprio_qopt->qopt.count[i]; @@ -7931,7 +7932,6 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, */ max_rate = mqprio_qopt->max_rate[i]; max_rate = div_u64(max_rate, ICE_BW_KBPS_DIVISOR); - sum_max_rate += max_rate; /* min_rate is minimum guaranteed rate and it can't be zero */ min_rate = mqprio_qopt->min_rate[i]; @@ -7944,6 +7944,12 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, return -EINVAL; } + if (max_rate && max_rate > speed) { + dev_err(dev, "TC%d: max_rate(%llu Kbps) > link speed of %u Kbps\n", + i, max_rate, speed); + return -EINVAL; + } + iter_div_u64_rem(min_rate, ICE_MIN_BW_LIMIT, &rem); if (rem) { dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps", @@ -7981,12 +7987,6 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) return -EINVAL; - speed = ice_get_link_speed_kbps(vsi); - if (sum_max_rate && sum_max_rate > (u64)speed) { - dev_err(dev, "Invalid max Tx rate(%llu) Kbps > speed(%u) Kbps specified\n", - sum_max_rate, speed); - return -EINVAL; - } if (sum_min_rate && sum_min_rate > (u64)speed) { dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n", sum_min_rate, speed); -- cgit v1.2.3 From 479cdfe388a04a16fdd127f3e9e9e019e45e5573 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Fri, 9 Jun 2023 17:40:24 -0700 Subject: ice: Fix tx queue rate limit when TCs are configured Configuring tx_maxrate via sysfs interface /sys/class/net/eth0/queues/tx-1/tx_maxrate was not working when TCs are configured because always main VSI was being used. Fix by using correct VSI in ice_set_tx_maxrate when TCs are configured. Fixes: 1ddef455f4a8 ("ice: Add NDO callback to set the maximum per-queue bitrate") Signed-off-by: Sridhar Samudrala Signed-off-by: Sudheer Mogilappagari Tested-by: Bharathi Sreenivas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 7 +++++++ drivers/net/ethernet/intel/ice/ice_tc_lib.c | 22 +++++++++++----------- drivers/net/ethernet/intel/ice/ice_tc_lib.h | 1 + 3 files changed, 19 insertions(+), 11 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 64efe4c83a3e..19a5e7f3a075 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5739,6 +5739,13 @@ ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate) q_handle = vsi->tx_rings[queue_index]->q_handle; tc = ice_dcb_get_tc(vsi, queue_index); + vsi = ice_locate_vsi_using_queue(vsi, queue_index); + if (!vsi) { + netdev_err(netdev, "Invalid VSI for given queue %d\n", + queue_index); + return -EINVAL; + } + /* Set BW back to default, when user set maxrate to 0 */ if (!maxrate) status = ice_cfg_q_bw_dflt_lmt(vsi->port_info, vsi->idx, tc, diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index b54052ef6050..4a34ef5f58d3 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -750,17 +750,16 @@ exit: /** * ice_locate_vsi_using_queue - locate VSI using queue (forward to queue action) * @vsi: Pointer to VSI - * @tc_fltr: Pointer to tc_flower_filter + * @queue: Queue index * - * Locate the VSI using specified queue. When ADQ is not enabled, always - * return input VSI, otherwise locate corresponding VSI based on per channel - * offset and qcount + * Locate the VSI using specified "queue". When ADQ is not enabled, + * always return input VSI, otherwise locate corresponding + * VSI based on per channel "offset" and "qcount" */ -static struct ice_vsi * -ice_locate_vsi_using_queue(struct ice_vsi *vsi, - struct ice_tc_flower_fltr *tc_fltr) +struct ice_vsi * +ice_locate_vsi_using_queue(struct ice_vsi *vsi, int queue) { - int num_tc, tc, queue; + int num_tc, tc; /* if ADQ is not active, passed VSI is the candidate VSI */ if (!ice_is_adq_active(vsi->back)) @@ -770,7 +769,6 @@ ice_locate_vsi_using_queue(struct ice_vsi *vsi, * upon queue number) */ num_tc = vsi->mqprio_qopt.qopt.num_tc; - queue = tc_fltr->action.fwd.q.queue; for (tc = 0; tc < num_tc; tc++) { int qcount = vsi->mqprio_qopt.qopt.count[tc]; @@ -812,6 +810,7 @@ ice_tc_forward_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *tc_fltr) struct ice_pf *pf = vsi->back; struct device *dev; u32 tc_class; + int q; dev = ice_pf_to_dev(pf); @@ -840,7 +839,8 @@ ice_tc_forward_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *tc_fltr) /* Determine destination VSI even though the action is * FWD_TO_QUEUE, because QUEUE is associated with VSI */ - dest_vsi = tc_fltr->dest_vsi; + q = tc_fltr->action.fwd.q.queue; + dest_vsi = ice_locate_vsi_using_queue(vsi, q); break; default: dev_err(dev, @@ -1716,7 +1716,7 @@ ice_tc_forward_to_queue(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr, /* If ADQ is configured, and the queue belongs to ADQ VSI, then prepare * ADQ switch filter */ - ch_vsi = ice_locate_vsi_using_queue(vsi, fltr); + ch_vsi = ice_locate_vsi_using_queue(vsi, fltr->action.fwd.q.queue); if (!ch_vsi) return -EINVAL; fltr->dest_vsi = ch_vsi; diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h index 8bbc1a62bdb1..65d387163a46 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h @@ -204,6 +204,7 @@ static inline int ice_chnl_dmac_fltr_cnt(struct ice_pf *pf) return pf->num_dmac_chnl_fltrs; } +struct ice_vsi *ice_locate_vsi_using_queue(struct ice_vsi *vsi, int queue); int ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi, struct flow_cls_offload *cls_flower); -- cgit v1.2.3 From ed89b74d2dc920cb61d3094e0e97ec8775b13086 Mon Sep 17 00:00:00 2001 From: Muhammad Husaini Zulkifli Date: Mon, 15 May 2023 14:03:36 +0800 Subject: igc: Add condition for qbv_config_change_errors counter Add condition to increase the qbv counter during taprio qbv configuration only. There might be a case when TC already been setup then user configure the ETF/CBS qdisc and this counter will increase if no condition above. Fixes: ae4fe4698300 ("igc: Add qbv_config_change_errors counter") Signed-off-by: Muhammad Husaini Zulkifli Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_main.c | 2 ++ drivers/net/ethernet/intel/igc/igc_tsn.c | 1 + 3 files changed, 4 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 00a5ee487812..aa5ceab0d371 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -184,6 +184,7 @@ struct igc_adapter { u32 max_frame_size; u32 min_frame_size; + int tc_setup_type; ktime_t base_time; ktime_t cycle_time; bool qbv_enable; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 019ce91c45aa..b90f94511005 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6327,6 +6327,8 @@ static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type, { struct igc_adapter *adapter = netdev_priv(dev); + adapter->tc_setup_type = type; + switch (type) { case TC_QUERY_CAPS: return igc_tc_query_caps(adapter, type_data); diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 94a2b0dfb54d..6b299b83e7ef 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -249,6 +249,7 @@ skip_cbs: * Gate Control List (GCL) is running. */ if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && + (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) && tsn_mode_reconfig) adapter->qbv_config_change_errors++; } else { -- cgit v1.2.3 From cca28ceac7c7857bc2d313777017585aef00bcc4 Mon Sep 17 00:00:00 2001 From: Muhammad Husaini Zulkifli Date: Wed, 17 May 2023 08:18:12 +0800 Subject: igc: Remove delay during TX ring configuration Remove unnecessary delay during the TX ring configuration. This will cause delay, especially during link down and link up activity. Furthermore, old SKUs like as I225 will call the reset_adapter to reset the controller during TSN mode Gate Control List (GCL) setting. This will add more time to the configuration of the real-time use case. It doesn't mentioned about this delay in the Software User Manual. It might have been ported from legacy code I210 in the past. Fixes: 13b5b7fd6a4a ("igc: Add support for Tx/Rx rings") Signed-off-by: Muhammad Husaini Zulkifli Acked-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index b90f94511005..7e22204822ea 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -711,7 +711,6 @@ static void igc_configure_tx_ring(struct igc_adapter *adapter, /* disable the queue */ wr32(IGC_TXDCTL(reg_idx), 0); wrfl(); - mdelay(10); wr32(IGC_TDLEN(reg_idx), ring->count * sizeof(union igc_adv_tx_desc)); -- cgit v1.2.3 From 175c241288c09f81eb7b44d65c1ef6045efa4d1a Mon Sep 17 00:00:00 2001 From: Muhammad Husaini Zulkifli Date: Sat, 3 Jun 2023 20:59:34 +0800 Subject: igc: Fix TX Hang issue when QBV Gate is closed If a user schedules a Gate Control List (GCL) to close one of the QBV gates while also transmitting a packet to that closed gate, TX Hang will be happen. HW would not drop any packet when the gate is closed and keep queuing up in HW TX FIFO until the gate is re-opened. This patch implements the solution to drop the packet for the closed gate. This patch will also reset the adapter to perform SW initialization for each 1st Gate Control List (GCL) to avoid hang. This is due to the HW design, where changing to TSN transmit mode requires SW initialization. Intel Discrete I225/6 transmit mode cannot be changed when in dynamic mode according to Software User Manual Section 7.5.2.1. Subsequent Gate Control List (GCL) operations will proceed without a reset, as they already are in TSN Mode. Step to reproduce: DUT: 1) Configure GCL List with certain gate close. BASE=$(date +%s%N) tc qdisc replace dev $IFACE parent root handle 100 taprio \ num_tc 4 \ map 0 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 \ queues 1@0 1@1 1@2 1@3 \ base-time $BASE \ sched-entry S 0x8 500000 \ sched-entry S 0x4 500000 \ flags 0x2 2) Transmit the packet to closed gate. You may use udp_tai application to transmit UDP packet to any of the closed gate. ./udp_tai -i -P 100000 -p 90 -c 1 -t <0/1> -u 30004 Fixes: ec50a9d437f0 ("igc: Add support for taprio offloading") Co-developed-by: Tan Tee Min Signed-off-by: Tan Tee Min Tested-by: Chwee Lin Choong Signed-off-by: Muhammad Husaini Zulkifli Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 6 ++++ drivers/net/ethernet/intel/igc/igc_main.c | 58 ++++++++++++++++++++++++++++--- drivers/net/ethernet/intel/igc/igc_tsn.c | 41 ++++++++++++++-------- 3 files changed, 87 insertions(+), 18 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index aa5ceab0d371..639a50c02537 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -14,6 +14,7 @@ #include #include #include +#include #include "igc_hw.h" @@ -101,6 +102,8 @@ struct igc_ring { u32 start_time; u32 end_time; u32 max_sdu; + bool oper_gate_closed; /* Operating gate. True if the TX Queue is closed */ + bool admin_gate_closed; /* Future gate. True if the TX Queue will be closed */ /* CBS parameters */ bool cbs_enable; /* indicates if CBS is enabled */ @@ -160,6 +163,7 @@ struct igc_adapter { struct timer_list watchdog_timer; struct timer_list dma_err_timer; struct timer_list phy_info_timer; + struct hrtimer hrtimer; u32 wol; u32 en_mng_pt; @@ -189,6 +193,8 @@ struct igc_adapter { ktime_t cycle_time; bool qbv_enable; u32 qbv_config_change_errors; + bool qbv_transition; + unsigned int qbv_count; /* OS defined structs */ struct pci_dev *pdev; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 7e22204822ea..e5bfc4000658 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1572,6 +1572,9 @@ done: first->bytecount = skb->len; first->gso_segs = 1; + if (adapter->qbv_transition || tx_ring->oper_gate_closed) + goto out_drop; + if (tx_ring->max_sdu > 0) { u32 max_sdu = 0; @@ -3011,8 +3014,8 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) time_after(jiffies, tx_buffer->time_stamp + (adapter->tx_timeout_factor * HZ)) && !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) && - (rd32(IGC_TDH(tx_ring->reg_idx)) != - readl(tx_ring->tail))) { + (rd32(IGC_TDH(tx_ring->reg_idx)) != readl(tx_ring->tail)) && + !tx_ring->oper_gate_closed) { /* detected Tx unit hang */ netdev_err(tx_ring->netdev, "Detected Tx Unit Hang\n" @@ -6102,6 +6105,8 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter) adapter->base_time = 0; adapter->cycle_time = NSEC_PER_SEC; adapter->qbv_config_change_errors = 0; + adapter->qbv_transition = false; + adapter->qbv_count = 0; for (i = 0; i < adapter->num_tx_queues; i++) { struct igc_ring *ring = adapter->tx_ring[i]; @@ -6109,6 +6114,8 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter) ring->start_time = 0; ring->end_time = NSEC_PER_SEC; ring->max_sdu = 0; + ring->oper_gate_closed = false; + ring->admin_gate_closed = false; } return 0; @@ -6120,6 +6127,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, bool queue_configured[IGC_MAX_TX_QUEUES] = { }; struct igc_hw *hw = &adapter->hw; u32 start_time = 0, end_time = 0; + struct timespec64 now; size_t n; int i; @@ -6149,6 +6157,8 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, adapter->cycle_time = qopt->cycle_time; adapter->base_time = qopt->base_time; + igc_ptp_read(adapter, &now); + for (n = 0; n < qopt->num_entries; n++) { struct tc_taprio_sched_entry *e = &qopt->entries[n]; @@ -6183,7 +6193,10 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, ring->start_time = start_time; ring->end_time = end_time; - queue_configured[i] = true; + if (ring->start_time >= adapter->cycle_time) + queue_configured[i] = false; + else + queue_configured[i] = true; } start_time += e->interval; @@ -6193,8 +6206,20 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, * If not, set the start and end time to be end time. */ for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *ring = adapter->tx_ring[i]; + + if (!is_base_time_past(qopt->base_time, &now)) { + ring->admin_gate_closed = false; + } else { + ring->oper_gate_closed = false; + ring->admin_gate_closed = false; + } + if (!queue_configured[i]) { - struct igc_ring *ring = adapter->tx_ring[i]; + if (!is_base_time_past(qopt->base_time, &now)) + ring->admin_gate_closed = true; + else + ring->oper_gate_closed = true; ring->start_time = end_time; ring->end_time = end_time; @@ -6575,6 +6600,27 @@ static const struct xdp_metadata_ops igc_xdp_metadata_ops = { .xmo_rx_timestamp = igc_xdp_rx_timestamp, }; +static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer) +{ + struct igc_adapter *adapter = container_of(timer, struct igc_adapter, + hrtimer); + unsigned int i; + + adapter->qbv_transition = true; + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + if (tx_ring->admin_gate_closed) { + tx_ring->admin_gate_closed = false; + tx_ring->oper_gate_closed = true; + } else { + tx_ring->oper_gate_closed = false; + } + } + adapter->qbv_transition = false; + return HRTIMER_NORESTART; +} + /** * igc_probe - Device Initialization Routine * @pdev: PCI device information struct @@ -6753,6 +6799,9 @@ static int igc_probe(struct pci_dev *pdev, INIT_WORK(&adapter->reset_task, igc_reset_task); INIT_WORK(&adapter->watchdog_task, igc_watchdog_task); + hrtimer_init(&adapter->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + adapter->hrtimer.function = &igc_qbv_scheduling_timer; + /* Initialize link properties that are user-changeable */ adapter->fc_autoneg = true; hw->mac.autoneg = true; @@ -6856,6 +6905,7 @@ static void igc_remove(struct pci_dev *pdev) cancel_work_sync(&adapter->reset_task); cancel_work_sync(&adapter->watchdog_task); + hrtimer_cancel(&adapter->hrtimer); /* Release control of h/w to f/w. If f/w is AMT enabled, this * would have already happened in close and is redundant. diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 6b299b83e7ef..3cdb0c988728 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -114,7 +114,6 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) static int igc_tsn_enable_offload(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; - bool tsn_mode_reconfig = false; u32 tqavctrl, baset_l, baset_h; u32 sec, nsec, cycle; ktime_t base_time, systim; @@ -228,11 +227,10 @@ skip_cbs: tqavctrl = rd32(IGC_TQAVCTRL) & ~IGC_TQAVCTRL_FUTSCDDIS; - if (tqavctrl & IGC_TQAVCTRL_TRANSMIT_MODE_TSN) - tsn_mode_reconfig = true; - tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV; + adapter->qbv_count++; + cycle = adapter->cycle_time; base_time = adapter->base_time; @@ -250,17 +248,28 @@ skip_cbs: */ if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) && - tsn_mode_reconfig) + (adapter->qbv_count > 1)) adapter->qbv_config_change_errors++; } else { - /* According to datasheet section 7.5.2.9.3.3, FutScdDis bit - * has to be configured before the cycle time and base time. - * Tx won't hang if there is a GCL is already running, - * so in this case we don't need to set FutScdDis. - */ - if (igc_is_device_id_i226(hw) && - !(rd32(IGC_BASET_H) || rd32(IGC_BASET_L))) - tqavctrl |= IGC_TQAVCTRL_FUTSCDDIS; + if (igc_is_device_id_i226(hw)) { + ktime_t adjust_time, expires_time; + + /* According to datasheet section 7.5.2.9.3.3, FutScdDis bit + * has to be configured before the cycle time and base time. + * Tx won't hang if a GCL is already running, + * so in this case we don't need to set FutScdDis. + */ + if (!(rd32(IGC_BASET_H) || rd32(IGC_BASET_L))) + tqavctrl |= IGC_TQAVCTRL_FUTSCDDIS; + + nsec = rd32(IGC_SYSTIML); + sec = rd32(IGC_SYSTIMH); + systim = ktime_set(sec, nsec); + + adjust_time = adapter->base_time; + expires_time = ktime_sub_ns(adjust_time, systim); + hrtimer_start(&adapter->hrtimer, expires_time, HRTIMER_MODE_REL); + } } wr32(IGC_TQAVCTRL, tqavctrl); @@ -306,7 +315,11 @@ int igc_tsn_offload_apply(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; - if (netif_running(adapter->netdev) && igc_is_device_id_i225(hw)) { + /* Per I225/6 HW Design Section 7.5.2.1, transmit mode + * cannot be changed dynamically. Require reset the adapter. + */ + if (netif_running(adapter->netdev) && + (igc_is_device_id_i225(hw) || !adapter->qbv_count)) { schedule_work(&adapter->reset_task); return 0; } -- cgit v1.2.3 From 884abe45a9014d0de2e6edb0630dfd64f23f1d1b Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 28 Jun 2023 08:59:34 +0800 Subject: net/mlx5e: fix double free in mlx5e_destroy_flow_table In function accel_fs_tcp_create_groups(), when the ft->g memory is successfully allocated but the 'in' memory fails to be allocated, the memory pointed to by ft->g is released once. And in function accel_fs_tcp_create_table, mlx5e_destroy_flow_table is called to release the memory pointed to by ft->g again. This will cause double free problem. Fixes: c062d52ac24c ("net/mlx5e: Receive flow steering framework for accelerated TCP flows") Signed-off-by: Zhengchao Shao Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index 88a5aed9d678..c7d191f66ad1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -190,6 +190,7 @@ static int accel_fs_tcp_create_groups(struct mlx5e_flow_table *ft, in = kvzalloc(inlen, GFP_KERNEL); if (!in || !ft->g) { kfree(ft->g); + ft->g = NULL; kvfree(in); return -ENOMEM; } -- cgit v1.2.3 From 3250affdc658557a41df9c5fb567723e421f8bf2 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 30 Jun 2023 09:49:02 +0800 Subject: net/mlx5e: fix memory leak in mlx5e_fs_tt_redirect_any_create The memory pointed to by the fs->any pointer is not freed in the error path of mlx5e_fs_tt_redirect_any_create, which can lead to a memory leak. Fix by freeing the memory in the error path, thereby making the error path identical to mlx5e_fs_tt_redirect_any_destroy(). Fixes: 0f575c20bf06 ("net/mlx5e: Introduce Flow Steering ANY API") Signed-off-by: Zhengchao Shao Reviewed-by: Simon Horman Reviewed-by: Rahul Rameshbabu Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c index 03cb79adf912..be83ad9db82a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -594,7 +594,7 @@ int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs) err = fs_any_create_table(fs); if (err) - return err; + goto err_free_any; err = fs_any_enable(fs); if (err) @@ -606,8 +606,8 @@ int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs) err_destroy_table: fs_any_destroy_table(fs_any); - - kfree(fs_any); +err_free_any: mlx5e_fs_set_any(fs, NULL); + kfree(fs_any); return err; } -- cgit v1.2.3 From d543b649ffe58a0cb4b6948b3305069c5980a1fa Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 30 Jun 2023 09:49:03 +0800 Subject: net/mlx5e: fix memory leak in mlx5e_ptp_open When kvzalloc_node or kvzalloc failed in mlx5e_ptp_open, the memory pointed by "c" or "cparams" is not freed, which can lead to a memory leak. Fix by freeing the array in the error path. Fixes: 145e5637d941 ("net/mlx5e: Add TX PTP port object support") Signed-off-by: Zhengchao Shao Reviewed-by: Rahul Rameshbabu Reviewed-by: Gal Pressman Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 3cbebfba582b..b0b429a0321e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -729,8 +729,10 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, c = kvzalloc_node(sizeof(*c), GFP_KERNEL, dev_to_node(mlx5_core_dma_dev(mdev))); cparams = kvzalloc(sizeof(*cparams), GFP_KERNEL); - if (!c || !cparams) - return -ENOMEM; + if (!c || !cparams) { + err = -ENOMEM; + goto err_free; + } c->priv = priv; c->mdev = priv->mdev; -- cgit v1.2.3 From 2e2d1965794d22fbe86df45bf4f933216743577d Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 22 May 2023 21:18:53 +0300 Subject: net/mlx5e: RX, Fix flush and close release flow of regular rq for legacy rq Regular (non-XSK) RQs get flushed on XSK setup and re-activated on XSK close. If the same regular RQ is closed (a config change for example) soon after the XSK close, a double release occurs because the missing wqes get released a second time. Fixes: 3f93f82988bc ("net/mlx5e: RX, Defer page release in legacy rq for better recycling") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 704b022cd1f0..a9575219e455 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -390,10 +390,18 @@ static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix); - if (rq->xsk_pool) + if (rq->xsk_pool) { mlx5e_xsk_free_rx_wqe(wi); - else + } else { mlx5e_free_rx_wqe(rq, wi); + + /* Avoid a second release of the wqe pages: dealloc is called + * for the same missing wqes on regular RQ flush and on regular + * RQ close. This happens when XSK RQs come into play. + */ + for (int i = 0; i < rq->wqe.info.num_frags; i++, wi++) + wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + } } static void mlx5e_xsk_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) -- cgit v1.2.3 From 631079e08aa4a20b73e70de4cf457886194f029f Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 26 Jun 2023 20:36:41 -0700 Subject: net/mlx5: Register a unique thermal zone per device Prior to this patch only one "mlx5" thermal zone could have been registered regardless of the number of individual mlx5 devices in the system. To fix this setup a unique name per device to register its own thermal zone. In order to not register a thermal zone for a virtual device (VF/SF) add a check for PF device type. The new name is a concatenation between "mlx5_" and "", which will also help associating a thermal zone with its PCI device. $ lspci | grep ConnectX 00:04.0 Ethernet controller: Mellanox Technologies MT2892 Family [ConnectX-6 Dx] 00:05.0 Ethernet controller: Mellanox Technologies MT2892 Family [ConnectX-6 Dx] $ cat /sys/devices/virtual/thermal/thermal_zone0/type mlx5_0000:00:04.0 $ cat /sys/devices/virtual/thermal/thermal_zone1/type mlx5_0000:00:05.0 Fixes: c1fef618d611 ("net/mlx5: Implement thermal zone") CC: Sandipan Patra Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/thermal.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c index 20bb5eb266c1..52199d39657e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/thermal.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/thermal.c @@ -68,14 +68,19 @@ static struct thermal_zone_device_ops mlx5_thermal_ops = { int mlx5_thermal_init(struct mlx5_core_dev *mdev) { + char data[THERMAL_NAME_LENGTH]; struct mlx5_thermal *thermal; - struct thermal_zone_device *tzd; - const char *data = "mlx5"; + int err; - tzd = thermal_zone_get_zone_by_name(data); - if (!IS_ERR(tzd)) + if (!mlx5_core_is_pf(mdev) && !mlx5_core_is_ecpf(mdev)) return 0; + err = snprintf(data, sizeof(data), "mlx5_%s", dev_name(mdev->device)); + if (err < 0 || err >= sizeof(data)) { + mlx5_core_err(mdev, "Failed to setup thermal zone name, %d\n", err); + return -EINVAL; + } + thermal = kzalloc(sizeof(*thermal), GFP_KERNEL); if (!thermal) return -ENOMEM; @@ -89,10 +94,10 @@ int mlx5_thermal_init(struct mlx5_core_dev *mdev) &mlx5_thermal_ops, NULL, 0, MLX5_THERMAL_POLL_INT_MSEC); if (IS_ERR(thermal->tzdev)) { - dev_err(mdev->device, "Failed to register thermal zone device (%s) %ld\n", - data, PTR_ERR(thermal->tzdev)); + err = PTR_ERR(thermal->tzdev); + mlx5_core_err(mdev, "Failed to register thermal zone device (%s) %d\n", data, err); kfree(thermal); - return -EINVAL; + return err; } mdev->thermal = thermal; -- cgit v1.2.3 From 65e64640e97c0f223e77f9ea69b5a46186b93470 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 8 Jun 2023 09:32:10 +0200 Subject: net/mlx5e: Check for NOT_READY flag state after locking Currently the check for NOT_READY flag is performed before obtaining the necessary lock. This opens a possibility for race condition when the flow is concurrently removed from unready_flows list by the workqueue task, which causes a double-removal from the list and a crash[0]. Fix the issue by moving the flag check inside the section protected by uplink_priv->unready_flows_lock mutex. [0]: [44376.389654] general protection fault, probably for non-canonical address 0xdead000000000108: 0000 [#1] SMP [44376.391665] CPU: 7 PID: 59123 Comm: tc Not tainted 6.4.0-rc4+ #1 [44376.392984] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [44376.395342] RIP: 0010:mlx5e_tc_del_fdb_flow+0xb3/0x340 [mlx5_core] [44376.396857] Code: 00 48 8b b8 68 ce 02 00 e8 8a 4d 02 00 4c 8d a8 a8 01 00 00 4c 89 ef e8 8b 79 88 e1 48 8b 83 98 06 00 00 48 8b 93 90 06 00 00 <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 83 90 06 [44376.399167] RSP: 0018:ffff88812cc97570 EFLAGS: 00010246 [44376.399680] RAX: dead000000000122 RBX: ffff8881088e3800 RCX: ffff8881881bac00 [44376.400337] RDX: dead000000000100 RSI: ffff88812cc97500 RDI: ffff8881242f71b0 [44376.401001] RBP: ffff88811cbb0940 R08: 0000000000000400 R09: 0000000000000001 [44376.401663] R10: 0000000000000001 R11: 0000000000000000 R12: ffff88812c944000 [44376.402342] R13: ffff8881242f71a8 R14: ffff8881222b4000 R15: 0000000000000000 [44376.402999] FS: 00007f0451104800(0000) GS:ffff88852cb80000(0000) knlGS:0000000000000000 [44376.403787] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [44376.404343] CR2: 0000000000489108 CR3: 0000000123a79003 CR4: 0000000000370ea0 [44376.405004] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [44376.405665] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [44376.406339] Call Trace: [44376.406651] [44376.406939] ? die_addr+0x33/0x90 [44376.407311] ? exc_general_protection+0x192/0x390 [44376.407795] ? asm_exc_general_protection+0x22/0x30 [44376.408292] ? mlx5e_tc_del_fdb_flow+0xb3/0x340 [mlx5_core] [44376.408876] __mlx5e_tc_del_fdb_peer_flow+0xbc/0xe0 [mlx5_core] [44376.409482] mlx5e_tc_del_flow+0x42/0x210 [mlx5_core] [44376.410055] mlx5e_flow_put+0x25/0x50 [mlx5_core] [44376.410529] mlx5e_delete_flower+0x24b/0x350 [mlx5_core] [44376.411043] tc_setup_cb_reoffload+0x22/0x80 [44376.411462] fl_reoffload+0x261/0x2f0 [cls_flower] [44376.411907] ? mlx5e_rep_indr_setup_ft_cb+0x160/0x160 [mlx5_core] [44376.412481] ? mlx5e_rep_indr_setup_ft_cb+0x160/0x160 [mlx5_core] [44376.413044] tcf_block_playback_offloads+0x76/0x170 [44376.413497] tcf_block_unbind+0x7b/0xd0 [44376.413881] tcf_block_setup+0x17d/0x1c0 [44376.414269] tcf_block_offload_cmd.isra.0+0xf1/0x130 [44376.414725] tcf_block_offload_unbind+0x43/0x70 [44376.415153] __tcf_block_put+0x82/0x150 [44376.415532] ingress_destroy+0x22/0x30 [sch_ingress] [44376.415986] qdisc_destroy+0x3b/0xd0 [44376.416343] qdisc_graft+0x4d0/0x620 [44376.416706] tc_get_qdisc+0x1c9/0x3b0 [44376.417074] rtnetlink_rcv_msg+0x29c/0x390 [44376.419978] ? rep_movs_alternative+0x3a/0xa0 [44376.420399] ? rtnl_calcit.isra.0+0x120/0x120 [44376.420813] netlink_rcv_skb+0x54/0x100 [44376.421192] netlink_unicast+0x1f6/0x2c0 [44376.421573] netlink_sendmsg+0x232/0x4a0 [44376.421980] sock_sendmsg+0x38/0x60 [44376.422328] ____sys_sendmsg+0x1d0/0x1e0 [44376.422709] ? copy_msghdr_from_user+0x6d/0xa0 [44376.423127] ___sys_sendmsg+0x80/0xc0 [44376.423495] ? ___sys_recvmsg+0x8b/0xc0 [44376.423869] __sys_sendmsg+0x51/0x90 [44376.424226] do_syscall_64+0x3d/0x90 [44376.424587] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [44376.425046] RIP: 0033:0x7f045134f887 [44376.425403] Code: 0a 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 [44376.426914] RSP: 002b:00007ffd63a82b98 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [44376.427592] RAX: ffffffffffffffda RBX: 000000006481955f RCX: 00007f045134f887 [44376.428195] RDX: 0000000000000000 RSI: 00007ffd63a82c00 RDI: 0000000000000003 [44376.428796] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 [44376.429404] R10: 00007f0451208708 R11: 0000000000000246 R12: 0000000000000001 [44376.430039] R13: 0000000000409980 R14: 000000000047e538 R15: 0000000000485400 [44376.430644] [44376.430907] Modules linked in: mlx5_ib mlx5_core act_mirred act_tunnel_key cls_flower vxlan dummy sch_ingress openvswitch nsh rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm ib_uverbs ib_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_g ss_krb5 auth_rpcgss oid_registry overlay zram zsmalloc fuse [last unloaded: mlx5_core] [44376.433936] ---[ end trace 0000000000000000 ]--- [44376.434373] RIP: 0010:mlx5e_tc_del_fdb_flow+0xb3/0x340 [mlx5_core] [44376.434951] Code: 00 48 8b b8 68 ce 02 00 e8 8a 4d 02 00 4c 8d a8 a8 01 00 00 4c 89 ef e8 8b 79 88 e1 48 8b 83 98 06 00 00 48 8b 93 90 06 00 00 <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 83 90 06 [44376.436452] RSP: 0018:ffff88812cc97570 EFLAGS: 00010246 [44376.436924] RAX: dead000000000122 RBX: ffff8881088e3800 RCX: ffff8881881bac00 [44376.437530] RDX: dead000000000100 RSI: ffff88812cc97500 RDI: ffff8881242f71b0 [44376.438179] RBP: ffff88811cbb0940 R08: 0000000000000400 R09: 0000000000000001 [44376.438786] R10: 0000000000000001 R11: 0000000000000000 R12: ffff88812c944000 [44376.439393] R13: ffff8881242f71a8 R14: ffff8881222b4000 R15: 0000000000000000 [44376.439998] FS: 00007f0451104800(0000) GS:ffff88852cb80000(0000) knlGS:0000000000000000 [44376.440714] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [44376.441225] CR2: 0000000000489108 CR3: 0000000123a79003 CR4: 0000000000370ea0 [44376.441843] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [44376.442471] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: ad86755b18d5 ("net/mlx5e: Protect unready flows with dedicated lock") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 41dc26800f48..8d0a3f69693e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1639,7 +1639,8 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow) uplink_priv = &rpriv->uplink_priv; mutex_lock(&uplink_priv->unready_flows_lock); - unready_flow_del(flow); + if (flow_flag_test(flow, NOT_READY)) + unready_flow_del(flow); mutex_unlock(&uplink_priv->unready_flows_lock); } @@ -1932,8 +1933,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, esw_attr = attr->esw_attr; mlx5e_put_flow_tunnel_id(flow); - if (flow_flag_test(flow, NOT_READY)) - remove_unready_flow(flow); + remove_unready_flow(flow); if (mlx5e_is_offloaded_flow(flow)) { if (flow_flag_test(flow, SLOW)) -- cgit v1.2.3 From f7a485115ad4cfc560833942014bf791abf1f827 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Sun, 4 Jun 2023 12:45:38 +0300 Subject: net/mlx5e: TC, CT: Offload ct clear only once Non-clear CT action causes a flow rule split, while CT clear action doesn't and is just a header-rewrite to the current flow rule. But ct offload is done in post_parse and is per ct action instance, so ct clear offload is parsed multiple times, while its deleted once. Fix this by post_parsing the ct action only once per flow attribute (which is per flow rule) by using a offloaded ct_attr flag. Fixes: 08fe94ec5f77 ("net/mlx5e: TC, Remove special handling of CT action") Signed-off-by: Paul Blakey Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 14 +++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h | 1 + 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index a254e728ac95..fadfa8b50beb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -1545,7 +1545,8 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, attr->ct_attr.ct_action |= act->ct.action; /* So we can have clear + ct */ attr->ct_attr.zone = act->ct.zone; - attr->ct_attr.nf_ft = act->ct.flow_table; + if (!(act->ct.action & TCA_CT_ACT_CLEAR)) + attr->ct_attr.nf_ft = act->ct.flow_table; attr->ct_attr.act_miss_cookie = act->miss_cookie; return 0; @@ -1990,6 +1991,9 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *att if (!priv) return -EOPNOTSUPP; + if (attr->ct_attr.offloaded) + return 0; + if (attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR) { err = mlx5_tc_ct_entry_set_registers(priv, &attr->parse_attr->mod_hdr_acts, 0, 0, 0, 0); @@ -1999,11 +2003,15 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *att attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; } - if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */ + if (!attr->ct_attr.nf_ft) { /* means only ct clear action, and not ct_clear,ct() */ + attr->ct_attr.offloaded = true; return 0; + } mutex_lock(&priv->control_lock); err = __mlx5_tc_ct_flow_offload(priv, attr); + if (!err) + attr->ct_attr.offloaded = true; mutex_unlock(&priv->control_lock); return err; @@ -2021,7 +2029,7 @@ void mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr) { - if (!attr->ct_attr.ft) /* no ct action, return */ + if (!attr->ct_attr.offloaded) /* no ct action, return */ return; if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */ return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 8e9316fa46d4..b66c5f98067f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -29,6 +29,7 @@ struct mlx5_ct_attr { u32 ct_labels_id; u32 act_miss_mapping; u64 act_miss_cookie; + bool offloaded; struct mlx5_ct_ft *ft; }; -- cgit v1.2.3 From 6496357aa5f710eec96f91345b9da1b37c3231f6 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Tue, 20 Jun 2023 14:07:03 +0300 Subject: net/mlx5: Query hca_cap_2 only when supported On vport enable, where fw's hca caps are queried, the driver queries hca_caps_2 without checking if fw truly supports them, causing a false failure of vfs vport load and blocking SRIOV enablement on old devices such as CX4 where hca_caps_2 support is missing. Thus, add a check for the said caps support before accessing them. Fixes: e5b9642a33be ("net/mlx5: E-Switch, Implement devlink port function cmds to control migratable") Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index faec7d7a4400..243c455f1029 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -807,6 +807,9 @@ static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport * hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); vport->info.roce_enabled = MLX5_GET(cmd_hca_cap, hca_caps, roce); + if (!MLX5_CAP_GEN_MAX(esw->dev, hca_cap_2)) + goto out_free; + memset(query_ctx, 0, query_out_sz); err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx, MLX5_CAP_GENERAL_2); -- cgit v1.2.3 From 7abd955a58fb0fcd4e756fa2065c03ae488fcfa7 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Wed, 31 May 2023 21:18:49 +0300 Subject: net/mlx5e: RX, Fix page_pool page fragment tracking for XDP Currently mlx5e releases pages directly to the page_pool for XDP_TX and does page fragment counting for XDP_REDIRECT. RX pages from the page_pool are leaking on XDP_REDIRECT because the xdp core will release only one fragment out of MLX5E_PAGECNT_BIAS_MAX and subsequently the page is marked as "skip release" which avoids the driver release. A fix would be to take an extra fragment for XDP_REDIRECT and not set the "skip release" bit so that the release on the driver side can handle the remaining bias fragments. But this would be a shortsighted solution. Instead, this patch converges the two XDP paths (XDP_TX and XDP_REDIRECT) to always do fragment tracking. The "skip release" bit is no longer necessary for XDP. Fixes: 6f5742846053 ("net/mlx5e: RX, Enable skb page recycling through the page_pool") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 32 +++++++++--------------- 2 files changed, 13 insertions(+), 22 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index f0e6095809fa..40589cebb773 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -662,8 +662,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) * as we know this is a page_pool page. */ - page_pool_put_defragged_page(page->pp, - page, -1, true); + page_pool_recycle_direct(page->pp, page); } while (++n < num); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index a9575219e455..41d37159e027 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1751,11 +1751,11 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi prog = rcu_dereference(rq->xdp_prog); if (prog && mlx5e_xdp_handle(rq, prog, &mxbuf)) { - if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { struct mlx5e_wqe_frag_info *pwi; for (pwi = head_wi; pwi < wi; pwi++) - pwi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + pwi->frag_page->frags++; } return NULL; /* page/packet was consumed by XDP */ } @@ -1825,12 +1825,8 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) rq, wi, cqe, cqe_bcnt); if (!skb) { /* probably for XDP */ - if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { - /* do not return page to cache, - * it will be returned on XDP_TX completion. - */ - wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); - } + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) + wi->frag_page->frags++; goto wq_cyc_pop; } @@ -1876,12 +1872,8 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) rq, wi, cqe, cqe_bcnt); if (!skb) { /* probably for XDP */ - if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { - /* do not return page to cache, - * it will be returned on XDP_TX completion. - */ - wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); - } + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) + wi->frag_page->frags++; goto wq_cyc_pop; } @@ -2060,12 +2052,12 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w if (prog) { if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { - int i; + struct mlx5e_frag_page *pfp; + + for (pfp = head_page; pfp < frag_page; pfp++) + pfp->frags++; - for (i = 0; i < sinfo->nr_frags; i++) - /* non-atomic */ - __set_bit(page_idx + i, wi->skip_release_bitmap); - return NULL; + wi->linear_page.frags++; } mlx5e_page_release_fragmented(rq, &wi->linear_page); return NULL; /* page/packet was consumed by XDP */ @@ -2163,7 +2155,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, cqe_bcnt, &mxbuf); if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) - __set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */ + frag_page->frags++; return NULL; /* page/packet was consumed by XDP */ } -- cgit v1.2.3 From cc7eab25b1cf3f9594fe61142d3523ce4d14a788 Mon Sep 17 00:00:00 2001 From: Yinjun Zhang Date: Wed, 5 Jul 2023 07:28:18 +0200 Subject: nfp: clean mc addresses in application firmware when closing port When moving devices from one namespace to another, mc addresses are cleaned in software while not removed from application firmware. Thus the mc addresses are remained and will cause resource leak. Now use `__dev_mc_unsync` to clean mc addresses when closing port. Fixes: e20aa071cd95 ("nfp: fix schedule in atomic context when sync mc address") Cc: stable@vger.kernel.org Signed-off-by: Yinjun Zhang Acked-by: Simon Horman Signed-off-by: Louis Peens Reviewed-by: Jacob Keller Message-ID: <20230705052818.7122-1-louis.peens@corigine.com> Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 49f2f081ebb5..6b1fb5708434 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -53,6 +53,8 @@ #include "crypto/crypto.h" #include "crypto/fw.h" +static int nfp_net_mc_unsync(struct net_device *netdev, const unsigned char *addr); + /** * nfp_net_get_fw_version() - Read and parse the FW version * @fw_ver: Output fw_version structure to read to @@ -1084,6 +1086,9 @@ static int nfp_net_netdev_close(struct net_device *netdev) /* Step 2: Tell NFP */ + if (nn->cap_w1 & NFP_NET_CFG_CTRL_MCAST_FILTER) + __dev_mc_unsync(netdev, nfp_net_mc_unsync); + nfp_net_clear_config_and_disable(nn); nfp_port_configure(netdev, false); -- cgit v1.2.3 From 9ac3fc2f42e5ffa1e927dcbffb71b15fa81459e2 Mon Sep 17 00:00:00 2001 From: Prasad Koya Date: Mon, 5 Jun 2023 11:09:01 -0700 Subject: igc: set TP bit in 'supported' and 'advertising' fields of ethtool_link_ksettings set TP bit in the 'supported' and 'advertising' fields. i225/226 parts only support twisted pair copper. Fixes: 8c5ad0dae93c ("igc: Add ethtool support") Signed-off-by: Prasad Koya Acked-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 0e2cb00622d1..93bce729be76 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1708,6 +1708,8 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev, /* twisted pair */ cmd->base.port = PORT_TP; cmd->base.phy_address = hw->phy.addr; + ethtool_link_ksettings_add_link_mode(cmd, supported, TP); + ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); /* advertising link modes */ if (hw->phy.autoneg_advertised & ADVERTISE_10_HALF) -- cgit v1.2.3 From 25102893e409bc02761ab82dbcfa092006404790 Mon Sep 17 00:00:00 2001 From: Tan Tee Min Date: Fri, 9 Jun 2023 11:28:42 +0800 Subject: igc: Include the length/type field and VLAN tag in queueMaxSDU IEEE 802.1Q does not have clear definitions of what constitutes an SDU (Service Data Unit), but IEEE Std 802.3 clause 3.1.2 does define the MAC service primitives and clause 3.2.7 does define the MAC Client Data for Q-tagged frames. It shows that the mac_service_data_unit (MSDU) does NOT contain the preamble, destination and source address, or FCS. The MSDU does contain the length/type field, MAC client data, VLAN tag and any padding data (prior to the FCS). Thus, the maximum 802.3 frame size that is allowed to be transmitted should be QueueMaxSDU (MSDU) + 16 (6 byte SA + 6 byte DA + 4 byte FCS). Fixes: 92a0dcb8427d ("igc: offload queue max SDU from tc-taprio") Signed-off-by: Tan Tee Min Reviewed-by: Muhammad Husaini Zulkifli Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index e5bfc4000658..281a0e35b9d1 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1575,16 +1575,9 @@ done: if (adapter->qbv_transition || tx_ring->oper_gate_closed) goto out_drop; - if (tx_ring->max_sdu > 0) { - u32 max_sdu = 0; - - max_sdu = tx_ring->max_sdu + - (skb_vlan_tagged(first->skb) ? VLAN_HLEN : 0); - - if (first->bytecount > max_sdu) { - adapter->stats.txdrop++; - goto out_drop; - } + if (tx_ring->max_sdu > 0 && first->bytecount > tx_ring->max_sdu) { + adapter->stats.txdrop++; + goto out_drop; } if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) && @@ -6231,7 +6224,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, struct net_device *dev = adapter->netdev; if (qopt->max_sdu[i]) - ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len; + ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len - ETH_TLEN; else ring->max_sdu = 0; } -- cgit v1.2.3 From 84a192e46106355de1a314d709e657231d4b1026 Mon Sep 17 00:00:00 2001 From: Aravindhan Gunasekaran Date: Thu, 15 Jun 2023 12:00:43 +0530 Subject: igc: Handle PPS start time programming for past time values I225/6 hardware can be programmed to start PPS output once the time in Target Time registers is reached. The time programmed in these registers should always be into future. Only then PPS output is triggered when SYSTIM register reaches the programmed value. There are two modes in i225/6 hardware to program PPS, pulse and clock mode. There were issues reported where PPS is not generated when start time is in past. Example 1, "echo 0 0 0 2 0 > /sys/class/ptp/ptp0/period" In the current implementation, a value of '0' is programmed into Target time registers and PPS output is in pulse mode. Eventually an interrupt which is triggered upon SYSTIM register reaching Target time is not fired. Thus no PPS output is generated. Example 2, "echo 0 0 0 1 0 > /sys/class/ptp/ptp0/period" Above case, a value of '0' is programmed into Target time registers and PPS output is in clock mode. Here, HW tries to catch-up the current time by incrementing Target Time register. This catch-up time seem to vary according to programmed PPS period time as per the HW design. In my experiments, the delay ranged between few tens of seconds to few minutes. The PPS output is only generated after the Target time register reaches current time. In my experiments, I also observed PPS stopped working with below test and could not recover until module is removed and loaded again. 1) echo 0 0 1 0 > /sys/class/ptp/ptp1/period 2) echo 0 0 0 1 0 > /sys/class/ptp/ptp1/period 3) echo 0 0 0 1 0 > /sys/class/ptp/ptp1/period After this PPS did not work even if i re-program with proper values. I could only get this back working by reloading the driver. This patch takes care of calculating and programming appropriate future time value into Target Time registers. Fixes: 5e91c72e560c ("igc: Fix PPS delta between two synchronized end-points") Signed-off-by: Aravindhan Gunasekaran Reviewed-by: Muhammad Husaini Zulkifli Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ptp.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 32ef112f8291..f0b979a70655 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -356,16 +356,35 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, tsim &= ~IGC_TSICR_TT0; } if (on) { + struct timespec64 safe_start; int i = rq->perout.index; igc_pin_perout(igc, i, pin, use_freq); - igc->perout[i].start.tv_sec = rq->perout.start.sec; + igc_ptp_read(igc, &safe_start); + + /* PPS output start time is triggered by Target time(TT) + * register. Programming any past time value into TT + * register will cause PPS to never start. Need to make + * sure we program the TT register a time ahead in + * future. There isn't a stringent need to fire PPS out + * right away. Adding +2 seconds should take care of + * corner cases. Let's say if the SYSTIML is close to + * wrap up and the timer keeps ticking as we program the + * register, adding +2seconds is safe bet. + */ + safe_start.tv_sec += 2; + + if (rq->perout.start.sec < safe_start.tv_sec) + igc->perout[i].start.tv_sec = safe_start.tv_sec; + else + igc->perout[i].start.tv_sec = rq->perout.start.sec; igc->perout[i].start.tv_nsec = rq->perout.start.nsec; igc->perout[i].period.tv_sec = ts.tv_sec; igc->perout[i].period.tv_nsec = ts.tv_nsec; - wr32(trgttimh, rq->perout.start.sec); + wr32(trgttimh, (u32)igc->perout[i].start.tv_sec); /* For now, always select timer 0 as source. */ - wr32(trgttiml, rq->perout.start.nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); + wr32(trgttiml, (u32)(igc->perout[i].start.tv_nsec | + IGC_TT_IO_TIMER_SEL_SYSTIM0)); if (use_freq) wr32(freqout, ns); tsauxc |= tsauxc_mask; -- cgit v1.2.3 From 21327f81db6337c8843ce755b01523c7d3df715b Mon Sep 17 00:00:00 2001 From: Klaus Kudielka Date: Wed, 5 Jul 2023 07:37:12 +0200 Subject: net: mvneta: fix txq_map in case of txq_number==1 If we boot with mvneta.txq_number=1, the txq_map is set incorrectly: MVNETA_CPU_TXQ_ACCESS(1) refers to TX queue 1, but only TX queue 0 is initialized. Fix this. Fixes: 50bf8cb6fc9c ("net: mvneta: Configure XPS support") Signed-off-by: Klaus Kudielka Reviewed-by: Michal Kubiak Link: https://lore.kernel.org/r/20230705053712.3914-1-klaus.kudielka@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/mvneta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index ff5647bcdfca..acf4f6ba73a6 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1511,7 +1511,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp) */ if (txq_number == 1) txq_map = (cpu == pp->rxq_def) ? - MVNETA_CPU_TXQ_ACCESS(1) : 0; + MVNETA_CPU_TXQ_ACCESS(0) : 0; } else { txq_map = MVNETA_CPU_TXQ_ACCESS_ALL_MASK; @@ -4356,7 +4356,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp) */ if (txq_number == 1) txq_map = (cpu == elected_cpu) ? - MVNETA_CPU_TXQ_ACCESS(1) : 0; + MVNETA_CPU_TXQ_ACCESS(0) : 0; else txq_map = mvreg_read(pp, MVNETA_CPU_MAP(cpu)) & MVNETA_CPU_TXQ_ACCESS_ALL_MASK; -- cgit v1.2.3 From 009d30f1a77795014f151ba317fcbfc2f17153c6 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 5 Jul 2023 13:44:20 +0300 Subject: net: mscc: ocelot: extend ocelot->fwd_domain_lock to cover ocelot->tas_lock In a future commit we will have to call vsc9959_tas_guard_bands_update() from ocelot_port_update_active_preemptible_tcs(), and that will be impossible due to the AB/BA locking dependencies between ocelot->tas_lock and ocelot->fwd_domain_lock. Just like we did in commit 3ff468ef987e ("net: mscc: ocelot: remove struct ocelot_mm_state :: lock"), the only solution is to expand the scope of ocelot->fwd_domain_lock for it to also serialize changes made to the Time-Aware Shaper, because those will have to result in a recalculation of cut-through TCs, which is something that depends on the forwarding domain. Signed-off-by: Vladimir Oltean Message-ID: <20230705104422.49025-2-vladimir.oltean@nxp.com> Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix.c | 4 ++-- drivers/net/dsa/ocelot/felix_vsc9959.c | 36 +++++++++++++++++++++------------- drivers/net/ethernet/mscc/ocelot.c | 1 - drivers/net/ethernet/mscc/ocelot_mm.c | 7 ++----- include/soc/mscc/ocelot.h | 8 ++++---- 5 files changed, 30 insertions(+), 26 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 70c0e2b1936b..0c1207613aa4 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -1790,12 +1790,12 @@ static int felix_change_mtu(struct dsa_switch *ds, int port, int new_mtu) ocelot_port_set_maxlen(ocelot, port, new_mtu); - mutex_lock(&ocelot->tas_lock); + mutex_lock(&ocelot->fwd_domain_lock); if (ocelot_port->taprio && felix->info->tas_guard_bands_update) felix->info->tas_guard_bands_update(ocelot, port); - mutex_unlock(&ocelot->tas_lock); + mutex_unlock(&ocelot->fwd_domain_lock); return 0; } diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index bb39fedd46c7..56b8bcac9690 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1217,7 +1217,7 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) u8 tas_speed; int tc; - lockdep_assert_held(&ocelot->tas_lock); + lockdep_assert_held(&ocelot->fwd_domain_lock); taprio = ocelot_port->taprio; @@ -1259,8 +1259,6 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) vsc9959_tas_min_gate_lengths(taprio, min_gate_len); - mutex_lock(&ocelot->fwd_domain_lock); - for (tc = 0; tc < OCELOT_NUM_TC; tc++) { u32 requested_max_sdu = vsc9959_tas_tc_max_sdu(taprio, tc); u64 remaining_gate_len_ps; @@ -1323,8 +1321,6 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) ocelot_write_rix(ocelot, maxlen, QSYS_PORT_MAX_SDU, port); ocelot->ops->cut_through_fwd(ocelot); - - mutex_unlock(&ocelot->fwd_domain_lock); } static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port, @@ -1351,7 +1347,7 @@ static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port, break; } - mutex_lock(&ocelot->tas_lock); + mutex_lock(&ocelot->fwd_domain_lock); ocelot_rmw_rix(ocelot, QSYS_TAG_CONFIG_LINK_SPEED(tas_speed), @@ -1361,7 +1357,7 @@ static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port, if (ocelot_port->taprio) vsc9959_tas_guard_bands_update(ocelot, port); - mutex_unlock(&ocelot->tas_lock); + mutex_unlock(&ocelot->fwd_domain_lock); } static void vsc9959_new_base_time(struct ocelot *ocelot, ktime_t base_time, @@ -1409,7 +1405,7 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, int ret, i; u32 val; - mutex_lock(&ocelot->tas_lock); + mutex_lock(&ocelot->fwd_domain_lock); if (taprio->cmd == TAPRIO_CMD_DESTROY) { ocelot_port_mqprio(ocelot, port, &taprio->mqprio); @@ -1421,7 +1417,7 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, vsc9959_tas_guard_bands_update(ocelot, port); - mutex_unlock(&ocelot->tas_lock); + mutex_unlock(&ocelot->fwd_domain_lock); return 0; } else if (taprio->cmd != TAPRIO_CMD_REPLACE) { ret = -EOPNOTSUPP; @@ -1504,7 +1500,7 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, ocelot_port->taprio = taprio_offload_get(taprio); vsc9959_tas_guard_bands_update(ocelot, port); - mutex_unlock(&ocelot->tas_lock); + mutex_unlock(&ocelot->fwd_domain_lock); return 0; @@ -1512,7 +1508,7 @@ err_reset_tc: taprio->mqprio.qopt.num_tc = 0; ocelot_port_mqprio(ocelot, port, &taprio->mqprio); err_unlock: - mutex_unlock(&ocelot->tas_lock); + mutex_unlock(&ocelot->fwd_domain_lock); return ret; } @@ -1525,7 +1521,7 @@ static void vsc9959_tas_clock_adjust(struct ocelot *ocelot) int port; u32 val; - mutex_lock(&ocelot->tas_lock); + mutex_lock(&ocelot->fwd_domain_lock); for (port = 0; port < ocelot->num_phys_ports; port++) { ocelot_port = ocelot->ports[port]; @@ -1563,7 +1559,7 @@ static void vsc9959_tas_clock_adjust(struct ocelot *ocelot) QSYS_TAG_CONFIG_ENABLE, QSYS_TAG_CONFIG, port); } - mutex_unlock(&ocelot->tas_lock); + mutex_unlock(&ocelot->fwd_domain_lock); } static int vsc9959_qos_port_cbs_set(struct dsa_switch *ds, int port, @@ -1634,6 +1630,18 @@ static int vsc9959_qos_query_caps(struct tc_query_caps_base *base) } } +static int vsc9959_qos_port_mqprio(struct ocelot *ocelot, int port, + struct tc_mqprio_qopt_offload *mqprio) +{ + int ret; + + mutex_lock(&ocelot->fwd_domain_lock); + ret = ocelot_port_mqprio(ocelot, port, mqprio); + mutex_unlock(&ocelot->fwd_domain_lock); + + return ret; +} + static int vsc9959_port_setup_tc(struct dsa_switch *ds, int port, enum tc_setup_type type, void *type_data) @@ -1646,7 +1654,7 @@ static int vsc9959_port_setup_tc(struct dsa_switch *ds, int port, case TC_SETUP_QDISC_TAPRIO: return vsc9959_qos_port_tas_set(ocelot, port, type_data); case TC_SETUP_QDISC_MQPRIO: - return ocelot_port_mqprio(ocelot, port, type_data); + return vsc9959_qos_port_mqprio(ocelot, port, type_data); case TC_SETUP_QDISC_CBS: return vsc9959_qos_port_cbs_set(ds, port, type_data); default: diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 2fa833d041ba..56ccbd4c37fe 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -2927,7 +2927,6 @@ int ocelot_init(struct ocelot *ocelot) mutex_init(&ocelot->mact_lock); mutex_init(&ocelot->fwd_domain_lock); - mutex_init(&ocelot->tas_lock); spin_lock_init(&ocelot->ptp_clock_lock); spin_lock_init(&ocelot->ts_id_lock); diff --git a/drivers/net/ethernet/mscc/ocelot_mm.c b/drivers/net/ethernet/mscc/ocelot_mm.c index fb3145118d68..f3c0e6c32934 100644 --- a/drivers/net/ethernet/mscc/ocelot_mm.c +++ b/drivers/net/ethernet/mscc/ocelot_mm.c @@ -89,17 +89,14 @@ void ocelot_port_change_fp(struct ocelot *ocelot, int port, { struct ocelot_mm_state *mm = &ocelot->mm[port]; - mutex_lock(&ocelot->fwd_domain_lock); + lockdep_assert_held(&ocelot->fwd_domain_lock); if (mm->preemptible_tcs == preemptible_tcs) - goto out_unlock; + return; mm->preemptible_tcs = preemptible_tcs; ocelot_port_update_active_preemptible_tcs(ocelot, port); - -out_unlock: - mutex_unlock(&ocelot->fwd_domain_lock); } static void ocelot_mm_update_port_status(struct ocelot *ocelot, int port) diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 22aae505c813..eb5f8914a66c 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -863,12 +863,12 @@ struct ocelot { struct mutex stat_view_lock; /* Lock for serializing access to the MAC table */ struct mutex mact_lock; - /* Lock for serializing forwarding domain changes */ + /* Lock for serializing forwarding domain changes, including the + * configuration of the Time-Aware Shaper, MAC Merge layer and + * cut-through forwarding, on which it depends + */ struct mutex fwd_domain_lock; - /* Lock for serializing Time-Aware Shaper changes */ - struct mutex tas_lock; - struct workqueue_struct *owq; u8 ptp:1; -- cgit v1.2.3 From c6efb4ae387c79bf0d4da286108c810b7b40de3c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 5 Jul 2023 13:44:22 +0300 Subject: net: mscc: ocelot: fix oversize frame dropping for preemptible TCs This switch implements Hold/Release in a strange way, with no control from the user as required by IEEE 802.1Q-2018 through Set-And-Hold-MAC and Set-And-Release-MAC, but rather, it emits HOLD requests implicitly based on the schedule. Namely, when the gate of a preemptible TC is about to close (actually QSYS::PREEMPTION_CFG.HOLD_ADVANCE octet times in advance of this event), the QSYS seems to emit a HOLD request pulse towards the MAC which preempts the currently transmitted packet, and further packets are held back in the queue system. This allows large frames to be squeezed through small time slots, because HOLD requests initiated by the gate events result in the frame being segmented in multiple fragments, the bit time of which is equal to the size of the time slot. It has been reported that the vsc9959_tas_guard_bands_update() logic breaks this, because it doesn't take preemptible TCs into account, and enables oversized frame dropping when the time slot doesn't allow a full MTU to be sent, but it does allow 2*minFragSize to be sent (128B). Packets larger than 128B are dropped instead of being sent in multiple fragments. Confusingly, the manual says: | For guard band, SDU calculation of a traffic class of a port, if | preemption is enabled (through 'QSYS::PREEMPTION_CFG.P_QUEUES') then | QSYS::PREEMPTION_CFG.HOLD_ADVANCE is used, otherwise | QSYS::QMAXSDU_CFG_*.QMAXSDU_* is used. but this only refers to the static guard band durations, and the QMAXSDU_CFG_* registers have dual purpose - the other being oversized frame dropping, which takes place irrespective of whether frames are preemptible or express. So, to fix the problem, we need to call vsc9959_tas_guard_bands_update() from ocelot_port_update_active_preemptible_tcs(), and modify the guard band logic to consider a different (lower) oversize limit for preemptible traffic classes. Fixes: 403ffc2c34de ("net: mscc: ocelot: add support for preemptible traffic classes") Signed-off-by: Vladimir Oltean Message-ID: <20230705104422.49025-4-vladimir.oltean@nxp.com> Signed-off-by: Jakub Kicinski --- drivers/net/dsa/ocelot/felix_vsc9959.c | 21 +++++++++++++++++---- drivers/net/ethernet/mscc/ocelot_mm.c | 7 +++++-- 2 files changed, 22 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index d7caadd13f83..1c113957fcf4 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1209,11 +1209,13 @@ static u32 vsc9959_tas_tc_max_sdu(struct tc_taprio_qopt_offload *taprio, int tc) static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) { struct ocelot_port *ocelot_port = ocelot->ports[port]; + struct ocelot_mm_state *mm = &ocelot->mm[port]; struct tc_taprio_qopt_offload *taprio; u64 min_gate_len[OCELOT_NUM_TC]; + u32 val, maxlen, add_frag_size; + u64 needed_min_frag_time_ps; int speed, picos_per_byte; u64 needed_bit_time_ps; - u32 val, maxlen; u8 tas_speed; int tc; @@ -1253,9 +1255,18 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) */ needed_bit_time_ps = (u64)(maxlen + 24) * picos_per_byte; + /* Preemptible TCs don't need to pass a full MTU, the port will + * automatically emit a HOLD request when a preemptible TC gate closes + */ + val = ocelot_read_rix(ocelot, QSYS_PREEMPTION_CFG, port); + add_frag_size = QSYS_PREEMPTION_CFG_MM_ADD_FRAG_SIZE_X(val); + needed_min_frag_time_ps = picos_per_byte * + (u64)(24 + 2 * ethtool_mm_frag_size_add_to_min(add_frag_size)); + dev_dbg(ocelot->dev, - "port %d: max frame size %d needs %llu ps at speed %d\n", - port, maxlen, needed_bit_time_ps, speed); + "port %d: max frame size %d needs %llu ps, %llu ps for mPackets at speed %d\n", + port, maxlen, needed_bit_time_ps, needed_min_frag_time_ps, + speed); vsc9959_tas_min_gate_lengths(taprio, min_gate_len); @@ -1267,7 +1278,9 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) remaining_gate_len_ps = vsc9959_tas_remaining_gate_len_ps(min_gate_len[tc]); - if (remaining_gate_len_ps > needed_bit_time_ps) { + if ((mm->active_preemptible_tcs & BIT(tc)) ? + remaining_gate_len_ps > needed_min_frag_time_ps : + remaining_gate_len_ps > needed_bit_time_ps) { /* Setting QMAXSDU_CFG to 0 disables oversized frame * dropping. */ diff --git a/drivers/net/ethernet/mscc/ocelot_mm.c b/drivers/net/ethernet/mscc/ocelot_mm.c index f3c0e6c32934..c815ae64e39d 100644 --- a/drivers/net/ethernet/mscc/ocelot_mm.c +++ b/drivers/net/ethernet/mscc/ocelot_mm.c @@ -67,10 +67,13 @@ void ocelot_port_update_active_preemptible_tcs(struct ocelot *ocelot, int port) val = mm->preemptible_tcs; /* Cut through switching doesn't work for preemptible priorities, - * so first make sure it is disabled. + * so first make sure it is disabled. Also, changing the preemptible + * TCs affects the oversized frame dropping logic, so that needs to be + * re-triggered. And since tas_guard_bands_update() also implicitly + * calls cut_through_fwd(), we don't need to explicitly call it. */ mm->active_preemptible_tcs = val; - ocelot->ops->cut_through_fwd(ocelot); + ocelot->ops->tas_guard_bands_update(ocelot, port); dev_dbg(ocelot->dev, "port %d %s/%s, MM TX %s, preemptible TCs 0x%x, active 0x%x\n", -- cgit v1.2.3 From 0503efeadbf6bb8bf24397613a73b67e665eac5f Mon Sep 17 00:00:00 2001 From: Junfeng Guo Date: Thu, 6 Jul 2023 12:41:28 +0800 Subject: gve: Set default duplex configuration to full Current duplex mode was unset in the driver, resulting in the default parameter being set to 0, which corresponds to half duplex. It might mislead users to have incorrect expectation about the driver's transmission capabilities. Set the default duplex configuration to full, as the driver runs in full duplex mode at this point. Fixes: 7e074d5a76ca ("gve: Enable Link Speed Reporting in the driver.") Signed-off-by: Junfeng Guo Reviewed-by: Leon Romanovsky Message-ID: <20230706044128.2726747-1-junfeng.guo@intel.com> Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index cfd4b8d284d1..50162ec9424d 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -590,6 +590,9 @@ static int gve_get_link_ksettings(struct net_device *netdev, err = gve_adminq_report_link_speed(priv); cmd->base.speed = priv->link_speed; + + cmd->base.duplex = DUPLEX_FULL; + return err; } -- cgit v1.2.3 From af42088bdaf292060b8d8a00d8644ca7b2b3f2d1 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Thu, 6 Jul 2023 09:57:05 +0530 Subject: octeontx2-af: Promisc enable/disable through mbox In legacy silicon, promiscuous mode is only modified through CGX mbox messages. In CN10KB silicon, it is modified from CGX mbox and NIX. This breaks legacy application behaviour. Fix this by removing call from NIX. Fixes: d6c9784baf59 ("octeontx2-af: Invoke exact match functions if supported") Signed-off-by: Ratheesh Kannoth Reviewed-by: Leon Romanovsky Reviewed-by: Michal Kubiak Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/af/rvu_nix.c | 11 ++--------- .../ethernet/marvell/octeontx2/af/rvu_npc_hash.c | 23 ++++++++++++++++++++-- 2 files changed, 23 insertions(+), 11 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 0d745ae1cc9a..04b0e885f9d2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -4069,21 +4069,14 @@ int rvu_mbox_handler_nix_set_rx_mode(struct rvu *rvu, struct nix_rx_mode *req, } /* install/uninstall promisc entry */ - if (promisc) { + if (promisc) rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf, pfvf->rx_chan_base, pfvf->rx_chan_cnt); - - if (rvu_npc_exact_has_match_table(rvu)) - rvu_npc_exact_promisc_enable(rvu, pcifunc); - } else { + else if (!nix_rx_multicast) rvu_npc_enable_promisc_entry(rvu, pcifunc, nixlf, false); - if (rvu_npc_exact_has_match_table(rvu)) - rvu_npc_exact_promisc_disable(rvu, pcifunc); - } - return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c index 9f11c1e40737..6fe67f3a7f6f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c @@ -1164,8 +1164,10 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i { struct npc_exact_table *table; u16 *cnt, old_cnt; + bool promisc; table = rvu->hw->table; + promisc = table->promisc_mode[drop_mcam_idx]; cnt = &table->cnt_cmd_rules[drop_mcam_idx]; old_cnt = *cnt; @@ -1177,13 +1179,18 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i *enable_or_disable_cam = false; - /* If all rules are deleted, disable cam */ + if (promisc) + goto done; + + /* If all rules are deleted and not already in promisc mode; + * disable cam + */ if (!*cnt && val < 0) { *enable_or_disable_cam = true; goto done; } - /* If rule got added, enable cam */ + /* If rule got added and not already in promisc mode; enable cam */ if (!old_cnt && val > 0) { *enable_or_disable_cam = true; goto done; @@ -1462,6 +1469,12 @@ int rvu_npc_exact_promisc_disable(struct rvu *rvu, u16 pcifunc) *promisc = false; mutex_unlock(&table->lock); + /* Enable drop rule */ + rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, + true); + + dev_dbg(rvu->dev, "%s: disabled promisc mode (cgx=%d lmac=%d)\n", + __func__, cgx_id, lmac_id); return 0; } @@ -1503,6 +1516,12 @@ int rvu_npc_exact_promisc_enable(struct rvu *rvu, u16 pcifunc) *promisc = true; mutex_unlock(&table->lock); + /* disable drop rule */ + rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, + false); + + dev_dbg(rvu->dev, "%s: Enabled promisc mode (cgx=%d lmac=%d)\n", + __func__, cgx_id, lmac_id); return 0; } -- cgit v1.2.3 From 7709fbd4922c197efabda03660d93e48a3e80323 Mon Sep 17 00:00:00 2001 From: Sai Krishna Date: Thu, 6 Jul 2023 13:59:36 +0530 Subject: octeontx2-af: Move validation of ptp pointer before its usage Moved PTP pointer validation before its use to avoid smatch warning. Also used kzalloc/kfree instead of devm_kzalloc/devm_kfree. Fixes: 2ef4e45d99b1 ("octeontx2-af: Add PTP PPS Errata workaround on CN10K silicon") Signed-off-by: Naveen Mamindlapalli Signed-off-by: Sunil Goutham Signed-off-by: Sai Krishna Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/ptp.c | 19 +++++++++---------- drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 2 +- 2 files changed, 10 insertions(+), 11 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c index 3411e2e47d46..0ee420a489fc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c @@ -208,7 +208,7 @@ struct ptp *ptp_get(void) /* Check driver is bound to PTP block */ if (!ptp) ptp = ERR_PTR(-EPROBE_DEFER); - else + else if (!IS_ERR(ptp)) pci_dev_get(ptp->pdev); return ptp; @@ -388,11 +388,10 @@ static int ptp_extts_on(struct ptp *ptp, int on) static int ptp_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { - struct device *dev = &pdev->dev; struct ptp *ptp; int err; - ptp = devm_kzalloc(dev, sizeof(*ptp), GFP_KERNEL); + ptp = kzalloc(sizeof(*ptp), GFP_KERNEL); if (!ptp) { err = -ENOMEM; goto error; @@ -428,20 +427,19 @@ static int ptp_probe(struct pci_dev *pdev, return 0; error_free: - devm_kfree(dev, ptp); + kfree(ptp); error: /* For `ptp_get()` we need to differentiate between the case * when the core has not tried to probe this device and the case when - * the probe failed. In the later case we pretend that the - * initialization was successful and keep the error in + * the probe failed. In the later case we keep the error in * `dev->driver_data`. */ pci_set_drvdata(pdev, ERR_PTR(err)); if (!first_ptp_block) first_ptp_block = ERR_PTR(err); - return 0; + return err; } static void ptp_remove(struct pci_dev *pdev) @@ -449,16 +447,17 @@ static void ptp_remove(struct pci_dev *pdev) struct ptp *ptp = pci_get_drvdata(pdev); u64 clock_cfg; - if (cn10k_ptp_errata(ptp) && hrtimer_active(&ptp->hrtimer)) - hrtimer_cancel(&ptp->hrtimer); - if (IS_ERR_OR_NULL(ptp)) return; + if (cn10k_ptp_errata(ptp) && hrtimer_active(&ptp->hrtimer)) + hrtimer_cancel(&ptp->hrtimer); + /* Disable PTP clock */ clock_cfg = readq(ptp->reg_base + PTP_CLOCK_CFG); clock_cfg &= ~PTP_CLOCK_CFG_PTP_EN; writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG); + kfree(ptp); } static const struct pci_device_id ptp_id_table[] = { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 8dbc35c481f6..73df2d564545 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -3252,7 +3252,7 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id) rvu->ptp = ptp_get(); if (IS_ERR(rvu->ptp)) { err = PTR_ERR(rvu->ptp); - if (err == -EPROBE_DEFER) + if (err) goto err_release_regions; rvu->ptp = NULL; } -- cgit v1.2.3 From abfb2a58a5377ebab717d4362d6180f901b6e5c1 Mon Sep 17 00:00:00 2001 From: Nitya Sunkad Date: Thu, 6 Jul 2023 11:20:06 -0700 Subject: ionic: remove WARN_ON to prevent panic_on_warn Remove unnecessary early code development check and the WARN_ON that it uses. The irq alloc and free paths have long been cleaned up and this check shouldn't have stuck around so long. Fixes: 77ceb68e29cc ("ionic: Add notifyq support") Signed-off-by: Nitya Sunkad Signed-off-by: Shannon Nelson Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 7c20a44e549b..612b0015dc43 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -475,11 +475,6 @@ static void ionic_qcqs_free(struct ionic_lif *lif) static void ionic_link_qcq_interrupts(struct ionic_qcq *src_qcq, struct ionic_qcq *n_qcq) { - if (WARN_ON(n_qcq->flags & IONIC_QCQ_F_INTR)) { - ionic_intr_free(n_qcq->cq.lif->ionic, n_qcq->intr.index); - n_qcq->flags &= ~IONIC_QCQ_F_INTR; - } - n_qcq->intr.vector = src_qcq->intr.vector; n_qcq->intr.index = src_qcq->intr.index; n_qcq->napi_qcq = src_qcq->napi_qcq; -- cgit v1.2.3 From 3a7af34fb6ecd9fbeb4454fc03c654b26fab5f5e Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Thu, 6 Jul 2023 13:59:24 -0700 Subject: ionic: remove dead device fail path Remove the probe error path code that leaves the driver bound to the device, but with essentially a dead device. This was useful maybe twice early in the driver's life and no longer makes sense to keep. Fixes: 30a1e6d0f8e2 ("ionic: keep ionic dev on lif init fail") Signed-off-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index b8678da1cce5..ab7d217b98b3 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -353,12 +353,6 @@ err_out_reset: ionic_reset(ionic); err_out_teardown: ionic_dev_teardown(ionic); - pci_clear_master(pdev); - /* Don't fail the probe for these errors, keep - * the hw interface around for inspection - */ - return 0; - err_out_unmap_bars: ionic_unmap_bars(ionic); err_out_pci_release_regions: -- cgit v1.2.3 From e7731194fdf085f46d58b1adccfddbd0dfee4873 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Fri, 7 Jul 2023 08:53:25 +0200 Subject: net: bgmac: postpone turning IRQs off to avoid SoC hangs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turning IRQs off is done by accessing Ethernet controller registers. That can't be done until device's clock is enabled. It results in a SoC hang otherwise. This bug remained unnoticed for years as most bootloaders keep all Ethernet interfaces turned on. It seems to only affect a niche SoC family BCM47189. It has two Ethernet controllers but CFE bootloader uses only the first one. Fixes: 34322615cbaa ("net: bgmac: Mask interrupts during probe") Signed-off-by: Rafał Miłecki Reviewed-by: Michal Kubiak Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 1761df8fb7f9..10c7c232cc4e 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1492,8 +1492,6 @@ int bgmac_enet_probe(struct bgmac *bgmac) bgmac->in_init = true; - bgmac_chip_intrs_off(bgmac); - net_dev->irq = bgmac->irq; SET_NETDEV_DEV(net_dev, bgmac->dev); dev_set_drvdata(bgmac->dev, bgmac); @@ -1511,6 +1509,8 @@ int bgmac_enet_probe(struct bgmac *bgmac) */ bgmac_clk_enable(bgmac, 0); + bgmac_chip_intrs_off(bgmac); + /* This seems to be fixing IRQ by assigning OOB #6 to the core */ if (!(bgmac->feature_flags & BGMAC_FEAT_IDM_MASK)) { if (bgmac->feature_flags & BGMAC_FEAT_IRQ_ID_OOB_6) -- cgit v1.2.3 From 73c4d1b307aeb713e80ab03f90c7df9d417dc0f0 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sat, 8 Jul 2023 15:06:25 +0100 Subject: net: lan743x: select FIXED_PHY The blamed commit introduces usage of fixed_phy_register() but not a corresponding dependency on FIXED_PHY. This can result in a build failure. s390-linux-ld: drivers/net/ethernet/microchip/lan743x_main.o: in function `lan743x_phy_open': drivers/net/ethernet/microchip/lan743x_main.c:1514: undefined reference to `fixed_phy_register' Fixes: 624864fbff92 ("net: lan743x: add fixed phy support for LAN7431 device") Cc: stable@vger.kernel.org Reported-by: Randy Dunlap Closes: https://lore.kernel.org/netdev/725bf1c5-b252-7d19-7582-a6809716c7d6@infradead.org/ Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap # build-tested Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/microchip/Kconfig b/drivers/net/ethernet/microchip/Kconfig index 24c994baad13..329e374b9539 100644 --- a/drivers/net/ethernet/microchip/Kconfig +++ b/drivers/net/ethernet/microchip/Kconfig @@ -46,7 +46,7 @@ config LAN743X tristate "LAN743x support" depends on PCI depends on PTP_1588_CLOCK_OPTIONAL - select PHYLIB + select FIXED_PHY select CRC16 select CRC32 help -- cgit v1.2.3 From 9d0aba98316d00f9c0a4506fc15f5ed9241bc1fd Mon Sep 17 00:00:00 2001 From: Junfeng Guo Date: Sat, 8 Jul 2023 11:14:51 +0800 Subject: gve: unify driver name usage Current codebase contained the usage of two different names for this driver (i.e., `gvnic` and `gve`), which is quite unfriendly for users to use, especially when trying to bind or unbind the driver manually. The corresponding kernel module is registered with the name of `gve`. It's more reasonable to align the name of the driver with the module. Fixes: 893ce44df565 ("gve: Add basic driver framework for Compute Engine Virtual NIC") Cc: csully@google.com Signed-off-by: Junfeng Guo Signed-off-by: David S. Miller --- drivers/net/ethernet/google/gve/gve.h | 1 + drivers/net/ethernet/google/gve/gve_ethtool.c | 2 +- drivers/net/ethernet/google/gve/gve_main.c | 11 ++++++----- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 98eb78d98e9f..4b425bf71ede 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -964,5 +964,6 @@ void gve_handle_report_stats(struct gve_priv *priv); /* exported by ethtool.c */ extern const struct ethtool_ops gve_ethtool_ops; /* needed by ethtool */ +extern char gve_driver_name[]; extern const char gve_version_str[]; #endif /* _GVE_H_ */ diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 50162ec9424d..233e5946905e 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -15,7 +15,7 @@ static void gve_get_drvinfo(struct net_device *netdev, { struct gve_priv *priv = netdev_priv(netdev); - strscpy(info->driver, "gve", sizeof(info->driver)); + strscpy(info->driver, gve_driver_name, sizeof(info->driver)); strscpy(info->version, gve_version_str, sizeof(info->version)); strscpy(info->bus_info, pci_name(priv->pdev), sizeof(info->bus_info)); } diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 8fb70db63b8b..e6f1711d9be0 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -33,6 +33,7 @@ #define MIN_TX_TIMEOUT_GAP (1000 * 10) #define DQO_TX_MAX 0x3FFFF +char gve_driver_name[] = "gve"; const char gve_version_str[] = GVE_VERSION; static const char gve_version_prefix[] = GVE_VERSION_PREFIX; @@ -2200,7 +2201,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) return err; - err = pci_request_regions(pdev, "gvnic-cfg"); + err = pci_request_regions(pdev, gve_driver_name); if (err) goto abort_with_enabled; @@ -2393,8 +2394,8 @@ static const struct pci_device_id gve_id_table[] = { { } }; -static struct pci_driver gvnic_driver = { - .name = "gvnic", +static struct pci_driver gve_driver = { + .name = gve_driver_name, .id_table = gve_id_table, .probe = gve_probe, .remove = gve_remove, @@ -2405,10 +2406,10 @@ static struct pci_driver gvnic_driver = { #endif }; -module_pci_driver(gvnic_driver); +module_pci_driver(gve_driver); MODULE_DEVICE_TABLE(pci, gve_id_table); MODULE_AUTHOR("Google, Inc."); -MODULE_DESCRIPTION("gVNIC Driver"); +MODULE_DESCRIPTION("Google Virtual NIC Driver"); MODULE_LICENSE("Dual MIT/GPL"); MODULE_VERSION(GVE_VERSION); -- cgit v1.2.3 From 8046063df887bee35c002224267ba46f41be7cf6 Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Wed, 14 Jun 2023 16:07:09 +0200 Subject: igc: Rename qbv_enable to taprio_offload_enable In the current implementation the flags adapter->qbv_enable and IGC_FLAG_TSN_QBV_ENABLED have a similar name, but do not have the same meaning. The first one is used only to indicate taprio offload (i.e. when igc_save_qbv_schedule was called), while the second one corresponds to the Qbv mode of the hardware. However, the second one is also used to support the TX launchtime feature, i.e. ETF qdisc offload. This leads to situations where adapter->qbv_enable is false, but the flag IGC_FLAG_TSN_QBV_ENABLED is set. This is prone to confusion. The rename should reduce this confusion. Since it is a pure rename, it has no impact on functionality. Fixes: e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv") Signed-off-by: Florian Kauer Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 2 +- drivers/net/ethernet/intel/igc/igc_main.c | 6 +++--- drivers/net/ethernet/intel/igc/igc_tsn.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 639a50c02537..9db384f66a8e 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -191,7 +191,7 @@ struct igc_adapter { int tc_setup_type; ktime_t base_time; ktime_t cycle_time; - bool qbv_enable; + bool taprio_offload_enable; u32 qbv_config_change_errors; bool qbv_transition; unsigned int qbv_count; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 281a0e35b9d1..fae534ef1c4f 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6126,16 +6126,16 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, switch (qopt->cmd) { case TAPRIO_CMD_REPLACE: - adapter->qbv_enable = true; + adapter->taprio_offload_enable = true; break; case TAPRIO_CMD_DESTROY: - adapter->qbv_enable = false; + adapter->taprio_offload_enable = false; break; default: return -EOPNOTSUPP; } - if (!adapter->qbv_enable) + if (!adapter->taprio_offload_enable) return igc_tsn_clear_schedule(adapter); if (qopt->base_time < 0) diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 3cdb0c988728..b76ebfc10b1d 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -37,7 +37,7 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter) { unsigned int new_flags = adapter->flags & ~IGC_FLAG_TSN_ANY_ENABLED; - if (adapter->qbv_enable) + if (adapter->taprio_offload_enable) new_flags |= IGC_FLAG_TSN_QBV_ENABLED; if (is_any_launchtime(adapter)) -- cgit v1.2.3 From 82ff5f29b7377d614f0c01fd74b5d0cb225f0adc Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Wed, 14 Jun 2023 16:07:10 +0200 Subject: igc: Do not enable taprio offload for invalid arguments Only set adapter->taprio_offload_enable after validating the arguments. Otherwise, it stays set even if the offload was not enabled. Since the subsequent code does not get executed in case of invalid arguments, it will not be read at first. However, by activating and then deactivating another offload (e.g. ETF/TX launchtime offload), taprio_offload_enable is read and erroneously keeps the offload feature of the NIC enabled. This can be reproduced as follows: # TAPRIO offload (flags == 0x2) and negative base-time leading to expected -ERANGE sudo tc qdisc replace dev enp1s0 parent root handle 100 stab overhead 24 taprio \ num_tc 1 \ map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ queues 1@0 \ base-time -1000 \ sched-entry S 01 300000 \ flags 0x2 # IGC_TQAVCTRL is 0x0 as expected (iomem=relaxed for reading register) sudo pcimem /sys/bus/pci/devices/0000:01:00.0/resource0 0x3570 w*1 # Activate ETF offload sudo tc qdisc replace dev enp1s0 parent root handle 6666 mqprio \ num_tc 3 \ map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 \ queues 1@0 1@1 2@2 \ hw 0 sudo tc qdisc add dev enp1s0 parent 6666:1 etf \ clockid CLOCK_TAI \ delta 500000 \ offload # IGC_TQAVCTRL is 0x9 as expected sudo pcimem /sys/bus/pci/devices/0000:01:00.0/resource0 0x3570 w*1 # Deactivate ETF offload again sudo tc qdisc delete dev enp1s0 parent 6666:1 # IGC_TQAVCTRL should now be 0x0 again, but is observed as 0x9 sudo pcimem /sys/bus/pci/devices/0000:01:00.0/resource0 0x3570 w*1 Fixes: e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv") Signed-off-by: Florian Kauer Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index fae534ef1c4f..fb8e55c7c402 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6097,6 +6097,7 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter) adapter->base_time = 0; adapter->cycle_time = NSEC_PER_SEC; + adapter->taprio_offload_enable = false; adapter->qbv_config_change_errors = 0; adapter->qbv_transition = false; adapter->qbv_count = 0; @@ -6124,20 +6125,12 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, size_t n; int i; - switch (qopt->cmd) { - case TAPRIO_CMD_REPLACE: - adapter->taprio_offload_enable = true; - break; - case TAPRIO_CMD_DESTROY: - adapter->taprio_offload_enable = false; - break; - default: - return -EOPNOTSUPP; - } - - if (!adapter->taprio_offload_enable) + if (qopt->cmd == TAPRIO_CMD_DESTROY) return igc_tsn_clear_schedule(adapter); + if (qopt->cmd != TAPRIO_CMD_REPLACE) + return -EOPNOTSUPP; + if (qopt->base_time < 0) return -ERANGE; @@ -6149,6 +6142,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, adapter->cycle_time = qopt->cycle_time; adapter->base_time = qopt->base_time; + adapter->taprio_offload_enable = true; igc_ptp_read(adapter, &now); -- cgit v1.2.3 From e5d88c53d03f8df864776431175d08c053645f50 Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Wed, 14 Jun 2023 16:07:11 +0200 Subject: igc: Handle already enabled taprio offload for basetime 0 Since commit e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv") it is possible to enable taprio offload with a basetime of 0. However, the check if taprio offload is already enabled (and thus -EALREADY should be returned for igc_save_qbv_schedule) still relied on adapter->base_time > 0. This can be reproduced as follows: # TAPRIO offload (flags == 0x2) and base-time = 0 sudo tc qdisc replace dev enp1s0 parent root handle 100 stab overhead 24 taprio \ num_tc 1 \ map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ queues 1@0 \ base-time 0 \ sched-entry S 01 300000 \ flags 0x2 # The second call should fail with "Error: Device failed to setup taprio offload." # But that only happens if base-time was != 0 sudo tc qdisc replace dev enp1s0 parent root handle 100 stab overhead 24 taprio \ num_tc 1 \ map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ queues 1@0 \ base-time 0 \ sched-entry S 01 300000 \ flags 0x2 Fixes: e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv") Signed-off-by: Florian Kauer Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index fb8e55c7c402..5d24930fed8f 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6134,7 +6134,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, if (qopt->base_time < 0) return -ERANGE; - if (igc_is_device_id_i225(hw) && adapter->base_time) + if (igc_is_device_id_i225(hw) && adapter->taprio_offload_enable) return -EALREADY; if (!validate_schedule(adapter, qopt)) -- cgit v1.2.3 From 8b86f10ab64eca0287ea8f7c94e9ad8b2e101c01 Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Wed, 14 Jun 2023 16:07:12 +0200 Subject: igc: No strict mode in pure launchtime/CBS offload The flags IGC_TXQCTL_STRICT_CYCLE and IGC_TXQCTL_STRICT_END prevent the packet transmission over slot and cycle boundaries. This is important for taprio offload where the slots and cycles correspond to the slots and cycles configured for the network. However, the Qbv offload feature of the i225 is also used for enabling TX launchtime / ETF offload. In that case, however, the cycle has no meaning for the network and is only used internally to adapt the base time register after a second has passed. Enabling strict mode in this case would unnecessarily prevent the transmission of certain packets (i.e. at the boundary of a second) and thus interferes with the ETF qdisc that promises transmission at a certain point in time. Similar to ETF, this also applies to CBS offload that also should not be influenced by strict mode unless taprio offload would be enabled at the same time. This fully reverts commit d8f45be01dd9 ("igc: Use strict cycles for Qbv scheduling") but its commit message only describes what was already implemented before that commit. The difference to a plain revert of that commit is that it now copes with the base_time = 0 case that was fixed with commit e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv") In particular, enabling strict mode leads to TX hang situations under high traffic if taprio is applied WITHOUT taprio offload but WITH ETF offload, e.g. as in sudo tc qdisc replace dev enp1s0 parent root handle 100 taprio \ num_tc 1 \ map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ queues 1@0 \ base-time 0 \ sched-entry S 01 300000 \ flags 0x1 \ txtime-delay 500000 \ clockid CLOCK_TAI sudo tc qdisc replace dev enp1s0 parent 100:1 etf \ clockid CLOCK_TAI \ delta 500000 \ offload \ skip_sock_check and traffic generator sudo trafgen -i traffic.cfg -o enp1s0 --cpp -n0 -q -t1400ns with traffic.cfg #define ETH_P_IP 0x0800 { /* Ethernet Header */ 0x30, 0x1f, 0x9a, 0xd0, 0xf0, 0x0e, # MAC Dest - adapt as needed 0x24, 0x5e, 0xbe, 0x57, 0x2e, 0x36, # MAC Src - adapt as needed const16(ETH_P_IP), /* IPv4 Header */ 0b01000101, 0, # IPv4 version, IHL, TOS const16(1028), # IPv4 total length (UDP length + 20 bytes (IP header)) const16(2), # IPv4 ident 0b01000000, 0, # IPv4 flags, fragmentation off 64, # IPv4 TTL 17, # Protocol UDP csumip(14, 33), # IPv4 checksum /* UDP Header */ 10, 0, 48, 1, # IP Src - adapt as needed 10, 0, 48, 10, # IP Dest - adapt as needed const16(5555), # UDP Src Port const16(6666), # UDP Dest Port const16(1008), # UDP length (UDP header 8 bytes + payload length) csumudp(14, 34), # UDP checksum /* Payload */ fill('W', 1000), } and the observed message with that is for example igc 0000:01:00.0 enp1s0: Detected Tx Unit Hang Tx Queue <0> TDH TDT next_to_use next_to_clean buffer_info[next_to_clean] time_stamp next_to_watch <00000000245a4efb> jiffies desc.status <1048000> Fixes: d8f45be01dd9 ("igc: Use strict cycles for Qbv scheduling") Signed-off-by: Florian Kauer Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_tsn.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index b76ebfc10b1d..a9c08321aca9 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -132,8 +132,28 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) wr32(IGC_STQT(i), ring->start_time); wr32(IGC_ENDQT(i), ring->end_time); - txqctl |= IGC_TXQCTL_STRICT_CYCLE | - IGC_TXQCTL_STRICT_END; + if (adapter->taprio_offload_enable) { + /* If taprio_offload_enable is set we are in "taprio" + * mode and we need to be strict about the + * cycles: only transmit a packet if it can be + * completed during that cycle. + * + * If taprio_offload_enable is NOT true when + * enabling TSN offload, the cycle should have + * no external effects, but is only used internally + * to adapt the base time register after a second + * has passed. + * + * Enabling strict mode in this case would + * unnecessarily prevent the transmission of + * certain packets (i.e. at the boundary of a + * second) and thus interfere with the launchtime + * feature that promises transmission at a + * certain point in time. + */ + txqctl |= IGC_TXQCTL_STRICT_CYCLE | + IGC_TXQCTL_STRICT_END; + } if (ring->launchtime_enable) txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT; -- cgit v1.2.3 From c1bca9ac0bcb355be11354c2e68bc7bf31f5ac5a Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Wed, 14 Jun 2023 16:07:13 +0200 Subject: igc: Fix launchtime before start of cycle It is possible (verified on a running system) that frames are processed by igc_tx_launchtime with a txtime before the start of the cycle (baset_est). However, the result of txtime - baset_est is written into a u32, leading to a wrap around to a positive number. The following launchtime > 0 check will only branch to executing launchtime = 0 if launchtime is already 0. Fix it by using a s32 before checking launchtime > 0. Fixes: db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit") Signed-off-by: Florian Kauer Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 5d24930fed8f..4855caa3bae4 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1016,7 +1016,7 @@ static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, ktime_t base_time = adapter->base_time; ktime_t now = ktime_get_clocktai(); ktime_t baset_est, end_of_cycle; - u32 launchtime; + s32 launchtime; s64 n; n = div64_s64(ktime_sub_ns(now, base_time), cycle_time); -- cgit v1.2.3 From 0bcc62858d6ba62cbade957d69745e6adeed5f3d Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Wed, 14 Jun 2023 16:07:14 +0200 Subject: igc: Fix inserting of empty frame for launchtime The insertion of an empty frame was introduced with commit db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit") in order to ensure that the current cycle has at least one packet if there is some packet to be scheduled for the next cycle. However, the current implementation does not properly check if a packet is already scheduled for the current cycle. Currently, an empty packet is always inserted if and only if txtime >= end_of_cycle && txtime > last_tx_cycle but since last_tx_cycle is always either the end of the current cycle (end_of_cycle) or the end of a previous cycle, the second part (txtime > last_tx_cycle) is always true unless txtime == last_tx_cycle. What actually needs to be checked here is if the last_tx_cycle was already written within the current cycle, so an empty frame should only be inserted if and only if txtime >= end_of_cycle && end_of_cycle > last_tx_cycle. This patch does not only avoid an unnecessary insertion, but it can actually be harmful to insert an empty packet if packets are already scheduled in the current cycle, because it can lead to a situation where the empty packet is actually processed as the first packet in the upcoming cycle shifting the packet with the first_flag even one cycle into the future, finally leading to a TX hang. The TX hang can be reproduced on a i225 with: sudo tc qdisc replace dev enp1s0 parent root handle 100 taprio \ num_tc 1 \ map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ queues 1@0 \ base-time 0 \ sched-entry S 01 300000 \ flags 0x1 \ txtime-delay 500000 \ clockid CLOCK_TAI sudo tc qdisc replace dev enp1s0 parent 100:1 etf \ clockid CLOCK_TAI \ delta 500000 \ offload \ skip_sock_check and traffic generator sudo trafgen -i traffic.cfg -o enp1s0 --cpp -n0 -q -t1400ns with traffic.cfg #define ETH_P_IP 0x0800 { /* Ethernet Header */ 0x30, 0x1f, 0x9a, 0xd0, 0xf0, 0x0e, # MAC Dest - adapt as needed 0x24, 0x5e, 0xbe, 0x57, 0x2e, 0x36, # MAC Src - adapt as needed const16(ETH_P_IP), /* IPv4 Header */ 0b01000101, 0, # IPv4 version, IHL, TOS const16(1028), # IPv4 total length (UDP length + 20 bytes (IP header)) const16(2), # IPv4 ident 0b01000000, 0, # IPv4 flags, fragmentation off 64, # IPv4 TTL 17, # Protocol UDP csumip(14, 33), # IPv4 checksum /* UDP Header */ 10, 0, 48, 1, # IP Src - adapt as needed 10, 0, 48, 10, # IP Dest - adapt as needed const16(5555), # UDP Src Port const16(6666), # UDP Dest Port const16(1008), # UDP length (UDP header 8 bytes + payload length) csumudp(14, 34), # UDP checksum /* Payload */ fill('W', 1000), } and the observed message with that is for example igc 0000:01:00.0 enp1s0: Detected Tx Unit Hang Tx Queue <0> TDH <32> TDT <3c> next_to_use <3c> next_to_clean <32> buffer_info[next_to_clean] time_stamp next_to_watch <00000000632a1828> jiffies desc.status <1048000> Fixes: db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit") Signed-off-by: Florian Kauer Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 4855caa3bae4..9f93f0f4f752 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1029,7 +1029,7 @@ static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, *first_flag = true; ring->last_ff_cycle = baset_est; - if (ktime_compare(txtime, ring->last_tx_cycle) > 0) + if (ktime_compare(end_of_cycle, ring->last_tx_cycle) > 0) *insert_empty = true; } } -- cgit v1.2.3 From be7ecbe7ec7df7320db4b810fef438bf67144011 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 6 Jul 2023 16:10:09 +0800 Subject: net: fec: dynamically set the NETDEV_XDP_ACT_NDO_XMIT feature of XDP When a XDP program is installed or uninstalled, fec_restart() will be invoked to reset MAC and buffer descriptor rings. It's reasonable not to transmit any packet during the process of reset. However, the NETDEV_XDP_ACT_NDO_XMIT bit of xdp_features is enabled by default, that is to say, it's possible that the fec_enet_xdp_xmit() will be invoked even if the process of reset is not finished. In this case, the redirected XDP frames might be dropped and available transmit BDs may be incorrectly deemed insufficient. So this patch disable the NETDEV_XDP_ACT_NDO_XMIT feature by default and dynamically configure this feature when the bpf program is installed or uninstalled. Fixes: e4ac7cc6e5a4 ("net: fec: turn on XDP features") Signed-off-by: Wei Fang Signed-off-by: Paolo Abeni --- drivers/net/ethernet/freescale/fec_main.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 8fbe47703d47..9ce0319b33c3 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3732,12 +3732,18 @@ static int fec_enet_bpf(struct net_device *dev, struct netdev_bpf *bpf) if (fep->quirks & FEC_QUIRK_SWAP_FRAME) return -EOPNOTSUPP; + if (!bpf->prog) + xdp_features_clear_redirect_target(dev); + if (is_run) { napi_disable(&fep->napi); netif_tx_disable(dev); } old_prog = xchg(&fep->xdp_prog, bpf->prog); + if (old_prog) + bpf_prog_put(old_prog); + fec_restart(dev); if (is_run) { @@ -3745,8 +3751,8 @@ static int fec_enet_bpf(struct net_device *dev, struct netdev_bpf *bpf) netif_tx_start_all_queues(dev); } - if (old_prog) - bpf_prog_put(old_prog); + if (bpf->prog) + xdp_features_set_redirect_target(dev, false); return 0; @@ -4016,8 +4022,7 @@ static int fec_enet_init(struct net_device *ndev) if (!(fep->quirks & FEC_QUIRK_SWAP_FRAME)) ndev->xdp_features = NETDEV_XDP_ACT_BASIC | - NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_NDO_XMIT; + NETDEV_XDP_ACT_REDIRECT; fec_restart(ndev); -- cgit v1.2.3 From 20f797399035a8052dbd7297fdbe094079a9482e Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 6 Jul 2023 16:10:10 +0800 Subject: net: fec: recycle pages for transmitted XDP frames Once the XDP frames have been successfully transmitted through the ndo_xdp_xmit() interface, it's the driver responsibility to free the frames so that the page_pool can recycle the pages and reuse them. However, this action is not implemented in the fec driver. This leads to a user-visible problem that the console will print the following warning log. [ 157.568851] page_pool_release_retry() stalled pool shutdown 1389 inflight 60 sec [ 217.983446] page_pool_release_retry() stalled pool shutdown 1389 inflight 120 sec [ 278.399006] page_pool_release_retry() stalled pool shutdown 1389 inflight 181 sec [ 338.812885] page_pool_release_retry() stalled pool shutdown 1389 inflight 241 sec [ 399.226946] page_pool_release_retry() stalled pool shutdown 1389 inflight 302 sec Therefore, to solve this issue, we free XDP frames via xdp_return_frame() while cleaning the tx BD ring. Fixes: 6d6b39f180b8 ("net: fec: add initial XDP support") Signed-off-by: Wei Fang Signed-off-by: Paolo Abeni --- drivers/net/ethernet/freescale/fec.h | 15 ++- drivers/net/ethernet/freescale/fec_main.c | 148 ++++++++++++++++++++---------- 2 files changed, 115 insertions(+), 48 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 9939ccafb556..8c0226d061fe 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -544,10 +544,23 @@ enum { XDP_STATS_TOTAL, }; +enum fec_txbuf_type { + FEC_TXBUF_T_SKB, + FEC_TXBUF_T_XDP_NDO, +}; + +struct fec_tx_buffer { + union { + struct sk_buff *skb; + struct xdp_frame *xdp; + }; + enum fec_txbuf_type type; +}; + struct fec_enet_priv_tx_q { struct bufdesc_prop bd; unsigned char *tx_bounce[TX_RING_SIZE]; - struct sk_buff *tx_skbuff[TX_RING_SIZE]; + struct fec_tx_buffer tx_buf[TX_RING_SIZE]; unsigned short tx_stop_threshold; unsigned short tx_wake_threshold; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 9ce0319b33c3..940d3afe1d24 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -397,7 +397,7 @@ static void fec_dump(struct net_device *ndev) fec16_to_cpu(bdp->cbd_sc), fec32_to_cpu(bdp->cbd_bufaddr), fec16_to_cpu(bdp->cbd_datlen), - txq->tx_skbuff[index]); + txq->tx_buf[index].skb); bdp = fec_enet_get_nextdesc(bdp, &txq->bd); index++; } while (bdp != txq->bd.base); @@ -654,7 +654,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, index = fec_enet_get_bd_index(last_bdp, &txq->bd); /* Save skb pointer */ - txq->tx_skbuff[index] = skb; + txq->tx_buf[index].skb = skb; /* Make sure the updates to rest of the descriptor are performed before * transferring ownership. @@ -672,9 +672,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq, skb_tx_timestamp(skb); - /* Make sure the update to bdp and tx_skbuff are performed before - * txq->bd.cur. - */ + /* Make sure the update to bdp is performed before txq->bd.cur. */ wmb(); txq->bd.cur = bdp; @@ -862,7 +860,7 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq, } /* Save skb pointer */ - txq->tx_skbuff[index] = skb; + txq->tx_buf[index].skb = skb; skb_tx_timestamp(skb); txq->bd.cur = bdp; @@ -952,16 +950,33 @@ static void fec_enet_bd_init(struct net_device *dev) for (i = 0; i < txq->bd.ring_size; i++) { /* Initialize the BD for every fragment in the page. */ bdp->cbd_sc = cpu_to_fec16(0); - if (bdp->cbd_bufaddr && - !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) - dma_unmap_single(&fep->pdev->dev, - fec32_to_cpu(bdp->cbd_bufaddr), - fec16_to_cpu(bdp->cbd_datlen), - DMA_TO_DEVICE); - if (txq->tx_skbuff[i]) { - dev_kfree_skb_any(txq->tx_skbuff[i]); - txq->tx_skbuff[i] = NULL; + if (txq->tx_buf[i].type == FEC_TXBUF_T_SKB) { + if (bdp->cbd_bufaddr && + !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) + dma_unmap_single(&fep->pdev->dev, + fec32_to_cpu(bdp->cbd_bufaddr), + fec16_to_cpu(bdp->cbd_datlen), + DMA_TO_DEVICE); + if (txq->tx_buf[i].skb) { + dev_kfree_skb_any(txq->tx_buf[i].skb); + txq->tx_buf[i].skb = NULL; + } + } else { + if (bdp->cbd_bufaddr) + dma_unmap_single(&fep->pdev->dev, + fec32_to_cpu(bdp->cbd_bufaddr), + fec16_to_cpu(bdp->cbd_datlen), + DMA_TO_DEVICE); + + if (txq->tx_buf[i].xdp) { + xdp_return_frame(txq->tx_buf[i].xdp); + txq->tx_buf[i].xdp = NULL; + } + + /* restore default tx buffer type: FEC_TXBUF_T_SKB */ + txq->tx_buf[i].type = FEC_TXBUF_T_SKB; } + bdp->cbd_bufaddr = cpu_to_fec32(0); bdp = fec_enet_get_nextdesc(bdp, &txq->bd); } @@ -1360,6 +1375,7 @@ static void fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) { struct fec_enet_private *fep; + struct xdp_frame *xdpf; struct bufdesc *bdp; unsigned short status; struct sk_buff *skb; @@ -1387,16 +1403,31 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) index = fec_enet_get_bd_index(bdp, &txq->bd); - skb = txq->tx_skbuff[index]; - txq->tx_skbuff[index] = NULL; - if (!IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) - dma_unmap_single(&fep->pdev->dev, - fec32_to_cpu(bdp->cbd_bufaddr), - fec16_to_cpu(bdp->cbd_datlen), - DMA_TO_DEVICE); - bdp->cbd_bufaddr = cpu_to_fec32(0); - if (!skb) - goto skb_done; + if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) { + skb = txq->tx_buf[index].skb; + txq->tx_buf[index].skb = NULL; + if (bdp->cbd_bufaddr && + !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr))) + dma_unmap_single(&fep->pdev->dev, + fec32_to_cpu(bdp->cbd_bufaddr), + fec16_to_cpu(bdp->cbd_datlen), + DMA_TO_DEVICE); + bdp->cbd_bufaddr = cpu_to_fec32(0); + if (!skb) + goto tx_buf_done; + } else { + xdpf = txq->tx_buf[index].xdp; + if (bdp->cbd_bufaddr) + dma_unmap_single(&fep->pdev->dev, + fec32_to_cpu(bdp->cbd_bufaddr), + fec16_to_cpu(bdp->cbd_datlen), + DMA_TO_DEVICE); + bdp->cbd_bufaddr = cpu_to_fec32(0); + if (!xdpf) { + txq->tx_buf[index].type = FEC_TXBUF_T_SKB; + goto tx_buf_done; + } + } /* Check for errors. */ if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC | @@ -1415,21 +1446,11 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) ndev->stats.tx_carrier_errors++; } else { ndev->stats.tx_packets++; - ndev->stats.tx_bytes += skb->len; - } - /* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who - * are to time stamp the packet, so we still need to check time - * stamping enabled flag. - */ - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS && - fep->hwts_tx_en) && - fep->bufdesc_ex) { - struct skb_shared_hwtstamps shhwtstamps; - struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; - - fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), &shhwtstamps); - skb_tstamp_tx(skb, &shhwtstamps); + if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) + ndev->stats.tx_bytes += skb->len; + else + ndev->stats.tx_bytes += xdpf->len; } /* Deferred means some collisions occurred during transmit, @@ -1438,10 +1459,32 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id) if (status & BD_ENET_TX_DEF) ndev->stats.collisions++; - /* Free the sk buffer associated with this last transmit */ - dev_kfree_skb_any(skb); -skb_done: - /* Make sure the update to bdp and tx_skbuff are performed + if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) { + /* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who + * are to time stamp the packet, so we still need to check time + * stamping enabled flag. + */ + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS && + fep->hwts_tx_en) && fep->bufdesc_ex) { + struct skb_shared_hwtstamps shhwtstamps; + struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp; + + fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), &shhwtstamps); + skb_tstamp_tx(skb, &shhwtstamps); + } + + /* Free the sk buffer associated with this last transmit */ + dev_kfree_skb_any(skb); + } else { + xdp_return_frame(xdpf); + + txq->tx_buf[index].xdp = NULL; + /* restore default tx buffer type: FEC_TXBUF_T_SKB */ + txq->tx_buf[index].type = FEC_TXBUF_T_SKB; + } + +tx_buf_done: + /* Make sure the update to bdp and tx_buf are performed * before dirty_tx */ wmb(); @@ -3249,9 +3292,19 @@ static void fec_enet_free_buffers(struct net_device *ndev) for (i = 0; i < txq->bd.ring_size; i++) { kfree(txq->tx_bounce[i]); txq->tx_bounce[i] = NULL; - skb = txq->tx_skbuff[i]; - txq->tx_skbuff[i] = NULL; - dev_kfree_skb(skb); + + if (txq->tx_buf[i].type == FEC_TXBUF_T_SKB) { + skb = txq->tx_buf[i].skb; + txq->tx_buf[i].skb = NULL; + dev_kfree_skb(skb); + } else { + if (txq->tx_buf[i].xdp) { + xdp_return_frame(txq->tx_buf[i].xdp); + txq->tx_buf[i].xdp = NULL; + } + + txq->tx_buf[i].type = FEC_TXBUF_T_SKB; + } } } } @@ -3817,7 +3870,8 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, ebdp->cbd_esc = cpu_to_fec32(estatus); } - txq->tx_skbuff[index] = NULL; + txq->tx_buf[index].type = FEC_TXBUF_T_XDP_NDO; + txq->tx_buf[index].xdp = frame; /* Make sure the updates to rest of the descriptor are performed before * transferring ownership. -- cgit v1.2.3 From 56b3c6ba53d0e9649ea5e4089b39cadde13aaef8 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 6 Jul 2023 16:10:11 +0800 Subject: net: fec: increase the size of tx ring and update tx_wake_threshold When the XDP feature is enabled and with heavy XDP frames to be transmitted, there is a considerable probability that available tx BDs are insufficient. This will lead to some XDP frames to be discarded and the "NOT enough BD for SG!" error log will appear in the console (as shown below). [ 160.013112] fec 30be0000.ethernet eth0: NOT enough BD for SG! [ 160.023116] fec 30be0000.ethernet eth0: NOT enough BD for SG! [ 160.028926] fec 30be0000.ethernet eth0: NOT enough BD for SG! [ 160.038946] fec 30be0000.ethernet eth0: NOT enough BD for SG! [ 160.044758] fec 30be0000.ethernet eth0: NOT enough BD for SG! In the case of heavy XDP traffic, sometimes the speed of recycling tx BDs may be slower than the speed of sending XDP frames. There may be several specific reasons, such as the interrupt is not responsed in time, the efficiency of the NAPI callback function is too low due to all the queues (tx queues and rx queues) share the same NAPI, and so on. After trying various methods, I think that increase the size of tx BD ring is simple and effective. Maybe the best resolution is that allocate NAPI for each queue to improve the efficiency of the NAPI callback, but this change is a bit big and I didn't try this method. Perheps this method will be implemented in a future patch. This patch also updates the tx_wake_threshold of tx ring which is related to the size of tx ring in the previous logic. Otherwise, the tx_wake_threshold will be too high (403 BDs), which is more likely to impact the slow path in the case of heavy XDP traffic, because XDP path and slow path share the tx BD rings. According to Jakub's suggestion, the tx_wake_threshold is at least equal to tx_stop_threshold + 2 * MAX_SKB_FRAGS, if a queue of hundreds of entries is overflowing, we should be able to apply a hysteresis of a few tens of entries. Fixes: 6d6b39f180b8 ("net: fec: add initial XDP support") Signed-off-by: Wei Fang Signed-off-by: Paolo Abeni --- drivers/net/ethernet/freescale/fec.h | 2 +- drivers/net/ethernet/freescale/fec_main.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 8c0226d061fe..63a053dea819 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -355,7 +355,7 @@ struct bufdesc_ex { #define RX_RING_SIZE (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES) #define FEC_ENET_TX_FRSIZE 2048 #define FEC_ENET_TX_FRPPG (PAGE_SIZE / FEC_ENET_TX_FRSIZE) -#define TX_RING_SIZE 512 /* Must be power of two */ +#define TX_RING_SIZE 1024 /* Must be power of two */ #define TX_RING_MOD_MASK 511 /* for this to work */ #define BD_ENET_RX_INT 0x00800000 diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 940d3afe1d24..c59576ab8c7a 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3349,8 +3349,7 @@ static int fec_enet_alloc_queue(struct net_device *ndev) fep->total_tx_ring_size += fep->tx_queue[i]->bd.ring_size; txq->tx_stop_threshold = FEC_MAX_SKB_DESCS; - txq->tx_wake_threshold = - (txq->bd.ring_size - txq->tx_stop_threshold) / 2; + txq->tx_wake_threshold = FEC_MAX_SKB_DESCS + 2 * MAX_SKB_FRAGS; txq->tso_hdrs = dma_alloc_coherent(&fep->pdev->dev, txq->bd.ring_size * TSO_HEADER_SIZE, -- cgit v1.2.3 From 84a10947198792d038527af9c3994782ecb37c82 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 6 Jul 2023 16:10:12 +0800 Subject: net: fec: use netdev_err_once() instead of netdev_err() In the case of heavy XDP traffic to be transmitted, the console will print the error log continuously if there are lack of enough BDs to accommodate the frames. The log looks like below. [ 160.013112] fec 30be0000.ethernet eth0: NOT enough BD for SG! [ 160.023116] fec 30be0000.ethernet eth0: NOT enough BD for SG! [ 160.028926] fec 30be0000.ethernet eth0: NOT enough BD for SG! [ 160.038946] fec 30be0000.ethernet eth0: NOT enough BD for SG! [ 160.044758] fec 30be0000.ethernet eth0: NOT enough BD for SG! Not only will this log be replicated and redundant, it will also degrade XDP performance. So we use netdev_err_once() instead of netdev_err() now. Fixes: 6d6b39f180b8 ("net: fec: add initial XDP support") Signed-off-by: Wei Fang Signed-off-by: Paolo Abeni --- drivers/net/ethernet/freescale/fec_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index c59576ab8c7a..ec9e4bdb0c06 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3836,7 +3836,7 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep, entries_free = fec_enet_get_free_txdesc_num(txq); if (entries_free < MAX_SKB_FRAGS + 1) { - netdev_err(fep->netdev, "NOT enough BD for SG!\n"); + netdev_err_once(fep->netdev, "NOT enough BD for SG!\n"); return -EBUSY; } -- cgit v1.2.3 From 8278ee2a2646b9acf747317895e47a640ba933c9 Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Mon, 10 Jul 2023 16:00:27 +0530 Subject: octeontx2-pf: Add additional check for MCAM rules Due to hardware limitation, MCAM drop rule with ether_type == 802.1Q and vlan_id == 0 is not supported. Hence rejecting such rules. Fixes: dce677da57c0 ("octeontx2-pf: Add vlan-etype to ntuple filters") Signed-off-by: Suman Ghosh Link: https://lore.kernel.org/r/20230710103027.2244139-1-sumang@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c | 8 ++++++++ drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c | 15 +++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index 10e11262d48a..2d7713a1a153 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -872,6 +872,14 @@ static int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp, return -EINVAL; vlan_etype = be16_to_cpu(fsp->h_ext.vlan_etype); + + /* Drop rule with vlan_etype == 802.1Q + * and vlan_id == 0 is not supported + */ + if (vlan_etype == ETH_P_8021Q && !fsp->m_ext.vlan_tci && + fsp->ring_cookie == RX_CLS_FLOW_DISC) + return -EINVAL; + /* Only ETH_P_8021Q and ETH_P_802AD types supported */ if (vlan_etype != ETH_P_8021Q && vlan_etype != ETH_P_8021AD) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 8a13df592af6..5e56b6c3e60a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -597,6 +597,21 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node, return -EOPNOTSUPP; } + if (!match.mask->vlan_id) { + struct flow_action_entry *act; + int i; + + flow_action_for_each(i, act, &rule->action) { + if (act->id == FLOW_ACTION_DROP) { + netdev_err(nic->netdev, + "vlan tpid 0x%x with vlan_id %d is not supported for DROP rule.\n", + ntohs(match.key->vlan_tpid), + match.key->vlan_id); + return -EOPNOTSUPP; + } + } + } + if (match.mask->vlan_id || match.mask->vlan_dei || match.mask->vlan_priority) { -- cgit v1.2.3 From 1e9cb763e9bacf0c932aa948f50dcfca6f519a26 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Mon, 10 Jul 2023 18:36:21 -0700 Subject: net: ena: fix shift-out-of-bounds in exponential backoff The ENA adapters on our instances occasionally reset. Once recently logged a UBSAN failure to console in the process: UBSAN: shift-out-of-bounds in build/linux/drivers/net/ethernet/amazon/ena/ena_com.c:540:13 shift exponent 32 is too large for 32-bit type 'unsigned int' CPU: 28 PID: 70012 Comm: kworker/u72:2 Kdump: loaded not tainted 5.15.117 Hardware name: Amazon EC2 c5d.9xlarge/, BIOS 1.0 10/16/2017 Workqueue: ena ena_fw_reset_device [ena] Call Trace: dump_stack_lvl+0x4a/0x63 dump_stack+0x10/0x16 ubsan_epilogue+0x9/0x36 __ubsan_handle_shift_out_of_bounds.cold+0x61/0x10e ? __const_udelay+0x43/0x50 ena_delay_exponential_backoff_us.cold+0x16/0x1e [ena] wait_for_reset_state+0x54/0xa0 [ena] ena_com_dev_reset+0xc8/0x110 [ena] ena_down+0x3fe/0x480 [ena] ena_destroy_device+0xeb/0xf0 [ena] ena_fw_reset_device+0x30/0x50 [ena] process_one_work+0x22b/0x3d0 worker_thread+0x4d/0x3f0 ? process_one_work+0x3d0/0x3d0 kthread+0x12a/0x150 ? set_kthread_struct+0x50/0x50 ret_from_fork+0x22/0x30 Apparently, the reset delays are getting so large they can trigger a UBSAN panic. Looking at the code, the current timeout is capped at 5000us. Using a base value of 100us, the current code will overflow after (1<<29). Even at values before 32, this function wraps around, perhaps unintentionally. Cap the value of the exponent used for this backoff at (1<<16) which is larger than currently necessary, but large enough to support bigger values in the future. Cc: stable@vger.kernel.org Fixes: 4bb7f4cf60e3 ("net: ena: reduce driver load time") Signed-off-by: Krister Johansen Reviewed-by: Leon Romanovsky Reviewed-by: Shay Agroskin Link: https://lore.kernel.org/r/20230711013621.GE1926@templeofstupid.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amazon/ena/ena_com.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 451c3a1b6255..633b321d7fdd 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -35,6 +35,8 @@ #define ENA_REGS_ADMIN_INTR_MASK 1 +#define ENA_MAX_BACKOFF_DELAY_EXP 16U + #define ENA_MIN_ADMIN_POLL_US 100 #define ENA_MAX_ADMIN_POLL_US 5000 @@ -536,6 +538,7 @@ static int ena_com_comp_status_to_errno(struct ena_com_admin_queue *admin_queue, static void ena_delay_exponential_backoff_us(u32 exp, u32 delay_us) { + exp = min_t(u32, exp, ENA_MAX_BACKOFF_DELAY_EXP); delay_us = max_t(u32, ENA_MIN_ADMIN_POLL_US, delay_us); delay_us = min_t(u32, delay_us * (1U << exp), ENA_MAX_ADMIN_POLL_US); usleep_range(delay_us, 2 * delay_us); -- cgit v1.2.3 From aa846677a9fb19a0f2c58154c140398aa92a87ba Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 11 Jul 2023 14:34:14 +0800 Subject: net: txgbe: fix eeprom calculation error For some device types like TXGBE_ID_XAUI, *checksum computed in txgbe_calc_eeprom_checksum() is larger than TXGBE_EEPROM_SUM. Remove the limit on the size of *checksum. Fixes: 049fe5365324 ("net: txgbe: Add operations to interact with firmware") Fixes: 5e2ea7801fac ("net: txgbe: Fix unsigned comparison to zero in txgbe_calc_eeprom_checksum()") Signed-off-by: Jiawen Wu Link: https://lore.kernel.org/r/20230711063414.3311-1-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/net/ethernet') diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c index 12405d71c5ee..0772eb14eabf 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c @@ -186,9 +186,6 @@ static int txgbe_calc_eeprom_checksum(struct wx *wx, u16 *checksum) if (eeprom_ptrs) kvfree(eeprom_ptrs); - if (*checksum > TXGBE_EEPROM_SUM) - return -EINVAL; - *checksum = TXGBE_EEPROM_SUM - *checksum; return 0; -- cgit v1.2.3