From 84214ab4689f962b4bfc47fc9a5838d25ac4274d Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Apr 2023 15:30:42 +0200 Subject: igc: Enable and fix RX hash usage by netstack When function igc_rx_hash() was introduced in v4.20 via commit 0507ef8a0372 ("igc: Add transmit and receive fastpath and interrupt handlers"), the hardware wasn't configured to provide RSS hash, thus it made sense to not enable net_device NETIF_F_RXHASH feature bit. The NIC hardware was configured to enable RSS hash info in v5.2 via commit 2121c2712f82 ("igc: Add multiple receive queues control supporting"), but forgot to set the NETIF_F_RXHASH feature bit. The original implementation of igc_rx_hash() didn't extract the associated pkt_hash_type, but statically set PKT_HASH_TYPE_L3. The largest portions of this patch are about extracting the RSS Type from the hardware and mapping this to enum pkt_hash_types. This was based on Foxville i225 software user manual rev-1.3.1 and tested on Intel Ethernet Controller I225-LM (rev 03). For UDP it's worth noting that RSS (type) hashing have been disabled both for IPv4 and IPv6 (see IGC_MRQC_RSS_FIELD_IPV4_UDP + IGC_MRQC_RSS_FIELD_IPV6_UDP) because hardware RSS doesn't handle fragmented pkts well when enabled (can cause out-of-order). This results in PKT_HASH_TYPE_L3 for UDP packets, and hash value doesn't include UDP port numbers. Not being PKT_HASH_TYPE_L4, have the effect that netstack will do a software based hash calc calling into flow_dissect, but only when code calls skb_get_hash(), which doesn't necessary happen for local delivery. For QA verification testing I wrote a small bpftrace prog: [0] https://github.com/xdp-project/xdp-project/blob/master/areas/hints/monitor_skb_hash_on_dev.bt Fixes: 2121c2712f82 ("igc: Add multiple receive queues control supporting") Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann Acked-by: Song Yoong Siang Link: https://lore.kernel.org/bpf/168182464270.616355.11391652654430626584.stgit@firesoul --- drivers/net/ethernet/intel/igc/igc_main.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index ba49728be919..f6b03e0372e0 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1687,14 +1687,36 @@ static void igc_rx_checksum(struct igc_ring *ring, le32_to_cpu(rx_desc->wb.upper.status_error)); } +/* Mapping HW RSS Type to enum pkt_hash_types */ +static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = { + [IGC_RSS_TYPE_NO_HASH] = PKT_HASH_TYPE_L2, + [IGC_RSS_TYPE_HASH_TCP_IPV4] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_IPV4] = PKT_HASH_TYPE_L3, + [IGC_RSS_TYPE_HASH_TCP_IPV6] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_IPV6_EX] = PKT_HASH_TYPE_L3, + [IGC_RSS_TYPE_HASH_IPV6] = PKT_HASH_TYPE_L3, + [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_UDP_IPV4] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_UDP_IPV6] = PKT_HASH_TYPE_L4, + [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4, + [10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ + [11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask */ + [12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons */ + [13] = PKT_HASH_TYPE_NONE, + [14] = PKT_HASH_TYPE_NONE, + [15] = PKT_HASH_TYPE_NONE, +}; + static inline void igc_rx_hash(struct igc_ring *ring, union igc_adv_rx_desc *rx_desc, struct sk_buff *skb) { - if (ring->netdev->features & NETIF_F_RXHASH) - skb_set_hash(skb, - le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), - PKT_HASH_TYPE_L3); + if (ring->netdev->features & NETIF_F_RXHASH) { + u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss); + u32 rss_type = igc_rss_type(rx_desc); + + skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]); + } } static void igc_rx_vlan(struct igc_ring *rx_ring, @@ -6551,6 +6573,7 @@ static int igc_probe(struct pci_dev *pdev, netdev->features |= NETIF_F_TSO; netdev->features |= NETIF_F_TSO6; netdev->features |= NETIF_F_TSO_ECN; + netdev->features |= NETIF_F_RXHASH; netdev->features |= NETIF_F_RXCSUM; netdev->features |= NETIF_F_HW_CSUM; netdev->features |= NETIF_F_SCTP_CRC; -- cgit v1.2.3 From 73b7123de0cfa4f6609677e927ab02cb05b593c2 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Apr 2023 15:30:47 +0200 Subject: igc: Add igc_xdp_buff wrapper for xdp_buff in driver Driver specific metadata data for XDP-hints kfuncs are propagated via tail extending the struct xdp_buff with a locally scoped driver struct. Zero-Copy AF_XDP/XSK does similar tricks via struct xdp_buff_xsk. This xdp_buff_xsk struct contains a CB area (24 bytes) that can be used for extending the locally scoped driver into. The XSK_CHECK_PRIV_TYPE define catch size violations build time. The changes needed for AF_XDP zero-copy in igc_clean_rx_irq_zc() is done in next patch, because the member rx_desc isn't available at this point. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann Acked-by: Song Yoong Siang Link: https://lore.kernel.org/bpf/168182464779.616355.3761989884165609387.stgit@firesoul --- drivers/net/ethernet/intel/igc/igc.h | 5 +++++ drivers/net/ethernet/intel/igc/igc_main.c | 16 +++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index f7f9e217e7b4..76a5115aefc8 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -499,6 +499,11 @@ struct igc_rx_buffer { }; }; +/* context wrapper around xdp_buff to provide access to descriptor metadata */ +struct igc_xdp_buff { + struct xdp_buff xdp; +}; + struct igc_q_vector { struct igc_adapter *adapter; /* backlink */ void __iomem *itr_register; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index f6b03e0372e0..06bf8a7eec93 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2233,6 +2233,8 @@ static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count) if (!count) return ok; + XSK_CHECK_PRIV_TYPE(struct igc_xdp_buff); + desc = IGC_RX_DESC(ring, i); bi = &ring->rx_buffer_info[i]; i -= ring->count; @@ -2517,8 +2519,8 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) union igc_adv_rx_desc *rx_desc; struct igc_rx_buffer *rx_buffer; unsigned int size, truesize; + struct igc_xdp_buff ctx; ktime_t timestamp = 0; - struct xdp_buff xdp; int pkt_offset = 0; void *pktbuf; @@ -2552,13 +2554,13 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) } if (!skb) { - xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq); - xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring), + xdp_init_buff(&ctx.xdp, truesize, &rx_ring->xdp_rxq); + xdp_prepare_buff(&ctx.xdp, pktbuf - igc_rx_offset(rx_ring), igc_rx_offset(rx_ring) + pkt_offset, size, true); - xdp_buff_clear_frags_flag(&xdp); + xdp_buff_clear_frags_flag(&ctx.xdp); - skb = igc_xdp_run_prog(adapter, &xdp); + skb = igc_xdp_run_prog(adapter, &ctx.xdp); } if (IS_ERR(skb)) { @@ -2580,9 +2582,9 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) } else if (skb) igc_add_rx_frag(rx_ring, rx_buffer, skb, size); else if (ring_uses_build_skb(rx_ring)) - skb = igc_build_skb(rx_ring, rx_buffer, &xdp); + skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp); else - skb = igc_construct_skb(rx_ring, rx_buffer, &xdp, + skb = igc_construct_skb(rx_ring, rx_buffer, &ctx.xdp, timestamp); /* exit if we failed to retrieve a buffer */ -- cgit v1.2.3 From 8416814fffa9cfa74c18da149f522dd9e1850987 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Apr 2023 15:30:52 +0200 Subject: igc: Add XDP hints kfuncs for RX hash This implements XDP hints kfunc for RX-hash (xmo_rx_hash). The HW rss hash type is handled via mapping table. This igc driver (default config) does L3 hashing for UDP packets (excludes UDP src/dest ports in hash calc). Meaning RSS hash type is L3 based. Tested that the igc_rss_type_num for UDP is either IGC_RSS_TYPE_HASH_IPV4 or IGC_RSS_TYPE_HASH_IPV6. This patch also updates AF_XDP zero-copy function igc_clean_rx_irq_zc() to use the xdp_buff wrapper struct igc_xdp_buff. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann Acked-by: Song Yoong Siang Link: https://lore.kernel.org/bpf/168182465285.616355.2701740913376314790.stgit@firesoul --- drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_main.c | 53 +++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 76a5115aefc8..c609a2e648f8 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -502,6 +502,7 @@ struct igc_rx_buffer { /* context wrapper around xdp_buff to provide access to descriptor metadata */ struct igc_xdp_buff { struct xdp_buff xdp; + union igc_adv_rx_desc *rx_desc; }; struct igc_q_vector { diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 06bf8a7eec93..c18486f46085 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2559,6 +2559,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) igc_rx_offset(rx_ring) + pkt_offset, size, true); xdp_buff_clear_frags_flag(&ctx.xdp); + ctx.rx_desc = rx_desc; skb = igc_xdp_run_prog(adapter, &ctx.xdp); } @@ -2685,6 +2686,15 @@ static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector, napi_gro_receive(&q_vector->napi, skb); } +static struct igc_xdp_buff *xsk_buff_to_igc_ctx(struct xdp_buff *xdp) +{ + /* xdp_buff pointer used by ZC code path is alloc as xdp_buff_xsk. The + * igc_xdp_buff shares its layout with xdp_buff_xsk and private + * igc_xdp_buff fields fall into xdp_buff_xsk->cb + */ + return (struct igc_xdp_buff *)xdp; +} + static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) { struct igc_adapter *adapter = q_vector->adapter; @@ -2703,6 +2713,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) while (likely(total_packets < budget)) { union igc_adv_rx_desc *desc; struct igc_rx_buffer *bi; + struct igc_xdp_buff *ctx; ktime_t timestamp = 0; unsigned int size; int res; @@ -2720,6 +2731,9 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) bi = &ring->rx_buffer_info[ntc]; + ctx = xsk_buff_to_igc_ctx(bi->xdp); + ctx->rx_desc = desc; + if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) { timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, bi->xdp->data); @@ -6475,6 +6489,44 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg) return value; } +/* Mapping HW RSS Type to enum xdp_rss_hash_type */ +static enum xdp_rss_hash_type igc_xdp_rss_type[IGC_RSS_TYPE_MAX_TABLE] = { + [IGC_RSS_TYPE_NO_HASH] = XDP_RSS_TYPE_L2, + [IGC_RSS_TYPE_HASH_TCP_IPV4] = XDP_RSS_TYPE_L4_IPV4_TCP, + [IGC_RSS_TYPE_HASH_IPV4] = XDP_RSS_TYPE_L3_IPV4, + [IGC_RSS_TYPE_HASH_TCP_IPV6] = XDP_RSS_TYPE_L4_IPV6_TCP, + [IGC_RSS_TYPE_HASH_IPV6_EX] = XDP_RSS_TYPE_L3_IPV6_EX, + [IGC_RSS_TYPE_HASH_IPV6] = XDP_RSS_TYPE_L3_IPV6, + [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX, + [IGC_RSS_TYPE_HASH_UDP_IPV4] = XDP_RSS_TYPE_L4_IPV4_UDP, + [IGC_RSS_TYPE_HASH_UDP_IPV6] = XDP_RSS_TYPE_L4_IPV6_UDP, + [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX, + [10] = XDP_RSS_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW */ + [11] = XDP_RSS_TYPE_NONE, /* keep array sized for SW bit-mask */ + [12] = XDP_RSS_TYPE_NONE, /* to handle future HW revisons */ + [13] = XDP_RSS_TYPE_NONE, + [14] = XDP_RSS_TYPE_NONE, + [15] = XDP_RSS_TYPE_NONE, +}; + +static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type) +{ + const struct igc_xdp_buff *ctx = (void *)_ctx; + + if (!(ctx->xdp.rxq->dev->features & NETIF_F_RXHASH)) + return -ENODATA; + + *hash = le32_to_cpu(ctx->rx_desc->wb.lower.hi_dword.rss); + *rss_type = igc_xdp_rss_type[igc_rss_type(ctx->rx_desc)]; + + return 0; +} + +static const struct xdp_metadata_ops igc_xdp_metadata_ops = { + .xmo_rx_hash = igc_xdp_rx_hash, +}; + /** * igc_probe - Device Initialization Routine * @pdev: PCI device information struct @@ -6548,6 +6600,7 @@ static int igc_probe(struct pci_dev *pdev, hw->hw_addr = adapter->io_addr; netdev->netdev_ops = &igc_netdev_ops; + netdev->xdp_metadata_ops = &igc_xdp_metadata_ops; igc_ethtool_set_ops(netdev); netdev->watchdog_timeo = 5 * HZ; -- cgit v1.2.3 From d677266755c6e55c43b6755673a1eeae3d452e87 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Apr 2023 15:30:57 +0200 Subject: igc: Add XDP hints kfuncs for RX timestamp The NIC hardware RX timestamping mechanism adds an optional tailored header before the MAC header containing packet reception time. Optional depending on RX descriptor TSIP status bit (IGC_RXDADV_STAT_TSIP). In case this bit is set driver does offset adjustments to packet data start and extracts the timestamp. The timestamp need to be extracted before invoking the XDP bpf_prog, because this area just before the packet is also accessible by XDP via data_meta context pointer (and helper bpf_xdp_adjust_meta). Thus, an XDP bpf_prog can potentially overwrite this and corrupt data that we want to extract with the new kfunc for reading the timestamp. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann Acked-by: Song Yoong Siang Link: https://lore.kernel.org/bpf/168182465791.616355.2583922957423587914.stgit@firesoul --- drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_main.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index c609a2e648f8..18d4af934d8c 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -503,6 +503,7 @@ struct igc_rx_buffer { struct igc_xdp_buff { struct xdp_buff xdp; union igc_adv_rx_desc *rx_desc; + ktime_t rx_ts; /* data indication bit IGC_RXDADV_STAT_TSIP */ }; struct igc_q_vector { diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index c18486f46085..b78e0e6562c8 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2549,6 +2549,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) { timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, pktbuf); + ctx.rx_ts = timestamp; pkt_offset = IGC_TS_HDR_LEN; size -= IGC_TS_HDR_LEN; } @@ -2737,6 +2738,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) { timestamp = igc_ptp_rx_pktstamp(q_vector->adapter, bi->xdp->data); + ctx->rx_ts = timestamp; bi->xdp->data += IGC_TS_HDR_LEN; @@ -6523,8 +6525,22 @@ static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, return 0; } +static int igc_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp) +{ + const struct igc_xdp_buff *ctx = (void *)_ctx; + + if (igc_test_staterr(ctx->rx_desc, IGC_RXDADV_STAT_TSIP)) { + *timestamp = ctx->rx_ts; + + return 0; + } + + return -ENODATA; +} + static const struct xdp_metadata_ops igc_xdp_metadata_ops = { .xmo_rx_hash = igc_xdp_rx_hash, + .xmo_rx_timestamp = igc_xdp_rx_timestamp, }; /** -- cgit v1.2.3 From 95b681485563c64585de78662ee52d06b7fa47d9 Mon Sep 17 00:00:00 2001 From: Kurt Kanzenbach Date: Wed, 12 Apr 2023 09:36:11 +0200 Subject: igc: Avoid transmit queue timeout for XDP High XDP load triggers the netdev watchdog: |NETDEV WATCHDOG: enp3s0 (igc): transmit queue 2 timed out The reason is the Tx queue transmission start (txq->trans_start) is not updated in XDP code path. Therefore, add it for all XDP transmission functions. Signed-off-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 38d113b48111..c5ef1edcf548 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2411,6 +2411,8 @@ static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp) nq = txring_txq(ring); __netif_tx_lock(nq, cpu); + /* Avoid transmit queue timeout since we share it with the slow path */ + txq_trans_cond_update(nq); res = igc_xdp_init_tx_descriptor(ring, xdpf); __netif_tx_unlock(nq); return res; @@ -2829,6 +2831,9 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring) __netif_tx_lock(nq, cpu); + /* Avoid transmit queue timeout since we share it with the slow path */ + txq_trans_cond_update(nq); + budget = igc_desc_unused(ring); while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) { @@ -6354,6 +6359,9 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, __netif_tx_lock(nq, cpu); + /* Avoid transmit queue timeout since we share it with the slow path */ + txq_trans_cond_update(nq); + drops = 0; for (i = 0; i < num_frames; i++) { int err; -- cgit v1.2.3 From 2d800bc500fb3fb07a0fb42e2d0a1356fb9e1e8f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 30 May 2023 12:19:45 +0300 Subject: net/sched: taprio: replace tc_taprio_qopt_offload :: enable with a "cmd" enum Inspired from struct flow_cls_offload :: cmd, in order for taprio to be able to report statistics (which is future work), it seems that we need to drill one step further with the ndo_setup_tc(TC_SETUP_QDISC_TAPRIO) multiplexing, and pass the command as part of the common portion of the muxed structure. Since we already have an "enable" variable in tc_taprio_qopt_offload, refactor all drivers to check for "cmd" instead of "enable", and reject every other command except "replace" and "destroy" - to be future proof. Signed-off-by: Vladimir Oltean Reviewed-by: Horatiu Vultur # for lan966x Acked-by: Kurt Kanzenbach # hellcreek Reviewed-by: Muhammad Husaini Zulkifli Reviewed-by: Gerhard Engleder Signed-off-by: David S. Miller --- drivers/net/dsa/hirschmann/hellcreek.c | 14 +++++++++----- drivers/net/dsa/ocelot/felix_vsc9959.c | 4 +++- drivers/net/dsa/sja1105/sja1105_tas.c | 7 +++++-- drivers/net/ethernet/engleder/tsnep_selftests.c | 12 ++++++------ drivers/net/ethernet/engleder/tsnep_tc.c | 4 +++- drivers/net/ethernet/freescale/enetc/enetc_qos.c | 6 +++++- drivers/net/ethernet/intel/igc/igc_main.c | 13 +++++++++++-- drivers/net/ethernet/microchip/lan966x/lan966x_tc.c | 10 ++++++++-- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 7 +++++-- drivers/net/ethernet/ti/am65-cpsw-qos.c | 11 ++++++++--- include/net/pkt_sched.h | 7 ++++++- net/sched/sch_taprio.c | 4 ++-- 12 files changed, 71 insertions(+), 28 deletions(-) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 595a548bb0a8..af50001ccdd4 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -1885,13 +1885,17 @@ static int hellcreek_port_setup_tc(struct dsa_switch *ds, int port, case TC_SETUP_QDISC_TAPRIO: { struct tc_taprio_qopt_offload *taprio = type_data; - if (!hellcreek_validate_schedule(hellcreek, taprio)) - return -EOPNOTSUPP; + switch (taprio->cmd) { + case TAPRIO_CMD_REPLACE: + if (!hellcreek_validate_schedule(hellcreek, taprio)) + return -EOPNOTSUPP; - if (taprio->enable) return hellcreek_port_set_schedule(ds, port, taprio); - - return hellcreek_port_del_schedule(ds, port); + case TAPRIO_CMD_DESTROY: + return hellcreek_port_del_schedule(ds, port); + default: + return -EOPNOTSUPP; + } } default: return -EOPNOTSUPP; diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 030738fef60e..5de6a27052fc 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1411,7 +1411,7 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, mutex_lock(&ocelot->tas_lock); - if (!taprio->enable) { + if (taprio->cmd == TAPRIO_CMD_DESTROY) { ocelot_port_mqprio(ocelot, port, &taprio->mqprio); ocelot_rmw_rix(ocelot, 0, QSYS_TAG_CONFIG_ENABLE, QSYS_TAG_CONFIG, port); @@ -1423,6 +1423,8 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, mutex_unlock(&ocelot->tas_lock); return 0; + } else if (taprio->cmd != TAPRIO_CMD_REPLACE) { + return -EOPNOTSUPP; } ret = ocelot_port_mqprio(ocelot, port, &taprio->mqprio); diff --git a/drivers/net/dsa/sja1105/sja1105_tas.c b/drivers/net/dsa/sja1105/sja1105_tas.c index e6153848a950..d7818710bc02 100644 --- a/drivers/net/dsa/sja1105/sja1105_tas.c +++ b/drivers/net/dsa/sja1105/sja1105_tas.c @@ -516,10 +516,11 @@ int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port, /* Can't change an already configured port (must delete qdisc first). * Can't delete the qdisc from an unconfigured port. */ - if (!!tas_data->offload[port] == admin->enable) + if ((!!tas_data->offload[port] && admin->cmd == TAPRIO_CMD_REPLACE) || + (!tas_data->offload[port] && admin->cmd == TAPRIO_CMD_DESTROY)) return -EINVAL; - if (!admin->enable) { + if (admin->cmd == TAPRIO_CMD_DESTROY) { taprio_offload_free(tas_data->offload[port]); tas_data->offload[port] = NULL; @@ -528,6 +529,8 @@ int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port, return rc; return sja1105_static_config_reload(priv, SJA1105_SCHEDULING); + } else if (admin->cmd != TAPRIO_CMD_REPLACE) { + return -EOPNOTSUPP; } /* The cycle time extension is the amount of time the last cycle from diff --git a/drivers/net/ethernet/engleder/tsnep_selftests.c b/drivers/net/ethernet/engleder/tsnep_selftests.c index 1581d6b22232..8a9145f93147 100644 --- a/drivers/net/ethernet/engleder/tsnep_selftests.c +++ b/drivers/net/ethernet/engleder/tsnep_selftests.c @@ -329,7 +329,7 @@ static bool disable_taprio(struct tsnep_adapter *adapter) int retval; memset(&qopt, 0, sizeof(qopt)); - qopt.enable = 0; + qopt.cmd = TAPRIO_CMD_DESTROY; retval = tsnep_tc_setup(adapter->netdev, TC_SETUP_QDISC_TAPRIO, &qopt); if (retval) return false; @@ -360,7 +360,7 @@ static bool tsnep_test_taprio(struct tsnep_adapter *adapter) for (i = 0; i < 255; i++) qopt->entries[i].command = TC_TAPRIO_CMD_SET_GATES; - qopt->enable = 1; + qopt->cmd = TAPRIO_CMD_REPLACE; qopt->base_time = ktime_set(0, 0); qopt->cycle_time = 1500000; qopt->cycle_time_extension = 0; @@ -382,7 +382,7 @@ static bool tsnep_test_taprio(struct tsnep_adapter *adapter) if (!run_taprio(adapter, qopt, 100)) goto failed; - qopt->enable = 1; + qopt->cmd = TAPRIO_CMD_REPLACE; qopt->base_time = ktime_set(0, 0); qopt->cycle_time = 411854; qopt->cycle_time_extension = 0; @@ -406,7 +406,7 @@ static bool tsnep_test_taprio(struct tsnep_adapter *adapter) if (!run_taprio(adapter, qopt, 100)) goto failed; - qopt->enable = 1; + qopt->cmd = TAPRIO_CMD_REPLACE; qopt->base_time = ktime_set(0, 0); delay_base_time(adapter, qopt, 12); qopt->cycle_time = 125000; @@ -457,7 +457,7 @@ static bool tsnep_test_taprio_change(struct tsnep_adapter *adapter) for (i = 0; i < 255; i++) qopt->entries[i].command = TC_TAPRIO_CMD_SET_GATES; - qopt->enable = 1; + qopt->cmd = TAPRIO_CMD_REPLACE; qopt->base_time = ktime_set(0, 0); qopt->cycle_time = 100000; qopt->cycle_time_extension = 0; @@ -610,7 +610,7 @@ static bool tsnep_test_taprio_extension(struct tsnep_adapter *adapter) for (i = 0; i < 255; i++) qopt->entries[i].command = TC_TAPRIO_CMD_SET_GATES; - qopt->enable = 1; + qopt->cmd = TAPRIO_CMD_REPLACE; qopt->base_time = ktime_set(0, 0); qopt->cycle_time = 100000; qopt->cycle_time_extension = 50000; diff --git a/drivers/net/ethernet/engleder/tsnep_tc.c b/drivers/net/ethernet/engleder/tsnep_tc.c index d083e6684f12..745b191a5540 100644 --- a/drivers/net/ethernet/engleder/tsnep_tc.c +++ b/drivers/net/ethernet/engleder/tsnep_tc.c @@ -325,7 +325,7 @@ static int tsnep_taprio(struct tsnep_adapter *adapter, if (!adapter->gate_control) return -EOPNOTSUPP; - if (!qopt->enable) { + if (qopt->cmd == TAPRIO_CMD_DESTROY) { /* disable gate control if active */ mutex_lock(&adapter->gate_control_lock); @@ -337,6 +337,8 @@ static int tsnep_taprio(struct tsnep_adapter *adapter, mutex_unlock(&adapter->gate_control_lock); return 0; + } else if (qopt->cmd != TAPRIO_CMD_REPLACE) { + return -EOPNOTSUPP; } retval = tsnep_validate_gcl(qopt); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index 83c27bbbc6ed..7aad824f4da7 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -65,7 +65,7 @@ static int enetc_setup_taprio(struct net_device *ndev, gcl_len = admin_conf->num_entries; tge = enetc_rd(hw, ENETC_PTGCR); - if (!admin_conf->enable) { + if (admin_conf->cmd == TAPRIO_CMD_DESTROY) { enetc_wr(hw, ENETC_PTGCR, tge & ~ENETC_PTGCR_TGE); enetc_reset_ptcmsdur(hw); @@ -138,6 +138,10 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data) struct enetc_ndev_priv *priv = netdev_priv(ndev); int err, i; + if (taprio->cmd != TAPRIO_CMD_REPLACE && + taprio->cmd != TAPRIO_CMD_DESTROY) + return -EOPNOTSUPP; + /* TSD and Qbv are mutually exclusive in hardware */ for (i = 0; i < priv->num_tx_rings; i++) if (priv->tx_ring[i]->tsd_enable) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index c5ef1edcf548..88145c30c919 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6113,9 +6113,18 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, size_t n; int i; - adapter->qbv_enable = qopt->enable; + switch (qopt->cmd) { + case TAPRIO_CMD_REPLACE: + adapter->qbv_enable = true; + break; + case TAPRIO_CMD_DESTROY: + adapter->qbv_enable = false; + break; + default: + return -EOPNOTSUPP; + } - if (!qopt->enable) + if (!adapter->qbv_enable) return igc_tsn_clear_schedule(adapter); if (qopt->base_time < 0) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c index cf0cc7562d04..ee652f2d2359 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc.c @@ -21,8 +21,14 @@ static int lan966x_tc_setup_qdisc_mqprio(struct lan966x_port *port, static int lan966x_tc_setup_qdisc_taprio(struct lan966x_port *port, struct tc_taprio_qopt_offload *taprio) { - return taprio->enable ? lan966x_taprio_add(port, taprio) : - lan966x_taprio_del(port); + switch (taprio->cmd) { + case TAPRIO_CMD_REPLACE: + return lan966x_taprio_add(port, taprio); + case TAPRIO_CMD_DESTROY: + return lan966x_taprio_del(port); + default: + return -EOPNOTSUPP; + } } static int lan966x_tc_setup_qdisc_tbf(struct lan966x_port *port, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 9d55226479b4..ac41ef4cbd2f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -966,8 +966,11 @@ static int tc_setup_taprio(struct stmmac_priv *priv, return -EOPNOTSUPP; } - if (!qopt->enable) + if (qopt->cmd == TAPRIO_CMD_DESTROY) goto disable; + else if (qopt->cmd != TAPRIO_CMD_REPLACE) + return -EOPNOTSUPP; + if (qopt->num_entries >= dep) return -EINVAL; if (!qopt->cycle_time) @@ -988,7 +991,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, mutex_lock(&priv->plat->est->lock); priv->plat->est->gcl_size = size; - priv->plat->est->enable = qopt->enable; + priv->plat->est->enable = qopt->cmd == TAPRIO_CMD_REPLACE; mutex_unlock(&priv->plat->est->lock); for (i = 0; i < size; i++) { diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.c b/drivers/net/ethernet/ti/am65-cpsw-qos.c index 3a908db6e5b2..eced87fa261c 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-qos.c +++ b/drivers/net/ethernet/ti/am65-cpsw-qos.c @@ -450,7 +450,7 @@ static int am65_cpsw_configure_taprio(struct net_device *ndev, am65_cpsw_est_update_state(ndev); - if (!est_new->taprio.enable) { + if (est_new->taprio.cmd == TAPRIO_CMD_DESTROY) { am65_cpsw_stop_est(ndev); return ret; } @@ -476,7 +476,7 @@ static int am65_cpsw_configure_taprio(struct net_device *ndev, am65_cpsw_est_set_sched_list(ndev, est_new); am65_cpsw_port_est_assign_buf_num(ndev, est_new->buf); - am65_cpsw_est_set(ndev, est_new->taprio.enable); + am65_cpsw_est_set(ndev, est_new->taprio.cmd == TAPRIO_CMD_REPLACE); if (tact == TACT_PROG) { ret = am65_cpsw_timer_set(ndev, est_new); @@ -520,7 +520,7 @@ static int am65_cpsw_set_taprio(struct net_device *ndev, void *type_data) am65_cpsw_cp_taprio(taprio, &est_new->taprio); ret = am65_cpsw_configure_taprio(ndev, est_new); if (!ret) { - if (taprio->enable) { + if (taprio->cmd == TAPRIO_CMD_REPLACE) { devm_kfree(&ndev->dev, port->qos.est_admin); port->qos.est_admin = est_new; @@ -564,8 +564,13 @@ purge_est: static int am65_cpsw_setup_taprio(struct net_device *ndev, void *type_data) { struct am65_cpsw_port *port = am65_ndev_to_port(ndev); + struct tc_taprio_qopt_offload *taprio = type_data; struct am65_cpsw_common *common = port->common; + if (taprio->cmd != TAPRIO_CMD_REPLACE && + taprio->cmd != TAPRIO_CMD_DESTROY) + return -EOPNOTSUPP; + if (!IS_ENABLED(CONFIG_TI_AM65_CPSW_TAS)) return -ENODEV; diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index f436688b6efc..f5fb11da357b 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -185,6 +185,11 @@ struct tc_taprio_caps { bool broken_mqprio:1; }; +enum tc_taprio_qopt_cmd { + TAPRIO_CMD_REPLACE, + TAPRIO_CMD_DESTROY, +}; + struct tc_taprio_sched_entry { u8 command; /* TC_TAPRIO_CMD_* */ @@ -196,7 +201,7 @@ struct tc_taprio_sched_entry { struct tc_taprio_qopt_offload { struct tc_mqprio_qopt_offload mqprio; struct netlink_ext_ack *extack; - u8 enable; + enum tc_taprio_qopt_cmd cmd; ktime_t base_time; u64 cycle_time; u64 cycle_time_extension; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index d29e6785854d..06bf4c6355a5 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1524,7 +1524,7 @@ static int taprio_enable_offload(struct net_device *dev, "Not enough memory for enabling offload mode"); return -ENOMEM; } - offload->enable = 1; + offload->cmd = TAPRIO_CMD_REPLACE; offload->extack = extack; mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt); offload->mqprio.extack = extack; @@ -1572,7 +1572,7 @@ static int taprio_disable_offload(struct net_device *dev, "Not enough memory to disable offload mode"); return -ENOMEM; } - offload->enable = 0; + offload->cmd = TAPRIO_CMD_DESTROY; err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); if (err < 0) { -- cgit v1.2.3 From e43516f5978d11d36511ce63d31d1da4db916510 Mon Sep 17 00:00:00 2001 From: Muhammad Husaini Zulkifli Date: Mon, 15 May 2023 23:49:36 +0800 Subject: igc: Clean the TX buffer and TX descriptor ring There could be a race condition during link down where interrupt being generated and igc_clean_tx_irq() been called to perform the TX completion. Properly clear the TX buffer/descriptor ring and disable the TX Queue ring in igc_free_tx_resources() to avoid that. Kernel trace: [ 108.237177] Hardware name: Intel Corporation Tiger Lake Client Platform/TigerLake U DDR4 SODIMM RVP, BIOS TGLIFUI1.R00.4204.A00.2105270302 05/27/2021 [ 108.237178] RIP: 0010:refcount_warn_saturate+0x55/0x110 [ 108.242143] RSP: 0018:ffff9e7980003db0 EFLAGS: 00010286 [ 108.245555] Code: 84 bc 00 00 00 c3 cc cc cc cc 85 f6 74 46 80 3d 20 8c 4d 01 00 75 ee 48 c7 c7 88 f4 03 ab c6 05 10 8c 4d 01 01 e8 0b 10 96 ff <0f> 0b c3 cc cc cc cc 80 3d fc 8b 4d 01 00 75 cb 48 c7 c7 b0 f4 03 [ 108.250434] [ 108.250434] RSP: 0018:ffff9e798125f910 EFLAGS: 00010286 [ 108.254358] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 108.259325] [ 108.259325] RAX: 0000000000000000 RBX: ffff8ddb935b8000 RCX: 0000000000000027 [ 108.261868] RDX: ffff8de250a28800 RSI: ffff8de250a1c580 RDI: ffff8de250a1c580 [ 108.265538] RDX: 0000000000000027 RSI: 0000000000000002 RDI: ffff8de250a9c588 [ 108.265539] RBP: ffff8ddb935b8000 R08: ffffffffab2655a0 R09: ffff9e798125f898 [ 108.267914] RBP: ffff8ddb8a5b8d80 R08: 0000005648eba354 R09: 0000000000000000 [ 108.270196] R10: 0000000000000001 R11: 000000002d2d2d2d R12: ffff9e798125f948 [ 108.270197] R13: ffff9e798125fa1c R14: ffff8ddb8a5b8d80 R15: 7fffffffffffffff [ 108.273001] R10: 000000002d2d2d2d R11: 000000002d2d2d2d R12: ffff8ddb8a5b8ed4 [ 108.276410] FS: 00007f605851b740(0000) GS:ffff8de250a80000(0000) knlGS:0000000000000000 [ 108.280597] R13: 00000000000002ac R14: 00000000ffffff99 R15: ffff8ddb92561b80 [ 108.282966] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 108.282967] CR2: 00007f053c039248 CR3: 0000000185850003 CR4: 0000000000f70ee0 [ 108.286206] FS: 0000000000000000(0000) GS:ffff8de250a00000(0000) knlGS:0000000000000000 [ 108.289701] PKRU: 55555554 [ 108.289702] Call Trace: [ 108.289704] [ 108.293977] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 108.297562] sock_alloc_send_pskb+0x20c/0x240 [ 108.301494] CR2: 00007f053c03a168 CR3: 0000000184394002 CR4: 0000000000f70ef0 [ 108.301495] PKRU: 55555554 [ 108.306464] __ip_append_data.isra.0+0x96f/0x1040 [ 108.309441] Call Trace: [ 108.309443] ? __pfx_ip_generic_getfrag+0x10/0x10 [ 108.314927] [ 108.314928] sock_wfree+0x1c7/0x1d0 [ 108.318078] ? __pfx_ip_generic_getfrag+0x10/0x10 [ 108.320276] skb_release_head_state+0x32/0x90 [ 108.324812] ip_make_skb+0xf6/0x130 [ 108.327188] skb_release_all+0x16/0x40 [ 108.330775] ? udp_sendmsg+0x9f3/0xcb0 [ 108.332626] napi_consume_skb+0x48/0xf0 [ 108.334134] ? xfrm_lookup_route+0x23/0xb0 [ 108.344285] igc_poll+0x787/0x1620 [igc] [ 108.346659] udp_sendmsg+0x9f3/0xcb0 [ 108.360010] ? ttwu_do_activate+0x40/0x220 [ 108.365237] ? __pfx_ip_generic_getfrag+0x10/0x10 [ 108.366744] ? try_to_wake_up+0x289/0x5e0 [ 108.376987] ? sock_sendmsg+0x81/0x90 [ 108.395698] ? __pfx_process_timeout+0x10/0x10 [ 108.395701] sock_sendmsg+0x81/0x90 [ 108.409052] __napi_poll+0x29/0x1c0 [ 108.414279] ____sys_sendmsg+0x284/0x310 [ 108.419507] net_rx_action+0x257/0x2d0 [ 108.438216] ___sys_sendmsg+0x7c/0xc0 [ 108.439723] __do_softirq+0xc1/0x2a8 [ 108.444950] ? finish_task_switch+0xb4/0x2f0 [ 108.452077] irq_exit_rcu+0xa9/0xd0 [ 108.453584] ? __schedule+0x372/0xd00 [ 108.460713] common_interrupt+0x84/0xa0 [ 108.467840] ? clockevents_program_event+0x95/0x100 [ 108.474968] [ 108.482096] ? do_nanosleep+0x88/0x130 [ 108.489224] [ 108.489225] asm_common_interrupt+0x26/0x40 [ 108.496353] ? __rseq_handle_notify_resume+0xa9/0x4f0 [ 108.503478] RIP: 0010:cpu_idle_poll+0x2c/0x100 [ 108.510607] __sys_sendmsg+0x5d/0xb0 [ 108.518687] Code: 05 e1 d9 c8 00 65 8b 15 de 64 85 55 85 c0 7f 57 e8 b9 ef ff ff fb 65 48 8b 1c 25 00 cc 02 00 48 8b 03 a8 08 74 0b eb 1c f3 90 <48> 8b 03 a8 08 75 13 8b 05 77 63 cd 00 85 c0 75 ed e8 ce ec ff ff [ 108.525817] do_syscall_64+0x44/0xa0 [ 108.531563] RSP: 0018:ffffffffab203e70 EFLAGS: 00000202 [ 108.538693] entry_SYSCALL_64_after_hwframe+0x72/0xdc [ 108.546775] [ 108.546777] RIP: 0033:0x7f605862b7f7 [ 108.549495] RAX: 0000000000000001 RBX: ffffffffab20c940 RCX: 000000000000003b [ 108.551955] Code: 0e 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 [ 108.554068] RDX: 4000000000000000 RSI: 000000002da97f6a RDI: 00000000002b8ff4 [ 108.559816] RSP: 002b:00007ffc99264058 EFLAGS: 00000246 [ 108.564178] RBP: 0000000000000000 R08: 00000000002b8ff4 R09: ffff8ddb01554c80 [ 108.571302] ORIG_RAX: 000000000000002e [ 108.571303] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f605862b7f7 [ 108.574023] R10: 000000000000015b R11: 000000000000000f R12: ffffffffab20c940 [ 108.574024] R13: 0000000000000000 R14: ffff8de26fbeef40 R15: ffffffffab20c940 [ 108.578727] RDX: 0000000000000000 RSI: 00007ffc992640a0 RDI: 0000000000000003 [ 108.578728] RBP: 00007ffc99264110 R08: 0000000000000000 R09: 175f48ad1c3a9c00 [ 108.581187] do_idle+0x62/0x230 [ 108.585890] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffc992642d8 [ 108.585891] R13: 00005577814ab2ba R14: 00005577814addf0 R15: 00007f605876d000 [ 108.587920] cpu_startup_entry+0x1d/0x20 [ 108.591422] [ 108.596127] rest_init+0xc5/0xd0 [ 108.600490] ---[ end trace 0000000000000000 ]--- Test Setup: DUT: - Change mac address on DUT Side. Ensure NIC not having same MAC Address - Running udp_tai on DUT side. Let udp_tai running throughout the test Example: ./udp_tai -i enp170s0 -P 100000 -p 90 -c 1 -t 0 -u 30004 Host: - Perform link up/down every 5 second. Result: Kernel panic will happen on DUT Side. Fixes: 13b5b7fd6a4a ("igc: Add support for Tx/Rx rings") Signed-off-by: Muhammad Husaini Zulkifli Tested-by: Naama Meir Reviewed-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 1c4676882082..f986e88be5c1 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -254,6 +254,13 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) /* reset BQL for queue */ netdev_tx_reset_queue(txring_txq(tx_ring)); + /* Zero out the buffer ring */ + memset(tx_ring->tx_buffer_info, 0, + sizeof(*tx_ring->tx_buffer_info) * tx_ring->count); + + /* Zero out the descriptor ring */ + memset(tx_ring->desc, 0, tx_ring->size); + /* reset next_to_use and next_to_clean */ tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; @@ -267,7 +274,7 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring) */ void igc_free_tx_resources(struct igc_ring *tx_ring) { - igc_clean_tx_ring(tx_ring); + igc_disable_tx_ring(tx_ring); vfree(tx_ring->tx_buffer_info); tx_ring->tx_buffer_info = NULL; -- cgit v1.2.3 From c080fe262f9e73a00934b70c16b1479cf40cd2bd Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Mon, 17 Apr 2023 15:18:39 -0700 Subject: igc: Fix possible system crash when loading module Guarantee that when probe() is run again, PTM and PCI busmaster will be in the same state as it was if the driver was never loaded. Avoid an i225/i226 hardware issue that PTM requests can be made even though PCI bus mastering is not enabled. These unexpected PTM requests can crash some systems. So, "force" disable PTM and busmastering before removing the driver, so they can be re-enabled in the right order during probe(). This is more like a workaround and should be applicable for i225 and i226, in any platform. Fixes: 1b5d73fb8624 ("igc: Enable PCIe PTM") Signed-off-by: Vinicius Costa Gomes Reviewed-by: Muhammad Husaini Zulkifli Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index f986e88be5c1..fa764190f270 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6730,6 +6730,9 @@ static void igc_remove(struct pci_dev *pdev) igc_ptp_stop(adapter); + pci_disable_ptm(pdev); + pci_clear_master(pdev); + set_bit(__IGC_DOWN, &adapter->state); del_timer_sync(&adapter->watchdog_timer); -- cgit v1.2.3 From 9c50e2b150c8ee0eee5f8154e2ad168cdd748877 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 7 Jun 2023 14:32:29 -0700 Subject: igc: Fix race condition in PTP tx code Currently, the igc driver supports timestamping only one tx packet at a time. During the transmission flow, the skb that requires hardware timestamping is saved in adapter->ptp_tx_skb. Once hardware has the timestamp, an interrupt is delivered, and adapter->ptp_tx_work is scheduled. In igc_ptp_tx_work(), we read the timestamp register, update adapter->ptp_tx_skb, and notify the network stack. While the thread executing the transmission flow (the user process running in kernel mode) and the thread executing ptp_tx_work don't access adapter->ptp_tx_skb concurrently, there are two other places where adapter->ptp_tx_skb is accessed: igc_ptp_tx_hang() and igc_ptp_suspend(). igc_ptp_tx_hang() is executed by the adapter->watchdog_task worker thread which runs periodically so it is possible we have two threads accessing ptp_tx_skb at the same time. Consider the following scenario: right after __IGC_PTP_TX_IN_PROGRESS is set in igc_xmit_frame_ring(), igc_ptp_tx_hang() is executed. Since adapter->ptp_tx_start hasn't been written yet, this is considered a timeout and adapter->ptp_tx_skb is cleaned up. This patch fixes the issue described above by adding the ptp_tx_lock to protect access to ptp_tx_skb and ptp_tx_start fields from igc_adapter. Since igc_xmit_frame_ring() called in atomic context by the networking stack, ptp_tx_lock is defined as a spinlock, and the irq safe variants of lock/unlock are used. With the introduction of the ptp_tx_lock, the __IGC_PTP_TX_IN_PROGRESS flag doesn't provide much of a use anymore so this patch gets rid of it. Fixes: 2c344ae24501 ("igc: Add support for TX timestamping") Signed-off-by: Andre Guedes Signed-off-by: Vinicius Costa Gomes Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 5 ++- drivers/net/ethernet/intel/igc/igc_main.c | 9 +++-- drivers/net/ethernet/intel/igc/igc_ptp.c | 57 +++++++++++++++++-------------- 3 files changed, 41 insertions(+), 30 deletions(-) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 34aebf00a512..7da0657ea48f 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -229,6 +229,10 @@ struct igc_adapter { struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_caps; struct work_struct ptp_tx_work; + /* Access to ptp_tx_skb and ptp_tx_start are protected by the + * ptp_tx_lock. + */ + spinlock_t ptp_tx_lock; struct sk_buff *ptp_tx_skb; struct hwtstamp_config tstamp_config; unsigned long ptp_tx_start; @@ -401,7 +405,6 @@ enum igc_state_t { __IGC_TESTING, __IGC_RESETTING, __IGC_DOWN, - __IGC_PTP_TX_IN_PROGRESS, }; enum igc_tx_flags { diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index fa764190f270..9fcb263bd3a7 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1590,9 +1590,10 @@ done: * the other timer registers before skipping the * timestamping request. */ - if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && - !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS, - &adapter->state)) { + unsigned long flags; + + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && !adapter->ptp_tx_skb) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IGC_TX_FLAGS_TSTAMP; @@ -1601,6 +1602,8 @@ done: } else { adapter->tx_hwtstamp_skipped++; } + + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } if (skb_vlan_tag_present(skb)) { diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 4e10ced736db..56128e55f5c0 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -603,6 +603,7 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter, return 0; } +/* Requires adapter->ptp_tx_lock held by caller. */ static void igc_ptp_tx_timeout(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; @@ -610,7 +611,6 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter) dev_kfree_skb_any(adapter->ptp_tx_skb); adapter->ptp_tx_skb = NULL; adapter->tx_hwtstamp_timeouts++; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); /* Clear the tx valid bit in TSYNCTXCTL register to enable interrupt. */ rd32(IGC_TXSTMPH); netdev_warn(adapter->netdev, "Tx timestamp timeout\n"); @@ -618,20 +618,20 @@ static void igc_ptp_tx_timeout(struct igc_adapter *adapter) void igc_ptp_tx_hang(struct igc_adapter *adapter) { - bool timeout = time_is_before_jiffies(adapter->ptp_tx_start + - IGC_PTP_TX_TIMEOUT); + unsigned long flags; - if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state)) - return; + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); - /* If we haven't received a timestamp within the timeout, it is - * reasonable to assume that it will never occur, so we can unlock the - * timestamp bit when this occurs. - */ - if (timeout) { - cancel_work_sync(&adapter->ptp_tx_work); - igc_ptp_tx_timeout(adapter); - } + if (!adapter->ptp_tx_skb) + goto unlock; + + if (time_is_after_jiffies(adapter->ptp_tx_start + IGC_PTP_TX_TIMEOUT)) + goto unlock; + + igc_ptp_tx_timeout(adapter); + +unlock: + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } /** @@ -641,6 +641,8 @@ void igc_ptp_tx_hang(struct igc_adapter *adapter) * If we were asked to do hardware stamping and such a time stamp is * available, then it must have been for this skb here because we only * allow only one such packet into the queue. + * + * Context: Expects adapter->ptp_tx_lock to be held by caller. */ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) { @@ -676,13 +678,7 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) shhwtstamps.hwtstamp = ktime_add_ns(shhwtstamps.hwtstamp, adjust); - /* Clear the lock early before calling skb_tstamp_tx so that - * applications are not woken up before the lock bit is clear. We use - * a copy of the skb pointer to ensure other threads can't change it - * while we're notifying the stack. - */ adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); /* Notify the stack and free the skb after we've unlocked */ skb_tstamp_tx(skb, &shhwtstamps); @@ -693,24 +689,33 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) * igc_ptp_tx_work * @work: pointer to work struct * - * This work function polls the TSYNCTXCTL valid bit to determine when a - * timestamp has been taken for the current stored skb. + * This work function checks the TSYNCTXCTL valid bit to determine when + * a timestamp has been taken for the current stored skb. */ static void igc_ptp_tx_work(struct work_struct *work) { struct igc_adapter *adapter = container_of(work, struct igc_adapter, ptp_tx_work); struct igc_hw *hw = &adapter->hw; + unsigned long flags; u32 tsynctxctl; - if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state)) - return; + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + + if (!adapter->ptp_tx_skb) + goto unlock; tsynctxctl = rd32(IGC_TSYNCTXCTL); - if (WARN_ON_ONCE(!(tsynctxctl & IGC_TSYNCTXCTL_TXTT_0))) - return; + tsynctxctl &= IGC_TSYNCTXCTL_TXTT_0; + if (!tsynctxctl) { + WARN_ONCE(1, "Received a TSTAMP interrupt but no TSTAMP is ready.\n"); + goto unlock; + } igc_ptp_tx_hwtstamp(adapter); + +unlock: + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); } /** @@ -959,6 +964,7 @@ void igc_ptp_init(struct igc_adapter *adapter) return; } + spin_lock_init(&adapter->ptp_tx_lock); spin_lock_init(&adapter->tmreg_lock); INIT_WORK(&adapter->ptp_tx_work, igc_ptp_tx_work); @@ -1023,7 +1029,6 @@ void igc_ptp_suspend(struct igc_adapter *adapter) cancel_work_sync(&adapter->ptp_tx_work); dev_kfree_skb_any(adapter->ptp_tx_skb); adapter->ptp_tx_skb = NULL; - clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); if (pci_device_is_present(adapter->pdev)) { igc_ptp_time_save(adapter); -- cgit v1.2.3 From ce58c7cc8b9910f2bc1d038d7ba60c3f011b2cb2 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 7 Jun 2023 14:32:30 -0700 Subject: igc: Check if hardware TX timestamping is enabled earlier Before requesting a packet transmission to be hardware timestamped, check if the user has TX timestamping enabled. Fixes an issue that if a packet was internally forwarded to the NIC, and it had the SKBTX_HW_TSTAMP flag set, the driver would mark that timestamp as skipped. In reality, that timestamp was "not for us", as TX timestamp could never be enabled in the NIC. Checking if the TX timestamping is enabled earlier has a secondary effect that when TX timestamping is disabled, there's no need to check for timestamp timeouts. We should only take care to free any pending timestamp when TX timestamping is disabled, as that skb would never be released otherwise. Fixes: 2c344ae24501 ("igc: Add support for TX timestamping") Suggested-by: Vladimir Oltean Signed-off-by: Vinicius Costa Gomes Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_main.c | 5 ++-- drivers/net/ethernet/intel/igc/igc_ptp.c | 42 ++++++++++++++++++++++++++++--- 3 files changed, 43 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 7da0657ea48f..66fb67c17e4f 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -581,6 +581,7 @@ enum igc_ring_flags_t { IGC_RING_FLAG_TX_CTX_IDX, IGC_RING_FLAG_TX_DETECT_HANG, IGC_RING_FLAG_AF_XDP_ZC, + IGC_RING_FLAG_TX_HWTSTAMP, }; #define ring_uses_large_buffer(ring) \ diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 9fcb263bd3a7..eb50bfd5b867 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1585,7 +1585,8 @@ done: } } - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) && + skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { /* FIXME: add support for retrieving timestamps from * the other timer registers before skipping the * timestamping request. @@ -1593,7 +1594,7 @@ done: unsigned long flags; spin_lock_irqsave(&adapter->ptp_tx_lock, flags); - if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && !adapter->ptp_tx_skb) { + if (!adapter->ptp_tx_skb) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IGC_TX_FLAGS_TSTAMP; diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 56128e55f5c0..42f622ceb64b 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -536,9 +536,36 @@ static void igc_ptp_enable_rx_timestamp(struct igc_adapter *adapter) wr32(IGC_TSYNCRXCTL, val); } +static void igc_ptp_clear_tx_tstamp(struct igc_adapter *adapter) +{ + unsigned long flags; + + cancel_work_sync(&adapter->ptp_tx_work); + + spin_lock_irqsave(&adapter->ptp_tx_lock, flags); + + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + + spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags); +} + static void igc_ptp_disable_tx_timestamp(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; + int i; + + /* Clear the flags first to avoid new packets to be enqueued + * for TX timestamping. + */ + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + clear_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags); + } + + /* Now we can clean the pending TX timestamp requests. */ + igc_ptp_clear_tx_tstamp(adapter); wr32(IGC_TSYNCTXCTL, 0); } @@ -546,12 +573,23 @@ static void igc_ptp_disable_tx_timestamp(struct igc_adapter *adapter) static void igc_ptp_enable_tx_timestamp(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; + int i; wr32(IGC_TSYNCTXCTL, IGC_TSYNCTXCTL_ENABLED | IGC_TSYNCTXCTL_TXSYNSIG); /* Read TXSTMP registers to discard any timestamp previously stored. */ rd32(IGC_TXSTMPL); rd32(IGC_TXSTMPH); + + /* The hardware is ready to accept TX timestamp requests, + * notify the transmit path. + */ + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + set_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags); + } + } /** @@ -1026,9 +1064,7 @@ void igc_ptp_suspend(struct igc_adapter *adapter) if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) return; - cancel_work_sync(&adapter->ptp_tx_work); - dev_kfree_skb_any(adapter->ptp_tx_skb); - adapter->ptp_tx_skb = NULL; + igc_ptp_clear_tx_tstamp(adapter); if (pci_device_is_present(adapter->pdev)) { igc_ptp_time_save(adapter); -- cgit v1.2.3 From afa141583d82725f682b2fa762cb36a07f58b3f3 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Wed, 7 Jun 2023 14:32:31 -0700 Subject: igc: Retrieve TX timestamp during interrupt handling When the interrupt is handled, the TXTT_0 bit in the TSYNCTXCTL register should already be set and the timestamp value already loaded in the appropriate register. This simplifies the handling, and reduces the latency for retrieving the TX timestamp, which increase the amount of TX timestamps that can be handled in a given time period. As the "work" function doesn't run in a workqueue anymore, rename it to something more sensible, a event handler. Using ntpperf[1] we can see the following performance improvements: Before: $ sudo ./ntpperf -i enp3s0 -m 10:22:22:22:22:21 -d 192.168.1.3 -s 172.18.0.0/16 -I -H -o -37 | responses | TX timestamp offset (ns) rate clients | lost invalid basic xleave | min mean max stddev 1000 100 0.00% 0.00% 0.00% 100.00% -56 +9 +52 19 1500 150 0.00% 0.00% 0.00% 100.00% -40 +30 +75 22 2250 225 0.00% 0.00% 0.00% 100.00% -11 +29 +72 15 3375 337 0.00% 0.00% 0.00% 100.00% -18 +40 +88 22 5062 506 0.00% 0.00% 0.00% 100.00% -19 +23 +77 15 7593 759 0.00% 0.00% 0.00% 100.00% +7 +47 +5168 43 11389 1138 0.00% 0.00% 0.00% 100.00% -11 +41 +5240 39 17083 1708 0.00% 0.00% 0.00% 100.00% +19 +60 +5288 50 25624 2562 0.00% 0.00% 0.00% 100.00% +1 +56 +5368 58 38436 3843 0.00% 0.00% 0.00% 100.00% -84 +12 +8847 66 57654 5765 0.00% 0.00% 100.00% 0.00% 86481 8648 0.00% 0.00% 100.00% 0.00% 129721 12972 0.00% 0.00% 100.00% 0.00% 194581 16384 0.00% 0.00% 100.00% 0.00% 291871 16384 27.35% 0.00% 72.65% 0.00% 437806 16384 50.05% 0.00% 49.95% 0.00% After: $ sudo ./ntpperf -i enp3s0 -m 10:22:22:22:22:21 -d 192.168.1.3 -s 172.18.0.0/16 -I -H -o -37 | responses | TX timestamp offset (ns) rate clients | lost invalid basic xleave | min mean max stddev 1000 100 0.00% 0.00% 0.00% 100.00% -44 +0 +61 19 1500 150 0.00% 0.00% 0.00% 100.00% -6 +39 +81 16 2250 225 0.00% 0.00% 0.00% 100.00% -22 +25 +69 15 3375 337 0.00% 0.00% 0.00% 100.00% -28 +15 +56 14 5062 506 0.00% 0.00% 0.00% 100.00% +7 +78 +143 27 7593 759 0.00% 0.00% 0.00% 100.00% -54 +24 +144 47 11389 1138 0.00% 0.00% 0.00% 100.00% -90 -33 +28 21 17083 1708 0.00% 0.00% 0.00% 100.00% -50 -2 +35 14 25624 2562 0.00% 0.00% 0.00% 100.00% -62 +7 +66 23 38436 3843 0.00% 0.00% 0.00% 100.00% -33 +30 +5395 36 57654 5765 0.00% 0.00% 100.00% 0.00% 86481 8648 0.00% 0.00% 100.00% 0.00% 129721 12972 0.00% 0.00% 100.00% 0.00% 194581 16384 19.50% 0.00% 80.50% 0.00% 291871 16384 35.81% 0.00% 64.19% 0.00% 437806 16384 55.40% 0.00% 44.60% 0.00% [1] https://github.com/mlichvar/ntpperf Signed-off-by: Vinicius Costa Gomes Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 2 +- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- drivers/net/ethernet/intel/igc/igc_ptp.c | 15 +++++---------- 3 files changed, 7 insertions(+), 12 deletions(-) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 66fb67c17e4f..a00738bf6b19 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -228,7 +228,6 @@ struct igc_adapter { struct ptp_clock *ptp_clock; struct ptp_clock_info ptp_caps; - struct work_struct ptp_tx_work; /* Access to ptp_tx_skb and ptp_tx_start are protected by the * ptp_tx_lock. */ @@ -638,6 +637,7 @@ int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); void igc_ptp_tx_hang(struct igc_adapter *adapter); void igc_ptp_read(struct igc_adapter *adapter, struct timespec64 *ts); +void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter); #define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring)) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index eb50bfd5b867..eb4f0e562f60 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -5223,7 +5223,7 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter) if (tsicr & IGC_TSICR_TXTS) { /* retrieve hardware timestamp */ - schedule_work(&adapter->ptp_tx_work); + igc_ptp_tx_tstamp_event(adapter); ack |= IGC_TSICR_TXTS; } diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 42f622ceb64b..cf963a12a92f 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -540,8 +540,6 @@ static void igc_ptp_clear_tx_tstamp(struct igc_adapter *adapter) { unsigned long flags; - cancel_work_sync(&adapter->ptp_tx_work); - spin_lock_irqsave(&adapter->ptp_tx_lock, flags); dev_kfree_skb_any(adapter->ptp_tx_skb); @@ -724,16 +722,14 @@ static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter) } /** - * igc_ptp_tx_work - * @work: pointer to work struct + * igc_ptp_tx_tstamp_event + * @adapter: board private structure * - * This work function checks the TSYNCTXCTL valid bit to determine when - * a timestamp has been taken for the current stored skb. + * Called when a TX timestamp interrupt happens to retrieve the + * timestamp and send it up to the socket. */ -static void igc_ptp_tx_work(struct work_struct *work) +void igc_ptp_tx_tstamp_event(struct igc_adapter *adapter) { - struct igc_adapter *adapter = container_of(work, struct igc_adapter, - ptp_tx_work); struct igc_hw *hw = &adapter->hw; unsigned long flags; u32 tsynctxctl; @@ -1004,7 +1000,6 @@ void igc_ptp_init(struct igc_adapter *adapter) spin_lock_init(&adapter->ptp_tx_lock); spin_lock_init(&adapter->tmreg_lock); - INIT_WORK(&adapter->ptp_tx_work, igc_ptp_tx_work); adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; -- cgit v1.2.3 From ed89b74d2dc920cb61d3094e0e97ec8775b13086 Mon Sep 17 00:00:00 2001 From: Muhammad Husaini Zulkifli Date: Mon, 15 May 2023 14:03:36 +0800 Subject: igc: Add condition for qbv_config_change_errors counter Add condition to increase the qbv counter during taprio qbv configuration only. There might be a case when TC already been setup then user configure the ETF/CBS qdisc and this counter will increase if no condition above. Fixes: ae4fe4698300 ("igc: Add qbv_config_change_errors counter") Signed-off-by: Muhammad Husaini Zulkifli Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_main.c | 2 ++ drivers/net/ethernet/intel/igc/igc_tsn.c | 1 + 3 files changed, 4 insertions(+) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 00a5ee487812..aa5ceab0d371 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -184,6 +184,7 @@ struct igc_adapter { u32 max_frame_size; u32 min_frame_size; + int tc_setup_type; ktime_t base_time; ktime_t cycle_time; bool qbv_enable; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 019ce91c45aa..b90f94511005 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6327,6 +6327,8 @@ static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type, { struct igc_adapter *adapter = netdev_priv(dev); + adapter->tc_setup_type = type; + switch (type) { case TC_QUERY_CAPS: return igc_tc_query_caps(adapter, type_data); diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 94a2b0dfb54d..6b299b83e7ef 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -249,6 +249,7 @@ skip_cbs: * Gate Control List (GCL) is running. */ if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && + (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) && tsn_mode_reconfig) adapter->qbv_config_change_errors++; } else { -- cgit v1.2.3 From cca28ceac7c7857bc2d313777017585aef00bcc4 Mon Sep 17 00:00:00 2001 From: Muhammad Husaini Zulkifli Date: Wed, 17 May 2023 08:18:12 +0800 Subject: igc: Remove delay during TX ring configuration Remove unnecessary delay during the TX ring configuration. This will cause delay, especially during link down and link up activity. Furthermore, old SKUs like as I225 will call the reset_adapter to reset the controller during TSN mode Gate Control List (GCL) setting. This will add more time to the configuration of the real-time use case. It doesn't mentioned about this delay in the Software User Manual. It might have been ported from legacy code I210 in the past. Fixes: 13b5b7fd6a4a ("igc: Add support for Tx/Rx rings") Signed-off-by: Muhammad Husaini Zulkifli Acked-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index b90f94511005..7e22204822ea 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -711,7 +711,6 @@ static void igc_configure_tx_ring(struct igc_adapter *adapter, /* disable the queue */ wr32(IGC_TXDCTL(reg_idx), 0); wrfl(); - mdelay(10); wr32(IGC_TDLEN(reg_idx), ring->count * sizeof(union igc_adv_tx_desc)); -- cgit v1.2.3 From 175c241288c09f81eb7b44d65c1ef6045efa4d1a Mon Sep 17 00:00:00 2001 From: Muhammad Husaini Zulkifli Date: Sat, 3 Jun 2023 20:59:34 +0800 Subject: igc: Fix TX Hang issue when QBV Gate is closed If a user schedules a Gate Control List (GCL) to close one of the QBV gates while also transmitting a packet to that closed gate, TX Hang will be happen. HW would not drop any packet when the gate is closed and keep queuing up in HW TX FIFO until the gate is re-opened. This patch implements the solution to drop the packet for the closed gate. This patch will also reset the adapter to perform SW initialization for each 1st Gate Control List (GCL) to avoid hang. This is due to the HW design, where changing to TSN transmit mode requires SW initialization. Intel Discrete I225/6 transmit mode cannot be changed when in dynamic mode according to Software User Manual Section 7.5.2.1. Subsequent Gate Control List (GCL) operations will proceed without a reset, as they already are in TSN Mode. Step to reproduce: DUT: 1) Configure GCL List with certain gate close. BASE=$(date +%s%N) tc qdisc replace dev $IFACE parent root handle 100 taprio \ num_tc 4 \ map 0 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 \ queues 1@0 1@1 1@2 1@3 \ base-time $BASE \ sched-entry S 0x8 500000 \ sched-entry S 0x4 500000 \ flags 0x2 2) Transmit the packet to closed gate. You may use udp_tai application to transmit UDP packet to any of the closed gate. ./udp_tai -i -P 100000 -p 90 -c 1 -t <0/1> -u 30004 Fixes: ec50a9d437f0 ("igc: Add support for taprio offloading") Co-developed-by: Tan Tee Min Signed-off-by: Tan Tee Min Tested-by: Chwee Lin Choong Signed-off-by: Muhammad Husaini Zulkifli Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 6 ++++ drivers/net/ethernet/intel/igc/igc_main.c | 58 ++++++++++++++++++++++++++++--- drivers/net/ethernet/intel/igc/igc_tsn.c | 41 ++++++++++++++-------- 3 files changed, 87 insertions(+), 18 deletions(-) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index aa5ceab0d371..639a50c02537 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -14,6 +14,7 @@ #include #include #include +#include #include "igc_hw.h" @@ -101,6 +102,8 @@ struct igc_ring { u32 start_time; u32 end_time; u32 max_sdu; + bool oper_gate_closed; /* Operating gate. True if the TX Queue is closed */ + bool admin_gate_closed; /* Future gate. True if the TX Queue will be closed */ /* CBS parameters */ bool cbs_enable; /* indicates if CBS is enabled */ @@ -160,6 +163,7 @@ struct igc_adapter { struct timer_list watchdog_timer; struct timer_list dma_err_timer; struct timer_list phy_info_timer; + struct hrtimer hrtimer; u32 wol; u32 en_mng_pt; @@ -189,6 +193,8 @@ struct igc_adapter { ktime_t cycle_time; bool qbv_enable; u32 qbv_config_change_errors; + bool qbv_transition; + unsigned int qbv_count; /* OS defined structs */ struct pci_dev *pdev; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 7e22204822ea..e5bfc4000658 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1572,6 +1572,9 @@ done: first->bytecount = skb->len; first->gso_segs = 1; + if (adapter->qbv_transition || tx_ring->oper_gate_closed) + goto out_drop; + if (tx_ring->max_sdu > 0) { u32 max_sdu = 0; @@ -3011,8 +3014,8 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) time_after(jiffies, tx_buffer->time_stamp + (adapter->tx_timeout_factor * HZ)) && !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) && - (rd32(IGC_TDH(tx_ring->reg_idx)) != - readl(tx_ring->tail))) { + (rd32(IGC_TDH(tx_ring->reg_idx)) != readl(tx_ring->tail)) && + !tx_ring->oper_gate_closed) { /* detected Tx unit hang */ netdev_err(tx_ring->netdev, "Detected Tx Unit Hang\n" @@ -6102,6 +6105,8 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter) adapter->base_time = 0; adapter->cycle_time = NSEC_PER_SEC; adapter->qbv_config_change_errors = 0; + adapter->qbv_transition = false; + adapter->qbv_count = 0; for (i = 0; i < adapter->num_tx_queues; i++) { struct igc_ring *ring = adapter->tx_ring[i]; @@ -6109,6 +6114,8 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter) ring->start_time = 0; ring->end_time = NSEC_PER_SEC; ring->max_sdu = 0; + ring->oper_gate_closed = false; + ring->admin_gate_closed = false; } return 0; @@ -6120,6 +6127,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, bool queue_configured[IGC_MAX_TX_QUEUES] = { }; struct igc_hw *hw = &adapter->hw; u32 start_time = 0, end_time = 0; + struct timespec64 now; size_t n; int i; @@ -6149,6 +6157,8 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, adapter->cycle_time = qopt->cycle_time; adapter->base_time = qopt->base_time; + igc_ptp_read(adapter, &now); + for (n = 0; n < qopt->num_entries; n++) { struct tc_taprio_sched_entry *e = &qopt->entries[n]; @@ -6183,7 +6193,10 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, ring->start_time = start_time; ring->end_time = end_time; - queue_configured[i] = true; + if (ring->start_time >= adapter->cycle_time) + queue_configured[i] = false; + else + queue_configured[i] = true; } start_time += e->interval; @@ -6193,8 +6206,20 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, * If not, set the start and end time to be end time. */ for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *ring = adapter->tx_ring[i]; + + if (!is_base_time_past(qopt->base_time, &now)) { + ring->admin_gate_closed = false; + } else { + ring->oper_gate_closed = false; + ring->admin_gate_closed = false; + } + if (!queue_configured[i]) { - struct igc_ring *ring = adapter->tx_ring[i]; + if (!is_base_time_past(qopt->base_time, &now)) + ring->admin_gate_closed = true; + else + ring->oper_gate_closed = true; ring->start_time = end_time; ring->end_time = end_time; @@ -6575,6 +6600,27 @@ static const struct xdp_metadata_ops igc_xdp_metadata_ops = { .xmo_rx_timestamp = igc_xdp_rx_timestamp, }; +static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer) +{ + struct igc_adapter *adapter = container_of(timer, struct igc_adapter, + hrtimer); + unsigned int i; + + adapter->qbv_transition = true; + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + if (tx_ring->admin_gate_closed) { + tx_ring->admin_gate_closed = false; + tx_ring->oper_gate_closed = true; + } else { + tx_ring->oper_gate_closed = false; + } + } + adapter->qbv_transition = false; + return HRTIMER_NORESTART; +} + /** * igc_probe - Device Initialization Routine * @pdev: PCI device information struct @@ -6753,6 +6799,9 @@ static int igc_probe(struct pci_dev *pdev, INIT_WORK(&adapter->reset_task, igc_reset_task); INIT_WORK(&adapter->watchdog_task, igc_watchdog_task); + hrtimer_init(&adapter->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + adapter->hrtimer.function = &igc_qbv_scheduling_timer; + /* Initialize link properties that are user-changeable */ adapter->fc_autoneg = true; hw->mac.autoneg = true; @@ -6856,6 +6905,7 @@ static void igc_remove(struct pci_dev *pdev) cancel_work_sync(&adapter->reset_task); cancel_work_sync(&adapter->watchdog_task); + hrtimer_cancel(&adapter->hrtimer); /* Release control of h/w to f/w. If f/w is AMT enabled, this * would have already happened in close and is redundant. diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 6b299b83e7ef..3cdb0c988728 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -114,7 +114,6 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) static int igc_tsn_enable_offload(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; - bool tsn_mode_reconfig = false; u32 tqavctrl, baset_l, baset_h; u32 sec, nsec, cycle; ktime_t base_time, systim; @@ -228,11 +227,10 @@ skip_cbs: tqavctrl = rd32(IGC_TQAVCTRL) & ~IGC_TQAVCTRL_FUTSCDDIS; - if (tqavctrl & IGC_TQAVCTRL_TRANSMIT_MODE_TSN) - tsn_mode_reconfig = true; - tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV; + adapter->qbv_count++; + cycle = adapter->cycle_time; base_time = adapter->base_time; @@ -250,17 +248,28 @@ skip_cbs: */ if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) && - tsn_mode_reconfig) + (adapter->qbv_count > 1)) adapter->qbv_config_change_errors++; } else { - /* According to datasheet section 7.5.2.9.3.3, FutScdDis bit - * has to be configured before the cycle time and base time. - * Tx won't hang if there is a GCL is already running, - * so in this case we don't need to set FutScdDis. - */ - if (igc_is_device_id_i226(hw) && - !(rd32(IGC_BASET_H) || rd32(IGC_BASET_L))) - tqavctrl |= IGC_TQAVCTRL_FUTSCDDIS; + if (igc_is_device_id_i226(hw)) { + ktime_t adjust_time, expires_time; + + /* According to datasheet section 7.5.2.9.3.3, FutScdDis bit + * has to be configured before the cycle time and base time. + * Tx won't hang if a GCL is already running, + * so in this case we don't need to set FutScdDis. + */ + if (!(rd32(IGC_BASET_H) || rd32(IGC_BASET_L))) + tqavctrl |= IGC_TQAVCTRL_FUTSCDDIS; + + nsec = rd32(IGC_SYSTIML); + sec = rd32(IGC_SYSTIMH); + systim = ktime_set(sec, nsec); + + adjust_time = adapter->base_time; + expires_time = ktime_sub_ns(adjust_time, systim); + hrtimer_start(&adapter->hrtimer, expires_time, HRTIMER_MODE_REL); + } } wr32(IGC_TQAVCTRL, tqavctrl); @@ -306,7 +315,11 @@ int igc_tsn_offload_apply(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; - if (netif_running(adapter->netdev) && igc_is_device_id_i225(hw)) { + /* Per I225/6 HW Design Section 7.5.2.1, transmit mode + * cannot be changed dynamically. Require reset the adapter. + */ + if (netif_running(adapter->netdev) && + (igc_is_device_id_i225(hw) || !adapter->qbv_count)) { schedule_work(&adapter->reset_task); return 0; } -- cgit v1.2.3 From 25102893e409bc02761ab82dbcfa092006404790 Mon Sep 17 00:00:00 2001 From: Tan Tee Min Date: Fri, 9 Jun 2023 11:28:42 +0800 Subject: igc: Include the length/type field and VLAN tag in queueMaxSDU IEEE 802.1Q does not have clear definitions of what constitutes an SDU (Service Data Unit), but IEEE Std 802.3 clause 3.1.2 does define the MAC service primitives and clause 3.2.7 does define the MAC Client Data for Q-tagged frames. It shows that the mac_service_data_unit (MSDU) does NOT contain the preamble, destination and source address, or FCS. The MSDU does contain the length/type field, MAC client data, VLAN tag and any padding data (prior to the FCS). Thus, the maximum 802.3 frame size that is allowed to be transmitted should be QueueMaxSDU (MSDU) + 16 (6 byte SA + 6 byte DA + 4 byte FCS). Fixes: 92a0dcb8427d ("igc: offload queue max SDU from tc-taprio") Signed-off-by: Tan Tee Min Reviewed-by: Muhammad Husaini Zulkifli Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index e5bfc4000658..281a0e35b9d1 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1575,16 +1575,9 @@ done: if (adapter->qbv_transition || tx_ring->oper_gate_closed) goto out_drop; - if (tx_ring->max_sdu > 0) { - u32 max_sdu = 0; - - max_sdu = tx_ring->max_sdu + - (skb_vlan_tagged(first->skb) ? VLAN_HLEN : 0); - - if (first->bytecount > max_sdu) { - adapter->stats.txdrop++; - goto out_drop; - } + if (tx_ring->max_sdu > 0 && first->bytecount > tx_ring->max_sdu) { + adapter->stats.txdrop++; + goto out_drop; } if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) && @@ -6231,7 +6224,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, struct net_device *dev = adapter->netdev; if (qopt->max_sdu[i]) - ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len; + ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len - ETH_TLEN; else ring->max_sdu = 0; } -- cgit v1.2.3 From 8046063df887bee35c002224267ba46f41be7cf6 Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Wed, 14 Jun 2023 16:07:09 +0200 Subject: igc: Rename qbv_enable to taprio_offload_enable In the current implementation the flags adapter->qbv_enable and IGC_FLAG_TSN_QBV_ENABLED have a similar name, but do not have the same meaning. The first one is used only to indicate taprio offload (i.e. when igc_save_qbv_schedule was called), while the second one corresponds to the Qbv mode of the hardware. However, the second one is also used to support the TX launchtime feature, i.e. ETF qdisc offload. This leads to situations where adapter->qbv_enable is false, but the flag IGC_FLAG_TSN_QBV_ENABLED is set. This is prone to confusion. The rename should reduce this confusion. Since it is a pure rename, it has no impact on functionality. Fixes: e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv") Signed-off-by: Florian Kauer Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 2 +- drivers/net/ethernet/intel/igc/igc_main.c | 6 +++--- drivers/net/ethernet/intel/igc/igc_tsn.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 639a50c02537..9db384f66a8e 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -191,7 +191,7 @@ struct igc_adapter { int tc_setup_type; ktime_t base_time; ktime_t cycle_time; - bool qbv_enable; + bool taprio_offload_enable; u32 qbv_config_change_errors; bool qbv_transition; unsigned int qbv_count; diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 281a0e35b9d1..fae534ef1c4f 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6126,16 +6126,16 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, switch (qopt->cmd) { case TAPRIO_CMD_REPLACE: - adapter->qbv_enable = true; + adapter->taprio_offload_enable = true; break; case TAPRIO_CMD_DESTROY: - adapter->qbv_enable = false; + adapter->taprio_offload_enable = false; break; default: return -EOPNOTSUPP; } - if (!adapter->qbv_enable) + if (!adapter->taprio_offload_enable) return igc_tsn_clear_schedule(adapter); if (qopt->base_time < 0) diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 3cdb0c988728..b76ebfc10b1d 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -37,7 +37,7 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter) { unsigned int new_flags = adapter->flags & ~IGC_FLAG_TSN_ANY_ENABLED; - if (adapter->qbv_enable) + if (adapter->taprio_offload_enable) new_flags |= IGC_FLAG_TSN_QBV_ENABLED; if (is_any_launchtime(adapter)) -- cgit v1.2.3 From 82ff5f29b7377d614f0c01fd74b5d0cb225f0adc Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Wed, 14 Jun 2023 16:07:10 +0200 Subject: igc: Do not enable taprio offload for invalid arguments Only set adapter->taprio_offload_enable after validating the arguments. Otherwise, it stays set even if the offload was not enabled. Since the subsequent code does not get executed in case of invalid arguments, it will not be read at first. However, by activating and then deactivating another offload (e.g. ETF/TX launchtime offload), taprio_offload_enable is read and erroneously keeps the offload feature of the NIC enabled. This can be reproduced as follows: # TAPRIO offload (flags == 0x2) and negative base-time leading to expected -ERANGE sudo tc qdisc replace dev enp1s0 parent root handle 100 stab overhead 24 taprio \ num_tc 1 \ map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ queues 1@0 \ base-time -1000 \ sched-entry S 01 300000 \ flags 0x2 # IGC_TQAVCTRL is 0x0 as expected (iomem=relaxed for reading register) sudo pcimem /sys/bus/pci/devices/0000:01:00.0/resource0 0x3570 w*1 # Activate ETF offload sudo tc qdisc replace dev enp1s0 parent root handle 6666 mqprio \ num_tc 3 \ map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 \ queues 1@0 1@1 2@2 \ hw 0 sudo tc qdisc add dev enp1s0 parent 6666:1 etf \ clockid CLOCK_TAI \ delta 500000 \ offload # IGC_TQAVCTRL is 0x9 as expected sudo pcimem /sys/bus/pci/devices/0000:01:00.0/resource0 0x3570 w*1 # Deactivate ETF offload again sudo tc qdisc delete dev enp1s0 parent 6666:1 # IGC_TQAVCTRL should now be 0x0 again, but is observed as 0x9 sudo pcimem /sys/bus/pci/devices/0000:01:00.0/resource0 0x3570 w*1 Fixes: e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv") Signed-off-by: Florian Kauer Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index fae534ef1c4f..fb8e55c7c402 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6097,6 +6097,7 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter) adapter->base_time = 0; adapter->cycle_time = NSEC_PER_SEC; + adapter->taprio_offload_enable = false; adapter->qbv_config_change_errors = 0; adapter->qbv_transition = false; adapter->qbv_count = 0; @@ -6124,20 +6125,12 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, size_t n; int i; - switch (qopt->cmd) { - case TAPRIO_CMD_REPLACE: - adapter->taprio_offload_enable = true; - break; - case TAPRIO_CMD_DESTROY: - adapter->taprio_offload_enable = false; - break; - default: - return -EOPNOTSUPP; - } - - if (!adapter->taprio_offload_enable) + if (qopt->cmd == TAPRIO_CMD_DESTROY) return igc_tsn_clear_schedule(adapter); + if (qopt->cmd != TAPRIO_CMD_REPLACE) + return -EOPNOTSUPP; + if (qopt->base_time < 0) return -ERANGE; @@ -6149,6 +6142,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, adapter->cycle_time = qopt->cycle_time; adapter->base_time = qopt->base_time; + adapter->taprio_offload_enable = true; igc_ptp_read(adapter, &now); -- cgit v1.2.3 From e5d88c53d03f8df864776431175d08c053645f50 Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Wed, 14 Jun 2023 16:07:11 +0200 Subject: igc: Handle already enabled taprio offload for basetime 0 Since commit e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv") it is possible to enable taprio offload with a basetime of 0. However, the check if taprio offload is already enabled (and thus -EALREADY should be returned for igc_save_qbv_schedule) still relied on adapter->base_time > 0. This can be reproduced as follows: # TAPRIO offload (flags == 0x2) and base-time = 0 sudo tc qdisc replace dev enp1s0 parent root handle 100 stab overhead 24 taprio \ num_tc 1 \ map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ queues 1@0 \ base-time 0 \ sched-entry S 01 300000 \ flags 0x2 # The second call should fail with "Error: Device failed to setup taprio offload." # But that only happens if base-time was != 0 sudo tc qdisc replace dev enp1s0 parent root handle 100 stab overhead 24 taprio \ num_tc 1 \ map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ queues 1@0 \ base-time 0 \ sched-entry S 01 300000 \ flags 0x2 Fixes: e17090eb2494 ("igc: allow BaseTime 0 enrollment for Qbv") Signed-off-by: Florian Kauer Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index fb8e55c7c402..5d24930fed8f 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6134,7 +6134,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, if (qopt->base_time < 0) return -ERANGE; - if (igc_is_device_id_i225(hw) && adapter->base_time) + if (igc_is_device_id_i225(hw) && adapter->taprio_offload_enable) return -EALREADY; if (!validate_schedule(adapter, qopt)) -- cgit v1.2.3 From c1bca9ac0bcb355be11354c2e68bc7bf31f5ac5a Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Wed, 14 Jun 2023 16:07:13 +0200 Subject: igc: Fix launchtime before start of cycle It is possible (verified on a running system) that frames are processed by igc_tx_launchtime with a txtime before the start of the cycle (baset_est). However, the result of txtime - baset_est is written into a u32, leading to a wrap around to a positive number. The following launchtime > 0 check will only branch to executing launchtime = 0 if launchtime is already 0. Fix it by using a s32 before checking launchtime > 0. Fixes: db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit") Signed-off-by: Florian Kauer Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 5d24930fed8f..4855caa3bae4 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1016,7 +1016,7 @@ static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, ktime_t base_time = adapter->base_time; ktime_t now = ktime_get_clocktai(); ktime_t baset_est, end_of_cycle; - u32 launchtime; + s32 launchtime; s64 n; n = div64_s64(ktime_sub_ns(now, base_time), cycle_time); -- cgit v1.2.3 From 0bcc62858d6ba62cbade957d69745e6adeed5f3d Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Wed, 14 Jun 2023 16:07:14 +0200 Subject: igc: Fix inserting of empty frame for launchtime The insertion of an empty frame was introduced with commit db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit") in order to ensure that the current cycle has at least one packet if there is some packet to be scheduled for the next cycle. However, the current implementation does not properly check if a packet is already scheduled for the current cycle. Currently, an empty packet is always inserted if and only if txtime >= end_of_cycle && txtime > last_tx_cycle but since last_tx_cycle is always either the end of the current cycle (end_of_cycle) or the end of a previous cycle, the second part (txtime > last_tx_cycle) is always true unless txtime == last_tx_cycle. What actually needs to be checked here is if the last_tx_cycle was already written within the current cycle, so an empty frame should only be inserted if and only if txtime >= end_of_cycle && end_of_cycle > last_tx_cycle. This patch does not only avoid an unnecessary insertion, but it can actually be harmful to insert an empty packet if packets are already scheduled in the current cycle, because it can lead to a situation where the empty packet is actually processed as the first packet in the upcoming cycle shifting the packet with the first_flag even one cycle into the future, finally leading to a TX hang. The TX hang can be reproduced on a i225 with: sudo tc qdisc replace dev enp1s0 parent root handle 100 taprio \ num_tc 1 \ map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ queues 1@0 \ base-time 0 \ sched-entry S 01 300000 \ flags 0x1 \ txtime-delay 500000 \ clockid CLOCK_TAI sudo tc qdisc replace dev enp1s0 parent 100:1 etf \ clockid CLOCK_TAI \ delta 500000 \ offload \ skip_sock_check and traffic generator sudo trafgen -i traffic.cfg -o enp1s0 --cpp -n0 -q -t1400ns with traffic.cfg #define ETH_P_IP 0x0800 { /* Ethernet Header */ 0x30, 0x1f, 0x9a, 0xd0, 0xf0, 0x0e, # MAC Dest - adapt as needed 0x24, 0x5e, 0xbe, 0x57, 0x2e, 0x36, # MAC Src - adapt as needed const16(ETH_P_IP), /* IPv4 Header */ 0b01000101, 0, # IPv4 version, IHL, TOS const16(1028), # IPv4 total length (UDP length + 20 bytes (IP header)) const16(2), # IPv4 ident 0b01000000, 0, # IPv4 flags, fragmentation off 64, # IPv4 TTL 17, # Protocol UDP csumip(14, 33), # IPv4 checksum /* UDP Header */ 10, 0, 48, 1, # IP Src - adapt as needed 10, 0, 48, 10, # IP Dest - adapt as needed const16(5555), # UDP Src Port const16(6666), # UDP Dest Port const16(1008), # UDP length (UDP header 8 bytes + payload length) csumudp(14, 34), # UDP checksum /* Payload */ fill('W', 1000), } and the observed message with that is for example igc 0000:01:00.0 enp1s0: Detected Tx Unit Hang Tx Queue <0> TDH <32> TDT <3c> next_to_use <3c> next_to_clean <32> buffer_info[next_to_clean] time_stamp next_to_watch <00000000632a1828> jiffies desc.status <1048000> Fixes: db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit") Signed-off-by: Florian Kauer Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet/intel/igc/igc_main.c') diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 4855caa3bae4..9f93f0f4f752 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1029,7 +1029,7 @@ static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, *first_flag = true; ring->last_ff_cycle = baset_est; - if (ktime_compare(txtime, ring->last_tx_cycle) > 0) + if (ktime_compare(end_of_cycle, ring->last_tx_cycle) > 0) *insert_empty = true; } } -- cgit v1.2.3