Merge branch 'net-optimize-tx-throughput-and-efficiency'

Eric Dumazet says: ==================== net: optimize TX throughput and efficiency In this series, I replace the busylock spinlock we have in __dev_queue_xmit() and use lockless list (llist) to reduce spinlock contention to the minimum. Idea is that only one cpu might spin on the qdisc spinlock, while others simply add their skb in the llist. After this series, we get a 300 % (4x) improvement on heavy TX workloads, sending twice the number of packets per second, for half the cpu cycles. ==================== Link: https://patch.msgid.link/20251014171907.3554413-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
author: Jakub Kicinski <kuba@kernel.org> 2025-10-17 02:25:16 +0300
committer: Jakub Kicinski <kuba@kernel.org> 2025-10-17 02:25:16 +0300
commit: 2df75cc5bdc48f8a6f393eaa9d18480aeddac7f2 (patch)
tree: 474b900af346b7131228a19382b88a04694b0fd9 /include
parent: 01b6aca22bb9f8fbbebbf8bdbb80aadf11318e3d (diff)
parent: 100dfa74cad9d4665cdcf0cc8e673b123a3ea910 (diff)
download: linux-2df75cc5bdc48f8a6f393eaa9d18480aeddac7f2.tar.xz
2 files changed, 18 insertions, 14 deletions
diff --git a/include/linux/netdevice_xmit.h b/include/linux/netdevice_xmit.h
index 813a19122ebb..cc232508e695 100644
--- a/include/linux/netdevice_xmit.h
+++ b/include/linux/netdevice_xmit.h
@@ -2,6 +2,12 @@
 #ifndef _LINUX_NETDEVICE_XMIT_H
 #define _LINUX_NETDEVICE_XMIT_H
 
+#if IS_ENABLED(CONFIG_NET_ACT_MIRRED)
+#define MIRRED_NEST_LIMIT	4
+#endif
+
+struct net_device;
+
 struct netdev_xmit {
 	u16 recursion;
 	u8  more;
@@ -9,7 +15,8 @@ struct netdev_xmit {
 	u8  skip_txqueue;
 #endif
 #if IS_ENABLED(CONFIG_NET_ACT_MIRRED)
-	u8 sched_mirred_nest;
+	u8			sched_mirred_nest;
+	struct net_device	*sched_mirred_dev[MIRRED_NEST_LIMIT];
 #endif
 #if IS_ENABLED(CONFIG_NF_DUP_NETDEV)
 	u8 nf_dup_skb_recursion;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 738cd5b13c62..94966692ccdf 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -41,13 +41,6 @@ enum qdisc_state_t {
 	__QDISC_STATE_DRAINING,
 };
 
-enum qdisc_state2_t {
-	/* Only for !TCQ_F_NOLOCK qdisc. Never access it directly.
-	 * Use qdisc_run_begin/end() or qdisc_is_running() instead.
-	 */
-	__QDISC_STATE2_RUNNING,
-};
-
 #define QDISC_STATE_MISSED	BIT(__QDISC_STATE_MISSED)
 #define QDISC_STATE_DRAINING	BIT(__QDISC_STATE_DRAINING)
 
@@ -117,13 +110,14 @@ struct Qdisc {
 	struct qdisc_skb_head	q;
 	struct gnet_stats_basic_sync bstats;
 	struct gnet_stats_queue	qstats;
-	int                     owner;
+	bool			running; /* must be written under qdisc spinlock */
 	unsigned long		state;
-	unsigned long		state2; /* must be written under qdisc spinlock */
 	struct Qdisc            *next_sched;
 	struct sk_buff_head	skb_bad_txq;
 
-	spinlock_t		busylock ____cacheline_aligned_in_smp;
+	atomic_long_t		defer_count ____cacheline_aligned_in_smp;
+	struct llist_head	defer_list;
+
 	spinlock_t		seqlock;
 
 	struct rcu_head		rcu;
@@ -168,7 +162,7 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc)
 {
 	if (qdisc->flags & TCQ_F_NOLOCK)
 		return spin_is_locked(&qdisc->seqlock);
-	return test_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
+	return READ_ONCE(qdisc->running);
 }
 
 static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
@@ -211,7 +205,10 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 		 */
 		return spin_trylock(&qdisc->seqlock);
 	}
-	return !__test_and_set_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
+	if (READ_ONCE(qdisc->running))
+		return false;
+	WRITE_ONCE(qdisc->running, true);
+	return true;
 }
 
 static inline void qdisc_run_end(struct Qdisc *qdisc)
@@ -229,7 +226,7 @@ static inline void qdisc_run_end(struct Qdisc *qdisc)
 				      &qdisc->state)))
 			__netif_schedule(qdisc);
 	} else {
-		__clear_bit(__QDISC_STATE2_RUNNING, &qdisc->state2);
+		WRITE_ONCE(qdisc->running, false);
 	}
 }
author	Jakub Kicinski <kuba@kernel.org>	2025-10-17 02:25:16 +0300
committer	Jakub Kicinski <kuba@kernel.org>	2025-10-17 02:25:16 +0300
commit	2df75cc5bdc48f8a6f393eaa9d18480aeddac7f2 (patch)
tree	474b900af346b7131228a19382b88a04694b0fd9 /include
parent	01b6aca22bb9f8fbbebbf8bdbb80aadf11318e3d (diff)
parent	100dfa74cad9d4665cdcf0cc8e673b123a3ea910 (diff)
download	linux-2df75cc5bdc48f8a6f393eaa9d18480aeddac7f2.tar.xz