diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2025-10-17 02:25:16 +0300 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2025-10-17 02:25:16 +0300 |
| commit | 2df75cc5bdc48f8a6f393eaa9d18480aeddac7f2 (patch) | |
| tree | 474b900af346b7131228a19382b88a04694b0fd9 /include | |
| parent | 01b6aca22bb9f8fbbebbf8bdbb80aadf11318e3d (diff) | |
| parent | 100dfa74cad9d4665cdcf0cc8e673b123a3ea910 (diff) | |
| download | linux-2df75cc5bdc48f8a6f393eaa9d18480aeddac7f2.tar.xz | |
Merge branch 'net-optimize-tx-throughput-and-efficiency'
Eric Dumazet says:
====================
net: optimize TX throughput and efficiency
In this series, I replace the busylock spinlock we have in
__dev_queue_xmit() and use lockless list (llist) to reduce
spinlock contention to the minimum.
Idea is that only one cpu might spin on the qdisc spinlock,
while others simply add their skb in the llist.
After this series, we get a 300 % (4x) improvement on heavy TX workloads,
sending twice the number of packets per second, for half the cpu cycles.
====================
Link: https://patch.msgid.link/20251014171907.3554413-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/netdevice_xmit.h | 9 | ||||
| -rw-r--r-- | include/net/sch_generic.h | 23 |
2 files changed, 18 insertions, 14 deletions
diff --git a/include/linux/netdevice_xmit.h b/include/linux/netdevice_xmit.h index 813a19122ebb..cc232508e695 100644 --- a/include/linux/netdevice_xmit.h +++ b/include/linux/netdevice_xmit.h @@ -2,6 +2,12 @@ #ifndef _LINUX_NETDEVICE_XMIT_H #define _LINUX_NETDEVICE_XMIT_H +#if IS_ENABLED(CONFIG_NET_ACT_MIRRED) +#define MIRRED_NEST_LIMIT 4 +#endif + +struct net_device; + struct netdev_xmit { u16 recursion; u8 more; @@ -9,7 +15,8 @@ struct netdev_xmit { u8 skip_txqueue; #endif #if IS_ENABLED(CONFIG_NET_ACT_MIRRED) - u8 sched_mirred_nest; + u8 sched_mirred_nest; + struct net_device *sched_mirred_dev[MIRRED_NEST_LIMIT]; #endif #if IS_ENABLED(CONFIG_NF_DUP_NETDEV) u8 nf_dup_skb_recursion; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 738cd5b13c62..94966692ccdf 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -41,13 +41,6 @@ enum qdisc_state_t { __QDISC_STATE_DRAINING, }; -enum qdisc_state2_t { - /* Only for !TCQ_F_NOLOCK qdisc. Never access it directly. - * Use qdisc_run_begin/end() or qdisc_is_running() instead. - */ - __QDISC_STATE2_RUNNING, -}; - #define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED) #define QDISC_STATE_DRAINING BIT(__QDISC_STATE_DRAINING) @@ -117,13 +110,14 @@ struct Qdisc { struct qdisc_skb_head q; struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; - int owner; + bool running; /* must be written under qdisc spinlock */ unsigned long state; - unsigned long state2; /* must be written under qdisc spinlock */ struct Qdisc *next_sched; struct sk_buff_head skb_bad_txq; - spinlock_t busylock ____cacheline_aligned_in_smp; + atomic_long_t defer_count ____cacheline_aligned_in_smp; + struct llist_head defer_list; + spinlock_t seqlock; struct rcu_head rcu; @@ -168,7 +162,7 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) return spin_is_locked(&qdisc->seqlock); - return test_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); + return READ_ONCE(qdisc->running); } static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc) @@ -211,7 +205,10 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) */ return spin_trylock(&qdisc->seqlock); } - return !__test_and_set_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); + if (READ_ONCE(qdisc->running)) + return false; + WRITE_ONCE(qdisc->running, true); + return true; } static inline void qdisc_run_end(struct Qdisc *qdisc) @@ -229,7 +226,7 @@ static inline void qdisc_run_end(struct Qdisc *qdisc) &qdisc->state))) __netif_schedule(qdisc); } else { - __clear_bit(__QDISC_STATE2_RUNNING, &qdisc->state2); + WRITE_ONCE(qdisc->running, false); } } |
