diff options
| author | Eric Dumazet <edumazet@google.com> | 2025-11-21 11:32:55 +0300 |
|---|---|---|
| committer | Paolo Abeni <pabeni@redhat.com> | 2025-11-25 18:10:32 +0300 |
| commit | 191ff13e42a7b7824fec5b2ed84fd6481356754d (patch) | |
| tree | ddd4c7cdcd4ca88654791f78ade112b11432f9a7 /include | |
| parent | 0170d7f47c8bb0311bc802bad52245c045f151fe (diff) | |
| download | linux-191ff13e42a7b7824fec5b2ed84fd6481356754d.tar.xz | |
net_sched: add qdisc_dequeue_drop() helper
Some qdisc like cake, codel, fq_codel might drop packets
in their dequeue() method.
This is currently problematic because dequeue() runs with
the qdisc spinlock held. Freeing skbs can be extremely expensive.
Add qdisc_dequeue_drop() method and a new TCQ_F_DEQUEUE_DROPS
so that these qdiscs can opt-in to defer the skb frees
after the socket spinlock is released.
TCQ_F_DEQUEUE_DROPS is an attempt to not penalize other qdiscs
with an extra cache line miss.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20251121083256.674562-14-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Diffstat (limited to 'include')
| -rw-r--r-- | include/net/pkt_sched.h | 5 | ||||
| -rw-r--r-- | include/net/sch_generic.h | 30 |
2 files changed, 30 insertions, 5 deletions
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 4678db45832a..e703c507d0da 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -114,12 +114,13 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, void __qdisc_run(struct Qdisc *q); -static inline void qdisc_run(struct Qdisc *q) +static inline struct sk_buff *qdisc_run(struct Qdisc *q) { if (qdisc_run_begin(q)) { __qdisc_run(q); - qdisc_run_end(q); + return qdisc_run_end(q); } + return NULL; } extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index b8092d0378a0..c3a7268b567e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -88,6 +88,8 @@ struct Qdisc { #define TCQ_F_INVISIBLE 0x80 /* invisible by default in dump */ #define TCQ_F_NOLOCK 0x100 /* qdisc does not require locking */ #define TCQ_F_OFFLOADED 0x200 /* qdisc is offloaded to HW */ +#define TCQ_F_DEQUEUE_DROPS 0x400 /* ->dequeue() can drop packets in q->to_free */ + u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; @@ -119,6 +121,8 @@ struct Qdisc { /* Note : we only change qstats.backlog in fast path. */ struct gnet_stats_queue qstats; + + struct sk_buff *to_free; __cacheline_group_end(Qdisc_write); @@ -218,8 +222,10 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) return true; } -static inline void qdisc_run_end(struct Qdisc *qdisc) +static inline struct sk_buff *qdisc_run_end(struct Qdisc *qdisc) { + struct sk_buff *to_free = NULL; + if (qdisc->flags & TCQ_F_NOLOCK) { spin_unlock(&qdisc->seqlock); @@ -232,9 +238,16 @@ static inline void qdisc_run_end(struct Qdisc *qdisc) if (unlikely(test_bit(__QDISC_STATE_MISSED, &qdisc->state))) __netif_schedule(qdisc); - } else { - WRITE_ONCE(qdisc->running, false); + return NULL; + } + + if (qdisc->flags & TCQ_F_DEQUEUE_DROPS) { + to_free = qdisc->to_free; + if (to_free) + qdisc->to_free = NULL; } + WRITE_ONCE(qdisc->running, false); + return to_free; } static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) @@ -1116,6 +1129,17 @@ static inline void tcf_kfree_skb_list(struct sk_buff *skb) } } +static inline void qdisc_dequeue_drop(struct Qdisc *q, struct sk_buff *skb, + enum skb_drop_reason reason) +{ + DEBUG_NET_WARN_ON_ONCE(!(q->flags & TCQ_F_DEQUEUE_DROPS)); + DEBUG_NET_WARN_ON_ONCE(q->flags & TCQ_F_NOLOCK); + + tcf_set_drop_reason(skb, reason); + skb->next = q->to_free; + q->to_free = skb; +} + /* Instead of calling kfree_skb() while root qdisc lock is held, * queue the skb for future freeing at end of __dev_xmit_skb() */ |
