diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2026-05-14 03:52:57 +0300 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2026-05-14 03:52:57 +0300 |
| commit | 3803065cd6b0630d4161d86aa04e2d1db0f3a0b5 (patch) | |
| tree | a4a35048f0e03eec9b69ff8d08b1a0881a03c188 /include/linux | |
| parent | 8ebd24a7822cbae25beeafba49b2159d6a68a5f2 (diff) | |
| parent | 1d6e569b7d0c0b2736636749e4be0a27f3cefcb3 (diff) | |
| download | linux-3803065cd6b0630d4161d86aa04e2d1db0f3a0b5.tar.xz | |
Merge branch 'tun-tap-vhost-net-apply-qdisc-backpressure-on-full-ptr_ring-to-reduce-tx-drops'
Simon Schippers says:
====================
tun/tap & vhost-net: apply qdisc backpressure on full ptr_ring to reduce TX drops
This patch series deals with tun/tap & vhost-net which drop incoming
SKBs whenever their internal ptr_ring buffer is full. Instead, with this
patch series, the associated netdev queue is stopped - but only when a
qdisc is attached. If no qdisc is present the existing behavior is
preserved. The XDP transmit path is not affected. This patch series
touches tun/tap and vhost-net, as they share common logic and must be
updated together. Modifying only one of them would break the other.
By applying proper backpressure, this change allows the connected qdisc to
operate correctly, as reported in [1], and significantly improves
performance in real-world scenarios, as demonstrated in our paper [2]. For
example, we observed a 36% TCP throughput improvement for an OpenVPN
connection between Germany and the USA.
Synthetic pktgen benchmarks indicate a slight regression, and packet
loss is reduced to near zero. Pktgen benchmarks are provided per commit,
with the final commit showing the overall performance.
[1] Link: https://unix.stackexchange.com/questions/762935/traffic-shaping-ineffective-on-tun-device
[2] Link: https://cni.etit.tu-dortmund.de/storages/cni-etit/r/Research/Publications/2025/Gebauer_2025_VTCFall/Gebauer_VTCFall2025_AuthorsVersion.pdf
====================
Link: https://patch.msgid.link/20260510151529.43895-1-simon.schippers@tu-dortmund.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/if_tun.h | 3 | ||||
| -rw-r--r-- | include/linux/ptr_ring.h | 20 |
2 files changed, 21 insertions, 2 deletions
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 80166eb62f41..5f3e206c7a73 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -22,6 +22,7 @@ struct tun_msg_ctl { #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) struct socket *tun_get_socket(struct file *); struct ptr_ring *tun_get_tx_ring(struct file *file); +void tun_wake_queue(struct file *file, int consumed); static inline bool tun_is_xdp_frame(void *ptr) { @@ -55,6 +56,8 @@ static inline struct ptr_ring *tun_get_tx_ring(struct file *f) return ERR_PTR(-EINVAL); } +static inline void tun_wake_queue(struct file *f, int consumed) {} + static inline bool tun_is_xdp_frame(void *ptr) { return false; diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index d2c3629bbe45..c95e891903f0 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -98,13 +98,29 @@ static inline bool ptr_ring_full_bh(struct ptr_ring *r) /* Note: callers invoking this in a loop must use a compiler barrier, * for example cpu_relax(). Callers must hold producer_lock. + */ +static inline int __ptr_ring_check_produce(struct ptr_ring *r) +{ + if (unlikely(!r->size)) + return -EINVAL; + + if (data_race(r->queue[r->producer])) + return -ENOSPC; + + return 0; +} + +/* Note: callers invoking this in a loop must use a compiler barrier, + * for example cpu_relax(). Callers must hold producer_lock. * Callers are responsible for making sure pointer that is being queued * points to a valid data. */ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) { - if (unlikely(!r->size) || data_race(r->queue[r->producer])) - return -ENOSPC; + int p = __ptr_ring_check_produce(r); + + if (p) + return p; /* Make sure the pointer we are storing points to a valid data. */ /* Pairs with the dependency ordering in __ptr_ring_consume. */ |
