diff options
| author | Jakub Kicinski <kuba@kernel.org> | 2026-06-09 02:00:12 +0300 |
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2026-06-09 02:00:12 +0300 |
| commit | 1e127c94fa11cd55c8495c4b13bb255094683b4c (patch) | |
| tree | a93d922195a5a47123e7af3a50c18b761bd92c56 | |
| parent | 1bf20cc62a54f95db32529871534751fb6b1b73c (diff) | |
| parent | b016022b127fc2949f79c350817c458a060314e4 (diff) | |
| download | linux-1e127c94fa11cd55c8495c4b13bb255094683b4c.tar.xz | |
Merge branch 'so_txtime-improvements'
Willem de Bruijn says:
====================
SO_TXTIME improvements
FQ targets monotonic timestamps as generated by the TCP stack.
But SO_TXTIME was later added, which can send skbs with timestamps
against other clocks. It is now possible to detect these through skb
tstamp_type.
Make FQ robust by converting these timestamps for use in FQ (patch 2).
This also requires testing against out-of-bounds values. Prefer to do
this at the source, when parsing SCM_TXTIME (patch 1). But, tests in
the hot path are still needed, to handle BPF sources.
Extend the so_txtime selftest to handle this new case (patch 3).
v1: https://lore.kernel.org/20260603190243.2789335-1-willemdebruijn.kernel@gmail.com
====================
Link: https://patch.msgid.link/20260604194221.3319080-1-willemdebruijn.kernel@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
| -rw-r--r-- | net/core/sock.c | 32 | ||||
| -rw-r--r-- | net/sched/sch_fq.c | 43 | ||||
| -rwxr-xr-x | tools/testing/selftests/drivers/net/so_txtime.py | 18 |
3 files changed, 83 insertions, 10 deletions
diff --git a/net/core/sock.c b/net/core/sock.c index 4315409c22db..4a8a16793e16 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3045,12 +3045,42 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, sockc->tsflags |= tsflags; break; case SCM_TXTIME: + { + ktime_t tmin; + u64 txtime; + if (!sock_flag(sk, SOCK_TXTIME)) return -EINVAL; if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64))) return -EINVAL; - sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg)); + + txtime = get_unaligned((u64 *)CMSG_DATA(cmsg)); + + /* Allow sending without a delivery time: zero special case */ + if (!txtime) { + sockc->transmit_time = 0; + break; + } + + switch (sk->sk_clockid) { + case CLOCK_MONOTONIC: + tmin = 1; + break; + case CLOCK_REALTIME: + tmin = max(ktime_mono_to_real(0), 1); + break; + case CLOCK_TAI: + tmin = max(ktime_mono_to_any(0, TK_OFFS_TAI), 1); + break; + default: + tmin = 1; + WARN_ON_ONCE(1); + break; + } + + sockc->transmit_time = max_t(ktime_t, txtime, tmin); break; + } case SCM_TS_OPT_ID: if (sk_is_tcp(sk)) return -EINVAL; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 33783c9f8e16..7cae082a9847 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -537,10 +537,10 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb) rb_insert_color(&skb->rbnode, &flow->t_root); } -static bool fq_packet_beyond_horizon(const struct sk_buff *skb, +static bool fq_packet_beyond_horizon(ktime_t time_to_send, const struct fq_sched_data *q, u64 now) { - return unlikely((s64)skb->tstamp > (s64)(now + q->horizon)); + return unlikely((s64)time_to_send > (s64)(now + q->horizon)); } static void fq_flow_adjust_timer(struct fq_sched_data *q, struct fq_flow *flow, @@ -561,6 +561,36 @@ static void fq_flow_adjust_timer(struct fq_sched_data *q, struct fq_flow *flow, } } +static ktime_t fq_skb_tstamp_to_mono(struct sk_buff *skb) +{ + const ktime_t mono_max = NSEC_PER_SEC * TIME_UPTIME_SEC_MAX; + + if (likely(skb->tstamp_type == SKB_CLOCK_MONOTONIC)) + return max(skb->tstamp, 1); + + if (skb->tstamp_type == SKB_CLOCK_TAI) + return max(ktime_sub(skb->tstamp, ktime_mono_to_any(0, TK_OFFS_TAI)), 1); + + if (likely(skb->tstamp > mono_max)) + return max(ktime_sub(skb->tstamp, ktime_mono_to_real(0)), 1); + + /* Handle BPF programs setting skb->stamp but not tstamp_type */ + net_warn_ratelimited("fq: likely mono tstamp with tstamp_type 0\n"); + + skb->tstamp_type = SKB_CLOCK_MONOTONIC; + return max(skb->tstamp, 1); +} + +static void fq_mono_to_skb_tstamp(struct sk_buff *skb, ktime_t time_to_send) +{ + if (skb->tstamp_type == SKB_CLOCK_MONOTONIC) + skb->tstamp = time_to_send; + else if (skb->tstamp_type == SKB_CLOCK_REALTIME) + skb->tstamp = ktime_mono_to_real(time_to_send); + else + skb->tstamp = ktime_mono_to_any(time_to_send, TK_OFFS_TAI); +} + static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -579,17 +609,20 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (!skb->tstamp) { fq_skb_cb(skb)->time_to_send = now; } else { + ktime_t time_to_send = fq_skb_tstamp_to_mono(skb); + /* Check if packet timestamp is too far in the future. */ - if (fq_packet_beyond_horizon(skb, q, now)) { + if (fq_packet_beyond_horizon(time_to_send, q, now)) { if (q->horizon_drop) { q->stat_horizon_drops++; return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_HORIZON_LIMIT); } q->stat_horizon_caps++; - skb->tstamp = now + q->horizon; + time_to_send = now + q->horizon; + fq_mono_to_skb_tstamp(skb, time_to_send); } - fq_skb_cb(skb)->time_to_send = skb->tstamp; + fq_skb_cb(skb)->time_to_send = (u64)time_to_send; } f = fq_classify(sch, skb, now); diff --git a/tools/testing/selftests/drivers/net/so_txtime.py b/tools/testing/selftests/drivers/net/so_txtime.py index 5d4388bfc6dd..b7be4cabbec2 100755 --- a/tools/testing/selftests/drivers/net/so_txtime.py +++ b/tools/testing/selftests/drivers/net/so_txtime.py @@ -46,7 +46,7 @@ def _qdisc_setup(ifname, qdisc, optargs=""): tc(f"qdisc replace dev {ifname} root {qdisc} {optargs}") -def _test_variants_mono(): +def _test_variants_fq(): for ipver in ["4", "6"]: for testcase in [ ["no_delay", "a,-1", "a,-1"], @@ -59,13 +59,20 @@ def _test_variants_mono(): yield KsftNamedVariant(name, ipver, testcase[1], testcase[2]) -@ksft_variants(_test_variants_mono()) -def test_so_txtime_mono(cfg, ipver, args_tx, args_rx): +@ksft_variants(_test_variants_fq()) +def test_so_txtime_fq_mono(cfg, ipver, args_tx, args_rx): """Run all variants of monotonic (fq) tests.""" _qdisc_setup(cfg.ifname, "fq") test_so_txtime(cfg, "mono", ipver, args_tx, args_rx, True) +@ksft_variants(_test_variants_fq()) +def test_so_txtime_fq_tai(cfg, ipver, args_tx, args_rx): + """Run all variants of fq tests, but pass CLOCK_TAI to test conversion.""" + _qdisc_setup(cfg.ifname, "fq") + test_so_txtime(cfg, "tai", ipver, args_tx, args_rx, True) + + def _test_variants_etf(): for ipver in ["4", "6"]: for testcase in [ @@ -95,7 +102,10 @@ def test_so_txtime_etf(cfg, ipver, args_tx, args_rx, expect_fail): def main() -> None: """Boilerplate ksft main.""" with NetDrvEpEnv(__file__) as cfg: - ksft_run([test_so_txtime_mono, test_so_txtime_etf], args=(cfg,)) + ksft_run( + [test_so_txtime_fq_mono, test_so_txtime_fq_tai, test_so_txtime_etf], + args=(cfg,), + ) ksft_exit() |
