summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-06-09 02:00:12 +0300
committerJakub Kicinski <kuba@kernel.org>2026-06-09 02:00:12 +0300
commit1e127c94fa11cd55c8495c4b13bb255094683b4c (patch)
treea93d922195a5a47123e7af3a50c18b761bd92c56
parent1bf20cc62a54f95db32529871534751fb6b1b73c (diff)
parentb016022b127fc2949f79c350817c458a060314e4 (diff)
downloadlinux-1e127c94fa11cd55c8495c4b13bb255094683b4c.tar.xz
Merge branch 'so_txtime-improvements'
Willem de Bruijn says: ==================== SO_TXTIME improvements FQ targets monotonic timestamps as generated by the TCP stack. But SO_TXTIME was later added, which can send skbs with timestamps against other clocks. It is now possible to detect these through skb tstamp_type. Make FQ robust by converting these timestamps for use in FQ (patch 2). This also requires testing against out-of-bounds values. Prefer to do this at the source, when parsing SCM_TXTIME (patch 1). But, tests in the hot path are still needed, to handle BPF sources. Extend the so_txtime selftest to handle this new case (patch 3). v1: https://lore.kernel.org/20260603190243.2789335-1-willemdebruijn.kernel@gmail.com ==================== Link: https://patch.msgid.link/20260604194221.3319080-1-willemdebruijn.kernel@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--net/core/sock.c32
-rw-r--r--net/sched/sch_fq.c43
-rwxr-xr-xtools/testing/selftests/drivers/net/so_txtime.py18
3 files changed, 83 insertions, 10 deletions
diff --git a/net/core/sock.c b/net/core/sock.c
index 4315409c22db..4a8a16793e16 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3045,12 +3045,42 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
sockc->tsflags |= tsflags;
break;
case SCM_TXTIME:
+ {
+ ktime_t tmin;
+ u64 txtime;
+
if (!sock_flag(sk, SOCK_TXTIME))
return -EINVAL;
if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
return -EINVAL;
- sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
+
+ txtime = get_unaligned((u64 *)CMSG_DATA(cmsg));
+
+ /* Allow sending without a delivery time: zero special case */
+ if (!txtime) {
+ sockc->transmit_time = 0;
+ break;
+ }
+
+ switch (sk->sk_clockid) {
+ case CLOCK_MONOTONIC:
+ tmin = 1;
+ break;
+ case CLOCK_REALTIME:
+ tmin = max(ktime_mono_to_real(0), 1);
+ break;
+ case CLOCK_TAI:
+ tmin = max(ktime_mono_to_any(0, TK_OFFS_TAI), 1);
+ break;
+ default:
+ tmin = 1;
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ sockc->transmit_time = max_t(ktime_t, txtime, tmin);
break;
+ }
case SCM_TS_OPT_ID:
if (sk_is_tcp(sk))
return -EINVAL;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 33783c9f8e16..7cae082a9847 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -537,10 +537,10 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
rb_insert_color(&skb->rbnode, &flow->t_root);
}
-static bool fq_packet_beyond_horizon(const struct sk_buff *skb,
+static bool fq_packet_beyond_horizon(ktime_t time_to_send,
const struct fq_sched_data *q, u64 now)
{
- return unlikely((s64)skb->tstamp > (s64)(now + q->horizon));
+ return unlikely((s64)time_to_send > (s64)(now + q->horizon));
}
static void fq_flow_adjust_timer(struct fq_sched_data *q, struct fq_flow *flow,
@@ -561,6 +561,36 @@ static void fq_flow_adjust_timer(struct fq_sched_data *q, struct fq_flow *flow,
}
}
+static ktime_t fq_skb_tstamp_to_mono(struct sk_buff *skb)
+{
+ const ktime_t mono_max = NSEC_PER_SEC * TIME_UPTIME_SEC_MAX;
+
+ if (likely(skb->tstamp_type == SKB_CLOCK_MONOTONIC))
+ return max(skb->tstamp, 1);
+
+ if (skb->tstamp_type == SKB_CLOCK_TAI)
+ return max(ktime_sub(skb->tstamp, ktime_mono_to_any(0, TK_OFFS_TAI)), 1);
+
+ if (likely(skb->tstamp > mono_max))
+ return max(ktime_sub(skb->tstamp, ktime_mono_to_real(0)), 1);
+
+ /* Handle BPF programs setting skb->stamp but not tstamp_type */
+ net_warn_ratelimited("fq: likely mono tstamp with tstamp_type 0\n");
+
+ skb->tstamp_type = SKB_CLOCK_MONOTONIC;
+ return max(skb->tstamp, 1);
+}
+
+static void fq_mono_to_skb_tstamp(struct sk_buff *skb, ktime_t time_to_send)
+{
+ if (skb->tstamp_type == SKB_CLOCK_MONOTONIC)
+ skb->tstamp = time_to_send;
+ else if (skb->tstamp_type == SKB_CLOCK_REALTIME)
+ skb->tstamp = ktime_mono_to_real(time_to_send);
+ else
+ skb->tstamp = ktime_mono_to_any(time_to_send, TK_OFFS_TAI);
+}
+
static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
@@ -579,17 +609,20 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (!skb->tstamp) {
fq_skb_cb(skb)->time_to_send = now;
} else {
+ ktime_t time_to_send = fq_skb_tstamp_to_mono(skb);
+
/* Check if packet timestamp is too far in the future. */
- if (fq_packet_beyond_horizon(skb, q, now)) {
+ if (fq_packet_beyond_horizon(time_to_send, q, now)) {
if (q->horizon_drop) {
q->stat_horizon_drops++;
return qdisc_drop_reason(skb, sch, to_free,
QDISC_DROP_HORIZON_LIMIT);
}
q->stat_horizon_caps++;
- skb->tstamp = now + q->horizon;
+ time_to_send = now + q->horizon;
+ fq_mono_to_skb_tstamp(skb, time_to_send);
}
- fq_skb_cb(skb)->time_to_send = skb->tstamp;
+ fq_skb_cb(skb)->time_to_send = (u64)time_to_send;
}
f = fq_classify(sch, skb, now);
diff --git a/tools/testing/selftests/drivers/net/so_txtime.py b/tools/testing/selftests/drivers/net/so_txtime.py
index 5d4388bfc6dd..b7be4cabbec2 100755
--- a/tools/testing/selftests/drivers/net/so_txtime.py
+++ b/tools/testing/selftests/drivers/net/so_txtime.py
@@ -46,7 +46,7 @@ def _qdisc_setup(ifname, qdisc, optargs=""):
tc(f"qdisc replace dev {ifname} root {qdisc} {optargs}")
-def _test_variants_mono():
+def _test_variants_fq():
for ipver in ["4", "6"]:
for testcase in [
["no_delay", "a,-1", "a,-1"],
@@ -59,13 +59,20 @@ def _test_variants_mono():
yield KsftNamedVariant(name, ipver, testcase[1], testcase[2])
-@ksft_variants(_test_variants_mono())
-def test_so_txtime_mono(cfg, ipver, args_tx, args_rx):
+@ksft_variants(_test_variants_fq())
+def test_so_txtime_fq_mono(cfg, ipver, args_tx, args_rx):
"""Run all variants of monotonic (fq) tests."""
_qdisc_setup(cfg.ifname, "fq")
test_so_txtime(cfg, "mono", ipver, args_tx, args_rx, True)
+@ksft_variants(_test_variants_fq())
+def test_so_txtime_fq_tai(cfg, ipver, args_tx, args_rx):
+ """Run all variants of fq tests, but pass CLOCK_TAI to test conversion."""
+ _qdisc_setup(cfg.ifname, "fq")
+ test_so_txtime(cfg, "tai", ipver, args_tx, args_rx, True)
+
+
def _test_variants_etf():
for ipver in ["4", "6"]:
for testcase in [
@@ -95,7 +102,10 @@ def test_so_txtime_etf(cfg, ipver, args_tx, args_rx, expect_fail):
def main() -> None:
"""Boilerplate ksft main."""
with NetDrvEpEnv(__file__) as cfg:
- ksft_run([test_so_txtime_mono, test_so_txtime_etf], args=(cfg,))
+ ksft_run(
+ [test_so_txtime_fq_mono, test_so_txtime_fq_tai, test_so_txtime_etf],
+ args=(cfg,),
+ )
ksft_exit()