summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeita Morisaki <kmta1236@gmail.com>2026-03-04 14:15:17 +0300
committerJakub Kicinski <kuba@kernel.org>2026-03-06 05:30:40 +0300
commitcfcceb7a39fc10a6f896af8229bf81d96acb22cc (patch)
tree38b528f6b48a385fbc82603231187f4f38cc00de
parent752941e3faf6be26c6b5a118e37bdbaea2b97171 (diff)
downloadlinux-cfcceb7a39fc10a6f896af8229bf81d96acb22cc.tar.xz
tcp: shrink per-packet memset in __tcp_transmit_skb()
Use struct_group() to group the three fields in tcp_out_options that are read unconditionally by tcp_options_write() and bpf_skops_write_hdr_opt() (mss, bpf_opt_len, num_sack_blocks), then replace the full-struct memset with a targeted memset of only that group. struct tcp_out_options is 40 bytes without MPTCP and 96 bytes with CONFIG_MPTCP=y (typical distro config). Every remaining field is either assigned before first use by tcp_established_options()/tcp_syn_options(), or gated behind its OPTION_* flag in tcp_options_write(). This memset runs on every transmitted TCP packet, so shrinking it from 96 (or 40) bytes to 4 bytes reduces per-packet overhead on the hot path. Assembly comparison (x86-64, GCC 13, CONFIG_MPTCP=y): Before: rep stos zeroing 96 bytes (5 instructions, 12 8-byte stores) After: movl $0x0 zeroing 4 bytes (1 instruction, 1 store) Also add opts->options = 0 at the top of tcp_syn_options(), which already used |= without a prior clear. tcp_established_options() already clears opts->options at its top. Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com> Signed-off-by: Keita Morisaki <kmta1236@gmail.com> Acked-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> Reviewed-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com> Link: https://patch.msgid.link/20260304111517.2088694-1-kmta1236@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--net/ipv4/tcp_output.c15
1 files changed, 11 insertions, 4 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 46bd48cf776a..f0ebcc7e2871 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -429,14 +429,19 @@ static void smc_options_write(__be32 *ptr, u16 *options)
}
struct tcp_out_options {
+ /* Following group is cleared in __tcp_transmit_skb() */
+ struct_group(cleared,
+ u16 mss; /* 0 to disable */
+ u8 bpf_opt_len; /* length of BPF hdr option */
+ u8 num_sack_blocks; /* number of SACK blocks to include */
+ );
+
+ /* Caution: following fields are not cleared in __tcp_transmit_skb() */
u16 options; /* bit field of OPTION_* */
- u16 mss; /* 0 to disable */
u8 ws; /* window scale, 0 to disable */
- u8 num_sack_blocks; /* number of SACK blocks to include */
u8 num_accecn_fields:7, /* number of AccECN fields needed */
use_synack_ecn_bytes:1; /* Use synack_ecn_bytes or not */
u8 hash_size; /* bytes in hash_location */
- u8 bpf_opt_len; /* length of BPF hdr option */
__u8 *hash_location; /* temporary pointer, overloaded */
__u32 tsval, tsecr; /* need to include OPTION_TS */
struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
@@ -965,6 +970,8 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
struct tcp_fastopen_request *fastopen = tp->fastopen_req;
bool timestamps;
+ opts->options = 0;
+
/* Better than switch (key.type) as it has static branches */
if (tcp_key_is_md5(key)) {
timestamps = false;
@@ -1565,7 +1572,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
inet = inet_sk(sk);
tcb = TCP_SKB_CB(skb);
- memset(&opts, 0, sizeof(opts));
+ memset(&opts.cleared, 0, sizeof(opts.cleared));
tcp_get_current_key(sk, &key);
if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {