summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorKuniyuki Iwashima <kuniyu@google.com>2025-10-15 02:54:56 +0300
committerMartin KaFai Lau <martin.lau@kernel.org>2025-10-16 22:04:47 +0300
commitb46ab63181ff973ddce44ebc9ac24b269d42f481 (patch)
treef0dd1c7eb51f2db2cb3fdc9f33ed0b2306957c93 /include
parent7c268eaeec6388b7bee36aef3fb5e62c9222ad3b (diff)
downloadlinux-b46ab63181ff973ddce44ebc9ac24b269d42f481.tar.xz
net: Introduce net.core.bypass_prot_mem sysctl.
If a socket has sk->sk_bypass_prot_mem flagged, the socket opts out of the global protocol memory accounting. Let's control the flag by a new sysctl knob. The flag is written once during socket(2) and is inherited to child sockets. Tested with a script that creates local socket pairs and send()s a bunch of data without recv()ing. Setup: # mkdir /sys/fs/cgroup/test # echo $$ >> /sys/fs/cgroup/test/cgroup.procs # sysctl -q net.ipv4.tcp_mem="1000 1000 1000" # ulimit -n 524288 Without net.core.bypass_prot_mem, charged to tcp_mem & memcg # python3 pressure.py & # cat /sys/fs/cgroup/test/memory.stat | grep sock sock 22642688 <-------------------------------------- charged to memcg # cat /proc/net/sockstat| grep TCP TCP: inuse 2006 orphan 0 tw 0 alloc 2008 mem 5376 <-- charged to tcp_mem # ss -tn | head -n 5 State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 2000 0 127.0.0.1:34479 127.0.0.1:53188 ESTAB 2000 0 127.0.0.1:34479 127.0.0.1:49972 ESTAB 2000 0 127.0.0.1:34479 127.0.0.1:53868 ESTAB 2000 0 127.0.0.1:34479 127.0.0.1:53554 # nstat | grep Pressure || echo no pressure TcpExtTCPMemoryPressures 1 0.0 With net.core.bypass_prot_mem=1, charged to memcg only: # sysctl -q net.core.bypass_prot_mem=1 # python3 pressure.py & # cat /sys/fs/cgroup/test/memory.stat | grep sock sock 2757468160 <------------------------------------ charged to memcg # cat /proc/net/sockstat | grep TCP TCP: inuse 2006 orphan 0 tw 0 alloc 2008 mem 0 <- NOT charged to tcp_mem # ss -tn | head -n 5 State Recv-Q Send-Q Local Address:Port Peer Address:Port ESTAB 111000 0 127.0.0.1:36019 127.0.0.1:49026 ESTAB 110000 0 127.0.0.1:36019 127.0.0.1:45630 ESTAB 110000 0 127.0.0.1:36019 127.0.0.1:44870 ESTAB 111000 0 127.0.0.1:36019 127.0.0.1:45274 # nstat | grep Pressure || echo no pressure no pressure Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com> Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org> Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev> Reviewed-by: Eric Dumazet <edumazet@google.com> Acked-by: Roman Gushchin <roman.gushchin@linux.dev> Link: https://patch.msgid.link/20251014235604.3057003-4-kuniyu@google.com
Diffstat (limited to 'include')
-rw-r--r--include/net/netns/core.h1
1 files changed, 1 insertions, 0 deletions
diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index cb9c3e4cd738..9ef3d70e5e9c 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -17,6 +17,7 @@ struct netns_core {
int sysctl_optmem_max;
u8 sysctl_txrehash;
u8 sysctl_tstamp_allow_data;
+ u8 sysctl_bypass_prot_mem;
#ifdef CONFIG_PROC_FS
struct prot_inuse __percpu *prot_inuse;