summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorMartin KaFai Lau <martin.lau@kernel.org>2025-10-16 22:15:10 +0300
committerMartin KaFai Lau <martin.lau@kernel.org>2025-10-16 22:15:10 +0300
commit03de843bd0806184505f1e8099ff4ca9a8665dbb (patch)
treec001971e2e6dedd6609541d8f4e925e9dd1ae6e4 /include
parent55db64ddd6a12c5157a61419a11a18fc727e8286 (diff)
parent5f941dd87b0a82dd690821c6e0f427db87a4453b (diff)
downloadlinux-03de843bd0806184505f1e8099ff4ca9a8665dbb.tar.xz
Merge branch 'bpf-allow-opt-out-from-sk-sk_prot-memory_allocated'
Kuniyuki Iwashima says: ==================== bpf: Allow opt-out from sk->sk_prot->memory_allocated. This series allows opting out of the global per-protocol memory accounting if socket is configured as such by sysctl or BPF prog. This series is the successor of the series below [0], but the changes now fall in net and bpf subsystems only. I discussed with Roman Gushchin offlist, and he suggested not mixing two independent subsystems and it would be cleaner not to depend on memcg. So, sk->sk_memcg and memcg code are no longer touched, and instead we use another hole near sk->sk_prot to store a flag for the pure net opt-out feature. Overview of the series: patch 1 is misc cleanup patch 2 allows opt-out from sk->sk_prot->memory_allocated patch 3 introduces net.core.bypass_prot_mem patch 4 & 5 supports flagging sk->sk_bypass_prot_mem via bpf_setsockopt() patch 6 is selftest Thank you very much for all your help, Shakeel, Roman, Martin, and Eric! [0]: https://lore.kernel.org/bpf/20250920000751.2091731-1-kuniyu@google.com/ Changes: v2: * Patch 2: * Fill kdoc for skc_bypass_prot_mem * Patch 6 * Fix server fd leak in tcp_create_sockets() * Avoid close(0) in check_bypass() v1: https://lore.kernel.org/bpf/20251007001120.2661442-1-kuniyu@google.com/ ==================== Link: https://patch.msgid.link/20251014235604.3057003-1-kuniyu@google.com Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/net/netns/core.h1
-rw-r--r--include/net/proto_memory.h3
-rw-r--r--include/net/sock.h3
-rw-r--r--include/net/tcp.h3
-rw-r--r--include/uapi/linux/bpf.h2
5 files changed, 12 insertions, 0 deletions
diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index cb9c3e4cd738..9ef3d70e5e9c 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -17,6 +17,7 @@ struct netns_core {
int sysctl_optmem_max;
u8 sysctl_txrehash;
u8 sysctl_tstamp_allow_data;
+ u8 sysctl_bypass_prot_mem;
#ifdef CONFIG_PROC_FS
struct prot_inuse __percpu *prot_inuse;
diff --git a/include/net/proto_memory.h b/include/net/proto_memory.h
index 8e91a8fa31b5..ad6d703ce6fe 100644
--- a/include/net/proto_memory.h
+++ b/include/net/proto_memory.h
@@ -35,6 +35,9 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
mem_cgroup_sk_under_memory_pressure(sk))
return true;
+ if (sk->sk_bypass_prot_mem)
+ return false;
+
return !!READ_ONCE(*sk->sk_prot->memory_pressure);
}
diff --git a/include/net/sock.h b/include/net/sock.h
index 30ac2eb4ef9b..415e7381aa50 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -118,6 +118,7 @@ typedef __u64 __bitwise __addrpair;
* @skc_reuseport: %SO_REUSEPORT setting
* @skc_ipv6only: socket is IPV6 only
* @skc_net_refcnt: socket is using net ref counting
+ * @skc_bypass_prot_mem: bypass the per-protocol memory accounting for skb
* @skc_bound_dev_if: bound device index if != 0
* @skc_bind_node: bind hash linkage for various protocol lookup tables
* @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol
@@ -174,6 +175,7 @@ struct sock_common {
unsigned char skc_reuseport:1;
unsigned char skc_ipv6only:1;
unsigned char skc_net_refcnt:1;
+ unsigned char skc_bypass_prot_mem:1;
int skc_bound_dev_if;
union {
struct hlist_node skc_bind_node;
@@ -381,6 +383,7 @@ struct sock {
#define sk_reuseport __sk_common.skc_reuseport
#define sk_ipv6only __sk_common.skc_ipv6only
#define sk_net_refcnt __sk_common.skc_net_refcnt
+#define sk_bypass_prot_mem __sk_common.skc_bypass_prot_mem
#define sk_bound_dev_if __sk_common.skc_bound_dev_if
#define sk_bind_node __sk_common.skc_bind_node
#define sk_prot __sk_common.skc_prot
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1e547138f4fb..439e327fdbfa 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -303,6 +303,9 @@ static inline bool tcp_under_memory_pressure(const struct sock *sk)
mem_cgroup_sk_under_memory_pressure(sk))
return true;
+ if (sk->sk_bypass_prot_mem)
+ return false;
+
return READ_ONCE(tcp_memory_pressure);
}
/*
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6829936d33f5..6eb75ad900b1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -7200,6 +7200,8 @@ enum {
TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */
+ SK_BPF_BYPASS_PROT_MEM = 1010, /* Get or Set sk->sk_bypass_prot_mem */
+
};
enum {