summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorStanislav Fomichev <sdf@google.com>2021-01-15 19:34:59 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2023-07-27 09:43:37 +0300
commit08f61a34913558e06576e7b6318bf583f273c1be (patch)
treeae954551c1da61a585ad5c6508ca49a96e7b3b9e /net
parentc62e2ac02e288b7e0039037e8e2e9759dafe8acf (diff)
downloadlinux-08f61a34913558e06576e7b6318bf583f273c1be.tar.xz
bpf: Remove extra lock_sock for TCP_ZEROCOPY_RECEIVE
[ Upstream commit 9cacf81f8161111db25f98e78a7a0e32ae142b3f ] Add custom implementation of getsockopt hook for TCP_ZEROCOPY_RECEIVE. We skip generic hooks for TCP_ZEROCOPY_RECEIVE and have a custom call in do_tcp_getsockopt using the on-stack data. This removes 3% overhead for locking/unlocking the socket. Without this patch: 3.38% 0.07% tcp_mmap [kernel.kallsyms] [k] __cgroup_bpf_run_filter_getsockopt | --3.30%--__cgroup_bpf_run_filter_getsockopt | --0.81%--__kmalloc With the patch applied: 0.52% 0.12% tcp_mmap [kernel.kallsyms] [k] __cgroup_bpf_run_filter_getsockopt_kern Note, exporting uapi/tcp.h requires removing netinet/tcp.h from test_progs.h because those headers have confliciting definitions. Signed-off-by: Stanislav Fomichev <sdf@google.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Martin KaFai Lau <kafai@fb.com> Link: https://lore.kernel.org/bpf/20210115163501.805133-2-sdf@google.com Stable-dep-of: 2598619e012c ("sctp: add bpf_bypass_getsockopt proto callback") Signed-off-by: Sasha Levin <sashal@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp.c14
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv6/tcp_ipv6.c1
-rw-r--r--net/socket.c3
4 files changed, 19 insertions, 0 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 82abbf192985..cc42ceadc112 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3970,6 +3970,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return -EFAULT;
lock_sock(sk);
err = tcp_zerocopy_receive(sk, &zc);
+ err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname,
+ &zc, &len, err);
release_sock(sk);
if (len >= offsetofend(struct tcp_zerocopy_receive, err))
goto zerocopy_rcv_sk_err;
@@ -4004,6 +4006,18 @@ zerocopy_rcv_out:
return 0;
}
+bool tcp_bpf_bypass_getsockopt(int level, int optname)
+{
+ /* TCP do_tcp_getsockopt has optimized getsockopt implementation
+ * to avoid extra socket lock for TCP_ZEROCOPY_RECEIVE.
+ */
+ if (level == SOL_TCP && optname == TCP_ZEROCOPY_RECEIVE)
+ return true;
+
+ return false;
+}
+EXPORT_SYMBOL(tcp_bpf_bypass_getsockopt);
+
int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
int __user *optlen)
{
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 270b20e0907c..d62d5d7764ad 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2805,6 +2805,7 @@ struct proto tcp_prot = {
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
+ .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
.keepalive = tcp_set_keepalive,
.recvmsg = tcp_recvmsg,
.sendmsg = tcp_sendmsg,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index fe29bc66aeac..5392aebd48f1 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2135,6 +2135,7 @@ struct proto tcpv6_prot = {
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
+ .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
.keepalive = tcp_set_keepalive,
.recvmsg = tcp_recvmsg,
.sendmsg = tcp_sendmsg,
diff --git a/net/socket.c b/net/socket.c
index 84223419da86..f2172b756c0f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2137,6 +2137,9 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
return __sys_setsockopt(fd, level, optname, optval, optlen);
}
+INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
+ int optname));
+
/*
* Get a socket option. Because we don't know the option lengths we have
* to pass a user mode parameter for the protocols to sort out.