summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2024-03-29 18:28:50 +0300
committerJakub Kicinski <kuba@kernel.org>2024-03-29 18:28:51 +0300
commitda493dbb1f2a156a1b6d8d8a447f2c3affe43678 (patch)
tree0c59686980a7ef30db7510b22fc5245b052b35ae /tools
parent50e2907ef8bb52cf80ecde9eec5c4dac07177146 (diff)
parent2aa0cff26ed53bc8d4855292b501759435ffdd38 (diff)
downloadlinux-da493dbb1f2a156a1b6d8d8a447f2c3affe43678.tar.xz
Merge branch 'af_unix-rework-gc'
Kuniyuki Iwashima says: ==================== af_unix: Rework GC. When we pass a file descriptor to an AF_UNIX socket via SCM_RIGTHS, the underlying struct file of the inflight fd gets its refcount bumped. If the fd is of an AF_UNIX socket, we need to track it in case it forms cyclic references. Let's say we send a fd of AF_UNIX socket A to B and vice versa and close() both sockets. When created, each socket's struct file initially has one reference. After the fd exchange, both refcounts are bumped up to 2. Then, close() decreases both to 1. From this point on, no one can touch the file/socket. However, the struct file has one refcount and thus never calls the release() function of the AF_UNIX socket. That's why we need to track all inflight AF_UNIX sockets and run garbage collection. This series replaces the current GC implementation that locks each inflight socket's receive queue and requires trickiness in other places. The new GC does not lock each socket's queue to minimise its effect and tries to be lightweight if there is no cyclic reference or no update in the shape of the inflight fd graph. The new implementation is based on Tarjan's Strongly Connected Components algorithm, and we will consider each inflight AF_UNIX socket as a vertex and its file descriptor as an edge in a directed graph. For the details, please see each patch. patch 1 - 3 : Add struct to express inflight socket graphs patch 4 : Optimse inflight fd counting patch 5 - 6 : Group SCC possibly forming a cycle patch 7 - 8 : Support embryo socket patch 9 - 11 : Make GC lightweight patch 12 - 13 : Detect dead cycle references patch 14 : Replace GC algorithm patch 15 : selftest After this series is applied, we can remove the two ugly tricks for race, scm_fp_dup() in unix_attach_fds() and spin_lock dance in unix_peek_fds() as done in patch 14/15 of v1. Also, we will add cond_resched_lock() in __unix_gc() and convert it to use a dedicated kthread instead of global system workqueue as suggested by Paolo in a v4 thread. v4: https://lore.kernel.org/netdev/20240301022243.73908-1-kuniyu@amazon.com/ v3: https://lore.kernel.org/netdev/20240223214003.17369-1-kuniyu@amazon.com/ v2: https://lore.kernel.org/netdev/20240216210556.65913-1-kuniyu@amazon.com/ v1: https://lore.kernel.org/netdev/20240203030058.60750-1-kuniyu@amazon.com/ ==================== Link: https://lore.kernel.org/r/20240325202425.60930-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile2
-rw-r--r--tools/testing/selftests/net/af_unix/scm_rights.c286
3 files changed, 288 insertions, 1 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 2f9d378edec3..d996a0ab0765 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -31,6 +31,7 @@ reuseport_dualstack
rxtimestamp
sctp_hello
scm_pidfd
+scm_rights
sk_bind_sendto_listen
sk_connect_zero_addr
socket
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 221c387a7d7f..3b83c797650d 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,4 +1,4 @@
CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd
+TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd scm_rights
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c
new file mode 100644
index 000000000000..bab606c9f1eb
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/scm_rights.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "../../kselftest_harness.h"
+
+FIXTURE(scm_rights)
+{
+ int fd[16];
+};
+
+FIXTURE_VARIANT(scm_rights)
+{
+ char name[16];
+ int type;
+ int flags;
+ bool test_listener;
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, dgram)
+{
+ .name = "UNIX ",
+ .type = SOCK_DGRAM,
+ .flags = 0,
+ .test_listener = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = 0,
+ .test_listener = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_oob)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = MSG_OOB,
+ .test_listener = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = 0,
+ .test_listener = true,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = MSG_OOB,
+ .test_listener = true,
+};
+
+static int count_sockets(struct __test_metadata *_metadata,
+ const FIXTURE_VARIANT(scm_rights) *variant)
+{
+ int sockets = -1, len, ret;
+ char *line = NULL;
+ size_t unused;
+ FILE *f;
+
+ f = fopen("/proc/net/protocols", "r");
+ ASSERT_NE(NULL, f);
+
+ len = strlen(variant->name);
+
+ while (getline(&line, &unused, f) != -1) {
+ int unused2;
+
+ if (strncmp(line, variant->name, len))
+ continue;
+
+ ret = sscanf(line + len, "%d %d", &unused2, &sockets);
+ ASSERT_EQ(2, ret);
+
+ break;
+ }
+
+ free(line);
+
+ ret = fclose(f);
+ ASSERT_EQ(0, ret);
+
+ return sockets;
+}
+
+FIXTURE_SETUP(scm_rights)
+{
+ int ret;
+
+ ret = unshare(CLONE_NEWNET);
+ ASSERT_EQ(0, ret);
+
+ ret = count_sockets(_metadata, variant);
+ ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(scm_rights)
+{
+ int ret;
+
+ sleep(1);
+
+ ret = count_sockets(_metadata, variant);
+ ASSERT_EQ(0, ret);
+}
+
+static void create_listeners(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_rights) *self,
+ int n)
+{
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ };
+ socklen_t addrlen;
+ int i, ret;
+
+ for (i = 0; i < n * 2; i += 2) {
+ self->fd[i] = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, self->fd[i]);
+
+ addrlen = sizeof(addr.sun_family);
+ ret = bind(self->fd[i], (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(0, ret);
+
+ ret = listen(self->fd[i], -1);
+ ASSERT_EQ(0, ret);
+
+ addrlen = sizeof(addr);
+ ret = getsockname(self->fd[i], (struct sockaddr *)&addr, &addrlen);
+ ASSERT_EQ(0, ret);
+
+ self->fd[i + 1] = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, self->fd[i + 1]);
+
+ ret = connect(self->fd[i + 1], (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(0, ret);
+ }
+}
+
+static void create_socketpairs(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_rights) *self,
+ const FIXTURE_VARIANT(scm_rights) *variant,
+ int n)
+{
+ int i, ret;
+
+ ASSERT_GE(sizeof(self->fd) / sizeof(int), n);
+
+ for (i = 0; i < n * 2; i += 2) {
+ ret = socketpair(AF_UNIX, variant->type, 0, self->fd + i);
+ ASSERT_EQ(0, ret);
+ }
+}
+
+static void __create_sockets(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_rights) *self,
+ const FIXTURE_VARIANT(scm_rights) *variant,
+ int n)
+{
+ if (variant->test_listener)
+ create_listeners(_metadata, self, n);
+ else
+ create_socketpairs(_metadata, self, variant, n);
+}
+
+static void __close_sockets(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_rights) *self,
+ int n)
+{
+ int i, ret;
+
+ ASSERT_GE(sizeof(self->fd) / sizeof(int), n);
+
+ for (i = 0; i < n * 2; i++) {
+ ret = close(self->fd[i]);
+ ASSERT_EQ(0, ret);
+ }
+}
+
+void __send_fd(struct __test_metadata *_metadata,
+ const FIXTURE_DATA(scm_rights) *self,
+ const FIXTURE_VARIANT(scm_rights) *variant,
+ int inflight, int receiver)
+{
+#define MSG "nop"
+#define MSGLEN 3
+ struct {
+ struct cmsghdr cmsghdr;
+ int fd[2];
+ } cmsg = {
+ .cmsghdr = {
+ .cmsg_len = CMSG_LEN(sizeof(cmsg.fd)),
+ .cmsg_level = SOL_SOCKET,
+ .cmsg_type = SCM_RIGHTS,
+ },
+ .fd = {
+ self->fd[inflight * 2],
+ self->fd[inflight * 2],
+ },
+ };
+ struct iovec iov = {
+ .iov_base = MSG,
+ .iov_len = MSGLEN,
+ };
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &cmsg,
+ .msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)),
+ };
+ int ret;
+
+ ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags);
+ ASSERT_EQ(MSGLEN, ret);
+}
+
+#define create_sockets(n) \
+ __create_sockets(_metadata, self, variant, n)
+#define close_sockets(n) \
+ __close_sockets(_metadata, self, n)
+#define send_fd(inflight, receiver) \
+ __send_fd(_metadata, self, variant, inflight, receiver)
+
+TEST_F(scm_rights, self_ref)
+{
+ create_sockets(2);
+
+ send_fd(0, 0);
+
+ send_fd(1, 1);
+
+ close_sockets(2);
+}
+
+TEST_F(scm_rights, triangle)
+{
+ create_sockets(6);
+
+ send_fd(0, 1);
+ send_fd(1, 2);
+ send_fd(2, 0);
+
+ send_fd(3, 4);
+ send_fd(4, 5);
+ send_fd(5, 3);
+
+ close_sockets(6);
+}
+
+TEST_F(scm_rights, cross_edge)
+{
+ create_sockets(8);
+
+ send_fd(0, 1);
+ send_fd(1, 2);
+ send_fd(2, 0);
+ send_fd(1, 3);
+ send_fd(3, 2);
+
+ send_fd(4, 5);
+ send_fd(5, 6);
+ send_fd(6, 4);
+ send_fd(5, 7);
+ send_fd(7, 6);
+
+ close_sockets(8);
+}
+
+TEST_HARNESS_MAIN