diff options
Diffstat (limited to 'tools/testing/vsock')
-rw-r--r-- | tools/testing/vsock/Makefile | 1 | ||||
-rw-r--r-- | tools/testing/vsock/README | 15 | ||||
-rw-r--r-- | tools/testing/vsock/timeout.c | 18 | ||||
-rw-r--r-- | tools/testing/vsock/timeout.h | 1 | ||||
-rw-r--r-- | tools/testing/vsock/util.c | 245 | ||||
-rw-r--r-- | tools/testing/vsock/util.h | 41 | ||||
-rw-r--r-- | tools/testing/vsock/vsock_test.c | 852 |
7 files changed, 1076 insertions, 97 deletions
diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile index 6e0b4e95e230..88211fd132d2 100644 --- a/tools/testing/vsock/Makefile +++ b/tools/testing/vsock/Makefile @@ -5,6 +5,7 @@ vsock_test: vsock_test.o vsock_test_zerocopy.o timeout.o control.o util.o msg_ze vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o vsock_perf: vsock_perf.o msg_zerocopy_common.o +vsock_test: LDLIBS = -lpthread vsock_uring_test: LDLIBS = -luring vsock_uring_test: control.o util.o vsock_uring_test.o timeout.o msg_zerocopy_common.o diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README index 84ee217ba8ee..680ce666ceb5 100644 --- a/tools/testing/vsock/README +++ b/tools/testing/vsock/README @@ -36,6 +36,21 @@ Invoke test binaries in both directions as follows: --control-port=1234 \ --peer-cid=3 +Some tests are designed to produce kernel memory leaks. Leaks detection, +however, is deferred to Kernel Memory Leak Detector. It is recommended to enable +kmemleak (CONFIG_DEBUG_KMEMLEAK=y) and explicitly trigger a scan after each test +suite run, e.g. + + # echo clear > /sys/kernel/debug/kmemleak + # $TEST_BINARY ... + # echo "wait for any grace periods" && sleep 2 + # echo scan > /sys/kernel/debug/kmemleak + # echo "wait for kmemleak" && sleep 5 + # echo scan > /sys/kernel/debug/kmemleak + # cat /sys/kernel/debug/kmemleak + +For more information see Documentation/dev-tools/kmemleak.rst. + vsock_perf utility ------------------- 'vsock_perf' is a simple tool to measure vsock performance. It works in diff --git a/tools/testing/vsock/timeout.c b/tools/testing/vsock/timeout.c index 44aee49b6cee..1453d38e08bb 100644 --- a/tools/testing/vsock/timeout.c +++ b/tools/testing/vsock/timeout.c @@ -21,6 +21,7 @@ #include <stdbool.h> #include <unistd.h> #include <stdio.h> +#include <time.h> #include "timeout.h" static volatile bool timeout; @@ -28,6 +29,8 @@ static volatile bool timeout; /* SIGALRM handler function. Do not use sleep(2), alarm(2), or * setitimer(2) while using this API - they may interfere with each * other. + * + * If you need to sleep, please use timeout_sleep() provided by this API. */ void sigalrm(int signo) { @@ -58,3 +61,18 @@ void timeout_end(void) alarm(0); timeout = false; } + +/* Sleep in a timeout section. + * + * nanosleep(2) can be used with this API since POSIX.1 explicitly + * specifies that it does not interact with signals. + */ +int timeout_usleep(useconds_t usec) +{ + struct timespec ts = { + .tv_sec = usec / 1000000, + .tv_nsec = (usec % 1000000) * 1000, + }; + + return nanosleep(&ts, NULL); +} diff --git a/tools/testing/vsock/timeout.h b/tools/testing/vsock/timeout.h index ecb7c840e65a..1c3fcad87a49 100644 --- a/tools/testing/vsock/timeout.h +++ b/tools/testing/vsock/timeout.h @@ -11,5 +11,6 @@ void sigalrm(int signo); void timeout_begin(unsigned int seconds); void timeout_check(const char *operation); void timeout_end(void); +int timeout_usleep(useconds_t usec); #endif /* TIMEOUT_H */ diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 34e9dac0a105..7b861a8e997a 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -7,6 +7,7 @@ * Author: Stefan Hajnoczi <stefanha@redhat.com> */ +#include <ctype.h> #include <errno.h> #include <stdio.h> #include <stdint.h> @@ -16,12 +17,17 @@ #include <unistd.h> #include <assert.h> #include <sys/epoll.h> +#include <sys/ioctl.h> #include <sys/mman.h> +#include <linux/sockios.h> #include "timeout.h" #include "control.h" #include "util.h" +#define KALLSYMS_PATH "/proc/kallsyms" +#define KALLSYMS_LINE_LEN 512 + /* Install signal handlers */ void init_signals(void) { @@ -96,41 +102,110 @@ void vsock_wait_remote_close(int fd) close(epollfd); } -/* Bind to <bind_port>, connect to <cid, port> and return the file descriptor. */ -int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type) +/* Wait until ioctl gives an expected int value. + * Return false if the op is not supported. + */ +bool vsock_ioctl_int(int fd, unsigned long op, int expected) { - struct sockaddr_vm sa_client = { - .svm_family = AF_VSOCK, - .svm_cid = VMADDR_CID_ANY, - .svm_port = bind_port, - }; - struct sockaddr_vm sa_server = { + int actual, ret; + char name[32]; + + snprintf(name, sizeof(name), "ioctl(%lu)", op); + + timeout_begin(TIMEOUT); + do { + ret = ioctl(fd, op, &actual); + if (ret < 0) { + if (errno == EOPNOTSUPP || errno == ENOTTY) + break; + + perror(name); + exit(EXIT_FAILURE); + } + timeout_check(name); + } while (actual != expected); + timeout_end(); + + return ret >= 0; +} + +/* Wait until transport reports no data left to be sent. + * Return false if transport does not implement the unsent_bytes() callback. + */ +bool vsock_wait_sent(int fd) +{ + return vsock_ioctl_int(fd, SIOCOUTQ, 0); +} + +/* Create socket <type>, bind to <cid, port>. + * Return the file descriptor, or -1 on error. + */ +int vsock_bind_try(unsigned int cid, unsigned int port, int type) +{ + struct sockaddr_vm sa = { .svm_family = AF_VSOCK, .svm_cid = cid, .svm_port = port, }; + int fd, saved_errno; - int client_fd, ret; - - client_fd = socket(AF_VSOCK, type, 0); - if (client_fd < 0) { + fd = socket(AF_VSOCK, type, 0); + if (fd < 0) { perror("socket"); exit(EXIT_FAILURE); } - if (bind(client_fd, (struct sockaddr *)&sa_client, sizeof(sa_client))) { + if (bind(fd, (struct sockaddr *)&sa, sizeof(sa))) { + saved_errno = errno; + close(fd); + errno = saved_errno; + fd = -1; + } + + return fd; +} + +/* Create socket <type>, bind to <cid, port> and return the file descriptor. */ +int vsock_bind(unsigned int cid, unsigned int port, int type) +{ + int fd; + + fd = vsock_bind_try(cid, port, type); + if (fd < 0) { perror("bind"); exit(EXIT_FAILURE); } + return fd; +} + +int vsock_connect_fd(int fd, unsigned int cid, unsigned int port) +{ + struct sockaddr_vm sa = { + .svm_family = AF_VSOCK, + .svm_cid = cid, + .svm_port = port, + }; + int ret; + timeout_begin(TIMEOUT); do { - ret = connect(client_fd, (struct sockaddr *)&sa_server, sizeof(sa_server)); + ret = connect(fd, (struct sockaddr *)&sa, sizeof(sa)); timeout_check("connect"); } while (ret < 0 && errno == EINTR); timeout_end(); - if (ret < 0) { + return ret; +} + +/* Bind to <bind_port>, connect to <cid, port> and return the file descriptor. */ +int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type) +{ + int client_fd; + + client_fd = vsock_bind(VMADDR_CID_ANY, bind_port, type); + + if (vsock_connect_fd(client_fd, cid, port)) { perror("connect"); exit(EXIT_FAILURE); } @@ -141,17 +216,6 @@ int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_po /* Connect to <cid, port> and return the file descriptor. */ int vsock_connect(unsigned int cid, unsigned int port, int type) { - union { - struct sockaddr sa; - struct sockaddr_vm svm; - } addr = { - .svm = { - .svm_family = AF_VSOCK, - .svm_port = port, - .svm_cid = cid, - }, - }; - int ret; int fd; control_expectln("LISTENING"); @@ -162,20 +226,14 @@ int vsock_connect(unsigned int cid, unsigned int port, int type) exit(EXIT_FAILURE); } - timeout_begin(TIMEOUT); - do { - ret = connect(fd, &addr.sa, sizeof(addr.svm)); - timeout_check("connect"); - } while (ret < 0 && errno == EINTR); - timeout_end(); - - if (ret < 0) { + if (vsock_connect_fd(fd, cid, port)) { int old_errno = errno; close(fd); fd = -1; errno = old_errno; } + return fd; } @@ -192,28 +250,9 @@ int vsock_seqpacket_connect(unsigned int cid, unsigned int port) /* Listen on <cid, port> and return the file descriptor. */ static int vsock_listen(unsigned int cid, unsigned int port, int type) { - union { - struct sockaddr sa; - struct sockaddr_vm svm; - } addr = { - .svm = { - .svm_family = AF_VSOCK, - .svm_port = port, - .svm_cid = cid, - }, - }; int fd; - fd = socket(AF_VSOCK, type, 0); - if (fd < 0) { - perror("socket"); - exit(EXIT_FAILURE); - } - - if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { - perror("bind"); - exit(EXIT_FAILURE); - } + fd = vsock_bind(cid, port, type); if (listen(fd, 1) < 0) { perror("listen"); @@ -401,7 +440,7 @@ void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret) */ void send_byte(int fd, int expected_ret, int flags) { - const uint8_t byte = 'A'; + static const uint8_t byte = 'A'; send_buf(fd, &byte, sizeof(byte), flags, expected_ret); } @@ -420,7 +459,7 @@ void recv_byte(int fd, int expected_ret, int flags) recv_buf(fd, &byte, sizeof(byte), flags, expected_ret); if (byte != 'A') { - fprintf(stderr, "unexpected byte read %c\n", byte); + fprintf(stderr, "unexpected byte read 0x%02x\n", byte); exit(EXIT_FAILURE); } } @@ -486,8 +525,7 @@ void list_tests(const struct test_case *test_cases) exit(EXIT_FAILURE); } -void skip_test(struct test_case *test_cases, size_t test_cases_len, - const char *test_id_str) +static unsigned long parse_test_id(const char *test_id_str, size_t test_cases_len) { unsigned long test_id; char *endptr = NULL; @@ -505,9 +543,35 @@ void skip_test(struct test_case *test_cases, size_t test_cases_len, exit(EXIT_FAILURE); } + return test_id; +} + +void skip_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str) +{ + unsigned long test_id = parse_test_id(test_id_str, test_cases_len); test_cases[test_id].skip = true; } +void pick_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str) +{ + static bool skip_all = true; + unsigned long test_id; + + if (skip_all) { + unsigned long i; + + for (i = 0; i < test_cases_len; ++i) + test_cases[i].skip = true; + + skip_all = false; + } + + test_id = parse_test_id(test_id_str, test_cases_len); + test_cases[test_id].skip = false; +} + unsigned long hash_djb2(const void *data, size_t len) { unsigned long hash = 5381; @@ -793,3 +857,68 @@ void enable_so_zerocopy_check(int fd) setsockopt_int_check(fd, SOL_SOCKET, SO_ZEROCOPY, 1, "setsockopt SO_ZEROCOPY"); } + +void enable_so_linger(int fd, int timeout) +{ + struct linger optval = { + .l_onoff = 1, + .l_linger = timeout + }; + + if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) { + perror("setsockopt(SO_LINGER)"); + exit(EXIT_FAILURE); + } +} + +static int __get_transports(void) +{ + char buf[KALLSYMS_LINE_LEN]; + const char *ksym; + int ret = 0; + FILE *f; + + f = fopen(KALLSYMS_PATH, "r"); + if (!f) { + perror("Can't open " KALLSYMS_PATH); + exit(EXIT_FAILURE); + } + + while (fgets(buf, sizeof(buf), f)) { + char *match; + int i; + + assert(buf[strlen(buf) - 1] == '\n'); + + for (i = 0; i < TRANSPORT_NUM; ++i) { + if (ret & BIT(i)) + continue; + + /* Match should be followed by '\t' or '\n'. + * See kallsyms.c:s_show(). + */ + ksym = transport_ksyms[i]; + match = strstr(buf, ksym); + if (match && isspace(match[strlen(ksym)])) { + ret |= BIT(i); + break; + } + } + } + + fclose(f); + return ret; +} + +/* Return integer with TRANSPORT_* bit set for every (known) registered vsock + * transport. + */ +int get_transports(void) +{ + static int tr = -1; + + if (tr == -1) + tr = __get_transports(); + + return tr; +} diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index ba84d296d8b7..142c02a6834a 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -3,8 +3,40 @@ #define UTIL_H #include <sys/socket.h> +#include <linux/bitops.h> +#include <linux/kernel.h> #include <linux/vm_sockets.h> +/* All known vsock transports, see callers of vsock_core_register() */ +#define KNOWN_TRANSPORTS(x) \ + x(LOOPBACK, "loopback") \ + x(VIRTIO, "virtio") \ + x(VHOST, "vhost") \ + x(VMCI, "vmci") \ + x(HYPERV, "hvs") + +enum transport { + TRANSPORT_COUNTER_BASE = __COUNTER__ + 1, + #define x(name, symbol) \ + TRANSPORT_##name = BIT(__COUNTER__ - TRANSPORT_COUNTER_BASE), + KNOWN_TRANSPORTS(x) + TRANSPORT_NUM = __COUNTER__ - TRANSPORT_COUNTER_BASE, + #undef x +}; + +static const char * const transport_ksyms[] = { + #define x(name, symbol) "d " symbol "_transport", + KNOWN_TRANSPORTS(x) + #undef x +}; + +static_assert(ARRAY_SIZE(transport_ksyms) == TRANSPORT_NUM); +static_assert(BITS_PER_TYPE(int) >= TRANSPORT_NUM); + +#define TRANSPORTS_G2H (TRANSPORT_VIRTIO | TRANSPORT_VMCI | TRANSPORT_HYPERV) +#define TRANSPORTS_H2G (TRANSPORT_VHOST | TRANSPORT_VMCI) +#define TRANSPORTS_LOCAL (TRANSPORT_LOOPBACK) + /* Tests can either run as the client or the server */ enum test_mode { TEST_MODE_UNSET, @@ -39,10 +71,13 @@ struct test_case { void init_signals(void); unsigned int parse_cid(const char *str); unsigned int parse_port(const char *str); +int vsock_connect_fd(int fd, unsigned int cid, unsigned int port); int vsock_connect(unsigned int cid, unsigned int port, int type); int vsock_accept(unsigned int cid, unsigned int port, struct sockaddr_vm *clientaddrp, int type); int vsock_stream_connect(unsigned int cid, unsigned int port); +int vsock_bind_try(unsigned int cid, unsigned int port, int type); +int vsock_bind(unsigned int cid, unsigned int port, int type); int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type); int vsock_seqpacket_connect(unsigned int cid, unsigned int port); @@ -52,6 +87,8 @@ int vsock_stream_listen(unsigned int cid, unsigned int port); int vsock_seqpacket_accept(unsigned int cid, unsigned int port, struct sockaddr_vm *clientaddrp); void vsock_wait_remote_close(int fd); +bool vsock_ioctl_int(int fd, unsigned long op, int expected); +bool vsock_wait_sent(int fd); void send_buf(int fd, const void *buf, size_t len, int flags, ssize_t expected_ret); void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret); @@ -62,6 +99,8 @@ void run_tests(const struct test_case *test_cases, void list_tests(const struct test_case *test_cases); void skip_test(struct test_case *test_cases, size_t test_cases_len, const char *test_id_str); +void pick_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str); unsigned long hash_djb2(const void *data, size_t len); size_t iovec_bytes(const struct iovec *iov, size_t iovnum); unsigned long iovec_hash_djb2(const struct iovec *iov, size_t iovnum); @@ -75,4 +114,6 @@ void setsockopt_int_check(int fd, int level, int optname, int val, void setsockopt_timeval_check(int fd, int level, int optname, struct timeval val, char const *errmsg); void enable_so_zerocopy_check(int fd); +void enable_so_linger(int fd, int timeout); +int get_transports(void); #endif /* UTIL_H */ diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 48f17641ca50..d4517386e551 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -21,6 +21,9 @@ #include <poll.h> #include <signal.h> #include <sys/ioctl.h> +#include <linux/time64.h> +#include <pthread.h> +#include <fcntl.h> #include <linux/sockios.h> #include "vsock_test_zerocopy.h" @@ -28,6 +31,10 @@ #include "control.h" #include "util.h" +/* Basic messages for control_writeulong(), control_readulong() */ +#define CONTROL_CONTINUE 1 +#define CONTROL_DONE 0 + static void test_stream_connection_reset(const struct test_opts *opts) { union { @@ -108,24 +115,9 @@ static void test_stream_bind_only_client(const struct test_opts *opts) static void test_stream_bind_only_server(const struct test_opts *opts) { - union { - struct sockaddr sa; - struct sockaddr_vm svm; - } addr = { - .svm = { - .svm_family = AF_VSOCK, - .svm_port = opts->peer_port, - .svm_cid = VMADDR_CID_ANY, - }, - }; int fd; - fd = socket(AF_VSOCK, SOCK_STREAM, 0); - - if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { - perror("bind"); - exit(EXIT_FAILURE); - } + fd = vsock_bind(VMADDR_CID_ANY, opts->peer_port, SOCK_STREAM); /* Notify the client that the server is ready */ control_writeln("BIND"); @@ -559,7 +551,7 @@ static time_t current_nsec(void) exit(EXIT_FAILURE); } - return (ts.tv_sec * 1000000000ULL) + ts.tv_nsec; + return (ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec; } #define RCVTIMEO_TIMEOUT_SEC 1 @@ -599,7 +591,7 @@ static void test_seqpacket_timeout_client(const struct test_opts *opts) } read_overhead_ns = current_nsec() - read_enter_ns - - 1000000000ULL * RCVTIMEO_TIMEOUT_SEC; + NSEC_PER_SEC * RCVTIMEO_TIMEOUT_SEC; if (read_overhead_ns > READ_OVERHEAD_NSEC) { fprintf(stderr, @@ -1068,18 +1060,39 @@ static void sigpipe(int signo) have_sigpipe = 1; } +#define SEND_SLEEP_USEC (10 * 1000) + static void test_stream_check_sigpipe(int fd) { ssize_t res; have_sigpipe = 0; - res = send(fd, "A", 1, 0); - if (res != -1) { - fprintf(stderr, "expected send(2) failure, got %zi\n", res); - exit(EXIT_FAILURE); + /* When the other peer calls shutdown(SHUT_RD), there is a chance that + * the send() call could occur before the message carrying the close + * information arrives over the transport. In such cases, the send() + * might still succeed. To avoid this race, let's retry the send() call + * a few times, ensuring the test is more reliable. + */ + timeout_begin(TIMEOUT); + while(1) { + res = send(fd, "A", 1, 0); + if (res == -1 && errno != EINTR) + break; + + /* Sleep a little before trying again to avoid flooding the + * other peer and filling its receive buffer, causing + * false-negative. + */ + timeout_usleep(SEND_SLEEP_USEC); + timeout_check("send"); } + timeout_end(); + if (errno != EPIPE) { + fprintf(stderr, "unexpected send(2) errno %d\n", errno); + exit(EXIT_FAILURE); + } if (!have_sigpipe) { fprintf(stderr, "SIGPIPE expected\n"); exit(EXIT_FAILURE); @@ -1087,12 +1100,21 @@ static void test_stream_check_sigpipe(int fd) have_sigpipe = 0; - res = send(fd, "A", 1, MSG_NOSIGNAL); - if (res != -1) { - fprintf(stderr, "expected send(2) failure, got %zi\n", res); - exit(EXIT_FAILURE); + timeout_begin(TIMEOUT); + while(1) { + res = send(fd, "A", 1, MSG_NOSIGNAL); + if (res == -1 && errno != EINTR) + break; + + timeout_usleep(SEND_SLEEP_USEC); + timeout_check("send"); } + timeout_end(); + if (errno != EPIPE) { + fprintf(stderr, "unexpected send(2) errno %d\n", errno); + exit(EXIT_FAILURE); + } if (have_sigpipe) { fprintf(stderr, "SIGPIPE not expected\n"); exit(EXIT_FAILURE); @@ -1260,7 +1282,7 @@ static void test_unsent_bytes_server(const struct test_opts *opts, int type) static void test_unsent_bytes_client(const struct test_opts *opts, int type) { unsigned char buf[MSG_BUF_IOCTL_LEN]; - int ret, fd, sock_bytes_unsent; + int fd; fd = vsock_connect(opts->peer_cid, opts->peer_port, type); if (fd < 0) { @@ -1274,21 +1296,63 @@ static void test_unsent_bytes_client(const struct test_opts *opts, int type) send_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); control_expectln("RECEIVED"); - ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); - if (ret < 0) { - if (errno == EOPNOTSUPP) { - fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); - } else { - perror("ioctl"); - exit(EXIT_FAILURE); - } - } else if (ret == 0 && sock_bytes_unsent != 0) { - fprintf(stderr, - "Unexpected 'SIOCOUTQ' value, expected 0, got %i\n", - sock_bytes_unsent); + /* SIOCOUTQ isn't guaranteed to instantly track sent data. Even though + * the "RECEIVED" message means that the other side has received the + * data, there can be a delay in our kernel before updating the "unsent + * bytes" counter. vsock_wait_sent() will repeat SIOCOUTQ until it + * returns 0. + */ + if (!vsock_wait_sent(fd)) + fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); + + close(fd); +} + +static void test_unread_bytes_server(const struct test_opts *opts, int type) +{ + unsigned char buf[MSG_BUF_IOCTL_LEN]; + int client_fd; + + client_fd = vsock_accept(VMADDR_CID_ANY, opts->peer_port, NULL, type); + if (client_fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < sizeof(buf); i++) + buf[i] = rand() & 0xFF; + + send_buf(client_fd, buf, sizeof(buf), 0, sizeof(buf)); + control_writeln("SENT"); + + close(client_fd); +} + +static void test_unread_bytes_client(const struct test_opts *opts, int type) +{ + unsigned char buf[MSG_BUF_IOCTL_LEN]; + int fd; + + fd = vsock_connect(opts->peer_cid, opts->peer_port, type); + if (fd < 0) { + perror("connect"); exit(EXIT_FAILURE); } + control_expectln("SENT"); + /* The data has arrived but has not been read. The expected is + * MSG_BUF_IOCTL_LEN. + */ + if (!vsock_ioctl_int(fd, SIOCINQ, MSG_BUF_IOCTL_LEN)) { + fprintf(stderr, "Test skipped, SIOCINQ not supported.\n"); + goto out; + } + + recv_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); + /* All data has been consumed, so the expected is 0. */ + vsock_ioctl_int(fd, SIOCINQ, 0); + +out: close(fd); } @@ -1312,6 +1376,26 @@ static void test_seqpacket_unsent_bytes_server(const struct test_opts *opts) test_unsent_bytes_server(opts, SOCK_SEQPACKET); } +static void test_stream_unread_bytes_client(const struct test_opts *opts) +{ + test_unread_bytes_client(opts, SOCK_STREAM); +} + +static void test_stream_unread_bytes_server(const struct test_opts *opts) +{ + test_unread_bytes_server(opts, SOCK_STREAM); +} + +static void test_seqpacket_unread_bytes_client(const struct test_opts *opts) +{ + test_unread_bytes_client(opts, SOCK_SEQPACKET); +} + +static void test_seqpacket_unread_bytes_server(const struct test_opts *opts) +{ + test_unread_bytes_server(opts, SOCK_SEQPACKET); +} + #define RCVLOWAT_CREDIT_UPD_BUF_SIZE (1024 * 128) /* This define is the same as in 'include/linux/virtio_vsock.h': * it is used to decide when to send credit update message during @@ -1473,6 +1557,636 @@ static void test_stream_cred_upd_on_set_rcvlowat(const struct test_opts *opts) test_stream_credit_update_test(opts, false); } +/* The goal of test leak_acceptq is to stress the race between connect() and + * close(listener). Implementation of client/server loops boils down to: + * + * client server + * ------ ------ + * write(CONTINUE) + * expect(CONTINUE) + * listen() + * write(LISTENING) + * expect(LISTENING) + * connect() close() + */ +#define ACCEPTQ_LEAK_RACE_TIMEOUT 2 /* seconds */ + +static void test_stream_leak_acceptq_client(const struct test_opts *opts) +{ + time_t tout; + int fd; + + tout = current_nsec() + ACCEPTQ_LEAK_RACE_TIMEOUT * NSEC_PER_SEC; + do { + control_writeulong(CONTROL_CONTINUE); + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd >= 0) + close(fd); + } while (current_nsec() < tout); + + control_writeulong(CONTROL_DONE); +} + +/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ +static void test_stream_leak_acceptq_server(const struct test_opts *opts) +{ + int fd; + + while (control_readulong() == CONTROL_CONTINUE) { + fd = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port); + control_writeln("LISTENING"); + close(fd); + } +} + +/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ +static void test_stream_msgzcopy_leak_errq_client(const struct test_opts *opts) +{ + struct pollfd fds = { 0 }; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_zerocopy_check(fd); + send_byte(fd, 1, MSG_ZEROCOPY); + + fds.fd = fd; + fds.events = 0; + if (poll(&fds, 1, -1) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_msgzcopy_leak_errq_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + recv_byte(fd, 1, 0); + vsock_wait_remote_close(fd); + close(fd); +} + +/* Test msgzcopy_leak_zcskb is meant to exercise sendmsg() error handling path, + * that might leak an skb. The idea is to fail virtio_transport_init_zcopy_skb() + * by hitting net.core.optmem_max limit in sock_omalloc(), specifically + * + * vsock_connectible_sendmsg + * virtio_transport_stream_enqueue + * virtio_transport_send_pkt_info + * virtio_transport_init_zcopy_skb + * . msg_zerocopy_realloc + * . msg_zerocopy_alloc + * . sock_omalloc + * . sk_omem_alloc + size > sysctl_optmem_max + * return -ENOMEM + * + * We abuse the implementation detail of net/socket.c:____sys_sendmsg(). + * sk_omem_alloc can be precisely bumped by sock_kmalloc(), as it is used to + * fetch user-provided control data. + * + * While this approach works for now, it relies on assumptions regarding the + * implementation and configuration (for example, order of net.core.optmem_max + * can not exceed MAX_PAGE_ORDER), which may not hold in the future. A more + * resilient testing could be implemented by leveraging the Fault injection + * framework (CONFIG_FAULT_INJECTION), e.g. + * + * client# echo N > /sys/kernel/debug/failslab/ignore-gfp-wait + * client# echo 0 > /sys/kernel/debug/failslab/verbose + * + * void client(const struct test_opts *opts) + * { + * char buf[16]; + * int f, s, i; + * + * f = open("/proc/self/fail-nth", O_WRONLY); + * + * for (i = 1; i < 32; i++) { + * control_writeulong(CONTROL_CONTINUE); + * + * s = vsock_stream_connect(opts->peer_cid, opts->peer_port); + * enable_so_zerocopy_check(s); + * + * sprintf(buf, "%d", i); + * write(f, buf, strlen(buf)); + * + * send(s, &(char){ 0 }, 1, MSG_ZEROCOPY); + * + * write(f, "0", 1); + * close(s); + * } + * + * control_writeulong(CONTROL_DONE); + * close(f); + * } + * + * void server(const struct test_opts *opts) + * { + * int fd; + * + * while (control_readulong() == CONTROL_CONTINUE) { + * fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + * vsock_wait_remote_close(fd); + * close(fd); + * } + * } + * + * Refer to Documentation/fault-injection/fault-injection.rst. + */ +#define MAX_PAGE_ORDER 10 /* usually */ +#define PAGE_SIZE 4096 + +/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ +static void test_stream_msgzcopy_leak_zcskb_client(const struct test_opts *opts) +{ + size_t optmem_max, ctl_len, chunk_size; + struct msghdr msg = { 0 }; + struct iovec iov; + char *chunk; + int fd, res; + FILE *f; + + f = fopen("/proc/sys/net/core/optmem_max", "r"); + if (!f) { + perror("fopen(optmem_max)"); + exit(EXIT_FAILURE); + } + + if (fscanf(f, "%zu", &optmem_max) != 1) { + fprintf(stderr, "fscanf(optmem_max) failed\n"); + exit(EXIT_FAILURE); + } + + fclose(f); + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_zerocopy_check(fd); + + ctl_len = optmem_max - 1; + if (ctl_len > PAGE_SIZE << MAX_PAGE_ORDER) { + fprintf(stderr, "Try with net.core.optmem_max = 100000\n"); + exit(EXIT_FAILURE); + } + + chunk_size = CMSG_SPACE(ctl_len); + chunk = malloc(chunk_size); + if (!chunk) { + perror("malloc"); + exit(EXIT_FAILURE); + } + memset(chunk, 0, chunk_size); + + iov.iov_base = &(char){ 0 }; + iov.iov_len = 1; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = chunk; + msg.msg_controllen = ctl_len; + + errno = 0; + res = sendmsg(fd, &msg, MSG_ZEROCOPY); + if (res >= 0 || errno != ENOMEM) { + fprintf(stderr, "Expected ENOMEM, got errno=%d res=%d\n", + errno, res); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_wait_remote_close(fd); + close(fd); +} + +#define MAX_PORT_RETRIES 24 /* net/vmw_vsock/af_vsock.c */ + +static bool test_stream_transport_uaf(int cid) +{ + int sockets[MAX_PORT_RETRIES]; + struct sockaddr_vm addr; + socklen_t alen; + int fd, i, c; + bool ret; + + /* Probe for a transport by attempting a local CID bind. Unavailable + * transport (or more specifically: an unsupported transport/CID + * combination) results in EADDRNOTAVAIL, other errnos are fatal. + */ + fd = vsock_bind_try(cid, VMADDR_PORT_ANY, SOCK_STREAM); + if (fd < 0) { + if (errno != EADDRNOTAVAIL) { + perror("Unexpected bind() errno"); + exit(EXIT_FAILURE); + } + + return false; + } + + alen = sizeof(addr); + if (getsockname(fd, (struct sockaddr *)&addr, &alen)) { + perror("getsockname"); + exit(EXIT_FAILURE); + } + + /* Drain the autobind pool; see __vsock_bind_connectible(). */ + for (i = 0; i < MAX_PORT_RETRIES; ++i) + sockets[i] = vsock_bind(cid, ++addr.svm_port, SOCK_STREAM); + + close(fd); + + /* Setting SOCK_NONBLOCK makes connect() return soon after + * (re-)assigning the transport. We are not connecting to anything + * anyway, so there is no point entering the main loop in + * vsock_connect(); waiting for timeout, checking for signals, etc. + */ + fd = socket(AF_VSOCK, SOCK_STREAM | SOCK_NONBLOCK, 0); + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + /* Assign transport, while failing to autobind. Autobind pool was + * drained, so EADDRNOTAVAIL coming from __vsock_bind_connectible() is + * expected. + * + * One exception is ENODEV which is thrown by vsock_assign_transport(), + * i.e. before vsock_auto_bind(), when the only transport loaded is + * vhost. + */ + if (!connect(fd, (struct sockaddr *)&addr, alen)) { + fprintf(stderr, "Unexpected connect() success\n"); + exit(EXIT_FAILURE); + } + if (errno == ENODEV && cid == VMADDR_CID_HOST) { + ret = false; + goto cleanup; + } + if (errno != EADDRNOTAVAIL) { + perror("Unexpected connect() errno"); + exit(EXIT_FAILURE); + } + + /* Reassign transport, triggering old transport release and + * (potentially) unbinding of an unbound socket. + * + * Vulnerable system may crash now. + */ + for (c = VMADDR_CID_HYPERVISOR; c <= VMADDR_CID_HOST + 1; ++c) { + if (c != cid) { + addr.svm_cid = c; + (void)connect(fd, (struct sockaddr *)&addr, alen); + } + } + + ret = true; +cleanup: + close(fd); + while (i--) + close(sockets[i]); + + return ret; +} + +/* Test attempts to trigger a transport release for an unbound socket. This can + * lead to a reference count mishandling. + */ +static void test_stream_transport_uaf_client(const struct test_opts *opts) +{ + bool tested = false; + int cid, tr; + + for (cid = VMADDR_CID_HYPERVISOR; cid <= VMADDR_CID_HOST + 1; ++cid) + tested |= test_stream_transport_uaf(cid); + + tr = get_transports(); + if (!tr) + fprintf(stderr, "No transports detected\n"); + else if (tr == TRANSPORT_VIRTIO) + fprintf(stderr, "Setup unsupported: sole virtio transport\n"); + else if (!tested) + fprintf(stderr, "No transports tested\n"); +} + +static void test_stream_connect_retry_client(const struct test_opts *opts) +{ + int fd; + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + if (!vsock_connect_fd(fd, opts->peer_cid, opts->peer_port)) { + fprintf(stderr, "Unexpected connect() #1 success\n"); + exit(EXIT_FAILURE); + } + + control_writeln("LISTEN"); + control_expectln("LISTENING"); + + if (vsock_connect_fd(fd, opts->peer_cid, opts->peer_port)) { + perror("connect() #2"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_connect_retry_server(const struct test_opts *opts) +{ + int fd; + + control_expectln("LISTEN"); + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_wait_remote_close(fd); + close(fd); +} + +#define TRANSPORT_CHANGE_TIMEOUT 2 /* seconds */ + +static void *test_stream_transport_change_thread(void *vargp) +{ + pid_t *pid = (pid_t *)vargp; + int ret; + + ret = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); + if (ret) { + fprintf(stderr, "pthread_setcanceltype: %d\n", ret); + exit(EXIT_FAILURE); + } + + while (true) { + if (kill(*pid, SIGUSR1) < 0) { + perror("kill"); + exit(EXIT_FAILURE); + } + } + return NULL; +} + +static void test_transport_change_signal_handler(int signal) +{ + /* We need a custom handler for SIGUSR1 as the default one terminates the process. */ +} + +static void test_stream_transport_change_client(const struct test_opts *opts) +{ + __sighandler_t old_handler; + pid_t pid = getpid(); + pthread_t thread_id; + time_t tout; + int ret, tr; + + tr = get_transports(); + + /* Print a warning if there is a G2H transport loaded. + * This is on a best effort basis because VMCI can be either G2H and H2G, and there is + * no easy way to understand it. + * The bug we are testing only appears when G2H transports are not loaded. + * This is because `vsock_assign_transport`, when using CID 0, assigns a G2H transport + * to vsk->transport. If none is available it is set to NULL, causing the null-ptr-deref. + */ + if (tr & TRANSPORTS_G2H) + fprintf(stderr, "G2H Transport detected. This test will not fail.\n"); + + old_handler = signal(SIGUSR1, test_transport_change_signal_handler); + if (old_handler == SIG_ERR) { + perror("signal"); + exit(EXIT_FAILURE); + } + + ret = pthread_create(&thread_id, NULL, test_stream_transport_change_thread, &pid); + if (ret) { + fprintf(stderr, "pthread_create: %d\n", ret); + exit(EXIT_FAILURE); + } + + control_expectln("LISTENING"); + + tout = current_nsec() + TRANSPORT_CHANGE_TIMEOUT * NSEC_PER_SEC; + do { + struct sockaddr_vm sa = { + .svm_family = AF_VSOCK, + .svm_cid = opts->peer_cid, + .svm_port = opts->peer_port, + }; + bool send_control = false; + int s; + + s = socket(AF_VSOCK, SOCK_STREAM, 0); + if (s < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + ret = connect(s, (struct sockaddr *)&sa, sizeof(sa)); + /* The connect can fail due to signals coming from the thread, + * or because the receiver connection queue is full. + * Ignoring also the latter case because there is no way + * of synchronizing client's connect and server's accept when + * connect(s) are constantly being interrupted by signals. + */ + if (ret == -1 && (errno != EINTR && errno != ECONNRESET)) { + perror("connect"); + exit(EXIT_FAILURE); + } + + /* Notify the server if the connect() is successful or the + * receiver connection queue is full, so it will do accept() + * to drain it. + */ + if (!ret || errno == ECONNRESET) + send_control = true; + + /* Set CID to 0 cause a transport change. */ + sa.svm_cid = 0; + + /* There is a case where this will not fail: + * if the previous connect() is interrupted while the + * connection request is already sent, this second + * connect() will wait for the response. + */ + ret = connect(s, (struct sockaddr *)&sa, sizeof(sa)); + if (!ret || errno == ECONNRESET) + send_control = true; + + close(s); + + if (send_control) + control_writeulong(CONTROL_CONTINUE); + + } while (current_nsec() < tout); + + control_writeulong(CONTROL_DONE); + + ret = pthread_cancel(thread_id); + if (ret) { + fprintf(stderr, "pthread_cancel: %d\n", ret); + exit(EXIT_FAILURE); + } + + ret = pthread_join(thread_id, NULL); + if (ret) { + fprintf(stderr, "pthread_join: %d\n", ret); + exit(EXIT_FAILURE); + } + + if (signal(SIGUSR1, old_handler) == SIG_ERR) { + perror("signal"); + exit(EXIT_FAILURE); + } +} + +static void test_stream_transport_change_server(const struct test_opts *opts) +{ + int s = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port); + + /* Set the socket to be nonblocking because connects that have been interrupted + * (EINTR) can fill the receiver's accept queue anyway, leading to connect failure. + * As of today (6.15) in such situation there is no way to understand, from the + * client side, if the connection has been queued in the server or not. + */ + if (fcntl(s, F_SETFL, fcntl(s, F_GETFL, 0) | O_NONBLOCK) < 0) { + perror("fcntl"); + exit(EXIT_FAILURE); + } + control_writeln("LISTENING"); + + while (control_readulong() == CONTROL_CONTINUE) { + /* Must accept the connection, otherwise the `listen` + * queue will fill up and new connections will fail. + * There can be more than one queued connection, + * clear them all. + */ + while (true) { + int client = accept(s, NULL, NULL); + + if (client < 0) { + if (errno == EAGAIN) + break; + + perror("accept"); + exit(EXIT_FAILURE); + } + + close(client); + } + } + + close(s); +} + +static void test_stream_linger_client(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_linger(fd, 1); + close(fd); +} + +static void test_stream_linger_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_wait_remote_close(fd); + close(fd); +} + +/* Half of the default to not risk timing out the control channel */ +#define LINGER_TIMEOUT (TIMEOUT / 2) + +static void test_stream_nolinger_client(const struct test_opts *opts) +{ + bool waited; + time_t ns; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_linger(fd, LINGER_TIMEOUT); + send_byte(fd, 1, 0); /* Left unread to expose incorrect behaviour. */ + waited = vsock_wait_sent(fd); + + ns = current_nsec(); + close(fd); + ns = current_nsec() - ns; + + if (!waited) { + fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); + } else if (DIV_ROUND_UP(ns, NSEC_PER_SEC) >= LINGER_TIMEOUT) { + fprintf(stderr, "Unexpected lingering\n"); + exit(EXIT_FAILURE); + } + + control_writeln("DONE"); +} + +static void test_stream_nolinger_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + control_expectln("DONE"); + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1603,6 +2317,55 @@ static struct test_case test_cases[] = { .run_client = test_seqpacket_unsent_bytes_client, .run_server = test_seqpacket_unsent_bytes_server, }, + { + .name = "SOCK_STREAM leak accept queue", + .run_client = test_stream_leak_acceptq_client, + .run_server = test_stream_leak_acceptq_server, + }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY leak MSG_ERRQUEUE", + .run_client = test_stream_msgzcopy_leak_errq_client, + .run_server = test_stream_msgzcopy_leak_errq_server, + }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY leak completion skb", + .run_client = test_stream_msgzcopy_leak_zcskb_client, + .run_server = test_stream_msgzcopy_leak_zcskb_server, + }, + { + .name = "SOCK_STREAM transport release use-after-free", + .run_client = test_stream_transport_uaf_client, + }, + { + .name = "SOCK_STREAM retry failed connect()", + .run_client = test_stream_connect_retry_client, + .run_server = test_stream_connect_retry_server, + }, + { + .name = "SOCK_STREAM SO_LINGER null-ptr-deref", + .run_client = test_stream_linger_client, + .run_server = test_stream_linger_server, + }, + { + .name = "SOCK_STREAM SO_LINGER close() on unread", + .run_client = test_stream_nolinger_client, + .run_server = test_stream_nolinger_server, + }, + { + .name = "SOCK_STREAM transport change null-ptr-deref", + .run_client = test_stream_transport_change_client, + .run_server = test_stream_transport_change_server, + }, + { + .name = "SOCK_STREAM ioctl(SIOCINQ) functionality", + .run_client = test_stream_unread_bytes_client, + .run_server = test_stream_unread_bytes_server, + }, + { + .name = "SOCK_SEQPACKET ioctl(SIOCINQ) functionality", + .run_client = test_seqpacket_unread_bytes_client, + .run_server = test_seqpacket_unread_bytes_server, + }, {}, }; @@ -1644,6 +2407,11 @@ static const struct option longopts[] = { .val = 's', }, { + .name = "pick", + .has_arg = required_argument, + .val = 't', + }, + { .name = "help", .has_arg = no_argument, .val = '?', @@ -1680,6 +2448,8 @@ static void usage(void) " --peer-cid <cid> CID of the other side\n" " --peer-port <port> AF_VSOCK port used for the test [default: %d]\n" " --list List of tests that will be executed\n" + " --pick <test_id> Test ID to execute selectively;\n" + " use multiple --pick options to select more tests\n" " --skip <test_id> Test ID to skip;\n" " use multiple --skip options to skip more tests\n", DEFAULT_PEER_PORT @@ -1736,6 +2506,10 @@ int main(int argc, char **argv) skip_test(test_cases, ARRAY_SIZE(test_cases) - 1, optarg); break; + case 't': + pick_test(test_cases, ARRAY_SIZE(test_cases) - 1, + optarg); + break; case '?': default: usage(); |