diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-02 23:37:55 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-02 23:37:55 +0300 |
commit | 42df1cbf6a4726934cc5dac12bf263aa73c49fa3 (patch) | |
tree | 129e32104dccc660c3c8fca03b8bba418e572d09 /tools | |
parent | 98e247464088a11ce2328a214fdb87d4c06f8db6 (diff) | |
parent | 14b146b688ad9593f5eee93d51a34d09a47e50b5 (diff) | |
download | linux-42df1cbf6a4726934cc5dac12bf263aa73c49fa3.tar.xz |
Merge tag 'for-5.20/io_uring-zerocopy-send-2022-07-29' of git://git.kernel.dk/linux-block
Pull io_uring zerocopy support from Jens Axboe:
"This adds support for efficient support for zerocopy sends through
io_uring. Both ipv4 and ipv6 is supported, as well as both TCP and
UDP.
The core network changes to support this is in a stable branch from
Jakub that both io_uring and net-next has pulled in, and the io_uring
changes are layered on top of that.
All of the work has been done by Pavel"
* tag 'for-5.20/io_uring-zerocopy-send-2022-07-29' of git://git.kernel.dk/linux-block: (34 commits)
io_uring: notification completion optimisation
io_uring: export req alloc from core
io_uring/net: use unsigned for flags
io_uring/net: make page accounting more consistent
io_uring/net: checks errors of zc mem accounting
io_uring/net: improve io_get_notif_slot types
selftests/io_uring: test zerocopy send
io_uring: enable managed frags with register buffers
io_uring: add zc notification flush requests
io_uring: rename IORING_OP_FILES_UPDATE
io_uring: flush notifiers after sendzc
io_uring: sendzc with fixed buffers
io_uring: allow to pass addr into sendzc
io_uring: account locked pages for non-fixed zc
io_uring: wire send zc request type
io_uring: add notification slot registration
io_uring: add rsrc referencing for notifiers
io_uring: complete notifiers in tw
io_uring: cache struct io_notif
io_uring: add zc notification infrastructure
...
Diffstat (limited to 'tools')
-rw-r--r-- | tools/testing/selftests/net/Makefile | 1 | ||||
-rw-r--r-- | tools/testing/selftests/net/io_uring_zerocopy_tx.c | 605 | ||||
-rwxr-xr-x | tools/testing/selftests/net/io_uring_zerocopy_tx.sh | 131 |
3 files changed, 737 insertions, 0 deletions
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index db05b3764b77..9a4b30bd3a9e 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -59,6 +59,7 @@ TEST_GEN_FILES += toeplitz TEST_GEN_FILES += cmsg_sender TEST_GEN_FILES += stress_reuseport_listen TEST_PROGS += test_vxlan_vnifiltering.sh +TEST_GEN_FILES += io_uring_zerocopy_tx TEST_FILES := settings diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c new file mode 100644 index 000000000000..9d64c560a2d6 --- /dev/null +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c @@ -0,0 +1,605 @@ +/* SPDX-License-Identifier: MIT */ +/* based on linux-kernel/tools/testing/selftests/net/msg_zerocopy.c */ +#include <assert.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <arpa/inet.h> +#include <linux/errqueue.h> +#include <linux/if_packet.h> +#include <linux/io_uring.h> +#include <linux/ipv6.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/wait.h> + +#define NOTIF_TAG 0xfffffffULL +#define NONZC_TAG 0 +#define ZC_TAG 1 + +enum { + MODE_NONZC = 0, + MODE_ZC = 1, + MODE_ZC_FIXED = 2, + MODE_MIXED = 3, +}; + +static bool cfg_flush = false; +static bool cfg_cork = false; +static int cfg_mode = MODE_ZC_FIXED; +static int cfg_nr_reqs = 8; +static int cfg_family = PF_UNSPEC; +static int cfg_payload_len; +static int cfg_port = 8000; +static int cfg_runtime_ms = 4200; + +static socklen_t cfg_alen; +static struct sockaddr_storage cfg_dst_addr; + +static char payload[IP_MAXPACKET] __attribute__((aligned(4096))); + +struct io_sq_ring { + unsigned *head; + unsigned *tail; + unsigned *ring_mask; + unsigned *ring_entries; + unsigned *flags; + unsigned *array; +}; + +struct io_cq_ring { + unsigned *head; + unsigned *tail; + unsigned *ring_mask; + unsigned *ring_entries; + struct io_uring_cqe *cqes; +}; + +struct io_uring_sq { + unsigned *khead; + unsigned *ktail; + unsigned *kring_mask; + unsigned *kring_entries; + unsigned *kflags; + unsigned *kdropped; + unsigned *array; + struct io_uring_sqe *sqes; + + unsigned sqe_head; + unsigned sqe_tail; + + size_t ring_sz; +}; + +struct io_uring_cq { + unsigned *khead; + unsigned *ktail; + unsigned *kring_mask; + unsigned *kring_entries; + unsigned *koverflow; + struct io_uring_cqe *cqes; + + size_t ring_sz; +}; + +struct io_uring { + struct io_uring_sq sq; + struct io_uring_cq cq; + int ring_fd; +}; + +#ifdef __alpha__ +# ifndef __NR_io_uring_setup +# define __NR_io_uring_setup 535 +# endif +# ifndef __NR_io_uring_enter +# define __NR_io_uring_enter 536 +# endif +# ifndef __NR_io_uring_register +# define __NR_io_uring_register 537 +# endif +#else /* !__alpha__ */ +# ifndef __NR_io_uring_setup +# define __NR_io_uring_setup 425 +# endif +# ifndef __NR_io_uring_enter +# define __NR_io_uring_enter 426 +# endif +# ifndef __NR_io_uring_register +# define __NR_io_uring_register 427 +# endif +#endif + +#if defined(__x86_64) || defined(__i386__) +#define read_barrier() __asm__ __volatile__("":::"memory") +#define write_barrier() __asm__ __volatile__("":::"memory") +#else + +#define read_barrier() __sync_synchronize() +#define write_barrier() __sync_synchronize() +#endif + +static int io_uring_setup(unsigned int entries, struct io_uring_params *p) +{ + return syscall(__NR_io_uring_setup, entries, p); +} + +static int io_uring_enter(int fd, unsigned int to_submit, + unsigned int min_complete, + unsigned int flags, sigset_t *sig) +{ + return syscall(__NR_io_uring_enter, fd, to_submit, min_complete, + flags, sig, _NSIG / 8); +} + +static int io_uring_register_buffers(struct io_uring *ring, + const struct iovec *iovecs, + unsigned nr_iovecs) +{ + int ret; + + ret = syscall(__NR_io_uring_register, ring->ring_fd, + IORING_REGISTER_BUFFERS, iovecs, nr_iovecs); + return (ret < 0) ? -errno : ret; +} + +static int io_uring_register_notifications(struct io_uring *ring, + unsigned nr, + struct io_uring_notification_slot *slots) +{ + int ret; + struct io_uring_notification_register r = { + .nr_slots = nr, + .data = (unsigned long)slots, + }; + + ret = syscall(__NR_io_uring_register, ring->ring_fd, + IORING_REGISTER_NOTIFIERS, &r, sizeof(r)); + return (ret < 0) ? -errno : ret; +} + +static int io_uring_mmap(int fd, struct io_uring_params *p, + struct io_uring_sq *sq, struct io_uring_cq *cq) +{ + size_t size; + void *ptr; + int ret; + + sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned); + ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); + if (ptr == MAP_FAILED) + return -errno; + sq->khead = ptr + p->sq_off.head; + sq->ktail = ptr + p->sq_off.tail; + sq->kring_mask = ptr + p->sq_off.ring_mask; + sq->kring_entries = ptr + p->sq_off.ring_entries; + sq->kflags = ptr + p->sq_off.flags; + sq->kdropped = ptr + p->sq_off.dropped; + sq->array = ptr + p->sq_off.array; + + size = p->sq_entries * sizeof(struct io_uring_sqe); + sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES); + if (sq->sqes == MAP_FAILED) { + ret = -errno; +err: + munmap(sq->khead, sq->ring_sz); + return ret; + } + + cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe); + ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING); + if (ptr == MAP_FAILED) { + ret = -errno; + munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe)); + goto err; + } + cq->khead = ptr + p->cq_off.head; + cq->ktail = ptr + p->cq_off.tail; + cq->kring_mask = ptr + p->cq_off.ring_mask; + cq->kring_entries = ptr + p->cq_off.ring_entries; + cq->koverflow = ptr + p->cq_off.overflow; + cq->cqes = ptr + p->cq_off.cqes; + return 0; +} + +static int io_uring_queue_init(unsigned entries, struct io_uring *ring, + unsigned flags) +{ + struct io_uring_params p; + int fd, ret; + + memset(ring, 0, sizeof(*ring)); + memset(&p, 0, sizeof(p)); + p.flags = flags; + + fd = io_uring_setup(entries, &p); + if (fd < 0) + return fd; + ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq); + if (!ret) + ring->ring_fd = fd; + else + close(fd); + return ret; +} + +static int io_uring_submit(struct io_uring *ring) +{ + struct io_uring_sq *sq = &ring->sq; + const unsigned mask = *sq->kring_mask; + unsigned ktail, submitted, to_submit; + int ret; + + read_barrier(); + if (*sq->khead != *sq->ktail) { + submitted = *sq->kring_entries; + goto submit; + } + if (sq->sqe_head == sq->sqe_tail) + return 0; + + ktail = *sq->ktail; + to_submit = sq->sqe_tail - sq->sqe_head; + for (submitted = 0; submitted < to_submit; submitted++) { + read_barrier(); + sq->array[ktail++ & mask] = sq->sqe_head++ & mask; + } + if (!submitted) + return 0; + + if (*sq->ktail != ktail) { + write_barrier(); + *sq->ktail = ktail; + write_barrier(); + } +submit: + ret = io_uring_enter(ring->ring_fd, submitted, 0, + IORING_ENTER_GETEVENTS, NULL); + return ret < 0 ? -errno : ret; +} + +static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd, + const void *buf, size_t len, int flags) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = (__u8) IORING_OP_SEND; + sqe->fd = sockfd; + sqe->addr = (unsigned long) buf; + sqe->len = len; + sqe->msg_flags = (__u32) flags; +} + +static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd, + const void *buf, size_t len, int flags, + unsigned slot_idx, unsigned zc_flags) +{ + io_uring_prep_send(sqe, sockfd, buf, len, flags); + sqe->opcode = (__u8) IORING_OP_SENDZC_NOTIF; + sqe->notification_idx = slot_idx; + sqe->ioprio = zc_flags; +} + +static struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) +{ + struct io_uring_sq *sq = &ring->sq; + + if (sq->sqe_tail + 1 - sq->sqe_head > *sq->kring_entries) + return NULL; + return &sq->sqes[sq->sqe_tail++ & *sq->kring_mask]; +} + +static int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr) +{ + struct io_uring_cq *cq = &ring->cq; + const unsigned mask = *cq->kring_mask; + unsigned head = *cq->khead; + int ret; + + *cqe_ptr = NULL; + do { + read_barrier(); + if (head != *cq->ktail) { + *cqe_ptr = &cq->cqes[head & mask]; + break; + } + ret = io_uring_enter(ring->ring_fd, 0, 1, + IORING_ENTER_GETEVENTS, NULL); + if (ret < 0) + return -errno; + } while (1); + + return 0; +} + +static inline void io_uring_cqe_seen(struct io_uring *ring) +{ + *(&ring->cq)->khead += 1; + write_barrier(); +} + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static void do_setsockopt(int fd, int level, int optname, int val) +{ + if (setsockopt(fd, level, optname, &val, sizeof(val))) + error(1, errno, "setsockopt %d.%d: %d", level, optname, val); +} + +static int do_setup_tx(int domain, int type, int protocol) +{ + int fd; + + fd = socket(domain, type, protocol); + if (fd == -1) + error(1, errno, "socket t"); + + do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21); + + if (connect(fd, (void *) &cfg_dst_addr, cfg_alen)) + error(1, errno, "connect"); + return fd; +} + +static void do_tx(int domain, int type, int protocol) +{ + struct io_uring_notification_slot b[1] = {{.tag = NOTIF_TAG}}; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + unsigned long packets = 0, bytes = 0; + struct io_uring ring; + struct iovec iov; + uint64_t tstop; + int i, fd, ret; + int compl_cqes = 0; + + fd = do_setup_tx(domain, type, protocol); + + ret = io_uring_queue_init(512, &ring, 0); + if (ret) + error(1, ret, "io_uring: queue init"); + + ret = io_uring_register_notifications(&ring, 1, b); + if (ret) + error(1, ret, "io_uring: tx ctx registration"); + + iov.iov_base = payload; + iov.iov_len = cfg_payload_len; + + ret = io_uring_register_buffers(&ring, &iov, 1); + if (ret) + error(1, ret, "io_uring: buffer registration"); + + tstop = gettimeofday_ms() + cfg_runtime_ms; + do { + if (cfg_cork) + do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1); + + for (i = 0; i < cfg_nr_reqs; i++) { + unsigned zc_flags = 0; + unsigned buf_idx = 0; + unsigned slot_idx = 0; + unsigned mode = cfg_mode; + unsigned msg_flags = 0; + + if (cfg_mode == MODE_MIXED) + mode = rand() % 3; + + sqe = io_uring_get_sqe(&ring); + + if (mode == MODE_NONZC) { + io_uring_prep_send(sqe, fd, payload, + cfg_payload_len, msg_flags); + sqe->user_data = NONZC_TAG; + } else { + if (cfg_flush) { + zc_flags |= IORING_RECVSEND_NOTIF_FLUSH; + compl_cqes++; + } + io_uring_prep_sendzc(sqe, fd, payload, + cfg_payload_len, + msg_flags, slot_idx, zc_flags); + if (mode == MODE_ZC_FIXED) { + sqe->ioprio |= IORING_RECVSEND_FIXED_BUF; + sqe->buf_index = buf_idx; + } + sqe->user_data = ZC_TAG; + } + } + + ret = io_uring_submit(&ring); + if (ret != cfg_nr_reqs) + error(1, ret, "submit"); + + for (i = 0; i < cfg_nr_reqs; i++) { + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) + error(1, ret, "wait cqe"); + + if (cqe->user_data == NOTIF_TAG) { + compl_cqes--; + i--; + } else if (cqe->user_data != NONZC_TAG && + cqe->user_data != ZC_TAG) { + error(1, cqe->res, "invalid user_data"); + } else if (cqe->res <= 0 && cqe->res != -EAGAIN) { + error(1, cqe->res, "send failed"); + } else { + if (cqe->res > 0) { + packets++; + bytes += cqe->res; + } + /* failed requests don't flush */ + if (cfg_flush && + cqe->res <= 0 && + cqe->user_data == ZC_TAG) + compl_cqes--; + } + io_uring_cqe_seen(&ring); + } + if (cfg_cork) + do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0); + } while (gettimeofday_ms() < tstop); + + if (close(fd)) + error(1, errno, "close"); + + fprintf(stderr, "tx=%lu (MB=%lu), tx/s=%lu (MB/s=%lu)\n", + packets, bytes >> 20, + packets / (cfg_runtime_ms / 1000), + (bytes >> 20) / (cfg_runtime_ms / 1000)); + + while (compl_cqes) { + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) + error(1, ret, "wait cqe"); + io_uring_cqe_seen(&ring); + compl_cqes--; + } +} + +static void do_test(int domain, int type, int protocol) +{ + int i; + + for (i = 0; i < IP_MAXPACKET; i++) + payload[i] = 'a' + (i % 26); + do_tx(domain, type, protocol); +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s [-f] [-n<N>] [-z0] [-s<payload size>] " + "(-4|-6) [-t<time s>] -D<dst_ip> udp", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + const int max_payload_len = sizeof(payload) - + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) - + 40 /* max tcp options */; + struct sockaddr_in6 *addr6 = (void *) &cfg_dst_addr; + struct sockaddr_in *addr4 = (void *) &cfg_dst_addr; + char *daddr = NULL; + int c; + + if (argc <= 1) + usage(argv[0]); + cfg_payload_len = max_payload_len; + + while ((c = getopt(argc, argv, "46D:p:s:t:n:fc:m:")) != -1) { + switch (c) { + case '4': + if (cfg_family != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + cfg_family = PF_INET; + cfg_alen = sizeof(struct sockaddr_in); + break; + case '6': + if (cfg_family != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + cfg_family = PF_INET6; + cfg_alen = sizeof(struct sockaddr_in6); + break; + case 'D': + daddr = optarg; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 's': + cfg_payload_len = strtoul(optarg, NULL, 0); + break; + case 't': + cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000; + break; + case 'n': + cfg_nr_reqs = strtoul(optarg, NULL, 0); + break; + case 'f': + cfg_flush = 1; + break; + case 'c': + cfg_cork = strtol(optarg, NULL, 0); + break; + case 'm': + cfg_mode = strtol(optarg, NULL, 0); + break; + } + } + + switch (cfg_family) { + case PF_INET: + memset(addr4, 0, sizeof(*addr4)); + addr4->sin_family = AF_INET; + addr4->sin_port = htons(cfg_port); + if (daddr && + inet_pton(AF_INET, daddr, &(addr4->sin_addr)) != 1) + error(1, 0, "ipv4 parse error: %s", daddr); + break; + case PF_INET6: + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + if (daddr && + inet_pton(AF_INET6, daddr, &(addr6->sin6_addr)) != 1) + error(1, 0, "ipv6 parse error: %s", daddr); + break; + default: + error(1, 0, "illegal domain"); + } + + if (cfg_payload_len > max_payload_len) + error(1, 0, "-s: payload exceeds max (%d)", max_payload_len); + if (cfg_mode == MODE_NONZC && cfg_flush) + error(1, 0, "-f: only zerocopy modes support notifications"); + if (optind != argc - 1) + usage(argv[0]); +} + +int main(int argc, char **argv) +{ + const char *cfg_test = argv[argc - 1]; + + parse_opts(argc, argv); + + if (!strcmp(cfg_test, "tcp")) + do_test(cfg_family, SOCK_STREAM, 0); + else if (!strcmp(cfg_test, "udp")) + do_test(cfg_family, SOCK_DGRAM, 0); + else + error(1, 0, "unknown cfg_test %s", cfg_test); + return 0; +} diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh new file mode 100755 index 000000000000..6a65e4437640 --- /dev/null +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh @@ -0,0 +1,131 @@ +#!/bin/bash +# +# Send data between two processes across namespaces +# Run twice: once without and once with zerocopy + +set -e + +readonly DEV="veth0" +readonly DEV_MTU=65535 +readonly BIN_TX="./io_uring_zerocopy_tx" +readonly BIN_RX="./msg_zerocopy" + +readonly RAND="$(mktemp -u XXXXXX)" +readonly NSPREFIX="ns-${RAND}" +readonly NS1="${NSPREFIX}1" +readonly NS2="${NSPREFIX}2" + +readonly SADDR4='192.168.1.1' +readonly DADDR4='192.168.1.2' +readonly SADDR6='fd::1' +readonly DADDR6='fd::2' + +readonly path_sysctl_mem="net.core.optmem_max" + +# No arguments: automated test +if [[ "$#" -eq "0" ]]; then + IPs=( "4" "6" ) + protocols=( "tcp" "udp" ) + + for IP in "${IPs[@]}"; do + for proto in "${protocols[@]}"; do + for mode in $(seq 1 3); do + $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 + $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 -f + $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 -c -f + done + done + done + + echo "OK. All tests passed" + exit 0 +fi + +# Argument parsing +if [[ "$#" -lt "2" ]]; then + echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>" + exit 1 +fi + +readonly IP="$1" +shift +readonly TXMODE="$1" +shift +readonly EXTRA_ARGS="$@" + +# Argument parsing: configure addresses +if [[ "${IP}" == "4" ]]; then + readonly SADDR="${SADDR4}" + readonly DADDR="${DADDR4}" +elif [[ "${IP}" == "6" ]]; then + readonly SADDR="${SADDR6}" + readonly DADDR="${DADDR6}" +else + echo "Invalid IP version ${IP}" + exit 1 +fi + +# Argument parsing: select receive mode +# +# This differs from send mode for +# - packet: use raw recv, because packet receives skb clones +# - raw_hdrinc: use raw recv, because hdrincl is a tx-only option +case "${TXMODE}" in +'packet' | 'packet_dgram' | 'raw_hdrincl') + RXMODE='raw' + ;; +*) + RXMODE="${TXMODE}" + ;; +esac + +# Start of state changes: install cleanup handler +save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})" + +cleanup() { + ip netns del "${NS2}" + ip netns del "${NS1}" + sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}" +} + +trap cleanup EXIT + +# Configure system settings +sysctl -w -q "${path_sysctl_mem}=1000000" + +# Create virtual ethernet pair between network namespaces +ip netns add "${NS1}" +ip netns add "${NS2}" + +ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \ + peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}" + +# Bring the devices up +ip -netns "${NS1}" link set "${DEV}" up +ip -netns "${NS2}" link set "${DEV}" up + +# Set fixed MAC addresses on the devices +ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02 +ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06 + +# Add fixed IP addresses to the devices +ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}" +ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}" +ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad +ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad + +# Optionally disable sg or csum offload to test edge cases +# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off + +do_test() { + local readonly ARGS="$1" + + echo "ipv${IP} ${TXMODE} ${ARGS}" + ip netns exec "${NS2}" "${BIN_RX}" "-${IP}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" -r "${RXMODE}" & + sleep 0.2 + ip netns exec "${NS1}" "${BIN_TX}" "-${IP}" -t 1 -D "${DADDR}" ${ARGS} "${TXMODE}" + wait +} + +do_test "${EXTRA_ARGS}" +echo ok |