diff options
author | David S. Miller <davem@davemloft.net> | 2016-10-23 00:06:09 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-10-23 00:06:09 +0300 |
commit | 67dc15967303271657e37373fd590093bd095caf (patch) | |
tree | 8aebb2b1bf64250bcfaa35c865099a480bf6c8ac | |
parent | a10b91b8b81c29b87ff5a6d58c1402898337b956 (diff) | |
parent | 3c2c3c16aaf6a83902beb6063488e2c907de62e9 (diff) | |
download | linux-67dc15967303271657e37373fd590093bd095caf.tar.xz |
Merge branch 'bpf-numa-id'
Daniel Borkmann says:
====================
Add BPF numa id helper
This patch set adds a helper for retrieving current numa node
id and a test case for SO_REUSEPORT.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/bpf.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 6 | ||||
-rw-r--r-- | kernel/bpf/core.c | 1 | ||||
-rw-r--r-- | kernel/bpf/helpers.c | 12 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 2 | ||||
-rw-r--r-- | net/core/filter.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/net/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/net/Makefile | 11 | ||||
-rw-r--r-- | tools/testing/selftests/net/reuseport_bpf_numa.c | 255 |
9 files changed, 287 insertions, 4 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c201017b5730..edcd96ded8aa 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -319,6 +319,7 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; +extern const struct bpf_func_proto bpf_get_numa_node_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; extern const struct bpf_func_proto bpf_ktime_get_ns_proto; extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f09c70b97eca..374ef582ae18 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -426,6 +426,12 @@ enum bpf_func_id { */ BPF_FUNC_set_hash_invalid, + /** + * bpf_get_numa_node_id() + * Returns the id of the current NUMA node. + */ + BPF_FUNC_get_numa_node_id, + __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index aa6d98154106..82a04143368e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1043,6 +1043,7 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; +const struct bpf_func_proto bpf_get_numa_node_id_proto __weak; const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 39918402e6e9..045cbe673356 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -13,6 +13,7 @@ #include <linux/rcupdate.h> #include <linux/random.h> #include <linux/smp.h> +#include <linux/topology.h> #include <linux/ktime.h> #include <linux/sched.h> #include <linux/uidgid.h> @@ -92,6 +93,17 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = { .ret_type = RET_INTEGER, }; +BPF_CALL_0(bpf_get_numa_node_id) +{ + return numa_node_id(); +} + +const struct bpf_func_proto bpf_get_numa_node_id_proto = { + .func = bpf_get_numa_node_id, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + BPF_CALL_0(bpf_ktime_get_ns) { /* NMI safe access to clock monotonic */ diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 5dcb99281259..fa77311dadb2 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -422,6 +422,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) return bpf_get_trace_printk_proto(); case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; + case BPF_FUNC_get_numa_node_id: + return &bpf_get_numa_node_id_proto; case BPF_FUNC_perf_event_read: return &bpf_perf_event_read_proto; case BPF_FUNC_probe_write_user: diff --git a/net/core/filter.c b/net/core/filter.c index 00351cdf7d0c..cd9e2ba66b0e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2492,6 +2492,8 @@ sk_filter_func_proto(enum bpf_func_id func_id) return &bpf_get_prandom_u32_proto; case BPF_FUNC_get_smp_processor_id: return &bpf_get_raw_smp_processor_id_proto; + case BPF_FUNC_get_numa_node_id: + return &bpf_get_numa_node_id_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; case BPF_FUNC_ktime_get_ns: diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 0840684deb7d..afe109e5508a 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -3,4 +3,5 @@ psock_fanout psock_tpacket reuseport_bpf reuseport_bpf_cpu +reuseport_bpf_numa reuseport_dualstack diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 0e5340742620..e24e4c82542e 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -1,14 +1,17 @@ # Makefile for net selftests -CFLAGS = -Wall -O2 -g - +CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ -NET_PROGS = socket psock_fanout psock_tpacket reuseport_bpf reuseport_bpf_cpu reuseport_dualstack +NET_PROGS = socket +NET_PROGS += psock_fanout psock_tpacket +NET_PROGS += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa +NET_PROGS += reuseport_dualstack all: $(NET_PROGS) +reuseport_bpf_numa: LDFLAGS += -lnuma %: %.c - $(CC) $(CFLAGS) -o $@ $^ + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh TEST_FILES := $(NET_PROGS) diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c new file mode 100644 index 000000000000..6f20bc9ff627 --- /dev/null +++ b/tools/testing/selftests/net/reuseport_bpf_numa.c @@ -0,0 +1,255 @@ +/* + * Test functionality of BPF filters with SO_REUSEPORT. Same test as + * in reuseport_bpf_cpu, only as one socket per NUMA node. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <linux/filter.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/unistd.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <unistd.h> +#include <numa.h> + +static const int PORT = 8888; + +static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto) +{ + struct sockaddr_storage addr; + struct sockaddr_in *addr4; + struct sockaddr_in6 *addr6; + size_t i; + int opt; + + switch (family) { + case AF_INET: + addr4 = (struct sockaddr_in *)&addr; + addr4->sin_family = AF_INET; + addr4->sin_addr.s_addr = htonl(INADDR_ANY); + addr4->sin_port = htons(PORT); + break; + case AF_INET6: + addr6 = (struct sockaddr_in6 *)&addr; + addr6->sin6_family = AF_INET6; + addr6->sin6_addr = in6addr_any; + addr6->sin6_port = htons(PORT); + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + for (i = 0; i < len; ++i) { + rcv_fd[i] = socket(family, proto, 0); + if (rcv_fd[i] < 0) + error(1, errno, "failed to create receive socket"); + + opt = 1; + if (setsockopt(rcv_fd[i], SOL_SOCKET, SO_REUSEPORT, &opt, + sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT"); + + if (bind(rcv_fd[i], (struct sockaddr *)&addr, sizeof(addr))) + error(1, errno, "failed to bind receive socket"); + + if (proto == SOCK_STREAM && listen(rcv_fd[i], len * 10)) + error(1, errno, "failed to listen on receive port"); + } +} + +static void attach_bpf(int fd) +{ + static char bpf_log_buf[65536]; + static const char bpf_license[] = ""; + + int bpf_fd; + const struct bpf_insn prog[] = { + /* R0 = bpf_get_numa_node_id() */ + { BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_numa_node_id }, + /* return R0 */ + { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 } + }; + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insn_cnt = sizeof(prog) / sizeof(prog[0]); + attr.insns = (unsigned long) &prog; + attr.license = (unsigned long) &bpf_license; + attr.log_buf = (unsigned long) &bpf_log_buf; + attr.log_size = sizeof(bpf_log_buf); + attr.log_level = 1; + + bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); + if (bpf_fd < 0) + error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf); + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd, + sizeof(bpf_fd))) + error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF"); + + close(bpf_fd); +} + +static void send_from_node(int node_id, int family, int proto) +{ + struct sockaddr_storage saddr, daddr; + struct sockaddr_in *saddr4, *daddr4; + struct sockaddr_in6 *saddr6, *daddr6; + int fd; + + switch (family) { + case AF_INET: + saddr4 = (struct sockaddr_in *)&saddr; + saddr4->sin_family = AF_INET; + saddr4->sin_addr.s_addr = htonl(INADDR_ANY); + saddr4->sin_port = 0; + + daddr4 = (struct sockaddr_in *)&daddr; + daddr4->sin_family = AF_INET; + daddr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK); + daddr4->sin_port = htons(PORT); + break; + case AF_INET6: + saddr6 = (struct sockaddr_in6 *)&saddr; + saddr6->sin6_family = AF_INET6; + saddr6->sin6_addr = in6addr_any; + saddr6->sin6_port = 0; + + daddr6 = (struct sockaddr_in6 *)&daddr; + daddr6->sin6_family = AF_INET6; + daddr6->sin6_addr = in6addr_loopback; + daddr6->sin6_port = htons(PORT); + break; + default: + error(1, 0, "Unsupported family %d", family); + } + + if (numa_run_on_node(node_id) < 0) + error(1, errno, "failed to pin to node"); + + fd = socket(family, proto, 0); + if (fd < 0) + error(1, errno, "failed to create send socket"); + + if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr))) + error(1, errno, "failed to bind send socket"); + + if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr))) + error(1, errno, "failed to connect send socket"); + + if (send(fd, "a", 1, 0) < 0) + error(1, errno, "failed to send message"); + + close(fd); +} + +static +void receive_on_node(int *rcv_fd, int len, int epfd, int node_id, int proto) +{ + struct epoll_event ev; + int i, fd; + char buf[8]; + + i = epoll_wait(epfd, &ev, 1, -1); + if (i < 0) + error(1, errno, "epoll_wait failed"); + + if (proto == SOCK_STREAM) { + fd = accept(ev.data.fd, NULL, NULL); + if (fd < 0) + error(1, errno, "failed to accept"); + i = recv(fd, buf, sizeof(buf), 0); + close(fd); + } else { + i = recv(ev.data.fd, buf, sizeof(buf), 0); + } + + if (i < 0) + error(1, errno, "failed to recv"); + + for (i = 0; i < len; ++i) + if (ev.data.fd == rcv_fd[i]) + break; + if (i == len) + error(1, 0, "failed to find socket"); + fprintf(stderr, "send node %d, receive socket %d\n", node_id, i); + if (node_id != i) + error(1, 0, "node id/receive socket mismatch"); +} + +static void test(int *rcv_fd, int len, int family, int proto) +{ + struct epoll_event ev; + int epfd, node; + + build_rcv_group(rcv_fd, len, family, proto); + attach_bpf(rcv_fd[0]); + + epfd = epoll_create(1); + if (epfd < 0) + error(1, errno, "failed to create epoll"); + for (node = 0; node < len; ++node) { + ev.events = EPOLLIN; + ev.data.fd = rcv_fd[node]; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fd[node], &ev)) + error(1, errno, "failed to register sock epoll"); + } + + /* Forward iterate */ + for (node = 0; node < len; ++node) { + send_from_node(node, family, proto); + receive_on_node(rcv_fd, len, epfd, node, proto); + } + + /* Reverse iterate */ + for (node = len - 1; node >= 0; --node) { + send_from_node(node, family, proto); + receive_on_node(rcv_fd, len, epfd, node, proto); + } + + close(epfd); + for (node = 0; node < len; ++node) + close(rcv_fd[node]); +} + +int main(void) +{ + int *rcv_fd, nodes; + + if (numa_available() < 0) + error(1, errno, "no numa api support"); + + nodes = numa_max_node() + 1; + + rcv_fd = calloc(nodes, sizeof(int)); + if (!rcv_fd) + error(1, 0, "failed to allocate array"); + + fprintf(stderr, "---- IPv4 UDP ----\n"); + test(rcv_fd, nodes, AF_INET, SOCK_DGRAM); + + fprintf(stderr, "---- IPv6 UDP ----\n"); + test(rcv_fd, nodes, AF_INET6, SOCK_DGRAM); + + fprintf(stderr, "---- IPv4 TCP ----\n"); + test(rcv_fd, nodes, AF_INET, SOCK_STREAM); + + fprintf(stderr, "---- IPv6 TCP ----\n"); + test(rcv_fd, nodes, AF_INET6, SOCK_STREAM); + + free(rcv_fd); + + fprintf(stderr, "SUCCESS\n"); + return 0; +} |