From 900de4ac4953fcf5e25ea45bea5bb28e797b1420 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 19 Jun 2019 17:27:42 +0100 Subject: libbpf: fix spelling mistake "conflictling" -> "conflicting" There are several spelling mistakes in pr_warning messages. Fix these. Signed-off-by: Colin Ian King Acked-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4259c9f0cfe7..68f45a96769f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1169,7 +1169,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, pr_debug("map '%s': found key_size = %u.\n", map_name, sz); if (map->def.key_size && map->def.key_size != sz) { - pr_warning("map '%s': conflictling key size %u != %u.\n", + pr_warning("map '%s': conflicting key size %u != %u.\n", map_name, map->def.key_size, sz); return -EINVAL; } @@ -1197,7 +1197,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, pr_debug("map '%s': found key [%u], sz = %lld.\n", map_name, t->type, sz); if (map->def.key_size && map->def.key_size != sz) { - pr_warning("map '%s': conflictling key size %u != %lld.\n", + pr_warning("map '%s': conflicting key size %u != %lld.\n", map_name, map->def.key_size, sz); return -EINVAL; } @@ -1212,7 +1212,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, pr_debug("map '%s': found value_size = %u.\n", map_name, sz); if (map->def.value_size && map->def.value_size != sz) { - pr_warning("map '%s': conflictling value size %u != %u.\n", + pr_warning("map '%s': conflicting value size %u != %u.\n", map_name, map->def.value_size, sz); return -EINVAL; } @@ -1240,7 +1240,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, pr_debug("map '%s': found value [%u], sz = %lld.\n", map_name, t->type, sz); if (map->def.value_size && map->def.value_size != sz) { - pr_warning("map '%s': conflictling value size %u != %lld.\n", + pr_warning("map '%s': conflicting value size %u != %lld.\n", map_name, map->def.value_size, sz); return -EINVAL; } -- cgit v1.2.3 From 88091ff56b71d73c5577ebefcd4f0f721a359077 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Thu, 20 Jun 2019 11:23:23 +0900 Subject: selftests, bpf: Add test for veth native XDP Add a test case for veth native XDP. It checks if XDP_PASS, XDP_TX and XDP_REDIRECT work properly. $ cd tools/testing/selftests/bpf $ make \ TEST_CUSTOM_PROGS= \ TEST_GEN_PROGS= \ TEST_GEN_PROGS_EXTENDED= \ TEST_PROGS_EXTENDED= \ TEST_PROGS="test_xdp_veth.sh" \ run_tests TAP version 13 1..1 # selftests: bpf: test_xdp_veth.sh # PING 10.1.1.33 (10.1.1.33) 56(84) bytes of data. # 64 bytes from 10.1.1.33: icmp_seq=1 ttl=64 time=0.073 ms # # --- 10.1.1.33 ping statistics --- # 1 packets transmitted, 1 received, 0% packet loss, time 0ms # rtt min/avg/max/mdev = 0.073/0.073/0.073/0.000 ms # selftests: xdp_veth [PASS] ok 1 selftests: bpf: test_xdp_veth.sh Signed-off-by: Toshiaki Makita Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 1 + .../testing/selftests/bpf/progs/xdp_redirect_map.c | 31 ++++++ tools/testing/selftests/bpf/progs/xdp_tx.c | 12 +++ tools/testing/selftests/bpf/test_xdp_veth.sh | 118 +++++++++++++++++++++ 4 files changed, 162 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/xdp_redirect_map.c create mode 100644 tools/testing/selftests/bpf/progs/xdp_tx.c create mode 100755 tools/testing/selftests/bpf/test_xdp_veth.sh (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index fb5ce43e28b3..f2dbe2043067 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -46,6 +46,7 @@ TEST_PROGS := test_kmod.sh \ test_libbpf.sh \ test_xdp_redirect.sh \ test_xdp_meta.sh \ + test_xdp_veth.sh \ test_offload.py \ test_sock_addr.sh \ test_tunnel.sh \ diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c new file mode 100644 index 000000000000..e87a985b9df9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") tx_port = { + .type = BPF_MAP_TYPE_DEVMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 8, +}; + +SEC("redirect_map_0") +int xdp_redirect_map_0(struct xdp_md *xdp) +{ + return bpf_redirect_map(&tx_port, 0, 0); +} + +SEC("redirect_map_1") +int xdp_redirect_map_1(struct xdp_md *xdp) +{ + return bpf_redirect_map(&tx_port, 1, 0); +} + +SEC("redirect_map_2") +int xdp_redirect_map_2(struct xdp_md *xdp) +{ + return bpf_redirect_map(&tx_port, 2, 0); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/xdp_tx.c b/tools/testing/selftests/bpf/progs/xdp_tx.c new file mode 100644 index 000000000000..57912e7c94b0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xdp_tx.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "bpf_helpers.h" + +SEC("tx") +int xdp_tx(struct xdp_md *xdp) +{ + return XDP_TX; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh new file mode 100755 index 000000000000..ba8ffcdaac30 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_veth.sh @@ -0,0 +1,118 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Create 3 namespaces with 3 veth peers, and +# forward packets in-between using native XDP +# +# XDP_TX +# NS1(veth11) NS2(veth22) NS3(veth33) +# | | | +# | | | +# (veth1, (veth2, (veth3, +# id:111) id:122) id:133) +# ^ | ^ | ^ | +# | | XDP_REDIRECT | | XDP_REDIRECT | | +# | ------------------ ------------------ | +# ----------------------------------------- +# XDP_REDIRECT + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +TESTNAME=xdp_veth +BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts) +BPF_DIR=$BPF_FS/test_$TESTNAME + +_cleanup() +{ + set +e + ip link del veth1 2> /dev/null + ip link del veth2 2> /dev/null + ip link del veth3 2> /dev/null + ip netns del ns1 2> /dev/null + ip netns del ns2 2> /dev/null + ip netns del ns3 2> /dev/null + rm -rf $BPF_DIR 2> /dev/null +} + +cleanup_skip() +{ + echo "selftests: $TESTNAME [SKIP]" + _cleanup + + exit $ksft_skip +} + +cleanup() +{ + if [ "$?" = 0 ]; then + echo "selftests: $TESTNAME [PASS]" + else + echo "selftests: $TESTNAME [FAILED]" + fi + _cleanup +} + +if [ $(id -u) -ne 0 ]; then + echo "selftests: $TESTNAME [SKIP] Need root privileges" + exit $ksft_skip +fi + +if ! ip link set dev lo xdp off > /dev/null 2>&1; then + echo "selftests: $TESTNAME [SKIP] Could not run test without the ip xdp support" + exit $ksft_skip +fi + +if [ -z "$BPF_FS" ]; then + echo "selftests: $TESTNAME [SKIP] Could not run test without bpffs mounted" + exit $ksft_skip +fi + +if ! bpftool version > /dev/null 2>&1; then + echo "selftests: $TESTNAME [SKIP] Could not run test without bpftool" + exit $ksft_skip +fi + +set -e + +trap cleanup_skip EXIT + +ip netns add ns1 +ip netns add ns2 +ip netns add ns3 + +ip link add veth1 index 111 type veth peer name veth11 netns ns1 +ip link add veth2 index 122 type veth peer name veth22 netns ns2 +ip link add veth3 index 133 type veth peer name veth33 netns ns3 + +ip link set veth1 up +ip link set veth2 up +ip link set veth3 up + +ip -n ns1 addr add 10.1.1.11/24 dev veth11 +ip -n ns3 addr add 10.1.1.33/24 dev veth33 + +ip -n ns1 link set dev veth11 up +ip -n ns2 link set dev veth22 up +ip -n ns3 link set dev veth33 up + +mkdir $BPF_DIR +bpftool prog loadall \ + xdp_redirect_map.o $BPF_DIR/progs type xdp \ + pinmaps $BPF_DIR/maps +bpftool map update pinned $BPF_DIR/maps/tx_port key 0 0 0 0 value 122 0 0 0 +bpftool map update pinned $BPF_DIR/maps/tx_port key 1 0 0 0 value 133 0 0 0 +bpftool map update pinned $BPF_DIR/maps/tx_port key 2 0 0 0 value 111 0 0 0 +ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0 +ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1 +ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2 + +ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy +ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx +ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy + +trap cleanup EXIT + +ip netns exec ns1 ping -c 1 -W 1 10.1.1.33 + +exit 0 -- cgit v1.2.3 From 950649791b8366a4b89f570053d37bba1eb21c02 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Wed, 26 Jun 2019 13:38:37 +0300 Subject: libbpf: fix max() type mismatch for 32bit It fixes build error for 32bit caused by type mismatch size_t/unsigned long. Fixes: bf82927125dd ("libbpf: refactor map initialization") Acked-by: Song Liu Acked-by: Andrii Nakryiko Signed-off-by: Ivan Khoronzhuk Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 68f45a96769f..5186b7710430 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -778,7 +778,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) if (obj->nr_maps < obj->maps_cap) return &obj->maps[obj->nr_maps++]; - new_cap = max(4ul, obj->maps_cap * 3 / 2); + new_cap = max((size_t)4, obj->maps_cap * 3 / 2); new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps)); if (!new_maps) { pr_warning("alloc maps for object failed\n"); -- cgit v1.2.3 From 6bcc617f842cdb5f3d66b3a3f06614a62fab992b Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 25 Jun 2019 15:56:28 -0700 Subject: selftests/bpf: build tests with debug info Non-BPF (user land) part of selftests is built without debug info making occasional debugging with gdb terrible. Build with debug info always. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f2dbe2043067..faeb8220daa2 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -15,7 +15,7 @@ LLC ?= llc LLVM_OBJCOPY ?= llvm-objcopy LLVM_READELF ?= llvm-readelf BTF_PAHOLE ?= pahole -CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include \ +CFLAGS += -g -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include \ -Dbpf_prog_load=bpf_prog_test_load \ -Dbpf_load_program=bpf_test_load_program LDLIBS += -lcap -lelf -lrt -lpthread -- cgit v1.2.3 From 2640d3c8123223e0a205b2a25a446df6f072b3ea Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 26 Jun 2019 17:35:25 +0300 Subject: xsk: Add getsockopt XDP_OPTIONS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make it possible for the application to determine whether the AF_XDP socket is running in zero-copy mode. To achieve this, add a new getsockopt option XDP_OPTIONS that returns flags. The only flag supported for now is the zero-copy mode indicator. Signed-off-by: Maxim Mikityanskiy Signed-off-by: Tariq Toukan Acked-by: Saeed Mahameed Acked-by: Björn Töpel Signed-off-by: Daniel Borkmann --- include/uapi/linux/if_xdp.h | 8 ++++++++ net/xdp/xsk.c | 20 ++++++++++++++++++++ tools/include/uapi/linux/if_xdp.h | 8 ++++++++ 3 files changed, 36 insertions(+) (limited to 'tools') diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index caed8b1614ff..faaa5ca2a117 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -46,6 +46,7 @@ struct xdp_mmap_offsets { #define XDP_UMEM_FILL_RING 5 #define XDP_UMEM_COMPLETION_RING 6 #define XDP_STATISTICS 7 +#define XDP_OPTIONS 8 struct xdp_umem_reg { __u64 addr; /* Start of packet data area */ @@ -60,6 +61,13 @@ struct xdp_statistics { __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */ }; +struct xdp_options { + __u32 flags; +}; + +/* Flags for the flags field of struct xdp_options */ +#define XDP_OPTIONS_ZEROCOPY (1 << 0) + /* Pgoff for mmaping the rings */ #define XDP_PGOFF_RX_RING 0 #define XDP_PGOFF_TX_RING 0x80000000 diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index b68a380f50b3..35ca531ac74e 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -650,6 +650,26 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname, return 0; } + case XDP_OPTIONS: + { + struct xdp_options opts = {}; + + if (len < sizeof(opts)) + return -EINVAL; + + mutex_lock(&xs->mutex); + if (xs->zc) + opts.flags |= XDP_OPTIONS_ZEROCOPY; + mutex_unlock(&xs->mutex); + + len = sizeof(opts); + if (copy_to_user(optval, &opts, len)) + return -EFAULT; + if (put_user(len, optlen)) + return -EFAULT; + + return 0; + } default: break; } diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index caed8b1614ff..faaa5ca2a117 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -46,6 +46,7 @@ struct xdp_mmap_offsets { #define XDP_UMEM_FILL_RING 5 #define XDP_UMEM_COMPLETION_RING 6 #define XDP_STATISTICS 7 +#define XDP_OPTIONS 8 struct xdp_umem_reg { __u64 addr; /* Start of packet data area */ @@ -60,6 +61,13 @@ struct xdp_statistics { __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */ }; +struct xdp_options { + __u32 flags; +}; + +/* Flags for the flags field of struct xdp_options */ +#define XDP_OPTIONS_ZEROCOPY (1 << 0) + /* Pgoff for mmaping the rings */ #define XDP_PGOFF_RX_RING 0 #define XDP_PGOFF_TX_RING 0x80000000 -- cgit v1.2.3 From 2761ed4b6e192820760d5ba913834b2ba05fd08c Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 26 Jun 2019 17:35:26 +0300 Subject: libbpf: Support getsockopt XDP_OPTIONS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Query XDP_OPTIONS in libbpf to determine if the zero-copy mode is active or not. Signed-off-by: Maxim Mikityanskiy Signed-off-by: Tariq Toukan Acked-by: Saeed Mahameed Acked-by: Björn Töpel Signed-off-by: Daniel Borkmann --- tools/lib/bpf/xsk.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 7ef6293b4fd7..bf15a80a37c2 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -65,6 +65,7 @@ struct xsk_socket { int xsks_map_fd; __u32 queue_id; char ifname[IFNAMSIZ]; + bool zc; }; struct xsk_nl_info { @@ -480,6 +481,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, void *rx_map = NULL, *tx_map = NULL; struct sockaddr_xdp sxdp = {}; struct xdp_mmap_offsets off; + struct xdp_options opts; struct xsk_socket *xsk; socklen_t optlen; int err; @@ -597,6 +599,16 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, } xsk->prog_fd = -1; + + optlen = sizeof(opts); + err = getsockopt(xsk->fd, SOL_XDP, XDP_OPTIONS, &opts, &optlen); + if (err) { + err = -errno; + goto out_mmap_tx; + } + + xsk->zc = opts.flags & XDP_OPTIONS_ZEROCOPY; + if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { err = xsk_setup_xdp_prog(xsk); if (err) -- cgit v1.2.3 From 123e8da1d33042a83cedb530fb5efd64f32ce594 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Wed, 26 Jun 2019 17:35:27 +0300 Subject: xsk: Change the default frame size to 4096 and allow controlling it The typical XDP memory scheme is one packet per page. Change the AF_XDP frame size in libbpf to 4096, which is the page size on x86, to allow libbpf to be used with the drivers with the packet-per-page scheme. Add a command line option -f to xdpsock to allow to specify a custom frame size. Signed-off-by: Maxim Mikityanskiy Signed-off-by: Tariq Toukan Acked-by: Saeed Mahameed Signed-off-by: Daniel Borkmann --- samples/bpf/xdpsock_user.c | 44 ++++++++++++++++++++++++++++---------------- tools/lib/bpf/xsk.h | 2 +- 2 files changed, 29 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 0f5eb0d7f2df..93eaaf7239b2 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -68,6 +68,7 @@ static int opt_queue; static int opt_poll; static int opt_interval = 1; static u32 opt_xdp_bind_flags; +static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; static __u32 prog_id; struct xsk_umem_info { @@ -276,6 +277,12 @@ static size_t gen_eth_frame(struct xsk_umem_info *umem, u64 addr) static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) { struct xsk_umem_info *umem; + struct xsk_umem_config cfg = { + .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, + .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, + .frame_size = opt_xsk_frame_size, + .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM, + }; int ret; umem = calloc(1, sizeof(*umem)); @@ -283,7 +290,7 @@ static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) exit_with_error(errno); ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq, - NULL); + &cfg); if (ret) exit_with_error(-ret); @@ -323,11 +330,9 @@ static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem) &idx); if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS) exit_with_error(-ret); - for (i = 0; - i < XSK_RING_PROD__DEFAULT_NUM_DESCS * - XSK_UMEM__DEFAULT_FRAME_SIZE; - i += XSK_UMEM__DEFAULT_FRAME_SIZE) - *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) = i; + for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++) + *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) = + i * opt_xsk_frame_size; xsk_ring_prod__submit(&xsk->umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS); @@ -346,6 +351,7 @@ static struct option long_options[] = { {"interval", required_argument, 0, 'n'}, {"zero-copy", no_argument, 0, 'z'}, {"copy", no_argument, 0, 'c'}, + {"frame-size", required_argument, 0, 'f'}, {0, 0, 0, 0} }; @@ -365,8 +371,9 @@ static void usage(const char *prog) " -n, --interval=n Specify statistics update interval (default 1 sec).\n" " -z, --zero-copy Force zero-copy mode.\n" " -c, --copy Force copy mode.\n" + " -f, --frame-size=n Set the frame size (must be a power of two, default is %d).\n" "\n"; - fprintf(stderr, str, prog); + fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE); exit(EXIT_FAILURE); } @@ -377,7 +384,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "Frtli:q:psSNn:cz", long_options, + c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:", long_options, &option_index); if (c == -1) break; @@ -420,6 +427,9 @@ static void parse_command_line(int argc, char **argv) case 'F': opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; break; + case 'f': + opt_xsk_frame_size = atoi(optarg); + break; default: usage(basename(argv[0])); } @@ -432,6 +442,11 @@ static void parse_command_line(int argc, char **argv) usage(basename(argv[0])); } + if (opt_xsk_frame_size & (opt_xsk_frame_size - 1)) { + fprintf(stderr, "--frame-size=%d is not a power of two\n", + opt_xsk_frame_size); + usage(basename(argv[0])); + } } static void kick_tx(struct xsk_socket_info *xsk) @@ -583,8 +598,7 @@ static void tx_only(struct xsk_socket_info *xsk) for (i = 0; i < BATCH_SIZE; i++) { xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr - = (frame_nb + i) << - XSK_UMEM__DEFAULT_FRAME_SHIFT; + = (frame_nb + i) * opt_xsk_frame_size; xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len = sizeof(pkt_data) - 1; } @@ -661,21 +675,19 @@ int main(int argc, char **argv) } ret = posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */ - NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE); + NUM_FRAMES * opt_xsk_frame_size); if (ret) exit_with_error(ret); /* Create sockets... */ - umem = xsk_configure_umem(bufs, - NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE); + umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size); xsks[num_socks++] = xsk_configure_socket(umem); if (opt_bench == BENCH_TXONLY) { int i; - for (i = 0; i < NUM_FRAMES * XSK_UMEM__DEFAULT_FRAME_SIZE; - i += XSK_UMEM__DEFAULT_FRAME_SIZE) - (void)gen_eth_frame(umem, i); + for (i = 0; i < NUM_FRAMES; i++) + (void)gen_eth_frame(umem, i * opt_xsk_frame_size); } signal(SIGINT, int_exit); diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h index 82ea71a0f3ec..833a6e60d065 100644 --- a/tools/lib/bpf/xsk.h +++ b/tools/lib/bpf/xsk.h @@ -167,7 +167,7 @@ LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk); #define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 #define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 -#define XSK_UMEM__DEFAULT_FRAME_SHIFT 11 /* 2048 bytes */ +#define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */ #define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT) #define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0 -- cgit v1.2.3 From aa6ab6471e525ce6842e95a565f10b56fd6b4d5c Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 27 Jun 2019 13:38:48 -0700 Subject: bpf: sync bpf.h to tools/ Export new prog type and hook points to the libbpf. Cc: Andrii Nakryiko Cc: Martin Lau Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b077507efa3f..a396b516a2b2 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -170,6 +170,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_FLOW_DISSECTOR, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, + BPF_PROG_TYPE_CGROUP_SOCKOPT, }; enum bpf_attach_type { @@ -194,6 +195,8 @@ enum bpf_attach_type { BPF_CGROUP_SYSCTL, BPF_CGROUP_UDP4_RECVMSG, BPF_CGROUP_UDP6_RECVMSG, + BPF_CGROUP_GETSOCKOPT, + BPF_CGROUP_SETSOCKOPT, __MAX_BPF_ATTACH_TYPE }; @@ -3541,4 +3544,15 @@ struct bpf_sysctl { */ }; +struct bpf_sockopt { + __bpf_md_ptr(struct bpf_sock *, sk); + __bpf_md_ptr(void *, optval); + __bpf_md_ptr(void *, optval_end); + + __s32 level; + __s32 optname; + __s32 optlen; + __s32 retval; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 4cdbfb59c44a0df58ab321e4ddd9710cd0823584 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 27 Jun 2019 13:38:49 -0700 Subject: libbpf: support sockopt hooks Make libbpf aware of new sockopt hooks so it can derive prog type and hook point from the section names. Cc: Andrii Nakryiko Cc: Martin Lau Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 5 +++++ tools/lib/bpf/libbpf_probes.c | 1 + 2 files changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5186b7710430..6e6ebef11ba3 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2646,6 +2646,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_CGROUP_SYSCTL: + case BPF_PROG_TYPE_CGROUP_SOCKOPT: return false; case BPF_PROG_TYPE_KPROBE: default: @@ -3604,6 +3605,10 @@ static const struct { BPF_CGROUP_UDP6_RECVMSG), BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL), + BPF_EAPROG_SEC("cgroup/getsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, + BPF_CGROUP_GETSOCKOPT), + BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT, + BPF_CGROUP_SETSOCKOPT), }; #undef BPF_PROG_SEC_IMPL diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 6635a31a7a16..ace1a0708d99 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -101,6 +101,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: case BPF_PROG_TYPE_CGROUP_SYSCTL: + case BPF_PROG_TYPE_CGROUP_SOCKOPT: default: break; } -- cgit v1.2.3 From 47ac90bbce5b9f7c1c600a4c81fcfd78cb674798 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 27 Jun 2019 13:38:50 -0700 Subject: selftests/bpf: test sockopt section name Add tests that make sure libbpf section detection works. Cc: Andrii Nakryiko Cc: Martin Lau Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_section_names.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_section_names.c b/tools/testing/selftests/bpf/test_section_names.c index dee2f2eceb0f..29833aeaf0de 100644 --- a/tools/testing/selftests/bpf/test_section_names.c +++ b/tools/testing/selftests/bpf/test_section_names.c @@ -134,6 +134,16 @@ static struct sec_name_test tests[] = { {0, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL}, {0, BPF_CGROUP_SYSCTL}, }, + { + "cgroup/getsockopt", + {0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT}, + {0, BPF_CGROUP_GETSOCKOPT}, + }, + { + "cgroup/setsockopt", + {0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT}, + {0, BPF_CGROUP_SETSOCKOPT}, + }, }; static int test_prog_type_by_name(const struct sec_name_test *test) -- cgit v1.2.3 From 9ec8a4c9489dc6fac324f8adb76fc182be119389 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 27 Jun 2019 13:38:51 -0700 Subject: selftests/bpf: add sockopt test Add sockopt selftests: * require proper expected_attach_type * enforce context field read/write access * test bpf_sockopt_handled handler * test EPERM * test limiting optlen from getsockopt * test out-of-bounds access v9: * add tests for setsockopt argument mangling v7: * remove return 2; test retval=0 and optlen=-1 v3: * use DW for optval{,_end} loads v2: * use return code 2 for kernel bypass Cc: Andrii Nakryiko Cc: Martin Lau Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/.gitignore | 1 + tools/testing/selftests/bpf/Makefile | 3 +- tools/testing/selftests/bpf/test_sockopt.c | 1021 ++++++++++++++++++++++++++++ 3 files changed, 1024 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/test_sockopt.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 7470327edcfe..3fe92601223d 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -39,3 +39,4 @@ libbpf.so.* test_hashmap test_btf_dump xdping +test_sockopt diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index faeb8220daa2..7f2210402e57 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -26,7 +26,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage test_select_reuseport test_section_names \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ - test_btf_dump test_cgroup_attach xdping + test_btf_dump test_cgroup_attach xdping test_sockopt BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c))) TEST_GEN_FILES = $(BPF_OBJ_FILES) @@ -103,6 +103,7 @@ $(OUTPUT)/test_netcnt: cgroup_helpers.c $(OUTPUT)/test_sock_fields: cgroup_helpers.c $(OUTPUT)/test_sysctl: cgroup_helpers.c $(OUTPUT)/test_cgroup_attach: cgroup_helpers.c +$(OUTPUT)/test_sockopt: cgroup_helpers.c .PHONY: force diff --git a/tools/testing/selftests/bpf/test_sockopt.c b/tools/testing/selftests/bpf/test_sockopt.c new file mode 100644 index 000000000000..23bd0819382d --- /dev/null +++ b/tools/testing/selftests/bpf/test_sockopt.c @@ -0,0 +1,1021 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "bpf_rlimit.h" +#include "bpf_util.h" +#include "cgroup_helpers.h" + +#define CG_PATH "/sockopt" + +static char bpf_log_buf[4096]; +static bool verbose; + +enum sockopt_test_error { + OK = 0, + DENY_LOAD, + DENY_ATTACH, + EPERM_GETSOCKOPT, + EFAULT_GETSOCKOPT, + EPERM_SETSOCKOPT, + EFAULT_SETSOCKOPT, +}; + +static struct sockopt_test { + const char *descr; + const struct bpf_insn insns[64]; + enum bpf_attach_type attach_type; + enum bpf_attach_type expected_attach_type; + + int set_optname; + int set_level; + const char set_optval[64]; + socklen_t set_optlen; + + int get_optname; + int get_level; + const char get_optval[64]; + socklen_t get_optlen; + socklen_t get_optlen_ret; + + enum sockopt_test_error error; +} tests[] = { + + /* ==================== getsockopt ==================== */ + + { + .descr = "getsockopt: no expected_attach_type", + .insns = { + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + + }, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = 0, + .error = DENY_LOAD, + }, + { + .descr = "getsockopt: wrong expected_attach_type", + .insns = { + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + + }, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + .error = DENY_ATTACH, + }, + { + .descr = "getsockopt: bypass bpf hook", + .insns = { + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + + .get_level = SOL_IP, + .set_level = SOL_IP, + + .get_optname = IP_TOS, + .set_optname = IP_TOS, + + .set_optval = { 1 << 3 }, + .set_optlen = 1, + + .get_optval = { 1 << 3 }, + .get_optlen = 1, + }, + { + .descr = "getsockopt: return EPERM from bpf hook", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + + .get_level = SOL_IP, + .get_optname = IP_TOS, + + .get_optlen = 1, + .error = EPERM_GETSOCKOPT, + }, + { + .descr = "getsockopt: no optval bounds check, deny loading", + .insns = { + /* r6 = ctx->optval */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, + offsetof(struct bpf_sockopt, optval)), + + /* ctx->optval[0] = 0x80 */ + BPF_MOV64_IMM(BPF_REG_0, 0x80), + BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_0, 0), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + .error = DENY_LOAD, + }, + { + .descr = "getsockopt: read ctx->level", + .insns = { + /* r6 = ctx->level */ + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct bpf_sockopt, level)), + + /* if (ctx->level == 123) { */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 123, 4), + /* ctx->retval = 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, retval)), + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + /* } else { */ + /* return 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + /* } */ + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + + .get_level = 123, + + .get_optlen = 1, + }, + { + .descr = "getsockopt: deny writing to ctx->level", + .insns = { + /* ctx->level = 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, level)), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + + .error = DENY_LOAD, + }, + { + .descr = "getsockopt: read ctx->optname", + .insns = { + /* r6 = ctx->optname */ + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct bpf_sockopt, optname)), + + /* if (ctx->optname == 123) { */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 123, 4), + /* ctx->retval = 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, retval)), + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + /* } else { */ + /* return 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + /* } */ + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + + .get_optname = 123, + + .get_optlen = 1, + }, + { + .descr = "getsockopt: read ctx->retval", + .insns = { + /* r6 = ctx->retval */ + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct bpf_sockopt, retval)), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + + .get_level = SOL_IP, + .get_optname = IP_TOS, + .get_optlen = 1, + }, + { + .descr = "getsockopt: deny writing to ctx->optname", + .insns = { + /* ctx->optname = 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, optname)), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + + .error = DENY_LOAD, + }, + { + .descr = "getsockopt: read ctx->optlen", + .insns = { + /* r6 = ctx->optlen */ + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct bpf_sockopt, optlen)), + + /* if (ctx->optlen == 64) { */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 64, 4), + /* ctx->retval = 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, retval)), + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + /* } else { */ + /* return 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + /* } */ + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + + .get_optlen = 64, + }, + { + .descr = "getsockopt: deny bigger ctx->optlen", + .insns = { + /* ctx->optlen = 65 */ + BPF_MOV64_IMM(BPF_REG_0, 65), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, optlen)), + + /* ctx->retval = 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, retval)), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + + .get_optlen = 64, + + .error = EFAULT_GETSOCKOPT, + }, + { + .descr = "getsockopt: deny arbitrary ctx->retval", + .insns = { + /* ctx->retval = 123 */ + BPF_MOV64_IMM(BPF_REG_0, 123), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, retval)), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + + .get_optlen = 64, + + .error = EFAULT_GETSOCKOPT, + }, + { + .descr = "getsockopt: support smaller ctx->optlen", + .insns = { + /* ctx->optlen = 32 */ + BPF_MOV64_IMM(BPF_REG_0, 32), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, optlen)), + /* ctx->retval = 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, retval)), + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + + .get_optlen = 64, + .get_optlen_ret = 32, + }, + { + .descr = "getsockopt: deny writing to ctx->optval", + .insns = { + /* ctx->optval = 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, optval)), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + + .error = DENY_LOAD, + }, + { + .descr = "getsockopt: deny writing to ctx->optval_end", + .insns = { + /* ctx->optval_end = 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, optval_end)), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + + .error = DENY_LOAD, + }, + { + .descr = "getsockopt: rewrite value", + .insns = { + /* r6 = ctx->optval */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, + offsetof(struct bpf_sockopt, optval)), + /* r2 = ctx->optval */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), + /* r6 = ctx->optval + 1 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + + /* r7 = ctx->optval_end */ + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sockopt, optval_end)), + + /* if (ctx->optval + 1 <= ctx->optval_end) { */ + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1), + /* ctx->optval[0] = 0xF0 */ + BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 0xF0), + /* } */ + + /* ctx->retval = 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, retval)), + + /* return 1*/ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + + .get_level = SOL_IP, + .get_optname = IP_TOS, + + .get_optval = { 0xF0 }, + .get_optlen = 1, + }, + + /* ==================== setsockopt ==================== */ + + { + .descr = "setsockopt: no expected_attach_type", + .insns = { + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = 0, + .error = DENY_LOAD, + }, + { + .descr = "setsockopt: wrong expected_attach_type", + .insns = { + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_GETSOCKOPT, + .error = DENY_ATTACH, + }, + { + .descr = "setsockopt: bypass bpf hook", + .insns = { + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .get_level = SOL_IP, + .set_level = SOL_IP, + + .get_optname = IP_TOS, + .set_optname = IP_TOS, + + .set_optval = { 1 << 3 }, + .set_optlen = 1, + + .get_optval = { 1 << 3 }, + .get_optlen = 1, + }, + { + .descr = "setsockopt: return EPERM from bpf hook", + .insns = { + /* return 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .set_level = SOL_IP, + .set_optname = IP_TOS, + + .set_optlen = 1, + .error = EPERM_SETSOCKOPT, + }, + { + .descr = "setsockopt: no optval bounds check, deny loading", + .insns = { + /* r6 = ctx->optval */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, + offsetof(struct bpf_sockopt, optval)), + + /* r0 = ctx->optval[0] */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + .error = DENY_LOAD, + }, + { + .descr = "setsockopt: read ctx->level", + .insns = { + /* r6 = ctx->level */ + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct bpf_sockopt, level)), + + /* if (ctx->level == 123) { */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 123, 4), + /* ctx->optlen = -1 */ + BPF_MOV64_IMM(BPF_REG_0, -1), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, optlen)), + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + /* } else { */ + /* return 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + /* } */ + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .set_level = 123, + + .set_optlen = 1, + }, + { + .descr = "setsockopt: allow changing ctx->level", + .insns = { + /* ctx->level = SOL_IP */ + BPF_MOV64_IMM(BPF_REG_0, SOL_IP), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, level)), + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .get_level = SOL_IP, + .set_level = 234, /* should be rewritten to SOL_IP */ + + .get_optname = IP_TOS, + .set_optname = IP_TOS, + + .set_optval = { 1 << 3 }, + .set_optlen = 1, + .get_optval = { 1 << 3 }, + .get_optlen = 1, + }, + { + .descr = "setsockopt: read ctx->optname", + .insns = { + /* r6 = ctx->optname */ + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct bpf_sockopt, optname)), + + /* if (ctx->optname == 123) { */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 123, 4), + /* ctx->optlen = -1 */ + BPF_MOV64_IMM(BPF_REG_0, -1), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, optlen)), + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + /* } else { */ + /* return 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + /* } */ + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .set_optname = 123, + + .set_optlen = 1, + }, + { + .descr = "setsockopt: allow changing ctx->optname", + .insns = { + /* ctx->optname = IP_TOS */ + BPF_MOV64_IMM(BPF_REG_0, IP_TOS), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, optname)), + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .get_level = SOL_IP, + .set_level = SOL_IP, + + .get_optname = IP_TOS, + .set_optname = 456, /* should be rewritten to IP_TOS */ + + .set_optval = { 1 << 3 }, + .set_optlen = 1, + .get_optval = { 1 << 3 }, + .get_optlen = 1, + }, + { + .descr = "setsockopt: read ctx->optlen", + .insns = { + /* r6 = ctx->optlen */ + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct bpf_sockopt, optlen)), + + /* if (ctx->optlen == 64) { */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 64, 4), + /* ctx->optlen = -1 */ + BPF_MOV64_IMM(BPF_REG_0, -1), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, optlen)), + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + /* } else { */ + /* return 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + /* } */ + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .set_optlen = 64, + }, + { + .descr = "setsockopt: ctx->optlen == -1 is ok", + .insns = { + /* ctx->optlen = -1 */ + BPF_MOV64_IMM(BPF_REG_0, -1), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, optlen)), + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .set_optlen = 64, + }, + { + .descr = "setsockopt: deny ctx->optlen < 0 (except -1)", + .insns = { + /* ctx->optlen = -2 */ + BPF_MOV64_IMM(BPF_REG_0, -2), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, optlen)), + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .set_optlen = 4, + + .error = EFAULT_SETSOCKOPT, + }, + { + .descr = "setsockopt: deny ctx->optlen > input optlen", + .insns = { + /* ctx->optlen = 65 */ + BPF_MOV64_IMM(BPF_REG_0, 65), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, optlen)), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .set_optlen = 64, + + .error = EFAULT_SETSOCKOPT, + }, + { + .descr = "setsockopt: allow changing ctx->optlen within bounds", + .insns = { + /* r6 = ctx->optval */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, + offsetof(struct bpf_sockopt, optval)), + /* r2 = ctx->optval */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), + /* r6 = ctx->optval + 1 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + + /* r7 = ctx->optval_end */ + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sockopt, optval_end)), + + /* if (ctx->optval + 1 <= ctx->optval_end) { */ + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1), + /* ctx->optval[0] = 1 << 3 */ + BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 1 << 3), + /* } */ + + /* ctx->optlen = 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, optlen)), + + /* return 1*/ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .get_level = SOL_IP, + .set_level = SOL_IP, + + .get_optname = IP_TOS, + .set_optname = IP_TOS, + + .set_optval = { 1, 1, 1, 1 }, + .set_optlen = 4, + .get_optval = { 1 << 3 }, + .get_optlen = 1, + }, + { + .descr = "setsockopt: deny write ctx->retval", + .insns = { + /* ctx->retval = 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, retval)), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .error = DENY_LOAD, + }, + { + .descr = "setsockopt: deny read ctx->retval", + .insns = { + /* r6 = ctx->retval */ + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct bpf_sockopt, retval)), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .error = DENY_LOAD, + }, + { + .descr = "setsockopt: deny writing to ctx->optval", + .insns = { + /* ctx->optval = 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, optval)), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .error = DENY_LOAD, + }, + { + .descr = "setsockopt: deny writing to ctx->optval_end", + .insns = { + /* ctx->optval_end = 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct bpf_sockopt, optval_end)), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .error = DENY_LOAD, + }, + { + .descr = "setsockopt: allow IP_TOS <= 128", + .insns = { + /* r6 = ctx->optval */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, + offsetof(struct bpf_sockopt, optval)), + /* r7 = ctx->optval + 1 */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1), + + /* r8 = ctx->optval_end */ + BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_1, + offsetof(struct bpf_sockopt, optval_end)), + + /* if (ctx->optval + 1 <= ctx->optval_end) { */ + BPF_JMP_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 4), + + /* r9 = ctx->optval[0] */ + BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_6, 0), + + /* if (ctx->optval[0] < 128) */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_9, 128, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + /* } */ + + /* } else { */ + BPF_MOV64_IMM(BPF_REG_0, 0), + /* } */ + + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .get_level = SOL_IP, + .set_level = SOL_IP, + + .get_optname = IP_TOS, + .set_optname = IP_TOS, + + .set_optval = { 0x80 }, + .set_optlen = 1, + .get_optval = { 0x80 }, + .get_optlen = 1, + }, + { + .descr = "setsockopt: deny IP_TOS > 128", + .insns = { + /* r6 = ctx->optval */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, + offsetof(struct bpf_sockopt, optval)), + /* r7 = ctx->optval + 1 */ + BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1), + + /* r8 = ctx->optval_end */ + BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_1, + offsetof(struct bpf_sockopt, optval_end)), + + /* if (ctx->optval + 1 <= ctx->optval_end) { */ + BPF_JMP_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 4), + + /* r9 = ctx->optval[0] */ + BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_6, 0), + + /* if (ctx->optval[0] < 128) */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_9, 128, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_A(1), + /* } */ + + /* } else { */ + BPF_MOV64_IMM(BPF_REG_0, 0), + /* } */ + + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .get_level = SOL_IP, + .set_level = SOL_IP, + + .get_optname = IP_TOS, + .set_optname = IP_TOS, + + .set_optval = { 0x81 }, + .set_optlen = 1, + .get_optval = { 0x00 }, + .get_optlen = 1, + + .error = EPERM_SETSOCKOPT, + }, +}; + +static int load_prog(const struct bpf_insn *insns, + enum bpf_attach_type expected_attach_type) +{ + struct bpf_load_program_attr attr = { + .prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT, + .expected_attach_type = expected_attach_type, + .insns = insns, + .license = "GPL", + .log_level = 2, + }; + int fd; + + for (; + insns[attr.insns_cnt].code != (BPF_JMP | BPF_EXIT); + attr.insns_cnt++) { + } + attr.insns_cnt++; + + fd = bpf_load_program_xattr(&attr, bpf_log_buf, sizeof(bpf_log_buf)); + if (verbose && fd < 0) + fprintf(stderr, "%s\n", bpf_log_buf); + + return fd; +} + +static int run_test(int cgroup_fd, struct sockopt_test *test) +{ + int sock_fd, err, prog_fd; + void *optval = NULL; + int ret = 0; + + prog_fd = load_prog(test->insns, test->expected_attach_type); + if (prog_fd < 0) { + if (test->error == DENY_LOAD) + return 0; + + log_err("Failed to load BPF program"); + return -1; + } + + err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0); + if (err < 0) { + if (test->error == DENY_ATTACH) + goto close_prog_fd; + + log_err("Failed to attach BPF program"); + ret = -1; + goto close_prog_fd; + } + + sock_fd = socket(AF_INET, SOCK_STREAM, 0); + if (sock_fd < 0) { + log_err("Failed to create AF_INET socket"); + ret = -1; + goto detach_prog; + } + + if (test->set_optlen) { + err = setsockopt(sock_fd, test->set_level, test->set_optname, + test->set_optval, test->set_optlen); + if (err) { + if (errno == EPERM && test->error == EPERM_SETSOCKOPT) + goto close_sock_fd; + if (errno == EFAULT && test->error == EFAULT_SETSOCKOPT) + goto free_optval; + + log_err("Failed to call setsockopt"); + ret = -1; + goto close_sock_fd; + } + } + + if (test->get_optlen) { + optval = malloc(test->get_optlen); + socklen_t optlen = test->get_optlen; + socklen_t expected_get_optlen = test->get_optlen_ret ?: + test->get_optlen; + + err = getsockopt(sock_fd, test->get_level, test->get_optname, + optval, &optlen); + if (err) { + if (errno == EPERM && test->error == EPERM_GETSOCKOPT) + goto free_optval; + if (errno == EFAULT && test->error == EFAULT_GETSOCKOPT) + goto free_optval; + + log_err("Failed to call getsockopt"); + ret = -1; + goto free_optval; + } + + if (optlen != expected_get_optlen) { + errno = 0; + log_err("getsockopt returned unexpected optlen"); + ret = -1; + goto free_optval; + } + + if (memcmp(optval, test->get_optval, optlen) != 0) { + errno = 0; + log_err("getsockopt returned unexpected optval"); + ret = -1; + goto free_optval; + } + } + + ret = test->error != OK; + +free_optval: + free(optval); +close_sock_fd: + close(sock_fd); +detach_prog: + bpf_prog_detach2(prog_fd, cgroup_fd, test->attach_type); +close_prog_fd: + close(prog_fd); + return ret; +} + +int main(int args, char **argv) +{ + int err = EXIT_FAILURE, error_cnt = 0; + int cgroup_fd, i; + + if (setup_cgroup_environment()) + goto cleanup_obj; + + cgroup_fd = create_and_get_cgroup(CG_PATH); + if (cgroup_fd < 0) + goto cleanup_cgroup_env; + + if (join_cgroup(CG_PATH)) + goto cleanup_cgroup; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + int err = run_test(cgroup_fd, &tests[i]); + + if (err) + error_cnt++; + + printf("#%d %s: %s\n", i, err ? "FAIL" : "PASS", + tests[i].descr); + } + + printf("Summary: %ld PASSED, %d FAILED\n", + ARRAY_SIZE(tests) - error_cnt, error_cnt); + err = error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; + +cleanup_cgroup: + close(cgroup_fd); +cleanup_cgroup_env: + cleanup_cgroup_environment(); +cleanup_obj: + return err; +} -- cgit v1.2.3 From 8a027dc0d8f5d421eba953a7116aea991459b202 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 27 Jun 2019 13:38:52 -0700 Subject: selftests/bpf: add sockopt test that exercises sk helpers socktop test that introduces new SOL_CUSTOM sockopt level and stores whatever users sets in sk storage. Whenever getsockopt is called, the original value is retrieved. v9: * SO_SNDBUF example to override user-supplied buffer v7: * use retval=0 and optlen-1 v6: * test 'ret=1' use-case as well (Alexei Starovoitov) v4: * don't call bpf_sk_fullsock helper v3: * drop (__u8 *)(long) casts for optval{,_end} v2: * new test Cc: Andrii Nakryiko Cc: Martin Lau Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/.gitignore | 1 + tools/testing/selftests/bpf/Makefile | 3 +- tools/testing/selftests/bpf/progs/sockopt_sk.c | 111 +++++++++++++ tools/testing/selftests/bpf/test_sockopt_sk.c | 214 +++++++++++++++++++++++++ 4 files changed, 328 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/progs/sockopt_sk.c create mode 100644 tools/testing/selftests/bpf/test_sockopt_sk.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 3fe92601223d..8ac076c311d4 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -40,3 +40,4 @@ test_hashmap test_btf_dump xdping test_sockopt +test_sockopt_sk diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 7f2210402e57..57ae4e168ace 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -26,7 +26,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage test_select_reuseport test_section_names \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ - test_btf_dump test_cgroup_attach xdping test_sockopt + test_btf_dump test_cgroup_attach xdping test_sockopt test_sockopt_sk BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c))) TEST_GEN_FILES = $(BPF_OBJ_FILES) @@ -104,6 +104,7 @@ $(OUTPUT)/test_sock_fields: cgroup_helpers.c $(OUTPUT)/test_sysctl: cgroup_helpers.c $(OUTPUT)/test_cgroup_attach: cgroup_helpers.c $(OUTPUT)/test_sockopt: cgroup_helpers.c +$(OUTPUT)/test_sockopt_sk: cgroup_helpers.c .PHONY: force diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c new file mode 100644 index 000000000000..076122c898e9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "bpf_helpers.h" + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; + +#define SOL_CUSTOM 0xdeadbeef + +struct sockopt_sk { + __u8 val; +}; + +struct bpf_map_def SEC("maps") socket_storage_map = { + .type = BPF_MAP_TYPE_SK_STORAGE, + .key_size = sizeof(int), + .value_size = sizeof(struct sockopt_sk), + .map_flags = BPF_F_NO_PREALLOC, +}; +BPF_ANNOTATE_KV_PAIR(socket_storage_map, int, struct sockopt_sk); + +SEC("cgroup/getsockopt") +int _getsockopt(struct bpf_sockopt *ctx) +{ + __u8 *optval_end = ctx->optval_end; + __u8 *optval = ctx->optval; + struct sockopt_sk *storage; + + if (ctx->level == SOL_IP && ctx->optname == IP_TOS) + /* Not interested in SOL_IP:IP_TOS; + * let next BPF program in the cgroup chain or kernel + * handle it. + */ + return 1; + + if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) { + /* Not interested in SOL_SOCKET:SO_SNDBUF; + * let next BPF program in the cgroup chain or kernel + * handle it. + */ + return 1; + } + + if (ctx->level != SOL_CUSTOM) + return 0; /* EPERM, deny everything except custom level */ + + if (optval + 1 > optval_end) + return 0; /* EPERM, bounds check */ + + storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!storage) + return 0; /* EPERM, couldn't get sk storage */ + + if (!ctx->retval) + return 0; /* EPERM, kernel should not have handled + * SOL_CUSTOM, something is wrong! + */ + ctx->retval = 0; /* Reset system call return value to zero */ + + optval[0] = storage->val; + ctx->optlen = 1; + + return 1; +} + +SEC("cgroup/setsockopt") +int _setsockopt(struct bpf_sockopt *ctx) +{ + __u8 *optval_end = ctx->optval_end; + __u8 *optval = ctx->optval; + struct sockopt_sk *storage; + + if (ctx->level == SOL_IP && ctx->optname == IP_TOS) + /* Not interested in SOL_IP:IP_TOS; + * let next BPF program in the cgroup chain or kernel + * handle it. + */ + return 1; + + if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) { + /* Overwrite SO_SNDBUF value */ + + if (optval + sizeof(__u32) > optval_end) + return 0; /* EPERM, bounds check */ + + *(__u32 *)optval = 0x55AA; + ctx->optlen = 4; + + return 1; + } + + if (ctx->level != SOL_CUSTOM) + return 0; /* EPERM, deny everything except custom level */ + + if (optval + 1 > optval_end) + return 0; /* EPERM, bounds check */ + + storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!storage) + return 0; /* EPERM, couldn't get sk storage */ + + storage->val = optval[0]; + ctx->optlen = -1; /* BPF has consumed this option, don't call kernel + * setsockopt handler. + */ + + return 1; +} diff --git a/tools/testing/selftests/bpf/test_sockopt_sk.c b/tools/testing/selftests/bpf/test_sockopt_sk.c new file mode 100644 index 000000000000..12e79ed075ce --- /dev/null +++ b/tools/testing/selftests/bpf/test_sockopt_sk.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "bpf_rlimit.h" +#include "bpf_util.h" +#include "cgroup_helpers.h" + +#define CG_PATH "/sockopt" + +#define SOL_CUSTOM 0xdeadbeef + +static int getsetsockopt(void) +{ + int fd, err; + char buf[4] = {}; + socklen_t optlen; + + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + log_err("Failed to create socket"); + return -1; + } + + /* IP_TOS - BPF bypass */ + + buf[0] = 0x08; + err = setsockopt(fd, SOL_IP, IP_TOS, buf, 1); + if (err) { + log_err("Failed to call setsockopt(IP_TOS)"); + goto err; + } + + buf[0] = 0x00; + optlen = 1; + err = getsockopt(fd, SOL_IP, IP_TOS, buf, &optlen); + if (err) { + log_err("Failed to call getsockopt(IP_TOS)"); + goto err; + } + + if (buf[0] != 0x08) { + log_err("Unexpected getsockopt(IP_TOS) buf[0] 0x%02x != 0x08", + buf[0]); + goto err; + } + + /* IP_TTL - EPERM */ + + buf[0] = 1; + err = setsockopt(fd, SOL_IP, IP_TTL, buf, 1); + if (!err || errno != EPERM) { + log_err("Unexpected success from setsockopt(IP_TTL)"); + goto err; + } + + /* SOL_CUSTOM - handled by BPF */ + + buf[0] = 0x01; + err = setsockopt(fd, SOL_CUSTOM, 0, buf, 1); + if (err) { + log_err("Failed to call setsockopt"); + goto err; + } + + buf[0] = 0x00; + optlen = 4; + err = getsockopt(fd, SOL_CUSTOM, 0, buf, &optlen); + if (err) { + log_err("Failed to call getsockopt"); + goto err; + } + + if (optlen != 1) { + log_err("Unexpected optlen %d != 1", optlen); + goto err; + } + if (buf[0] != 0x01) { + log_err("Unexpected buf[0] 0x%02x != 0x01", buf[0]); + goto err; + } + + /* SO_SNDBUF is overwritten */ + + buf[0] = 0x01; + buf[1] = 0x01; + buf[2] = 0x01; + buf[3] = 0x01; + err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, buf, 4); + if (err) { + log_err("Failed to call setsockopt(SO_SNDBUF)"); + goto err; + } + + buf[0] = 0x00; + buf[1] = 0x00; + buf[2] = 0x00; + buf[3] = 0x00; + optlen = 4; + err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, buf, &optlen); + if (err) { + log_err("Failed to call getsockopt(SO_SNDBUF)"); + goto err; + } + + if (*(__u32 *)buf != 0x55AA*2) { + log_err("Unexpected getsockopt(SO_SNDBUF) 0x%x != 0x55AA*2", + *(__u32 *)buf); + goto err; + } + + close(fd); + return 0; +err: + close(fd); + return -1; +} + +static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title) +{ + enum bpf_attach_type attach_type; + enum bpf_prog_type prog_type; + struct bpf_program *prog; + int err; + + err = libbpf_prog_type_by_name(title, &prog_type, &attach_type); + if (err) { + log_err("Failed to deduct types for %s BPF program", title); + return -1; + } + + prog = bpf_object__find_program_by_title(obj, title); + if (!prog) { + log_err("Failed to find %s BPF program", title); + return -1; + } + + err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, + attach_type, 0); + if (err) { + log_err("Failed to attach %s BPF program", title); + return -1; + } + + return 0; +} + +static int run_test(int cgroup_fd) +{ + struct bpf_prog_load_attr attr = { + .file = "./sockopt_sk.o", + }; + struct bpf_object *obj; + int ignored; + int err; + + err = bpf_prog_load_xattr(&attr, &obj, &ignored); + if (err) { + log_err("Failed to load BPF object"); + return -1; + } + + err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt"); + if (err) + goto close_bpf_object; + + err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt"); + if (err) + goto close_bpf_object; + + err = getsetsockopt(); + +close_bpf_object: + bpf_object__close(obj); + return err; +} + +int main(int args, char **argv) +{ + int cgroup_fd; + int err = EXIT_SUCCESS; + + if (setup_cgroup_environment()) + goto cleanup_obj; + + cgroup_fd = create_and_get_cgroup(CG_PATH); + if (cgroup_fd < 0) + goto cleanup_cgroup_env; + + if (join_cgroup(CG_PATH)) + goto cleanup_cgroup; + + if (run_test(cgroup_fd)) + err = EXIT_FAILURE; + + printf("test_sockopt_sk: %s\n", + err == EXIT_SUCCESS ? "PASSED" : "FAILED"); + +cleanup_cgroup: + close(cgroup_fd); +cleanup_cgroup_env: + cleanup_cgroup_environment(); +cleanup_obj: + return err; +} -- cgit v1.2.3 From 65b4414a05ebf51966c4a28a2a1b156ab5a01b0f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 27 Jun 2019 13:38:53 -0700 Subject: selftests/bpf: add sockopt test that exercises BPF_F_ALLOW_MULTI sockopt test that verifies chaining behavior. v9: * setsockopt chaining example v7: * rework the test to verify cgroup getsockopt chaining Cc: Andrii Nakryiko Cc: Martin Lau Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/.gitignore | 1 + tools/testing/selftests/bpf/Makefile | 4 +- tools/testing/selftests/bpf/progs/sockopt_multi.c | 71 ++++ tools/testing/selftests/bpf/test_sockopt_multi.c | 374 ++++++++++++++++++++++ 4 files changed, 449 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/progs/sockopt_multi.c create mode 100644 tools/testing/selftests/bpf/test_sockopt_multi.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 8ac076c311d4..a2f7f79c7908 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -41,3 +41,4 @@ test_btf_dump xdping test_sockopt test_sockopt_sk +test_sockopt_multi diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 57ae4e168ace..de1754a8f5fe 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -26,7 +26,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage test_select_reuseport test_section_names \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ - test_btf_dump test_cgroup_attach xdping test_sockopt test_sockopt_sk + test_btf_dump test_cgroup_attach xdping test_sockopt test_sockopt_sk \ + test_sockopt_multi BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c))) TEST_GEN_FILES = $(BPF_OBJ_FILES) @@ -105,6 +106,7 @@ $(OUTPUT)/test_sysctl: cgroup_helpers.c $(OUTPUT)/test_cgroup_attach: cgroup_helpers.c $(OUTPUT)/test_sockopt: cgroup_helpers.c $(OUTPUT)/test_sockopt_sk: cgroup_helpers.c +$(OUTPUT)/test_sockopt_multi: cgroup_helpers.c .PHONY: force diff --git a/tools/testing/selftests/bpf/progs/sockopt_multi.c b/tools/testing/selftests/bpf/progs/sockopt_multi.c new file mode 100644 index 000000000000..4afd2595c08e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sockopt_multi.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include "bpf_helpers.h" + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; + +SEC("cgroup/getsockopt/child") +int _getsockopt_child(struct bpf_sockopt *ctx) +{ + __u8 *optval_end = ctx->optval_end; + __u8 *optval = ctx->optval; + + if (ctx->level != SOL_IP || ctx->optname != IP_TOS) + return 1; + + if (optval + 1 > optval_end) + return 0; /* EPERM, bounds check */ + + if (optval[0] != 0x80) + return 0; /* EPERM, unexpected optval from the kernel */ + + ctx->retval = 0; /* Reset system call return value to zero */ + + optval[0] = 0x90; + ctx->optlen = 1; + + return 1; +} + +SEC("cgroup/getsockopt/parent") +int _getsockopt_parent(struct bpf_sockopt *ctx) +{ + __u8 *optval_end = ctx->optval_end; + __u8 *optval = ctx->optval; + + if (ctx->level != SOL_IP || ctx->optname != IP_TOS) + return 1; + + if (optval + 1 > optval_end) + return 0; /* EPERM, bounds check */ + + if (optval[0] != 0x90) + return 0; /* EPERM, unexpected optval from the kernel */ + + ctx->retval = 0; /* Reset system call return value to zero */ + + optval[0] = 0xA0; + ctx->optlen = 1; + + return 1; +} + +SEC("cgroup/setsockopt") +int _setsockopt(struct bpf_sockopt *ctx) +{ + __u8 *optval_end = ctx->optval_end; + __u8 *optval = ctx->optval; + + if (ctx->level != SOL_IP || ctx->optname != IP_TOS) + return 1; + + if (optval + 1 > optval_end) + return 0; /* EPERM, bounds check */ + + optval[0] += 0x10; + ctx->optlen = 1; + + return 1; +} diff --git a/tools/testing/selftests/bpf/test_sockopt_multi.c b/tools/testing/selftests/bpf/test_sockopt_multi.c new file mode 100644 index 000000000000..4be3441db867 --- /dev/null +++ b/tools/testing/selftests/bpf/test_sockopt_multi.c @@ -0,0 +1,374 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "bpf_rlimit.h" +#include "bpf_util.h" +#include "cgroup_helpers.h" + +static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title) +{ + enum bpf_attach_type attach_type; + enum bpf_prog_type prog_type; + struct bpf_program *prog; + int err; + + err = libbpf_prog_type_by_name(title, &prog_type, &attach_type); + if (err) { + log_err("Failed to deduct types for %s BPF program", title); + return -1; + } + + prog = bpf_object__find_program_by_title(obj, title); + if (!prog) { + log_err("Failed to find %s BPF program", title); + return -1; + } + + err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, + attach_type, BPF_F_ALLOW_MULTI); + if (err) { + log_err("Failed to attach %s BPF program", title); + return -1; + } + + return 0; +} + +static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title) +{ + enum bpf_attach_type attach_type; + enum bpf_prog_type prog_type; + struct bpf_program *prog; + int err; + + err = libbpf_prog_type_by_name(title, &prog_type, &attach_type); + if (err) + return -1; + + prog = bpf_object__find_program_by_title(obj, title); + if (!prog) + return -1; + + err = bpf_prog_detach2(bpf_program__fd(prog), cgroup_fd, + attach_type); + if (err) + return -1; + + return 0; +} + +static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, + int cg_child, int sock_fd) +{ + socklen_t optlen; + __u8 buf; + int err; + + /* Set IP_TOS to the expected value (0x80). */ + + buf = 0x80; + err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1); + if (err < 0) { + log_err("Failed to call setsockopt(IP_TOS)"); + goto detach; + } + + buf = 0x00; + optlen = 1; + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (err) { + log_err("Failed to call getsockopt(IP_TOS)"); + goto detach; + } + + if (buf != 0x80) { + log_err("Unexpected getsockopt 0x%x != 0x80 without BPF", buf); + err = -1; + goto detach; + } + + /* Attach child program and make sure it returns new value: + * - kernel: -> 0x80 + * - child: 0x80 -> 0x90 + */ + + err = prog_attach(obj, cg_child, "cgroup/getsockopt/child"); + if (err) + goto detach; + + buf = 0x00; + optlen = 1; + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (err) { + log_err("Failed to call getsockopt(IP_TOS)"); + goto detach; + } + + if (buf != 0x90) { + log_err("Unexpected getsockopt 0x%x != 0x90", buf); + err = -1; + goto detach; + } + + /* Attach parent program and make sure it returns new value: + * - kernel: -> 0x80 + * - child: 0x80 -> 0x90 + * - parent: 0x90 -> 0xA0 + */ + + err = prog_attach(obj, cg_parent, "cgroup/getsockopt/parent"); + if (err) + goto detach; + + buf = 0x00; + optlen = 1; + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (err) { + log_err("Failed to call getsockopt(IP_TOS)"); + goto detach; + } + + if (buf != 0xA0) { + log_err("Unexpected getsockopt 0x%x != 0xA0", buf); + err = -1; + goto detach; + } + + /* Setting unexpected initial sockopt should return EPERM: + * - kernel: -> 0x40 + * - child: unexpected 0x40, EPERM + * - parent: unexpected 0x40, EPERM + */ + + buf = 0x40; + if (setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1) < 0) { + log_err("Failed to call setsockopt(IP_TOS)"); + goto detach; + } + + buf = 0x00; + optlen = 1; + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (!err) { + log_err("Unexpected success from getsockopt(IP_TOS)"); + goto detach; + } + + /* Detach child program and make sure we still get EPERM: + * - kernel: -> 0x40 + * - parent: unexpected 0x40, EPERM + */ + + err = prog_detach(obj, cg_child, "cgroup/getsockopt/child"); + if (err) { + log_err("Failed to detach child program"); + goto detach; + } + + buf = 0x00; + optlen = 1; + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (!err) { + log_err("Unexpected success from getsockopt(IP_TOS)"); + goto detach; + } + + /* Set initial value to the one the parent program expects: + * - kernel: -> 0x90 + * - parent: 0x90 -> 0xA0 + */ + + buf = 0x90; + err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1); + if (err < 0) { + log_err("Failed to call setsockopt(IP_TOS)"); + goto detach; + } + + buf = 0x00; + optlen = 1; + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (err) { + log_err("Failed to call getsockopt(IP_TOS)"); + goto detach; + } + + if (buf != 0xA0) { + log_err("Unexpected getsockopt 0x%x != 0xA0", buf); + err = -1; + goto detach; + } + +detach: + prog_detach(obj, cg_child, "cgroup/getsockopt/child"); + prog_detach(obj, cg_parent, "cgroup/getsockopt/parent"); + + return err; +} + +static int run_setsockopt_test(struct bpf_object *obj, int cg_parent, + int cg_child, int sock_fd) +{ + socklen_t optlen; + __u8 buf; + int err; + + /* Set IP_TOS to the expected value (0x80). */ + + buf = 0x80; + err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1); + if (err < 0) { + log_err("Failed to call setsockopt(IP_TOS)"); + goto detach; + } + + buf = 0x00; + optlen = 1; + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (err) { + log_err("Failed to call getsockopt(IP_TOS)"); + goto detach; + } + + if (buf != 0x80) { + log_err("Unexpected getsockopt 0x%x != 0x80 without BPF", buf); + err = -1; + goto detach; + } + + /* Attach child program and make sure it adds 0x10. */ + + err = prog_attach(obj, cg_child, "cgroup/setsockopt"); + if (err) + goto detach; + + buf = 0x80; + err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1); + if (err < 0) { + log_err("Failed to call setsockopt(IP_TOS)"); + goto detach; + } + + buf = 0x00; + optlen = 1; + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (err) { + log_err("Failed to call getsockopt(IP_TOS)"); + goto detach; + } + + if (buf != 0x80 + 0x10) { + log_err("Unexpected getsockopt 0x%x != 0x80 + 0x10", buf); + err = -1; + goto detach; + } + + /* Attach parent program and make sure it adds another 0x10. */ + + err = prog_attach(obj, cg_parent, "cgroup/setsockopt"); + if (err) + goto detach; + + buf = 0x80; + err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1); + if (err < 0) { + log_err("Failed to call setsockopt(IP_TOS)"); + goto detach; + } + + buf = 0x00; + optlen = 1; + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (err) { + log_err("Failed to call getsockopt(IP_TOS)"); + goto detach; + } + + if (buf != 0x80 + 2 * 0x10) { + log_err("Unexpected getsockopt 0x%x != 0x80 + 2 * 0x10", buf); + err = -1; + goto detach; + } + +detach: + prog_detach(obj, cg_child, "cgroup/setsockopt"); + prog_detach(obj, cg_parent, "cgroup/setsockopt"); + + return err; +} + +int main(int argc, char **argv) +{ + struct bpf_prog_load_attr attr = { + .file = "./sockopt_multi.o", + }; + int cg_parent = -1, cg_child = -1; + struct bpf_object *obj = NULL; + int sock_fd = -1; + int err = -1; + int ignored; + + if (setup_cgroup_environment()) { + log_err("Failed to setup cgroup environment\n"); + goto out; + } + + cg_parent = create_and_get_cgroup("/parent"); + if (cg_parent < 0) { + log_err("Failed to create cgroup /parent\n"); + goto out; + } + + cg_child = create_and_get_cgroup("/parent/child"); + if (cg_child < 0) { + log_err("Failed to create cgroup /parent/child\n"); + goto out; + } + + if (join_cgroup("/parent/child")) { + log_err("Failed to join cgroup /parent/child\n"); + goto out; + } + + err = bpf_prog_load_xattr(&attr, &obj, &ignored); + if (err) { + log_err("Failed to load BPF object"); + goto out; + } + + sock_fd = socket(AF_INET, SOCK_STREAM, 0); + if (sock_fd < 0) { + log_err("Failed to create socket"); + goto out; + } + + if (run_getsockopt_test(obj, cg_parent, cg_child, sock_fd)) + err = -1; + printf("test_sockopt_multi: getsockopt %s\n", + err ? "FAILED" : "PASSED"); + + if (run_setsockopt_test(obj, cg_parent, cg_child, sock_fd)) + err = -1; + printf("test_sockopt_multi: setsockopt %s\n", + err ? "FAILED" : "PASSED"); + +out: + close(sock_fd); + bpf_object__close(obj); + close(cg_child); + close(cg_parent); + + printf("test_sockopt_multi: %s\n", err ? "FAILED" : "PASSED"); + return err ? EXIT_FAILURE : EXIT_SUCCESS; +} -- cgit v1.2.3 From f6d08d9d8543c8ee494b307804b28e2750ffedb9 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 27 Jun 2019 13:38:55 -0700 Subject: bpftool: support cgroup sockopt Support sockopt prog type and cgroup hooks in the bpftool. Cc: Andrii Nakryiko Cc: Martin Lau Acked-by: Jakub Kicinski Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 7 +++++-- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 3 ++- tools/bpf/bpftool/bash-completion/bpftool | 9 ++++++--- tools/bpf/bpftool/cgroup.c | 5 ++++- tools/bpf/bpftool/main.h | 1 + tools/bpf/bpftool/prog.c | 3 ++- 6 files changed, 20 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index d80fdde79c22..585f270c2d25 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -29,7 +29,8 @@ CGROUP COMMANDS | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } | *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** | | **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** | -| **sendmsg4** | **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** } +| **sendmsg4** | **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | +| **getsockopt** | **setsockopt** } | *ATTACH_FLAGS* := { **multi** | **override** } DESCRIPTION @@ -90,7 +91,9 @@ DESCRIPTION an unconnected udp4 socket (since 5.2); **recvmsg6** call to recvfrom(2), recvmsg(2), recvmmsg(2) for an unconnected udp6 socket (since 5.2); - **sysctl** sysctl access (since 5.2). + **sysctl** sysctl access (since 5.2); + **getsockopt** call to getsockopt (since 5.3); + **setsockopt** call to setsockopt (since 5.3). **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* Detach *PROG* from the cgroup *CGROUP* and attach type diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 55dd06517a3b..1df637f85f94 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -40,7 +40,8 @@ PROG COMMANDS | **lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** | | **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** | | **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** | -| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** +| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** | +| **cgroup/getsockopt** | **cgroup/setsockopt** | } | *ATTACH_TYPE* := { | **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector** diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index a17e84c67498..ba37095e1f62 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -379,7 +379,8 @@ _bpftool() cgroup/sendmsg4 cgroup/sendmsg6 \ cgroup/recvmsg4 cgroup/recvmsg6 \ cgroup/post_bind4 cgroup/post_bind6 \ - cgroup/sysctl" -- \ + cgroup/sysctl cgroup/getsockopt \ + cgroup/setsockopt" -- \ "$cur" ) ) return 0 ;; @@ -689,7 +690,8 @@ _bpftool() attach|detach) local ATTACH_TYPES='ingress egress sock_create sock_ops \ device bind4 bind6 post_bind4 post_bind6 connect4 \ - connect6 sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl' + connect6 sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl \ + getsockopt setsockopt' local ATTACH_FLAGS='multi override' local PROG_TYPE='id pinned tag' case $prev in @@ -699,7 +701,8 @@ _bpftool() ;; ingress|egress|sock_create|sock_ops|device|bind4|bind6|\ post_bind4|post_bind6|connect4|connect6|sendmsg4|\ - sendmsg6|recvmsg4|recvmsg6|sysctl) + sendmsg6|recvmsg4|recvmsg6|sysctl|getsockopt|\ + setsockopt) COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ "$cur" ) ) return 0 diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 73ec8ea33fb4..390b89a224f1 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -26,7 +26,8 @@ " sock_ops | device | bind4 | bind6 |\n" \ " post_bind4 | post_bind6 | connect4 |\n" \ " connect6 | sendmsg4 | sendmsg6 |\n" \ - " recvmsg4 | recvmsg6 | sysctl }" + " recvmsg4 | recvmsg6 | sysctl |\n" \ + " getsockopt | setsockopt }" static const char * const attach_type_strings[] = { [BPF_CGROUP_INET_INGRESS] = "ingress", @@ -45,6 +46,8 @@ static const char * const attach_type_strings[] = { [BPF_CGROUP_SYSCTL] = "sysctl", [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4", [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6", + [BPF_CGROUP_GETSOCKOPT] = "getsockopt", + [BPF_CGROUP_SETSOCKOPT] = "setsockopt", [__MAX_BPF_ATTACH_TYPE] = NULL, }; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 28a2a5857e14..9c5d9c80f71e 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -74,6 +74,7 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", + [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", }; extern const char * const map_type_name[]; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index f1a831f05010..9b0db5d14e31 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1071,7 +1071,8 @@ static int do_help(int argc, char **argv) " cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n" " cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n" " cgroup/sendmsg4 | cgroup/sendmsg6 | cgroup/recvmsg4 |\n" - " cgroup/recvmsg6 }\n" + " cgroup/recvmsg6 | cgroup/getsockopt |\n" + " cgroup/setsockopt }\n" " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n" " flow_dissector }\n" " " HELP_SPEC_OPTIONS "\n" -- cgit v1.2.3 From 2d6dbb9a65f4001f2878512078394c11301994f3 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 27 Jun 2019 18:12:33 -0700 Subject: selftests/bpf: fix -Wstrict-aliasing in test_sockopt_sk.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's use union with u8[4] and u32 members for sockopt buffer, that should fix any possible aliasing issues. test_sockopt_sk.c: In function ‘getsetsockopt’: test_sockopt_sk.c:115:2: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] if (*(__u32 *)buf != 0x55AA*2) { ^~ test_sockopt_sk.c:116:3: warning: dereferencing type-punned pointer will break strict-aliasing rules [-Wstrict-aliasing] log_err("Unexpected getsockopt(SO_SNDBUF) 0x%x != 0x55AA*2", ^~~~~~~ Fixes: 8a027dc0d8f5 ("selftests/bpf: add sockopt test that exercises sk helpers") Reported-by: Alexei Starovoitov Signed-off-by: Stanislav Fomichev Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_sockopt_sk.c | 51 +++++++++++++-------------- 1 file changed, 24 insertions(+), 27 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/test_sockopt_sk.c b/tools/testing/selftests/bpf/test_sockopt_sk.c index 12e79ed075ce..036b652e5ca9 100644 --- a/tools/testing/selftests/bpf/test_sockopt_sk.c +++ b/tools/testing/selftests/bpf/test_sockopt_sk.c @@ -22,7 +22,10 @@ static int getsetsockopt(void) { int fd, err; - char buf[4] = {}; + union { + char u8[4]; + __u32 u32; + } buf = {}; socklen_t optlen; fd = socket(AF_INET, SOCK_STREAM, 0); @@ -33,31 +36,31 @@ static int getsetsockopt(void) /* IP_TOS - BPF bypass */ - buf[0] = 0x08; - err = setsockopt(fd, SOL_IP, IP_TOS, buf, 1); + buf.u8[0] = 0x08; + err = setsockopt(fd, SOL_IP, IP_TOS, &buf, 1); if (err) { log_err("Failed to call setsockopt(IP_TOS)"); goto err; } - buf[0] = 0x00; + buf.u8[0] = 0x00; optlen = 1; - err = getsockopt(fd, SOL_IP, IP_TOS, buf, &optlen); + err = getsockopt(fd, SOL_IP, IP_TOS, &buf, &optlen); if (err) { log_err("Failed to call getsockopt(IP_TOS)"); goto err; } - if (buf[0] != 0x08) { + if (buf.u8[0] != 0x08) { log_err("Unexpected getsockopt(IP_TOS) buf[0] 0x%02x != 0x08", - buf[0]); + buf.u8[0]); goto err; } /* IP_TTL - EPERM */ - buf[0] = 1; - err = setsockopt(fd, SOL_IP, IP_TTL, buf, 1); + buf.u8[0] = 1; + err = setsockopt(fd, SOL_IP, IP_TTL, &buf, 1); if (!err || errno != EPERM) { log_err("Unexpected success from setsockopt(IP_TTL)"); goto err; @@ -65,16 +68,16 @@ static int getsetsockopt(void) /* SOL_CUSTOM - handled by BPF */ - buf[0] = 0x01; - err = setsockopt(fd, SOL_CUSTOM, 0, buf, 1); + buf.u8[0] = 0x01; + err = setsockopt(fd, SOL_CUSTOM, 0, &buf, 1); if (err) { log_err("Failed to call setsockopt"); goto err; } - buf[0] = 0x00; + buf.u32 = 0x00; optlen = 4; - err = getsockopt(fd, SOL_CUSTOM, 0, buf, &optlen); + err = getsockopt(fd, SOL_CUSTOM, 0, &buf, &optlen); if (err) { log_err("Failed to call getsockopt"); goto err; @@ -84,37 +87,31 @@ static int getsetsockopt(void) log_err("Unexpected optlen %d != 1", optlen); goto err; } - if (buf[0] != 0x01) { - log_err("Unexpected buf[0] 0x%02x != 0x01", buf[0]); + if (buf.u8[0] != 0x01) { + log_err("Unexpected buf[0] 0x%02x != 0x01", buf.u8[0]); goto err; } /* SO_SNDBUF is overwritten */ - buf[0] = 0x01; - buf[1] = 0x01; - buf[2] = 0x01; - buf[3] = 0x01; - err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, buf, 4); + buf.u32 = 0x01010101; + err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf, 4); if (err) { log_err("Failed to call setsockopt(SO_SNDBUF)"); goto err; } - buf[0] = 0x00; - buf[1] = 0x00; - buf[2] = 0x00; - buf[3] = 0x00; + buf.u32 = 0x00; optlen = 4; - err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, buf, &optlen); + err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf, &optlen); if (err) { log_err("Failed to call getsockopt(SO_SNDBUF)"); goto err; } - if (*(__u32 *)buf != 0x55AA*2) { + if (buf.u32 != 0x55AA*2) { log_err("Unexpected getsockopt(SO_SNDBUF) 0x%x != 0x55AA*2", - *(__u32 *)buf); + buf.u32); goto err; } -- cgit v1.2.3 From cdfc7f888c2a355b01308e97c6df108f1c2b64e8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 2 Jul 2019 08:16:20 -0700 Subject: libbpf: fix GCC8 warning for strncpy GCC8 started emitting warning about using strncpy with number of bytes exactly equal destination size, which is generally unsafe, as can lead to non-zero terminated string being copied. Use IFNAMSIZ - 1 as number of bytes to ensure name is always zero-terminated. Signed-off-by: Andrii Nakryiko Cc: Magnus Karlsson Acked-by: Yonghong Song Acked-by: Magnus Karlsson Signed-off-by: Daniel Borkmann --- tools/lib/bpf/xsk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index bf15a80a37c2..b33740221b7e 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -327,7 +327,8 @@ static int xsk_get_max_queues(struct xsk_socket *xsk) channels.cmd = ETHTOOL_GCHANNELS; ifr.ifr_data = (void *)&channels; - strncpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ); + strncpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1); + ifr.ifr_name[IFNAMSIZ - 1] = '\0'; err = ioctl(fd, SIOCETHTOOL, &ifr); if (err && errno != EOPNOTSUPP) { ret = -errno; -- cgit v1.2.3 From 33bae185f74d49a0d7b1bfaafb8e959efce0f243 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 2 Jul 2019 18:25:31 +0800 Subject: bpf, libbpf, smatch: Fix potential NULL pointer dereference Based on the following report from Smatch, fix the potential NULL pointer dereference check: tools/lib/bpf/libbpf.c:3493 bpf_prog_load_xattr() warn: variable dereferenced before check 'attr' (see line 3483) 3479 int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, 3480 struct bpf_object **pobj, int *prog_fd) 3481 { 3482 struct bpf_object_open_attr open_attr = { 3483 .file = attr->file, 3484 .prog_type = attr->prog_type, ^^^^^^ 3485 }; At the head of function, it directly access 'attr' without checking if it's NULL pointer. This patch moves the values assignment after validating 'attr' and 'attr->file'. Signed-off-by: Leo Yan Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6e6ebef11ba3..4907997289e9 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3872,10 +3872,7 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type, int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, struct bpf_object **pobj, int *prog_fd) { - struct bpf_object_open_attr open_attr = { - .file = attr->file, - .prog_type = attr->prog_type, - }; + struct bpf_object_open_attr open_attr = {}; struct bpf_program *prog, *first_prog = NULL; enum bpf_attach_type expected_attach_type; enum bpf_prog_type prog_type; @@ -3888,6 +3885,9 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, if (!attr->file) return -EINVAL; + open_attr.file = attr->file; + open_attr.prog_type = attr->prog_type; + obj = bpf_object__open_xattr(&open_attr); if (IS_ERR_OR_NULL(obj)) return -ENOENT; -- cgit v1.2.3 From d2f5bbbc350050895d9f33c2744a61f9e0af1caa Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 2 Jul 2019 20:26:51 +0200 Subject: selftests: bpf: standardize to static __always_inline The progs for bpf selftests use several different notations to force function inlining. Standardize to what most of them use, static __always_inline. Suggested-by: Song Liu Signed-off-by: Jiri Benc Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/progs/pyperf.h | 9 +++--- tools/testing/selftests/bpf/progs/strobemeta.h | 36 ++++++++++++---------- tools/testing/selftests/bpf/progs/test_jhash.h | 3 +- tools/testing/selftests/bpf/progs/test_seg6_loop.c | 23 +++++++------- .../selftests/bpf/progs/test_verif_scale2.c | 2 +- 5 files changed, 38 insertions(+), 35 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 6b0781391be5..abf6224649be 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -75,8 +75,7 @@ typedef struct { void* co_name; // PyCodeObject.co_name } FrameData; -static inline __attribute__((__always_inline__)) void* -get_thread_state(void* tls_base, PidData* pidData) +static __always_inline void *get_thread_state(void *tls_base, PidData *pidData) { void* thread_state; int key; @@ -87,8 +86,8 @@ get_thread_state(void* tls_base, PidData* pidData) return thread_state; } -static inline __attribute__((__always_inline__)) bool -get_frame_data(void* frame_ptr, PidData* pidData, FrameData* frame, Symbol* symbol) +static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData, + FrameData *frame, Symbol *symbol) { // read data from PyFrameObject bpf_probe_read(&frame->f_back, @@ -161,7 +160,7 @@ struct bpf_elf_map SEC("maps") stackmap = { .max_elem = 1000, }; -static inline __attribute__((__always_inline__)) int __on_event(struct pt_regs *ctx) +static __always_inline int __on_event(struct pt_regs *ctx) { uint64_t pid_tgid = bpf_get_current_pid_tgid(); pid_t pid = (pid_t)(pid_tgid >> 32); diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index 1ff73f60a3e4..553bc3b62e89 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -266,8 +266,8 @@ struct tls_index { uint64_t offset; }; -static inline __attribute__((always_inline)) -void *calc_location(struct strobe_value_loc *loc, void *tls_base) +static __always_inline void *calc_location(struct strobe_value_loc *loc, + void *tls_base) { /* * tls_mode value is: @@ -327,10 +327,10 @@ void *calc_location(struct strobe_value_loc *loc, void *tls_base) : NULL; } -static inline __attribute__((always_inline)) -void read_int_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base, - struct strobe_value_generic *value, - struct strobemeta_payload *data) +static __always_inline void read_int_var(struct strobemeta_cfg *cfg, + size_t idx, void *tls_base, + struct strobe_value_generic *value, + struct strobemeta_payload *data) { void *location = calc_location(&cfg->int_locs[idx], tls_base); if (!location) @@ -342,10 +342,11 @@ void read_int_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base, data->int_vals_set_mask |= (1 << idx); } -static inline __attribute__((always_inline)) -uint64_t read_str_var(struct strobemeta_cfg* cfg, size_t idx, void *tls_base, - struct strobe_value_generic *value, - struct strobemeta_payload *data, void *payload) +static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, + size_t idx, void *tls_base, + struct strobe_value_generic *value, + struct strobemeta_payload *data, + void *payload) { void *location; uint32_t len; @@ -371,10 +372,11 @@ uint64_t read_str_var(struct strobemeta_cfg* cfg, size_t idx, void *tls_base, return len; } -static inline __attribute__((always_inline)) -void *read_map_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base, - struct strobe_value_generic *value, - struct strobemeta_payload* data, void *payload) +static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, + size_t idx, void *tls_base, + struct strobe_value_generic *value, + struct strobemeta_payload *data, + void *payload) { struct strobe_map_descr* descr = &data->map_descrs[idx]; struct strobe_map_raw map; @@ -435,9 +437,9 @@ void *read_map_var(struct strobemeta_cfg *cfg, size_t idx, void *tls_base, * read_strobe_meta returns NULL, if no metadata was read; otherwise returns * pointer to *right after* payload ends */ -static inline __attribute__((always_inline)) -void *read_strobe_meta(struct task_struct* task, - struct strobemeta_payload* data) { +static __always_inline void *read_strobe_meta(struct task_struct *task, + struct strobemeta_payload *data) +{ pid_t pid = bpf_get_current_pid_tgid() >> 32; struct strobe_value_generic value = {0}; struct strobemeta_cfg *cfg; diff --git a/tools/testing/selftests/bpf/progs/test_jhash.h b/tools/testing/selftests/bpf/progs/test_jhash.h index 3d12c11a8d47..c300734d26f6 100644 --- a/tools/testing/selftests/bpf/progs/test_jhash.h +++ b/tools/testing/selftests/bpf/progs/test_jhash.h @@ -1,9 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook +#include typedef unsigned int u32; -static __attribute__((always_inline)) u32 rol32(u32 word, unsigned int shift) +static __always_inline u32 rol32(u32 word, unsigned int shift) { return (word << shift) | (word >> ((-shift) & 31)); } diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c index 463964d79f73..1dbe1d4d467e 100644 --- a/tools/testing/selftests/bpf/progs/test_seg6_loop.c +++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c @@ -54,7 +54,7 @@ struct sr6_tlv_t { unsigned char value[0]; } BPF_PACKET_HEADER; -static __attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb) +static __always_inline struct ip6_srh_t *get_srh(struct __sk_buff *skb) { void *cursor, *data_end; struct ip6_srh_t *srh; @@ -88,9 +88,9 @@ static __attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff return srh; } -static __attribute__((always_inline)) -int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad, - uint32_t old_pad, uint32_t pad_off) +static __always_inline int update_tlv_pad(struct __sk_buff *skb, + uint32_t new_pad, uint32_t old_pad, + uint32_t pad_off) { int err; @@ -118,10 +118,11 @@ int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad, return 0; } -static __attribute__((always_inline)) -int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh, - uint32_t *tlv_off, uint32_t *pad_size, - uint32_t *pad_off) +static __always_inline int is_valid_tlv_boundary(struct __sk_buff *skb, + struct ip6_srh_t *srh, + uint32_t *tlv_off, + uint32_t *pad_size, + uint32_t *pad_off) { uint32_t srh_off, cur_off; int offset_valid = 0; @@ -177,9 +178,9 @@ int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh, return 0; } -static __attribute__((always_inline)) -int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off, - struct sr6_tlv_t *itlv, uint8_t tlv_size) +static __always_inline int add_tlv(struct __sk_buff *skb, + struct ip6_srh_t *srh, uint32_t tlv_off, + struct sr6_tlv_t *itlv, uint8_t tlv_size) { uint32_t srh_off = (char *)srh - (char *)(long)skb->data; uint8_t len_remaining, new_pad; diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale2.c b/tools/testing/selftests/bpf/progs/test_verif_scale2.c index 77830693eccb..9897150ed516 100644 --- a/tools/testing/selftests/bpf/progs/test_verif_scale2.c +++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c @@ -2,7 +2,7 @@ // Copyright (c) 2019 Facebook #include #include "bpf_helpers.h" -#define ATTR __attribute__((always_inline)) +#define ATTR __always_inline #include "test_jhash.h" SEC("scale90_inline") -- cgit v1.2.3 From 692cbaa99fe446ed30bfb7e53fb2b4cc518331a1 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 2 Jul 2019 09:14:00 -0700 Subject: bpf/tools: sync bpf.h Sync new bpf_tcp_sock fields and new BPF_PROG_TYPE_SOCK_OPS RTT callback. Cc: Eric Dumazet Cc: Priyaranjan Jha Cc: Yuchung Cheng Cc: Soheil Hassas Yeganeh Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/include/uapi/linux/bpf.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a396b516a2b2..cecf42c871d4 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1767,6 +1767,7 @@ union bpf_attr { * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out) * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission) * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change) + * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT) * * Therefore, this function can be used to clear a callback flag by * setting the appropriate bit to zero. e.g. to disable the RTO @@ -3069,6 +3070,12 @@ struct bpf_tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ + __u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups + * total number of DSACK blocks received + */ + __u32 delivered; /* Total data packets delivered incl. rexmits */ + __u32 delivered_ce; /* Like the above but only ECE marked packets */ + __u32 icsk_retransmits; /* Number of unrecovered [RTO] timeouts */ }; struct bpf_sock_tuple { @@ -3311,7 +3318,8 @@ struct bpf_sock_ops { #define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) #define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) #define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently +#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently * supported cb flags */ @@ -3366,6 +3374,8 @@ enum { BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after * socket transition to LISTEN state. */ + BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. + */ }; /* List of TCP states. There is a build check in net/ipv4/tcp.c to detect -- cgit v1.2.3 From b55873984dab0eda6db00d19db4c43dce5c11d98 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 2 Jul 2019 09:14:01 -0700 Subject: selftests/bpf: test BPF_SOCK_OPS_RTT_CB Make sure the callback is invoked for syn-ack and data packet. Cc: Eric Dumazet Cc: Priyaranjan Jha Cc: Yuchung Cheng Cc: Soheil Hassas Yeganeh Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 3 +- tools/testing/selftests/bpf/progs/tcp_rtt.c | 61 +++++++ tools/testing/selftests/bpf/test_tcp_rtt.c | 254 ++++++++++++++++++++++++++++ 3 files changed, 317 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/progs/tcp_rtt.c create mode 100644 tools/testing/selftests/bpf/test_tcp_rtt.c (limited to 'tools') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index de1754a8f5fe..2620406a53ec 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -27,7 +27,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_cgroup_storage test_select_reuseport test_section_names \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ test_btf_dump test_cgroup_attach xdping test_sockopt test_sockopt_sk \ - test_sockopt_multi + test_sockopt_multi test_tcp_rtt BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c))) TEST_GEN_FILES = $(BPF_OBJ_FILES) @@ -107,6 +107,7 @@ $(OUTPUT)/test_cgroup_attach: cgroup_helpers.c $(OUTPUT)/test_sockopt: cgroup_helpers.c $(OUTPUT)/test_sockopt_sk: cgroup_helpers.c $(OUTPUT)/test_sockopt_multi: cgroup_helpers.c +$(OUTPUT)/test_tcp_rtt: cgroup_helpers.c .PHONY: force diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c new file mode 100644 index 000000000000..233bdcb1659e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "bpf_helpers.h" + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; + +struct tcp_rtt_storage { + __u32 invoked; + __u32 dsack_dups; + __u32 delivered; + __u32 delivered_ce; + __u32 icsk_retransmits; +}; + +struct bpf_map_def SEC("maps") socket_storage_map = { + .type = BPF_MAP_TYPE_SK_STORAGE, + .key_size = sizeof(int), + .value_size = sizeof(struct tcp_rtt_storage), + .map_flags = BPF_F_NO_PREALLOC, +}; +BPF_ANNOTATE_KV_PAIR(socket_storage_map, int, struct tcp_rtt_storage); + +SEC("sockops") +int _sockops(struct bpf_sock_ops *ctx) +{ + struct tcp_rtt_storage *storage; + struct bpf_tcp_sock *tcp_sk; + int op = (int) ctx->op; + struct bpf_sock *sk; + + sk = ctx->sk; + if (!sk) + return 1; + + storage = bpf_sk_storage_get(&socket_storage_map, sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!storage) + return 1; + + if (op == BPF_SOCK_OPS_TCP_CONNECT_CB) { + bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_RTT_CB_FLAG); + return 1; + } + + if (op != BPF_SOCK_OPS_RTT_CB) + return 1; + + tcp_sk = bpf_tcp_sock(sk); + if (!tcp_sk) + return 1; + + storage->invoked++; + + storage->dsack_dups = tcp_sk->dsack_dups; + storage->delivered = tcp_sk->delivered; + storage->delivered_ce = tcp_sk->delivered_ce; + storage->icsk_retransmits = tcp_sk->icsk_retransmits; + + return 1; +} diff --git a/tools/testing/selftests/bpf/test_tcp_rtt.c b/tools/testing/selftests/bpf/test_tcp_rtt.c new file mode 100644 index 000000000000..90c3862f74a8 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcp_rtt.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "bpf_rlimit.h" +#include "bpf_util.h" +#include "cgroup_helpers.h" + +#define CG_PATH "/tcp_rtt" + +struct tcp_rtt_storage { + __u32 invoked; + __u32 dsack_dups; + __u32 delivered; + __u32 delivered_ce; + __u32 icsk_retransmits; +}; + +static void send_byte(int fd) +{ + char b = 0x55; + + if (write(fd, &b, sizeof(b)) != 1) + error(1, errno, "Failed to send single byte"); +} + +static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked, + __u32 dsack_dups, __u32 delivered, __u32 delivered_ce, + __u32 icsk_retransmits) +{ + int err = 0; + struct tcp_rtt_storage val; + + if (bpf_map_lookup_elem(map_fd, &client_fd, &val) < 0) + error(1, errno, "Failed to read socket storage"); + + if (val.invoked != invoked) { + log_err("%s: unexpected bpf_tcp_sock.invoked %d != %d", + msg, val.invoked, invoked); + err++; + } + + if (val.dsack_dups != dsack_dups) { + log_err("%s: unexpected bpf_tcp_sock.dsack_dups %d != %d", + msg, val.dsack_dups, dsack_dups); + err++; + } + + if (val.delivered != delivered) { + log_err("%s: unexpected bpf_tcp_sock.delivered %d != %d", + msg, val.delivered, delivered); + err++; + } + + if (val.delivered_ce != delivered_ce) { + log_err("%s: unexpected bpf_tcp_sock.delivered_ce %d != %d", + msg, val.delivered_ce, delivered_ce); + err++; + } + + if (val.icsk_retransmits != icsk_retransmits) { + log_err("%s: unexpected bpf_tcp_sock.icsk_retransmits %d != %d", + msg, val.icsk_retransmits, icsk_retransmits); + err++; + } + + return err; +} + +static int connect_to_server(int server_fd) +{ + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int fd; + + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + log_err("Failed to create client socket"); + return -1; + } + + if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { + log_err("Failed to get server addr"); + goto out; + } + + if (connect(fd, (const struct sockaddr *)&addr, len) < 0) { + log_err("Fail to connect to server"); + goto out; + } + + return fd; + +out: + close(fd); + return -1; +} + +static int run_test(int cgroup_fd, int server_fd) +{ + struct bpf_prog_load_attr attr = { + .prog_type = BPF_PROG_TYPE_SOCK_OPS, + .file = "./tcp_rtt.o", + .expected_attach_type = BPF_CGROUP_SOCK_OPS, + }; + struct bpf_object *obj; + struct bpf_map *map; + int client_fd; + int prog_fd; + int map_fd; + int err; + + err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); + if (err) { + log_err("Failed to load BPF object"); + return -1; + } + + map = bpf_map__next(NULL, obj); + map_fd = bpf_map__fd(map); + + err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0); + if (err) { + log_err("Failed to attach BPF program"); + goto close_bpf_object; + } + + client_fd = connect_to_server(server_fd); + if (client_fd < 0) { + err = -1; + goto close_bpf_object; + } + + err += verify_sk(map_fd, client_fd, "syn-ack", + /*invoked=*/1, + /*dsack_dups=*/0, + /*delivered=*/1, + /*delivered_ce=*/0, + /*icsk_retransmits=*/0); + + send_byte(client_fd); + + err += verify_sk(map_fd, client_fd, "first payload byte", + /*invoked=*/2, + /*dsack_dups=*/0, + /*delivered=*/2, + /*delivered_ce=*/0, + /*icsk_retransmits=*/0); + + close(client_fd); + +close_bpf_object: + bpf_object__close(obj); + return err; +} + +static int start_server(void) +{ + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + }; + int fd; + + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + log_err("Failed to create server socket"); + return -1; + } + + if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) { + log_err("Failed to bind socket"); + close(fd); + return -1; + } + + return fd; +} + +static void *server_thread(void *arg) +{ + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int fd = *(int *)arg; + int client_fd; + + if (listen(fd, 1) < 0) + error(1, errno, "Failed to listed on socket"); + + client_fd = accept(fd, (struct sockaddr *)&addr, &len); + if (client_fd < 0) + error(1, errno, "Failed to accept client"); + + /* Wait for the next connection (that never arrives) + * to keep this thread alive to prevent calling + * close() on client_fd. + */ + if (accept(fd, (struct sockaddr *)&addr, &len) >= 0) + error(1, errno, "Unexpected success in second accept"); + + close(client_fd); + + return NULL; +} + +int main(int args, char **argv) +{ + int server_fd, cgroup_fd; + int err = EXIT_SUCCESS; + pthread_t tid; + + if (setup_cgroup_environment()) + goto cleanup_obj; + + cgroup_fd = create_and_get_cgroup(CG_PATH); + if (cgroup_fd < 0) + goto cleanup_cgroup_env; + + if (join_cgroup(CG_PATH)) + goto cleanup_cgroup; + + server_fd = start_server(); + if (server_fd < 0) { + err = EXIT_FAILURE; + goto cleanup_cgroup; + } + + pthread_create(&tid, NULL, server_thread, (void *)&server_fd); + + if (run_test(cgroup_fd, server_fd)) + err = EXIT_FAILURE; + + close(server_fd); + + printf("test_sockopt_sk: %s\n", + err == EXIT_SUCCESS ? "PASSED" : "FAILED"); + +cleanup_cgroup: + close(cgroup_fd); +cleanup_cgroup_env: + cleanup_cgroup_environment(); +cleanup_obj: + return err; +} -- cgit v1.2.3