From 97c33610acc596f318f3ec8d644ca70aeac30a7f Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Tue, 17 Apr 2018 10:25:20 +0800 Subject: samples/bpf: correct comment in sock_example.c The program run against loopback interace "lo", not "eth0". Correct the comment. Signed-off-by: Wang Sheng-Hui Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- samples/bpf/sock_example.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c index 6fc6e193ef1b..33a637507c00 100644 --- a/samples/bpf/sock_example.c +++ b/samples/bpf/sock_example.c @@ -9,10 +9,10 @@ * if (value) * (*(u64*)value) += 1; * - * - attaches this program to eth0 raw socket + * - attaches this program to loopback interface "lo" raw socket * * - every second user space reads map[tcp], map[udp], map[icmp] to see - * how many packets of given protocol were seen on eth0 + * how many packets of given protocol were seen on "lo" */ #include #include -- cgit v1.2.3 From 8de0e8ba973f710346f61e52b86df199b20d23b8 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 17 Apr 2018 16:08:06 +0200 Subject: samples/bpf: fix xdp_monitor user output for tracepoint exception The variable rec_i contains an XDP action code not an error. Thus, using err2str() was wrong, it should have been action2str(). Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- samples/bpf/xdp_monitor_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'samples/bpf') diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index eec14520d513..894bc64c2cac 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -330,7 +330,7 @@ static void stats_print(struct stats_record *stats_rec, pps = calc_pps_u64(r, p, t); if (pps > 0) printf(fmt1, "Exception", i, - 0.0, pps, err2str(rec_i)); + 0.0, pps, action2str(rec_i)); } pps = calc_pps_u64(&rec->total, &prev->total, t); if (pps > 0) -- cgit v1.2.3 From c6ffd1ff785675c4a572c79f0e55ba5735edbaa0 Mon Sep 17 00:00:00 2001 From: "Nikita V. Shirokov" Date: Tue, 17 Apr 2018 21:42:23 -0700 Subject: bpf: add bpf_xdp_adjust_tail sample prog adding bpf's sample program which is using bpf_xdp_adjust_tail helper by generating ICMPv4 "packet to big" message if ingress packet's size is bigger then 600 bytes Signed-off-by: Nikita V. Shirokov Signed-off-by: Daniel Borkmann --- samples/bpf/Makefile | 4 + samples/bpf/xdp_adjust_tail_kern.c | 152 ++++++++++++++++++++++++++++++ samples/bpf/xdp_adjust_tail_user.c | 142 ++++++++++++++++++++++++++++ tools/testing/selftests/bpf/bpf_helpers.h | 2 + 4 files changed, 300 insertions(+) create mode 100644 samples/bpf/xdp_adjust_tail_kern.c create mode 100644 samples/bpf/xdp_adjust_tail_user.c (limited to 'samples/bpf') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 4d6a6edd4bf6..aa8c392e2e52 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -44,6 +44,7 @@ hostprogs-y += xdp_monitor hostprogs-y += xdp_rxq_info hostprogs-y += syscall_tp hostprogs-y += cpustat +hostprogs-y += xdp_adjust_tail # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o @@ -95,6 +96,7 @@ xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o +xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -148,6 +150,7 @@ always += xdp_rxq_info_kern.o always += xdp2skb_meta_kern.o always += syscall_tp_kern.o always += cpustat_kern.o +always += xdp_adjust_tail_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -193,6 +196,7 @@ HOSTLOADLIBES_xdp_monitor += -lelf HOSTLOADLIBES_xdp_rxq_info += -lelf HOSTLOADLIBES_syscall_tp += -lelf HOSTLOADLIBES_cpustat += -lelf +HOSTLOADLIBES_xdp_adjust_tail += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c new file mode 100644 index 000000000000..411fdb21f8bc --- /dev/null +++ b/samples/bpf/xdp_adjust_tail_kern.c @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program shows how to use bpf_xdp_adjust_tail() by + * generating ICMPv4 "packet to big" (unreachable/ df bit set frag needed + * to be more preice in case of v4)" where receiving packets bigger then + * 600 bytes. + */ +#define KBUILD_MODNAME "foo" +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +#define DEFAULT_TTL 64 +#define MAX_PCKT_SIZE 600 +#define ICMP_TOOBIG_SIZE 98 +#define ICMP_TOOBIG_PAYLOAD_SIZE 92 + +struct bpf_map_def SEC("maps") icmpcnt = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64), + .max_entries = 1, +}; + +static __always_inline void count_icmp(void) +{ + u64 key = 0; + u64 *icmp_count; + + icmp_count = bpf_map_lookup_elem(&icmpcnt, &key); + if (icmp_count) + *icmp_count += 1; +} + +static __always_inline void swap_mac(void *data, struct ethhdr *orig_eth) +{ + struct ethhdr *eth; + + eth = data; + memcpy(eth->h_source, orig_eth->h_dest, ETH_ALEN); + memcpy(eth->h_dest, orig_eth->h_source, ETH_ALEN); + eth->h_proto = orig_eth->h_proto; +} + +static __always_inline __u16 csum_fold_helper(__u32 csum) +{ + return ~((csum & 0xffff) + (csum >> 16)); +} + +static __always_inline void ipv4_csum(void *data_start, int data_size, + __u32 *csum) +{ + *csum = bpf_csum_diff(0, 0, data_start, data_size, *csum); + *csum = csum_fold_helper(*csum); +} + +static __always_inline int send_icmp4_too_big(struct xdp_md *xdp) +{ + int headroom = (int)sizeof(struct iphdr) + (int)sizeof(struct icmphdr); + + if (bpf_xdp_adjust_head(xdp, 0 - headroom)) + return XDP_DROP; + void *data = (void *)(long)xdp->data; + void *data_end = (void *)(long)xdp->data_end; + + if (data + (ICMP_TOOBIG_SIZE + headroom) > data_end) + return XDP_DROP; + + struct iphdr *iph, *orig_iph; + struct icmphdr *icmp_hdr; + struct ethhdr *orig_eth; + __u32 csum = 0; + __u64 off = 0; + + orig_eth = data + headroom; + swap_mac(data, orig_eth); + off += sizeof(struct ethhdr); + iph = data + off; + off += sizeof(struct iphdr); + icmp_hdr = data + off; + off += sizeof(struct icmphdr); + orig_iph = data + off; + icmp_hdr->type = ICMP_DEST_UNREACH; + icmp_hdr->code = ICMP_FRAG_NEEDED; + icmp_hdr->un.frag.mtu = htons(MAX_PCKT_SIZE-sizeof(struct ethhdr)); + icmp_hdr->checksum = 0; + ipv4_csum(icmp_hdr, ICMP_TOOBIG_PAYLOAD_SIZE, &csum); + icmp_hdr->checksum = csum; + iph->ttl = DEFAULT_TTL; + iph->daddr = orig_iph->saddr; + iph->saddr = orig_iph->daddr; + iph->version = 4; + iph->ihl = 5; + iph->protocol = IPPROTO_ICMP; + iph->tos = 0; + iph->tot_len = htons( + ICMP_TOOBIG_SIZE + headroom - sizeof(struct ethhdr)); + iph->check = 0; + csum = 0; + ipv4_csum(iph, sizeof(struct iphdr), &csum); + iph->check = csum; + count_icmp(); + return XDP_TX; +} + + +static __always_inline int handle_ipv4(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + int pckt_size = data_end - data; + int offset; + + if (pckt_size > MAX_PCKT_SIZE) { + offset = pckt_size - ICMP_TOOBIG_SIZE; + if (bpf_xdp_adjust_tail(xdp, 0 - offset)) + return XDP_PASS; + return send_icmp4_too_big(xdp); + } + return XDP_PASS; +} + +SEC("xdp_icmp") +int _xdp_icmp(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct ethhdr *eth = data; + __u16 h_proto; + + if (eth + 1 > data_end) + return XDP_DROP; + + h_proto = eth->h_proto; + + if (h_proto == htons(ETH_P_IP)) + return handle_ipv4(xdp); + else + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c new file mode 100644 index 000000000000..f621a541b574 --- /dev/null +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_load.h" +#include "libbpf.h" +#include "bpf_util.h" + +#define STATS_INTERVAL_S 2U + +static int ifindex = -1; +static __u32 xdp_flags; + +static void int_exit(int sig) +{ + if (ifindex > -1) + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + exit(0); +} + +/* simple "icmp packet too big sent" counter + */ +static void poll_stats(unsigned int kill_after_s) +{ + time_t started_at = time(NULL); + __u64 value = 0; + int key = 0; + + + while (!kill_after_s || time(NULL) - started_at <= kill_after_s) { + sleep(STATS_INTERVAL_S); + + assert(bpf_map_lookup_elem(map_fd[0], &key, &value) == 0); + + printf("icmp \"packet too big\" sent: %10llu pkts\n", value); + } +} + +static void usage(const char *cmd) +{ + printf("Start a XDP prog which send ICMP \"packet too big\" \n" + "messages if ingress packet is bigger then MAX_SIZE bytes\n"); + printf("Usage: %s [...]\n", cmd); + printf(" -i Interface Index\n"); + printf(" -T Default: 0 (forever)\n"); + printf(" -S use skb-mode\n"); + printf(" -N enforce native mode\n"); + printf(" -h Display this help\n"); +} + +int main(int argc, char **argv) +{ + unsigned char opt_flags[256] = {}; + unsigned int kill_after_s = 0; + const char *optstr = "i:T:SNh"; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + char filename[256]; + int opt; + int i; + + + for (i = 0; i < strlen(optstr); i++) + if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z') + opt_flags[(unsigned char)optstr[i]] = 1; + + while ((opt = getopt(argc, argv, optstr)) != -1) { + + switch (opt) { + case 'i': + ifindex = atoi(optarg); + break; + case 'T': + kill_after_s = atoi(optarg); + break; + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case 'N': + xdp_flags |= XDP_FLAGS_DRV_MODE; + break; + default: + usage(argv[0]); + return 1; + } + opt_flags[opt] = 0; + } + + for (i = 0; i < strlen(optstr); i++) { + if (opt_flags[(unsigned int)optstr[i]]) { + fprintf(stderr, "Missing argument -%c\n", optstr[i]); + usage(argv[0]); + return 1; + } + } + + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)"); + return 1; + } + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + if (!prog_fd[0]) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + + if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) { + printf("link set xdp fd failed\n"); + return 1; + } + + poll_stats(kill_after_s); + + bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); + + return 0; +} diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 50c607014b22..9271576bdc8f 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -132,6 +132,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag (void *) BPF_FUNC_l3_csum_replace; static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = (void *) BPF_FUNC_l4_csum_replace; +static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) = + (void *) BPF_FUNC_csum_diff; static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) = (void *) BPF_FUNC_skb_under_cgroup; static int (*bpf_skb_change_head)(void *, int len, int flags) = -- cgit v1.2.3 From 29a36f9eef30b7f008b722a753a84b314f981037 Mon Sep 17 00:00:00 2001 From: Eyal Birger Date: Tue, 24 Apr 2018 17:50:30 +0300 Subject: samples/bpf: extend test_tunnel_bpf.sh with xfrm state test Add a test for fetching xfrm state parameters from a tc program running on ingress. Signed-off-by: Eyal Birger Signed-off-by: Daniel Borkmann --- samples/bpf/tcbpf2_kern.c | 16 +++++++ samples/bpf/test_tunnel_bpf.sh | 71 +++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 25 ++++++++++- tools/testing/selftests/bpf/bpf_helpers.h | 4 +- 4 files changed, 114 insertions(+), 2 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c index 9a8db7bd6db4..fa260c750fb1 100644 --- a/samples/bpf/tcbpf2_kern.c +++ b/samples/bpf/tcbpf2_kern.c @@ -593,4 +593,20 @@ int _ip6ip6_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; } +SEC("xfrm_get_state") +int _xfrm_get_state(struct __sk_buff *skb) +{ + struct bpf_xfrm_state x; + char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n"; + int ret; + + ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0); + if (ret < 0) + return TC_ACT_OK; + + bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi), + bpf_ntohl(x.remote_ipv4)); + return TC_ACT_OK; +} + char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh index c265863ccdf9..9c534dc07b36 100755 --- a/samples/bpf/test_tunnel_bpf.sh +++ b/samples/bpf/test_tunnel_bpf.sh @@ -155,6 +155,57 @@ function add_ipip_tunnel { ip addr add dev $DEV 10.1.1.200/24 } +function setup_xfrm_tunnel { + auth=0x$(printf '1%.0s' {1..40}) + enc=0x$(printf '2%.0s' {1..32}) + spi_in_to_out=0x1 + spi_out_to_in=0x2 + # in namespace + # in -> out + ip netns exec at_ns0 \ + ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ + spi $spi_in_to_out reqid 1 mode tunnel \ + auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc + ip netns exec at_ns0 \ + ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \ + tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ + mode tunnel + # out -> in + ip netns exec at_ns0 \ + ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ + spi $spi_out_to_in reqid 2 mode tunnel \ + auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc + ip netns exec at_ns0 \ + ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \ + tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ + mode tunnel + # address & route + ip netns exec at_ns0 \ + ip addr add dev veth0 10.1.1.100/32 + ip netns exec at_ns0 \ + ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \ + src 10.1.1.100 + + # out of namespace + # in -> out + ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ + spi $spi_in_to_out reqid 1 mode tunnel \ + auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc + ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \ + tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ + mode tunnel + # out -> in + ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ + spi $spi_out_to_in reqid 2 mode tunnel \ + auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc + ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \ + tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ + mode tunnel + # address & route + ip addr add dev veth1 10.1.1.200/32 + ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200 +} + function attach_bpf { DEV=$1 SET_TUNNEL=$2 @@ -278,6 +329,22 @@ function test_ipip { cleanup } +function test_xfrm_tunnel { + config_device + tcpdump -nei veth1 ip & + output=$(mktemp) + cat /sys/kernel/debug/tracing/trace_pipe | tee $output & + setup_xfrm_tunnel + tc qdisc add dev veth1 clsact + tc filter add dev veth1 proto ip ingress bpf da obj tcbpf2_kern.o \ + sec xfrm_get_state + ip netns exec at_ns0 ping -c 1 10.1.1.200 + grep "reqid 1" $output + grep "spi 0x1" $output + grep "remote ip 0xac100164" $output + cleanup +} + function cleanup { set +ex pkill iperf @@ -291,6 +358,8 @@ function cleanup { ip link del geneve11 ip link del erspan11 ip link del ip6erspan11 + ip x s flush + ip x p flush pkill tcpdump pkill cat set -ex @@ -316,4 +385,6 @@ echo "Testing GENEVE tunnel..." test_geneve echo "Testing IPIP tunnel..." test_ipip +echo "Testing IPSec tunnel..." +test_xfrm_tunnel echo "*** PASS ***" diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7f7fbb9d0253..5841ed41b30c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -774,6 +774,15 @@ union bpf_attr { * @xdp_md: pointer to xdp_md * @delta: A negative integer to be added to xdp_md.data_end * Return: 0 on success or negative on error + * + * int bpf_skb_get_xfrm_state(skb, index, xfrm_state, size, flags) + * retrieve XFRM state + * @skb: pointer to skb + * @index: index of the xfrm state in the secpath + * @key: pointer to 'struct bpf_xfrm_state' + * @size: size of 'struct bpf_xfrm_state' + * @flags: room for future extensions + * Return: 0 on success or negative error */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -841,7 +850,8 @@ union bpf_attr { FN(msg_cork_bytes), \ FN(msg_pull_data), \ FN(bind), \ - FN(xdp_adjust_tail), + FN(xdp_adjust_tail), \ + FN(skb_get_xfrm_state), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -946,6 +956,19 @@ struct bpf_tunnel_key { __u32 tunnel_label; }; +/* user accessible mirror of in-kernel xfrm_state. + * new fields can only be added to the end of this structure + */ +struct bpf_xfrm_state { + __u32 reqid; + __u32 spi; /* Stored in network byte order */ + __u16 family; + union { + __u32 remote_ipv4; /* Stored in network byte order */ + __u32 remote_ipv6[4]; /* Stored in network byte order */ + }; +}; + /* Generic BPF return codes which all BPF program types may support. * The values are binary compatible with their TC_ACT_* counter-part to * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 9271576bdc8f..69d7b918e66a 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -98,7 +98,9 @@ static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = (void *) BPF_FUNC_bind; static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = (void *) BPF_FUNC_xdp_adjust_tail; - +static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state, + int size, int flags) = + (void *) BPF_FUNC_skb_get_xfrm_state; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions -- cgit v1.2.3 From b05cd74043236e7427a71fc7c59deee4588a7c91 Mon Sep 17 00:00:00 2001 From: William Tu Date: Thu, 26 Apr 2018 14:01:40 -0700 Subject: samples/bpf: remove the bpf tunnel testsuite. Move the testsuite to selftests/bpf/{test_tunnel_kern.c, test_tunnel.sh} Signed-off-by: William Tu Signed-off-by: Daniel Borkmann --- samples/bpf/Makefile | 1 - samples/bpf/tcbpf2_kern.c | 612 ----------------------------------------- samples/bpf/test_tunnel_bpf.sh | 390 -------------------------- 3 files changed, 1003 deletions(-) delete mode 100644 samples/bpf/tcbpf2_kern.c delete mode 100755 samples/bpf/test_tunnel_bpf.sh (limited to 'samples/bpf') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index aa8c392e2e52..b853581592fd 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -114,7 +114,6 @@ always += sock_flags_kern.o always += test_probe_write_user_kern.o always += trace_output_kern.o always += tcbpf1_kern.o -always += tcbpf2_kern.o always += tc_l2_redirect_kern.o always += lathist_kern.o always += offwaketime_kern.o diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c deleted file mode 100644 index fa260c750fb1..000000000000 --- a/samples/bpf/tcbpf2_kern.c +++ /dev/null @@ -1,612 +0,0 @@ -/* Copyright (c) 2016 VMware - * Copyright (c) 2016 Facebook - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - */ -#define KBUILD_MODNAME "foo" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "bpf_helpers.h" -#include "bpf_endian.h" - -#define _htonl __builtin_bswap32 -#define ERROR(ret) do {\ - char fmt[] = "ERROR line:%d ret:%d\n";\ - bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \ - } while(0) - -struct geneve_opt { - __be16 opt_class; - u8 type; - u8 length:5; - u8 r3:1; - u8 r2:1; - u8 r1:1; - u8 opt_data[8]; /* hard-coded to 8 byte */ -}; - -struct vxlan_metadata { - u32 gbp; -}; - -SEC("gre_set_tunnel") -int _gre_set_tunnel(struct __sk_buff *skb) -{ - int ret; - struct bpf_tunnel_key key; - - __builtin_memset(&key, 0x0, sizeof(key)); - key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ - key.tunnel_id = 2; - key.tunnel_tos = 0; - key.tunnel_ttl = 64; - - ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), - BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - return TC_ACT_OK; -} - -SEC("gre_get_tunnel") -int _gre_get_tunnel(struct __sk_buff *skb) -{ - int ret; - struct bpf_tunnel_key key; - char fmt[] = "key %d remote ip 0x%x\n"; - - ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4); - return TC_ACT_OK; -} - -SEC("ip6gretap_set_tunnel") -int _ip6gretap_set_tunnel(struct __sk_buff *skb) -{ - struct bpf_tunnel_key key; - int ret; - - __builtin_memset(&key, 0x0, sizeof(key)); - key.remote_ipv6[3] = _htonl(0x11); /* ::11 */ - key.tunnel_id = 2; - key.tunnel_tos = 0; - key.tunnel_ttl = 64; - key.tunnel_label = 0xabcde; - - ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), - BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX | - BPF_F_SEQ_NUMBER); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - return TC_ACT_OK; -} - -SEC("ip6gretap_get_tunnel") -int _ip6gretap_get_tunnel(struct __sk_buff *skb) -{ - char fmt[] = "key %d remote ip6 ::%x label %x\n"; - struct bpf_tunnel_key key; - int ret; - - ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), - BPF_F_TUNINFO_IPV6); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv6[3], key.tunnel_label); - - return TC_ACT_OK; -} - -SEC("erspan_set_tunnel") -int _erspan_set_tunnel(struct __sk_buff *skb) -{ - struct bpf_tunnel_key key; - struct erspan_metadata md; - int ret; - - __builtin_memset(&key, 0x0, sizeof(key)); - key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ - key.tunnel_id = 2; - key.tunnel_tos = 0; - key.tunnel_ttl = 64; - - ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - __builtin_memset(&md, 0, sizeof(md)); -#ifdef ERSPAN_V1 - md.version = 1; - md.u.index = bpf_htonl(123); -#else - u8 direction = 1; - u8 hwid = 7; - - md.version = 2; - md.u.md2.dir = direction; - md.u.md2.hwid = hwid & 0xf; - md.u.md2.hwid_upper = (hwid >> 4) & 0x3; -#endif - - ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - return TC_ACT_OK; -} - -SEC("erspan_get_tunnel") -int _erspan_get_tunnel(struct __sk_buff *skb) -{ - char fmt[] = "key %d remote ip 0x%x erspan version %d\n"; - struct bpf_tunnel_key key; - struct erspan_metadata md; - u32 index; - int ret; - - ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv4, md.version); - -#ifdef ERSPAN_V1 - char fmt2[] = "\tindex %x\n"; - - index = bpf_ntohl(md.u.index); - bpf_trace_printk(fmt2, sizeof(fmt2), index); -#else - char fmt2[] = "\tdirection %d hwid %x timestamp %u\n"; - - bpf_trace_printk(fmt2, sizeof(fmt2), - md.u.md2.dir, - (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, - bpf_ntohl(md.u.md2.timestamp)); -#endif - - return TC_ACT_OK; -} - -SEC("ip4ip6erspan_set_tunnel") -int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb) -{ - struct bpf_tunnel_key key; - struct erspan_metadata md; - int ret; - - __builtin_memset(&key, 0x0, sizeof(key)); - key.remote_ipv6[3] = _htonl(0x11); - key.tunnel_id = 2; - key.tunnel_tos = 0; - key.tunnel_ttl = 64; - - ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), - BPF_F_TUNINFO_IPV6); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - __builtin_memset(&md, 0, sizeof(md)); - -#ifdef ERSPAN_V1 - md.u.index = htonl(123); - md.version = 1; -#else - u8 direction = 0; - u8 hwid = 17; - - md.version = 2; - md.u.md2.dir = direction; - md.u.md2.hwid = hwid & 0xf; - md.u.md2.hwid_upper = (hwid >> 4) & 0x3; -#endif - - ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - return TC_ACT_OK; -} - -SEC("ip4ip6erspan_get_tunnel") -int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb) -{ - char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n"; - struct bpf_tunnel_key key; - struct erspan_metadata md; - u32 index; - int ret; - - ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv4, md.version); - -#ifdef ERSPAN_V1 - char fmt2[] = "\tindex %x\n"; - - index = bpf_ntohl(md.u.index); - bpf_trace_printk(fmt2, sizeof(fmt2), index); -#else - char fmt2[] = "\tdirection %d hwid %x timestamp %u\n"; - - bpf_trace_printk(fmt2, sizeof(fmt2), - md.u.md2.dir, - (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, - bpf_ntohl(md.u.md2.timestamp)); -#endif - - return TC_ACT_OK; -} - -SEC("vxlan_set_tunnel") -int _vxlan_set_tunnel(struct __sk_buff *skb) -{ - int ret; - struct bpf_tunnel_key key; - struct vxlan_metadata md; - - __builtin_memset(&key, 0x0, sizeof(key)); - key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ - key.tunnel_id = 2; - key.tunnel_tos = 0; - key.tunnel_ttl = 64; - - ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */ - ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - return TC_ACT_OK; -} - -SEC("vxlan_get_tunnel") -int _vxlan_get_tunnel(struct __sk_buff *skb) -{ - int ret; - struct bpf_tunnel_key key; - struct vxlan_metadata md; - char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n"; - - ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv4, md.gbp); - - return TC_ACT_OK; -} - -SEC("geneve_set_tunnel") -int _geneve_set_tunnel(struct __sk_buff *skb) -{ - int ret, ret2; - struct bpf_tunnel_key key; - struct geneve_opt gopt; - - __builtin_memset(&key, 0x0, sizeof(key)); - key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ - key.tunnel_id = 2; - key.tunnel_tos = 0; - key.tunnel_ttl = 64; - - __builtin_memset(&gopt, 0x0, sizeof(gopt)); - gopt.opt_class = 0x102; /* Open Virtual Networking (OVN) */ - gopt.type = 0x08; - gopt.r1 = 0; - gopt.r2 = 0; - gopt.r3 = 0; - gopt.length = 2; /* 4-byte multiple */ - *(int *) &gopt.opt_data = 0xdeadbeef; - - ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt)); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - return TC_ACT_OK; -} - -SEC("geneve_get_tunnel") -int _geneve_get_tunnel(struct __sk_buff *skb) -{ - int ret; - struct bpf_tunnel_key key; - struct geneve_opt gopt; - char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n"; - - ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt)); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - bpf_trace_printk(fmt, sizeof(fmt), - key.tunnel_id, key.remote_ipv4, gopt.opt_class); - return TC_ACT_OK; -} - -SEC("ipip_set_tunnel") -int _ipip_set_tunnel(struct __sk_buff *skb) -{ - struct bpf_tunnel_key key = {}; - void *data = (void *)(long)skb->data; - struct iphdr *iph = data; - struct tcphdr *tcp = data + sizeof(*iph); - void *data_end = (void *)(long)skb->data_end; - int ret; - - /* single length check */ - if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { - ERROR(1); - return TC_ACT_SHOT; - } - - key.tunnel_ttl = 64; - if (iph->protocol == IPPROTO_ICMP) { - key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ - } else { - if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) - return TC_ACT_SHOT; - - if (tcp->dest == htons(5200)) - key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ - else if (tcp->dest == htons(5201)) - key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */ - else - return TC_ACT_SHOT; - } - - ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - return TC_ACT_OK; -} - -SEC("ipip_get_tunnel") -int _ipip_get_tunnel(struct __sk_buff *skb) -{ - int ret; - struct bpf_tunnel_key key; - char fmt[] = "remote ip 0x%x\n"; - - ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4); - return TC_ACT_OK; -} - -SEC("ipip6_set_tunnel") -int _ipip6_set_tunnel(struct __sk_buff *skb) -{ - struct bpf_tunnel_key key = {}; - void *data = (void *)(long)skb->data; - struct iphdr *iph = data; - struct tcphdr *tcp = data + sizeof(*iph); - void *data_end = (void *)(long)skb->data_end; - int ret; - - /* single length check */ - if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { - ERROR(1); - return TC_ACT_SHOT; - } - - key.remote_ipv6[0] = _htonl(0x2401db00); - key.tunnel_ttl = 64; - - if (iph->protocol == IPPROTO_ICMP) { - key.remote_ipv6[3] = _htonl(1); - } else { - if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) { - ERROR(iph->protocol); - return TC_ACT_SHOT; - } - - if (tcp->dest == htons(5200)) { - key.remote_ipv6[3] = _htonl(1); - } else if (tcp->dest == htons(5201)) { - key.remote_ipv6[3] = _htonl(2); - } else { - ERROR(tcp->dest); - return TC_ACT_SHOT; - } - } - - ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - return TC_ACT_OK; -} - -SEC("ipip6_get_tunnel") -int _ipip6_get_tunnel(struct __sk_buff *skb) -{ - int ret; - struct bpf_tunnel_key key; - char fmt[] = "remote ip6 %x::%x\n"; - - ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]), - _htonl(key.remote_ipv6[3])); - return TC_ACT_OK; -} - -SEC("ip6ip6_set_tunnel") -int _ip6ip6_set_tunnel(struct __sk_buff *skb) -{ - struct bpf_tunnel_key key = {}; - void *data = (void *)(long)skb->data; - struct ipv6hdr *iph = data; - struct tcphdr *tcp = data + sizeof(*iph); - void *data_end = (void *)(long)skb->data_end; - int ret; - - /* single length check */ - if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { - ERROR(1); - return TC_ACT_SHOT; - } - - key.remote_ipv6[0] = _htonl(0x2401db00); - key.tunnel_ttl = 64; - - if (iph->nexthdr == NEXTHDR_ICMP) { - key.remote_ipv6[3] = _htonl(1); - } else { - if (iph->nexthdr != NEXTHDR_TCP) { - ERROR(iph->nexthdr); - return TC_ACT_SHOT; - } - - if (tcp->dest == htons(5200)) { - key.remote_ipv6[3] = _htonl(1); - } else if (tcp->dest == htons(5201)) { - key.remote_ipv6[3] = _htonl(2); - } else { - ERROR(tcp->dest); - return TC_ACT_SHOT; - } - } - - ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - return TC_ACT_OK; -} - -SEC("ip6ip6_get_tunnel") -int _ip6ip6_get_tunnel(struct __sk_buff *skb) -{ - int ret; - struct bpf_tunnel_key key; - char fmt[] = "remote ip6 %x::%x\n"; - - ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } - - bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]), - _htonl(key.remote_ipv6[3])); - return TC_ACT_OK; -} - -SEC("xfrm_get_state") -int _xfrm_get_state(struct __sk_buff *skb) -{ - struct bpf_xfrm_state x; - char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n"; - int ret; - - ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0); - if (ret < 0) - return TC_ACT_OK; - - bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi), - bpf_ntohl(x.remote_ipv4)); - return TC_ACT_OK; -} - -char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh deleted file mode 100755 index 9c534dc07b36..000000000000 --- a/samples/bpf/test_tunnel_bpf.sh +++ /dev/null @@ -1,390 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# In Namespace 0 (at_ns0) using native tunnel -# Overlay IP: 10.1.1.100 -# local 192.16.1.100 remote 192.16.1.200 -# veth0 IP: 172.16.1.100, tunnel dev 00 - -# Out of Namespace using BPF set/get on lwtunnel -# Overlay IP: 10.1.1.200 -# local 172.16.1.200 remote 172.16.1.100 -# veth1 IP: 172.16.1.200, tunnel dev 11 - -function config_device { - ip netns add at_ns0 - ip link add veth0 type veth peer name veth1 - ip link set veth0 netns at_ns0 - ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 - ip netns exec at_ns0 ip link set dev veth0 up - ip link set dev veth1 up mtu 1500 - ip addr add dev veth1 172.16.1.200/24 -} - -function add_gre_tunnel { - # in namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ - local 172.16.1.100 remote 172.16.1.200 - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - - # out of namespace - ip link add dev $DEV type $TYPE key 2 external - ip link set dev $DEV up - ip addr add dev $DEV 10.1.1.200/24 -} - -function add_ip6gretap_tunnel { - - # assign ipv6 address - ip netns exec at_ns0 ip addr add ::11/96 dev veth0 - ip netns exec at_ns0 ip link set dev veth0 up - ip addr add dev veth1 ::22/96 - ip link set dev veth1 up - - # in namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \ - local ::11 remote ::22 - - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96 - ip netns exec at_ns0 ip link set dev $DEV_NS up - - # out of namespace - ip link add dev $DEV type $TYPE external - ip addr add dev $DEV 10.1.1.200/24 - ip addr add dev $DEV fc80::200/24 - ip link set dev $DEV up -} - -function add_erspan_tunnel { - # in namespace - if [ "$1" == "v1" ]; then - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ - local 172.16.1.100 remote 172.16.1.200 \ - erspan_ver 1 erspan 123 - else - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ - local 172.16.1.100 remote 172.16.1.200 \ - erspan_ver 2 erspan_dir egress erspan_hwid 3 - fi - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - - # out of namespace - ip link add dev $DEV type $TYPE external - ip link set dev $DEV up - ip addr add dev $DEV 10.1.1.200/24 -} - -function add_ip6erspan_tunnel { - - # assign ipv6 address - ip netns exec at_ns0 ip addr add ::11/96 dev veth0 - ip netns exec at_ns0 ip link set dev veth0 up - ip addr add dev veth1 ::22/96 - ip link set dev veth1 up - - # in namespace - if [ "$1" == "v1" ]; then - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ - local ::11 remote ::22 \ - erspan_ver 1 erspan 123 - else - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE seq key 2 \ - local ::11 remote ::22 \ - erspan_ver 2 erspan_dir egress erspan_hwid 7 - fi - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 ip link set dev $DEV_NS up - - # out of namespace - ip link add dev $DEV type $TYPE external - ip addr add dev $DEV 10.1.1.200/24 - ip link set dev $DEV up -} - -function add_vxlan_tunnel { - # Set static ARP entry here because iptables set-mark works - # on L3 packet, as a result not applying to ARP packets, - # causing errors at get_tunnel_{key/opt}. - - # in namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE id 2 dstport 4789 gbp remote 172.16.1.200 - ip netns exec at_ns0 ip link set dev $DEV_NS address 52:54:00:d9:01:00 up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00 - ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF - - # out of namespace - ip link add dev $DEV type $TYPE external gbp dstport 4789 - ip link set dev $DEV address 52:54:00:d9:02:00 up - ip addr add dev $DEV 10.1.1.200/24 - arp -s 10.1.1.100 52:54:00:d9:01:00 -} - -function add_geneve_tunnel { - # in namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE id 2 dstport 6081 remote 172.16.1.200 - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - - # out of namespace - ip link add dev $DEV type $TYPE dstport 6081 external - ip link set dev $DEV up - ip addr add dev $DEV 10.1.1.200/24 -} - -function add_ipip_tunnel { - # in namespace - ip netns exec at_ns0 \ - ip link add dev $DEV_NS type $TYPE local 172.16.1.100 remote 172.16.1.200 - ip netns exec at_ns0 ip link set dev $DEV_NS up - ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 - - # out of namespace - ip link add dev $DEV type $TYPE external - ip link set dev $DEV up - ip addr add dev $DEV 10.1.1.200/24 -} - -function setup_xfrm_tunnel { - auth=0x$(printf '1%.0s' {1..40}) - enc=0x$(printf '2%.0s' {1..32}) - spi_in_to_out=0x1 - spi_out_to_in=0x2 - # in namespace - # in -> out - ip netns exec at_ns0 \ - ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ - spi $spi_in_to_out reqid 1 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip netns exec at_ns0 \ - ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \ - tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ - mode tunnel - # out -> in - ip netns exec at_ns0 \ - ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ - spi $spi_out_to_in reqid 2 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip netns exec at_ns0 \ - ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \ - tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ - mode tunnel - # address & route - ip netns exec at_ns0 \ - ip addr add dev veth0 10.1.1.100/32 - ip netns exec at_ns0 \ - ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \ - src 10.1.1.100 - - # out of namespace - # in -> out - ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ - spi $spi_in_to_out reqid 1 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \ - tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ - mode tunnel - # out -> in - ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ - spi $spi_out_to_in reqid 2 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \ - tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ - mode tunnel - # address & route - ip addr add dev veth1 10.1.1.200/32 - ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200 -} - -function attach_bpf { - DEV=$1 - SET_TUNNEL=$2 - GET_TUNNEL=$3 - tc qdisc add dev $DEV clsact - tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL - tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL -} - -function test_gre { - TYPE=gretap - DEV_NS=gretap00 - DEV=gretap11 - config_device - add_gre_tunnel - attach_bpf $DEV gre_set_tunnel gre_get_tunnel - ping -c 1 10.1.1.100 - ip netns exec at_ns0 ping -c 1 10.1.1.200 - cleanup -} - -function test_ip6gre { - TYPE=ip6gre - DEV_NS=ip6gre00 - DEV=ip6gre11 - config_device - # reuse the ip6gretap function - add_ip6gretap_tunnel - attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel - # underlay - ping6 -c 4 ::11 - # overlay: ipv4 over ipv6 - ip netns exec at_ns0 ping -c 1 10.1.1.200 - ping -c 1 10.1.1.100 - # overlay: ipv6 over ipv6 - ip netns exec at_ns0 ping6 -c 1 fc80::200 - cleanup -} - -function test_ip6gretap { - TYPE=ip6gretap - DEV_NS=ip6gretap00 - DEV=ip6gretap11 - config_device - add_ip6gretap_tunnel - attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel - # underlay - ping6 -c 4 ::11 - # overlay: ipv4 over ipv6 - ip netns exec at_ns0 ping -i .2 -c 1 10.1.1.200 - ping -c 1 10.1.1.100 - # overlay: ipv6 over ipv6 - ip netns exec at_ns0 ping6 -c 1 fc80::200 - cleanup -} - -function test_erspan { - TYPE=erspan - DEV_NS=erspan00 - DEV=erspan11 - config_device - add_erspan_tunnel $1 - attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel - ping -c 1 10.1.1.100 - ip netns exec at_ns0 ping -c 1 10.1.1.200 - cleanup -} - -function test_ip6erspan { - TYPE=ip6erspan - DEV_NS=ip6erspan00 - DEV=ip6erspan11 - config_device - add_ip6erspan_tunnel $1 - attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel - ping6 -c 3 ::11 - ip netns exec at_ns0 ping -c 1 10.1.1.200 - cleanup -} - -function test_vxlan { - TYPE=vxlan - DEV_NS=vxlan00 - DEV=vxlan11 - config_device - add_vxlan_tunnel - attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel - ping -c 1 10.1.1.100 - ip netns exec at_ns0 ping -c 1 10.1.1.200 - cleanup -} - -function test_geneve { - TYPE=geneve - DEV_NS=geneve00 - DEV=geneve11 - config_device - add_geneve_tunnel - attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel - ping -c 1 10.1.1.100 - ip netns exec at_ns0 ping -c 1 10.1.1.200 - cleanup -} - -function test_ipip { - TYPE=ipip - DEV_NS=ipip00 - DEV=ipip11 - config_device - tcpdump -nei veth1 & - cat /sys/kernel/debug/tracing/trace_pipe & - add_ipip_tunnel - ethtool -K veth1 gso off gro off rx off tx off - ip link set dev veth1 mtu 1500 - attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel - ping -c 1 10.1.1.100 - ip netns exec at_ns0 ping -c 1 10.1.1.200 - ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null - sleep 0.2 - iperf -c 10.1.1.100 -n 5k -p 5200 - cleanup -} - -function test_xfrm_tunnel { - config_device - tcpdump -nei veth1 ip & - output=$(mktemp) - cat /sys/kernel/debug/tracing/trace_pipe | tee $output & - setup_xfrm_tunnel - tc qdisc add dev veth1 clsact - tc filter add dev veth1 proto ip ingress bpf da obj tcbpf2_kern.o \ - sec xfrm_get_state - ip netns exec at_ns0 ping -c 1 10.1.1.200 - grep "reqid 1" $output - grep "spi 0x1" $output - grep "remote ip 0xac100164" $output - cleanup -} - -function cleanup { - set +ex - pkill iperf - ip netns delete at_ns0 - ip link del veth1 - ip link del ipip11 - ip link del gretap11 - ip link del ip6gre11 - ip link del ip6gretap11 - ip link del vxlan11 - ip link del geneve11 - ip link del erspan11 - ip link del ip6erspan11 - ip x s flush - ip x p flush - pkill tcpdump - pkill cat - set -ex -} - -trap cleanup 0 2 3 6 9 -cleanup -echo "Testing GRE tunnel..." -test_gre -echo "Testing IP6GRE tunnel..." -test_ip6gre -echo "Testing IP6GRETAP tunnel..." -test_ip6gretap -echo "Testing ERSPAN tunnel..." -test_erspan v1 -test_erspan v2 -echo "Testing IP6ERSPAN tunnel..." -test_ip6erspan v1 -test_ip6erspan v2 -echo "Testing VXLAN tunnel..." -test_vxlan -echo "Testing GENEVE tunnel..." -test_geneve -echo "Testing IPIP tunnel..." -test_ipip -echo "Testing IPSec tunnel..." -test_xfrm_tunnel -echo "*** PASS ***" -- cgit v1.2.3 From c0885f61bbb6a89c35397d3a8fe49c35822cde81 Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Wed, 25 Apr 2018 10:07:13 +0800 Subject: samples, bpf: remove redundant ret assignment in bpf_load_program() 2 redundant ret assignments removed: * 'ret = 1' before the logic 'if (data_maps)', and if any errors jump to label 'done'. No 'ret = 1' needed before the error jump. * After the '/* load programs */' part, if everything goes well, then the BPF code will be loaded and 'ret' set to 0 by load_and_attach(). If something goes wrong, 'ret' set to none-O, the redundant 'ret = 0' after the for clause will make the error skipped. For example, if some BPF code cannot provide supported program types in ELF SEC("unknown"), the for clause will not call load_and_attach() to load the BPF code. 1 should be returned to callees instead of 0. Signed-off-by: Wang Sheng-Hui Signed-off-by: Daniel Borkmann --- samples/bpf/bpf_load.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index bebe4188b4b3..feca497d6afd 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -549,7 +549,6 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) if (nr_maps < 0) { printf("Error: Failed loading ELF maps (errno:%d):%s\n", nr_maps, strerror(-nr_maps)); - ret = 1; goto done; } if (load_maps(map_data, nr_maps, fixup_map)) @@ -615,7 +614,6 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map) } } - ret = 0; done: close(fd); return ret; -- cgit v1.2.3 From 28dbf861deacb0321604bf1c5e1ccc34dd215669 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 28 Apr 2018 22:28:13 -0700 Subject: samples/bpf: move common-purpose trace functions to selftests There is no functionality change in this patch. The common-purpose trace functions, including perf_event polling and ksym lookup, are moved from trace_output_user.c and bpf_load.c to selftests/bpf/trace_helpers.c so that these function can be reused later in selftests. Acked-by: Alexei Starovoitov Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- samples/bpf/Makefile | 11 +- samples/bpf/bpf_load.c | 63 ---------- samples/bpf/bpf_load.h | 7 -- samples/bpf/offwaketime_user.c | 1 + samples/bpf/sampleip_user.c | 1 + samples/bpf/spintest_user.c | 1 + samples/bpf/trace_event_user.c | 1 + samples/bpf/trace_output_user.c | 110 ++--------------- tools/testing/selftests/bpf/trace_helpers.c | 180 ++++++++++++++++++++++++++++ tools/testing/selftests/bpf/trace_helpers.h | 23 ++++ 10 files changed, 223 insertions(+), 175 deletions(-) create mode 100644 tools/testing/selftests/bpf/trace_helpers.c create mode 100644 tools/testing/selftests/bpf/trace_helpers.h (limited to 'samples/bpf') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index b853581592fd..5e31770ac087 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -49,6 +49,7 @@ hostprogs-y += xdp_adjust_tail # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o +TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o test_lru_dist-objs := test_lru_dist.o $(LIBBPF) sock_example-objs := sock_example.o $(LIBBPF) @@ -65,10 +66,10 @@ tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o -trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o +trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o $(TRACE_HELPERS) lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o -offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o -spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o +offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o $(TRACE_HELPERS) +spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o $(TRACE_HELPERS) map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o @@ -82,8 +83,8 @@ xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \ test_current_task_under_cgroup_user.o -trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o -sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o +trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o $(TRACE_HELPERS) +sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o $(TRACE_HELPERS) tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index feca497d6afd..a27ef3c42e4e 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -648,66 +648,3 @@ void read_trace_pipe(void) } } } - -#define MAX_SYMS 300000 -static struct ksym syms[MAX_SYMS]; -static int sym_cnt; - -static int ksym_cmp(const void *p1, const void *p2) -{ - return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; -} - -int load_kallsyms(void) -{ - FILE *f = fopen("/proc/kallsyms", "r"); - char func[256], buf[256]; - char symbol; - void *addr; - int i = 0; - - if (!f) - return -ENOENT; - - while (!feof(f)) { - if (!fgets(buf, sizeof(buf), f)) - break; - if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) - break; - if (!addr) - continue; - syms[i].addr = (long) addr; - syms[i].name = strdup(func); - i++; - } - sym_cnt = i; - qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp); - return 0; -} - -struct ksym *ksym_search(long key) -{ - int start = 0, end = sym_cnt; - int result; - - while (start < end) { - size_t mid = start + (end - start) / 2; - - result = key - syms[mid].addr; - if (result < 0) - end = mid; - else if (result > 0) - start = mid + 1; - else - return &syms[mid]; - } - - if (start >= 1 && syms[start - 1].addr < key && - key < syms[start].addr) - /* valid ksym */ - return &syms[start - 1]; - - /* out of range. return _stext */ - return &syms[0]; -} - diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index 453c200b389b..2c3d0b448632 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h @@ -54,12 +54,5 @@ int load_bpf_file(char *path); int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map); void read_trace_pipe(void); -struct ksym { - long addr; - char *name; -}; - -int load_kallsyms(void); -struct ksym *ksym_search(long key); int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); #endif diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c index 512f87a5fd20..f06063af9fcb 100644 --- a/samples/bpf/offwaketime_user.c +++ b/samples/bpf/offwaketime_user.c @@ -17,6 +17,7 @@ #include #include "libbpf.h" #include "bpf_load.h" +#include "trace_helpers.h" #define PRINT_RAW_ADDR 0 diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c index 4ed690b907ff..60c2b73d1b4d 100644 --- a/samples/bpf/sampleip_user.c +++ b/samples/bpf/sampleip_user.c @@ -22,6 +22,7 @@ #include "libbpf.h" #include "bpf_load.h" #include "perf-sys.h" +#include "trace_helpers.h" #define DEFAULT_FREQ 99 #define DEFAULT_SECS 5 diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c index 3d736219a31c..8d3e9cfa1909 100644 --- a/samples/bpf/spintest_user.c +++ b/samples/bpf/spintest_user.c @@ -7,6 +7,7 @@ #include #include "libbpf.h" #include "bpf_load.h" +#include "trace_helpers.h" int main(int ac, char **argv) { diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index 56f7a259a7c9..1fa1becfa641 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -21,6 +21,7 @@ #include "libbpf.h" #include "bpf_load.h" #include "perf-sys.h" +#include "trace_helpers.h" #define SAMPLE_FREQ 50 diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c index ccca1e348017..5e78c2ecd08d 100644 --- a/samples/bpf/trace_output_user.c +++ b/samples/bpf/trace_output_user.c @@ -21,100 +21,10 @@ #include "libbpf.h" #include "bpf_load.h" #include "perf-sys.h" +#include "trace_helpers.h" static int pmu_fd; -int page_size; -int page_cnt = 8; -volatile struct perf_event_mmap_page *header; - -typedef void (*print_fn)(void *data, int size); - -static int perf_event_mmap(int fd) -{ - void *base; - int mmap_size; - - page_size = getpagesize(); - mmap_size = page_size * (page_cnt + 1); - - base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (base == MAP_FAILED) { - printf("mmap err\n"); - return -1; - } - - header = base; - return 0; -} - -static int perf_event_poll(int fd) -{ - struct pollfd pfd = { .fd = fd, .events = POLLIN }; - - return poll(&pfd, 1, 1000); -} - -struct perf_event_sample { - struct perf_event_header header; - __u32 size; - char data[]; -}; - -static void perf_event_read(print_fn fn) -{ - __u64 data_tail = header->data_tail; - __u64 data_head = header->data_head; - __u64 buffer_size = page_cnt * page_size; - void *base, *begin, *end; - char buf[256]; - - asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ - if (data_head == data_tail) - return; - - base = ((char *)header) + page_size; - - begin = base + data_tail % buffer_size; - end = base + data_head % buffer_size; - - while (begin != end) { - struct perf_event_sample *e; - - e = begin; - if (begin + e->header.size > base + buffer_size) { - long len = base + buffer_size - begin; - - assert(len < e->header.size); - memcpy(buf, begin, len); - memcpy(buf + len, base, e->header.size - len); - e = (void *) buf; - begin = base + e->header.size - len; - } else if (begin + e->header.size == base + buffer_size) { - begin = base; - } else { - begin += e->header.size; - } - - if (e->header.type == PERF_RECORD_SAMPLE) { - fn(e->data, e->size); - } else if (e->header.type == PERF_RECORD_LOST) { - struct { - struct perf_event_header header; - __u64 id; - __u64 lost; - } *lost = (void *) e; - printf("lost %lld events\n", lost->lost); - } else { - printf("unknown event type=%d size=%d\n", - e->header.type, e->header.size); - } - } - - __sync_synchronize(); /* smp_mb() */ - header->data_tail = data_head; -} - static __u64 time_get_ns(void) { struct timespec ts; @@ -127,7 +37,7 @@ static __u64 start_time; #define MAX_CNT 100000ll -static void print_bpf_output(void *data, int size) +static int print_bpf_output(void *data, int size) { static __u64 cnt; struct { @@ -138,7 +48,7 @@ static void print_bpf_output(void *data, int size) if (e->cookie != 0x12345678) { printf("BUG pid %llx cookie %llx sized %d\n", e->pid, e->cookie, size); - kill(0, SIGINT); + return PERF_EVENT_ERROR; } cnt++; @@ -146,8 +56,10 @@ static void print_bpf_output(void *data, int size) if (cnt == MAX_CNT) { printf("recv %lld events per sec\n", MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); - kill(0, SIGINT); + return PERF_EVENT_DONE; } + + return PERF_EVENT_CONT; } static void test_bpf_perf_event(void) @@ -170,6 +82,7 @@ int main(int argc, char **argv) { char filename[256]; FILE *f; + int ret; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); @@ -187,10 +100,7 @@ int main(int argc, char **argv) (void) f; start_time = time_get_ns(); - for (;;) { - perf_event_poll(pmu_fd); - perf_event_read(print_bpf_output); - } - - return 0; + ret = perf_event_poller(pmu_fd, print_bpf_output); + kill(0, SIGINT); + return ret; } diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c new file mode 100644 index 000000000000..ad025bd75f1c --- /dev/null +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "trace_helpers.h" + +#define MAX_SYMS 300000 +static struct ksym syms[MAX_SYMS]; +static int sym_cnt; + +static int ksym_cmp(const void *p1, const void *p2) +{ + return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; +} + +int load_kallsyms(void) +{ + FILE *f = fopen("/proc/kallsyms", "r"); + char func[256], buf[256]; + char symbol; + void *addr; + int i = 0; + + if (!f) + return -ENOENT; + + while (!feof(f)) { + if (!fgets(buf, sizeof(buf), f)) + break; + if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) + break; + if (!addr) + continue; + syms[i].addr = (long) addr; + syms[i].name = strdup(func); + i++; + } + sym_cnt = i; + qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp); + return 0; +} + +struct ksym *ksym_search(long key) +{ + int start = 0, end = sym_cnt; + int result; + + while (start < end) { + size_t mid = start + (end - start) / 2; + + result = key - syms[mid].addr; + if (result < 0) + end = mid; + else if (result > 0) + start = mid + 1; + else + return &syms[mid]; + } + + if (start >= 1 && syms[start - 1].addr < key && + key < syms[start].addr) + /* valid ksym */ + return &syms[start - 1]; + + /* out of range. return _stext */ + return &syms[0]; +} + +static int page_size; +static int page_cnt = 8; +static volatile struct perf_event_mmap_page *header; + +int perf_event_mmap(int fd) +{ + void *base; + int mmap_size; + + page_size = getpagesize(); + mmap_size = page_size * (page_cnt + 1); + + base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (base == MAP_FAILED) { + printf("mmap err\n"); + return -1; + } + + header = base; + return 0; +} + +static int perf_event_poll(int fd) +{ + struct pollfd pfd = { .fd = fd, .events = POLLIN }; + + return poll(&pfd, 1, 1000); +} + +struct perf_event_sample { + struct perf_event_header header; + __u32 size; + char data[]; +}; + +static int perf_event_read(perf_event_print_fn fn) +{ + __u64 data_tail = header->data_tail; + __u64 data_head = header->data_head; + __u64 buffer_size = page_cnt * page_size; + void *base, *begin, *end; + char buf[256]; + int ret; + + asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ + if (data_head == data_tail) + return PERF_EVENT_CONT; + + base = ((char *)header) + page_size; + + begin = base + data_tail % buffer_size; + end = base + data_head % buffer_size; + + while (begin != end) { + struct perf_event_sample *e; + + e = begin; + if (begin + e->header.size > base + buffer_size) { + long len = base + buffer_size - begin; + + assert(len < e->header.size); + memcpy(buf, begin, len); + memcpy(buf + len, base, e->header.size - len); + e = (void *) buf; + begin = base + e->header.size - len; + } else if (begin + e->header.size == base + buffer_size) { + begin = base; + } else { + begin += e->header.size; + } + + if (e->header.type == PERF_RECORD_SAMPLE) { + ret = fn(e->data, e->size); + if (ret != PERF_EVENT_CONT) + return ret; + } else if (e->header.type == PERF_RECORD_LOST) { + struct { + struct perf_event_header header; + __u64 id; + __u64 lost; + } *lost = (void *) e; + printf("lost %lld events\n", lost->lost); + } else { + printf("unknown event type=%d size=%d\n", + e->header.type, e->header.size); + } + } + + __sync_synchronize(); /* smp_mb() */ + header->data_tail = data_head; + return PERF_EVENT_CONT; +} + +int perf_event_poller(int fd, perf_event_print_fn output_fn) +{ + int ret; + + for (;;) { + perf_event_poll(fd); + ret = perf_event_read(output_fn); + if (ret != PERF_EVENT_CONT) + return ret; + } + + return PERF_EVENT_DONE; +} diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h new file mode 100644 index 000000000000..fe3eefd21e86 --- /dev/null +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TRACE_HELPER_H +#define __TRACE_HELPER_H + +struct ksym { + long addr; + char *name; +}; + +int load_kallsyms(void); +struct ksym *ksym_search(long key); + +typedef int (*perf_event_print_fn)(void *data, int size); + +/* return code for perf_event_print_fn */ +#define PERF_EVENT_DONE 0 +#define PERF_EVENT_ERROR -1 +#define PERF_EVENT_CONT -2 + +int perf_event_mmap(int fd); +/* return PERF_EVENT_DONE or PERF_EVENT_ERROR */ +int perf_event_poller(int fd, perf_event_print_fn output_fn); +#endif -- cgit v1.2.3 From 34745aed515c1d6040110ff82378056533518eb6 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sun, 29 Apr 2018 19:27:48 -0700 Subject: samples/bpf: fix kprobe attachment issue on x64 Commit d5a00528b58c ("syscalls/core, syscalls/x86: Rename struct pt_regs-based sys_*() to __x64_sys_*()") renamed a lot of syscall function sys_*() to __x64_sys_*(). This caused several kprobe based samples/bpf tests failing. This patch fixed the problem in bpf_load.c. For x86_64 architecture, function name __x64_sys_*() will be first used for kprobe event creation. If the creation is successful, it will be used. Otherwise, function name sys_*() will be used for kprobe event creation. Fixes: d5a00528b58c ("syscalls/core, syscalls/x86: Rename struct pt_regs-based sys_*() to __x64_sys_*()") Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- samples/bpf/bpf_load.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index a27ef3c42e4e..da9bccfaf391 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -145,6 +145,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) } if (is_kprobe || is_kretprobe) { + bool need_normal_check = true; + const char *event_prefix = ""; + if (is_kprobe) event += 7; else @@ -158,18 +161,33 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) if (isdigit(*event)) return populate_prog_array(event, fd); - snprintf(buf, sizeof(buf), - "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events", - is_kprobe ? 'p' : 'r', event, event); - err = system(buf); - if (err < 0) { - printf("failed to create kprobe '%s' error '%s'\n", - event, strerror(errno)); - return -1; +#ifdef __x86_64__ + if (strncmp(event, "sys_", 4) == 0) { + snprintf(buf, sizeof(buf), + "echo '%c:__x64_%s __x64_%s' >> /sys/kernel/debug/tracing/kprobe_events", + is_kprobe ? 'p' : 'r', event, event); + err = system(buf); + if (err >= 0) { + need_normal_check = false; + event_prefix = "__x64_"; + } + } +#endif + if (need_normal_check) { + snprintf(buf, sizeof(buf), + "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events", + is_kprobe ? 'p' : 'r', event, event); + err = system(buf); + if (err < 0) { + printf("failed to create kprobe '%s' error '%s'\n", + event, strerror(errno)); + return -1; + } } strcpy(buf, DEBUGFS); strcat(buf, "events/kprobes/"); + strcat(buf, event_prefix); strcat(buf, event); strcat(buf, "/id"); } else if (is_tracepoint) { -- cgit v1.2.3 From b4b8faa1ded7a3bb34db374c692a51cea29f9080 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Wed, 2 May 2018 13:01:36 +0200 Subject: samples/bpf: sample application and documentation for AF_XDP sockets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a sample application for AF_XDP sockets. The application supports three different modes of operation: rxdrop, txonly and l2fwd. To show-case a simple round-robin load-balancing between a set of sockets in an xskmap, set the RR_LB compile time define option to 1 in "xdpsock.h". v2: The entries variable was calculated twice in {umem,xq}_nb_avail. Co-authored-by: Björn Töpel Signed-off-by: Björn Töpel Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov --- Documentation/networking/af_xdp.rst | 297 +++++++++++ Documentation/networking/index.rst | 1 + samples/bpf/Makefile | 4 + samples/bpf/xdpsock.h | 11 + samples/bpf/xdpsock_kern.c | 56 +++ samples/bpf/xdpsock_user.c | 948 ++++++++++++++++++++++++++++++++++++ 6 files changed, 1317 insertions(+) create mode 100644 Documentation/networking/af_xdp.rst create mode 100644 samples/bpf/xdpsock.h create mode 100644 samples/bpf/xdpsock_kern.c create mode 100644 samples/bpf/xdpsock_user.c (limited to 'samples/bpf') diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst new file mode 100644 index 000000000000..91928d9ee4bf --- /dev/null +++ b/Documentation/networking/af_xdp.rst @@ -0,0 +1,297 @@ +.. SPDX-License-Identifier: GPL-2.0 + +====== +AF_XDP +====== + +Overview +======== + +AF_XDP is an address family that is optimized for high performance +packet processing. + +This document assumes that the reader is familiar with BPF and XDP. If +not, the Cilium project has an excellent reference guide at +http://cilium.readthedocs.io/en/doc-1.0/bpf/. + +Using the XDP_REDIRECT action from an XDP program, the program can +redirect ingress frames to other XDP enabled netdevs, using the +bpf_redirect_map() function. AF_XDP sockets enable the possibility for +XDP programs to redirect frames to a memory buffer in a user-space +application. + +An AF_XDP socket (XSK) is created with the normal socket() +syscall. Associated with each XSK are two rings: the RX ring and the +TX ring. A socket can receive packets on the RX ring and it can send +packets on the TX ring. These rings are registered and sized with the +setsockopts XDP_RX_RING and XDP_TX_RING, respectively. It is mandatory +to have at least one of these rings for each socket. An RX or TX +descriptor ring points to a data buffer in a memory area called a +UMEM. RX and TX can share the same UMEM so that a packet does not have +to be copied between RX and TX. Moreover, if a packet needs to be kept +for a while due to a possible retransmit, the descriptor that points +to that packet can be changed to point to another and reused right +away. This again avoids copying data. + +The UMEM consists of a number of equally size frames and each frame +has a unique frame id. A descriptor in one of the rings references a +frame by referencing its frame id. The user space allocates memory for +this UMEM using whatever means it feels is most appropriate (malloc, +mmap, huge pages, etc). This memory area is then registered with the +kernel using the new setsockopt XDP_UMEM_REG. The UMEM also has two +rings: the FILL ring and the COMPLETION ring. The fill ring is used by +the application to send down frame ids for the kernel to fill in with +RX packet data. References to these frames will then appear in the RX +ring once each packet has been received. The completion ring, on the +other hand, contains frame ids that the kernel has transmitted +completely and can now be used again by user space, for either TX or +RX. Thus, the frame ids appearing in the completion ring are ids that +were previously transmitted using the TX ring. In summary, the RX and +FILL rings are used for the RX path and the TX and COMPLETION rings +are used for the TX path. + +The socket is then finally bound with a bind() call to a device and a +specific queue id on that device, and it is not until bind is +completed that traffic starts to flow. + +The UMEM can be shared between processes, if desired. If a process +wants to do this, it simply skips the registration of the UMEM and its +corresponding two rings, sets the XDP_SHARED_UMEM flag in the bind +call and submits the XSK of the process it would like to share UMEM +with as well as its own newly created XSK socket. The new process will +then receive frame id references in its own RX ring that point to this +shared UMEM. Note that since the ring structures are single-consumer / +single-producer (for performance reasons), the new process has to +create its own socket with associated RX and TX rings, since it cannot +share this with the other process. This is also the reason that there +is only one set of FILL and COMPLETION rings per UMEM. It is the +responsibility of a single process to handle the UMEM. + +How is then packets distributed from an XDP program to the XSKs? There +is a BPF map called XSKMAP (or BPF_MAP_TYPE_XSKMAP in full). The +user-space application can place an XSK at an arbitrary place in this +map. The XDP program can then redirect a packet to a specific index in +this map and at this point XDP validates that the XSK in that map was +indeed bound to that device and ring number. If not, the packet is +dropped. If the map is empty at that index, the packet is also +dropped. This also means that it is currently mandatory to have an XDP +program loaded (and one XSK in the XSKMAP) to be able to get any +traffic to user space through the XSK. + +AF_XDP can operate in two different modes: XDP_SKB and XDP_DRV. If the +driver does not have support for XDP, or XDP_SKB is explicitly chosen +when loading the XDP program, XDP_SKB mode is employed that uses SKBs +together with the generic XDP support and copies out the data to user +space. A fallback mode that works for any network device. On the other +hand, if the driver has support for XDP, it will be used by the AF_XDP +code to provide better performance, but there is still a copy of the +data into user space. + +Concepts +======== + +In order to use an AF_XDP socket, a number of associated objects need +to be setup. + +Jonathan Corbet has also written an excellent article on LWN, +"Accelerating networking with AF_XDP". It can be found at +https://lwn.net/Articles/750845/. + +UMEM +---- + +UMEM is a region of virtual contiguous memory, divided into +equal-sized frames. An UMEM is associated to a netdev and a specific +queue id of that netdev. It is created and configured (frame size, +frame headroom, start address and size) by using the XDP_UMEM_REG +setsockopt system call. A UMEM is bound to a netdev and queue id, via +the bind() system call. + +An AF_XDP is socket linked to a single UMEM, but one UMEM can have +multiple AF_XDP sockets. To share an UMEM created via one socket A, +the next socket B can do this by setting the XDP_SHARED_UMEM flag in +struct sockaddr_xdp member sxdp_flags, and passing the file descriptor +of A to struct sockaddr_xdp member sxdp_shared_umem_fd. + +The UMEM has two single-producer/single-consumer rings, that are used +to transfer ownership of UMEM frames between the kernel and the +user-space application. + +Rings +----- + +There are a four different kind of rings: Fill, Completion, RX and +TX. All rings are single-producer/single-consumer, so the user-space +application need explicit synchronization of multiple +processes/threads are reading/writing to them. + +The UMEM uses two rings: Fill and Completion. Each socket associated +with the UMEM must have an RX queue, TX queue or both. Say, that there +is a setup with four sockets (all doing TX and RX). Then there will be +one Fill ring, one Completion ring, four TX rings and four RX rings. + +The rings are head(producer)/tail(consumer) based rings. A producer +writes the data ring at the index pointed out by struct xdp_ring +producer member, and increasing the producer index. A consumer reads +the data ring at the index pointed out by struct xdp_ring consumer +member, and increasing the consumer index. + +The rings are configured and created via the _RING setsockopt system +calls and mmapped to user-space using the appropriate offset to mmap() +(XDP_PGOFF_RX_RING, XDP_PGOFF_TX_RING, XDP_UMEM_PGOFF_FILL_RING and +XDP_UMEM_PGOFF_COMPLETION_RING). + +The size of the rings need to be of size power of two. + +UMEM Fill Ring +~~~~~~~~~~~~~~ + +The Fill ring is used to transfer ownership of UMEM frames from +user-space to kernel-space. The UMEM indicies are passed in the +ring. As an example, if the UMEM is 64k and each frame is 4k, then the +UMEM has 16 frames and can pass indicies between 0 and 15. + +Frames passed to the kernel are used for the ingress path (RX rings). + +The user application produces UMEM indicies to this ring. + +UMEM Completetion Ring +~~~~~~~~~~~~~~~~~~~~~~ + +The Completion Ring is used transfer ownership of UMEM frames from +kernel-space to user-space. Just like the Fill ring, UMEM indicies are +used. + +Frames passed from the kernel to user-space are frames that has been +sent (TX ring) and can be used by user-space again. + +The user application consumes UMEM indicies from this ring. + + +RX Ring +~~~~~~~ + +The RX ring is the receiving side of a socket. Each entry in the ring +is a struct xdp_desc descriptor. The descriptor contains UMEM index +(idx), the length of the data (len), the offset into the frame +(offset). + +If no frames have been passed to kernel via the Fill ring, no +descriptors will (or can) appear on the RX ring. + +The user application consumes struct xdp_desc descriptors from this +ring. + +TX Ring +~~~~~~~ + +The TX ring is used to send frames. The struct xdp_desc descriptor is +filled (index, length and offset) and passed into the ring. + +To start the transfer a sendmsg() system call is required. This might +be relaxed in the future. + +The user application produces struct xdp_desc descriptors to this +ring. + +XSKMAP / BPF_MAP_TYPE_XSKMAP +---------------------------- + +On XDP side there is a BPF map type BPF_MAP_TYPE_XSKMAP (XSKMAP) that +is used in conjunction with bpf_redirect_map() to pass the ingress +frame to a socket. + +The user application inserts the socket into the map, via the bpf() +system call. + +Note that if an XDP program tries to redirect to a socket that does +not match the queue configuration and netdev, the frame will be +dropped. E.g. an AF_XDP socket is bound to netdev eth0 and +queue 17. Only the XDP program executing for eth0 and queue 17 will +successfully pass data to the socket. Please refer to the sample +application (samples/bpf/) in for an example. + +Usage +===== + +In order to use AF_XDP sockets there are two parts needed. The +user-space application and the XDP program. For a complete setup and +usage example, please refer to the sample application. The user-space +side is xdpsock_user.c and the XDP side xdpsock_kern.c. + +Naive ring dequeue and enqueue could look like this:: + + // typedef struct xdp_rxtx_ring RING; + // typedef struct xdp_umem_ring RING; + + // typedef struct xdp_desc RING_TYPE; + // typedef __u32 RING_TYPE; + + int dequeue_one(RING *ring, RING_TYPE *item) + { + __u32 entries = ring->ptrs.producer - ring->ptrs.consumer; + + if (entries == 0) + return -1; + + // read-barrier! + + *item = ring->desc[ring->ptrs.consumer & (RING_SIZE - 1)]; + ring->ptrs.consumer++; + return 0; + } + + int enqueue_one(RING *ring, const RING_TYPE *item) + { + u32 free_entries = RING_SIZE - (ring->ptrs.producer - ring->ptrs.consumer); + + if (free_entries == 0) + return -1; + + ring->desc[ring->ptrs.producer & (RING_SIZE - 1)] = *item; + + // write-barrier! + + ring->ptrs.producer++; + return 0; + } + + +For a more optimized version, please refer to the sample application. + +Sample application +================== + +There is a xdpsock benchmarking/test application included that +demonstrates how to use AF_XDP sockets with both private and shared +UMEMs. Say that you would like your UDP traffic from port 4242 to end +up in queue 16, that we will enable AF_XDP on. Here, we use ethtool +for this:: + + ethtool -N p3p2 rx-flow-hash udp4 fn + ethtool -N p3p2 flow-type udp4 src-port 4242 dst-port 4242 \ + action 16 + +Running the rxdrop benchmark in XDP_DRV mode can then be done +using:: + + samples/bpf/xdpsock -i p3p2 -q 16 -r -N + +For XDP_SKB mode, use the switch "-S" instead of "-N" and all options +can be displayed with "-h", as usual. + +Credits +======= + +- Björn Töpel (AF_XDP core) +- Magnus Karlsson (AF_XDP core) +- Alexander Duyck +- Alexei Starovoitov +- Daniel Borkmann +- Jesper Dangaard Brouer +- John Fastabend +- Jonathan Corbet (LWN coverage) +- Michael S. Tsirkin +- Qi Z Zhang +- Willem de Bruijn + diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index f204eaff657d..cbd9bdd4a79e 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -6,6 +6,7 @@ Contents: .. toctree:: :maxdepth: 2 + af_xdp batman-adv can dpaa2/index diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 5e31770ac087..8e0c7fb6d7cc 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -45,6 +45,7 @@ hostprogs-y += xdp_rxq_info hostprogs-y += syscall_tp hostprogs-y += cpustat hostprogs-y += xdp_adjust_tail +hostprogs-y += xdpsock # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o @@ -98,6 +99,7 @@ xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o +xdpsock-objs := bpf_load.o $(LIBBPF) xdpsock_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -151,6 +153,7 @@ always += xdp2skb_meta_kern.o always += syscall_tp_kern.o always += cpustat_kern.o always += xdp_adjust_tail_kern.o +always += xdpsock_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -197,6 +200,7 @@ HOSTLOADLIBES_xdp_rxq_info += -lelf HOSTLOADLIBES_syscall_tp += -lelf HOSTLOADLIBES_cpustat += -lelf HOSTLOADLIBES_xdp_adjust_tail += -lelf +HOSTLOADLIBES_xdpsock += -lelf -pthread # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/xdpsock.h b/samples/bpf/xdpsock.h new file mode 100644 index 000000000000..533ab81adfa1 --- /dev/null +++ b/samples/bpf/xdpsock.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef XDPSOCK_H_ +#define XDPSOCK_H_ + +/* Power-of-2 number of sockets */ +#define MAX_SOCKS 4 + +/* Round-robin receive */ +#define RR_LB 0 + +#endif /* XDPSOCK_H_ */ diff --git a/samples/bpf/xdpsock_kern.c b/samples/bpf/xdpsock_kern.c new file mode 100644 index 000000000000..d8806c41362e --- /dev/null +++ b/samples/bpf/xdpsock_kern.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +#define KBUILD_MODNAME "foo" +#include +#include "bpf_helpers.h" + +#include "xdpsock.h" + +struct bpf_map_def SEC("maps") qidconf_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") xsks_map = { + .type = BPF_MAP_TYPE_XSKMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 4, +}; + +struct bpf_map_def SEC("maps") rr_map = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(unsigned int), + .max_entries = 1, +}; + +SEC("xdp_sock") +int xdp_sock_prog(struct xdp_md *ctx) +{ + int *qidconf, key = 0, idx; + unsigned int *rr; + + qidconf = bpf_map_lookup_elem(&qidconf_map, &key); + if (!qidconf) + return XDP_ABORTED; + + if (*qidconf != ctx->rx_queue_index) + return XDP_PASS; + +#if RR_LB /* NB! RR_LB is configured in xdpsock.h */ + rr = bpf_map_lookup_elem(&rr_map, &key); + if (!rr) + return XDP_ABORTED; + + *rr = (*rr + 1) & (MAX_SOCKS - 1); + idx = *rr; +#else + idx = 0; +#endif + + return bpf_redirect_map(&xsks_map, idx, 0); +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c new file mode 100644 index 000000000000..4b8a7cf3e63b --- /dev/null +++ b/samples/bpf/xdpsock_user.c @@ -0,0 +1,948 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2017 - 2018 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_load.h" +#include "bpf_util.h" +#include "libbpf.h" + +#include "xdpsock.h" + +#ifndef SOL_XDP +#define SOL_XDP 283 +#endif + +#ifndef AF_XDP +#define AF_XDP 44 +#endif + +#ifndef PF_XDP +#define PF_XDP AF_XDP +#endif + +#define NUM_FRAMES 131072 +#define FRAME_HEADROOM 0 +#define FRAME_SIZE 2048 +#define NUM_DESCS 1024 +#define BATCH_SIZE 16 + +#define FQ_NUM_DESCS 1024 +#define CQ_NUM_DESCS 1024 + +#define DEBUG_HEXDUMP 0 + +typedef __u32 u32; + +static unsigned long prev_time; + +enum benchmark_type { + BENCH_RXDROP = 0, + BENCH_TXONLY = 1, + BENCH_L2FWD = 2, +}; + +static enum benchmark_type opt_bench = BENCH_RXDROP; +static u32 opt_xdp_flags; +static const char *opt_if = ""; +static int opt_ifindex; +static int opt_queue; +static int opt_poll; +static int opt_shared_packet_buffer; +static int opt_interval = 1; + +struct xdp_umem_uqueue { + u32 cached_prod; + u32 cached_cons; + u32 mask; + u32 size; + struct xdp_umem_ring *ring; +}; + +struct xdp_umem { + char (*frames)[FRAME_SIZE]; + struct xdp_umem_uqueue fq; + struct xdp_umem_uqueue cq; + int fd; +}; + +struct xdp_uqueue { + u32 cached_prod; + u32 cached_cons; + u32 mask; + u32 size; + struct xdp_rxtx_ring *ring; +}; + +struct xdpsock { + struct xdp_uqueue rx; + struct xdp_uqueue tx; + int sfd; + struct xdp_umem *umem; + u32 outstanding_tx; + unsigned long rx_npkts; + unsigned long tx_npkts; + unsigned long prev_rx_npkts; + unsigned long prev_tx_npkts; +}; + +#define MAX_SOCKS 4 +static int num_socks; +struct xdpsock *xsks[MAX_SOCKS]; + +static unsigned long get_nsecs(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_sec * 1000000000UL + ts.tv_nsec; +} + +static void dump_stats(void); + +#define lassert(expr) \ + do { \ + if (!(expr)) { \ + fprintf(stderr, "%s:%s:%i: Assertion failed: " \ + #expr ": errno: %d/\"%s\"\n", \ + __FILE__, __func__, __LINE__, \ + errno, strerror(errno)); \ + dump_stats(); \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +#define barrier() __asm__ __volatile__("": : :"memory") +#define u_smp_rmb() barrier() +#define u_smp_wmb() barrier() +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +static const char pkt_data[] = + "\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00" + "\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14" + "\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b" + "\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa"; + +static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb) +{ + u32 free_entries = q->size - (q->cached_prod - q->cached_cons); + + if (free_entries >= nb) + return free_entries; + + /* Refresh the local tail pointer */ + q->cached_cons = q->ring->ptrs.consumer; + + return q->size - (q->cached_prod - q->cached_cons); +} + +static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs) +{ + u32 free_entries = q->cached_cons - q->cached_prod; + + if (free_entries >= ndescs) + return free_entries; + + /* Refresh the local tail pointer */ + q->cached_cons = q->ring->ptrs.consumer + q->size; + return q->cached_cons - q->cached_prod; +} + +static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb) +{ + u32 entries = q->cached_prod - q->cached_cons; + + if (entries == 0) { + q->cached_prod = q->ring->ptrs.producer; + entries = q->cached_prod - q->cached_cons; + } + + return (entries > nb) ? nb : entries; +} + +static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs) +{ + u32 entries = q->cached_prod - q->cached_cons; + + if (entries == 0) { + q->cached_prod = q->ring->ptrs.producer; + entries = q->cached_prod - q->cached_cons; + } + + return (entries > ndescs) ? ndescs : entries; +} + +static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq, + struct xdp_desc *d, + size_t nb) +{ + u32 i; + + if (umem_nb_free(fq, nb) < nb) + return -ENOSPC; + + for (i = 0; i < nb; i++) { + u32 idx = fq->cached_prod++ & fq->mask; + + fq->ring->desc[idx] = d[i].idx; + } + + u_smp_wmb(); + + fq->ring->ptrs.producer = fq->cached_prod; + + return 0; +} + +static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d, + size_t nb) +{ + u32 i; + + if (umem_nb_free(fq, nb) < nb) + return -ENOSPC; + + for (i = 0; i < nb; i++) { + u32 idx = fq->cached_prod++ & fq->mask; + + fq->ring->desc[idx] = d[i]; + } + + u_smp_wmb(); + + fq->ring->ptrs.producer = fq->cached_prod; + + return 0; +} + +static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq, + u32 *d, size_t nb) +{ + u32 idx, i, entries = umem_nb_avail(cq, nb); + + u_smp_rmb(); + + for (i = 0; i < entries; i++) { + idx = cq->cached_cons++ & cq->mask; + d[i] = cq->ring->desc[idx]; + } + + if (entries > 0) { + u_smp_wmb(); + + cq->ring->ptrs.consumer = cq->cached_cons; + } + + return entries; +} + +static inline void *xq_get_data(struct xdpsock *xsk, __u32 idx, __u32 off) +{ + lassert(idx < NUM_FRAMES); + return &xsk->umem->frames[idx][off]; +} + +static inline int xq_enq(struct xdp_uqueue *uq, + const struct xdp_desc *descs, + unsigned int ndescs) +{ + struct xdp_rxtx_ring *r = uq->ring; + unsigned int i; + + if (xq_nb_free(uq, ndescs) < ndescs) + return -ENOSPC; + + for (i = 0; i < ndescs; i++) { + u32 idx = uq->cached_prod++ & uq->mask; + + r->desc[idx].idx = descs[i].idx; + r->desc[idx].len = descs[i].len; + r->desc[idx].offset = descs[i].offset; + } + + u_smp_wmb(); + + r->ptrs.producer = uq->cached_prod; + return 0; +} + +static inline int xq_enq_tx_only(struct xdp_uqueue *uq, + __u32 idx, unsigned int ndescs) +{ + struct xdp_rxtx_ring *q = uq->ring; + unsigned int i; + + if (xq_nb_free(uq, ndescs) < ndescs) + return -ENOSPC; + + for (i = 0; i < ndescs; i++) { + u32 idx = uq->cached_prod++ & uq->mask; + + q->desc[idx].idx = idx + i; + q->desc[idx].len = sizeof(pkt_data) - 1; + q->desc[idx].offset = 0; + } + + u_smp_wmb(); + + q->ptrs.producer = uq->cached_prod; + return 0; +} + +static inline int xq_deq(struct xdp_uqueue *uq, + struct xdp_desc *descs, + int ndescs) +{ + struct xdp_rxtx_ring *r = uq->ring; + unsigned int idx; + int i, entries; + + entries = xq_nb_avail(uq, ndescs); + + u_smp_rmb(); + + for (i = 0; i < entries; i++) { + idx = uq->cached_cons++ & uq->mask; + descs[i] = r->desc[idx]; + } + + if (entries > 0) { + u_smp_wmb(); + + r->ptrs.consumer = uq->cached_cons; + } + + return entries; +} + +static void swap_mac_addresses(void *data) +{ + struct ether_header *eth = (struct ether_header *)data; + struct ether_addr *src_addr = (struct ether_addr *)ð->ether_shost; + struct ether_addr *dst_addr = (struct ether_addr *)ð->ether_dhost; + struct ether_addr tmp; + + tmp = *src_addr; + *src_addr = *dst_addr; + *dst_addr = tmp; +} + +#if DEBUG_HEXDUMP +static void hex_dump(void *pkt, size_t length, const char *prefix) +{ + int i = 0; + const unsigned char *address = (unsigned char *)pkt; + const unsigned char *line = address; + size_t line_size = 32; + unsigned char c; + + printf("length = %zu\n", length); + printf("%s | ", prefix); + while (length-- > 0) { + printf("%02X ", *address++); + if (!(++i % line_size) || (length == 0 && i % line_size)) { + if (length == 0) { + while (i++ % line_size) + printf("__ "); + } + printf(" | "); /* right close */ + while (line < address) { + c = *line++; + printf("%c", (c < 33 || c == 255) ? 0x2E : c); + } + printf("\n"); + if (length > 0) + printf("%s | ", prefix); + } + } + printf("\n"); +} +#endif + +static size_t gen_eth_frame(char *frame) +{ + memcpy(frame, pkt_data, sizeof(pkt_data) - 1); + return sizeof(pkt_data) - 1; +} + +static struct xdp_umem *xdp_umem_configure(int sfd) +{ + int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS; + struct xdp_umem_reg mr; + struct xdp_umem *umem; + void *bufs; + + umem = calloc(1, sizeof(*umem)); + lassert(umem); + + lassert(posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */ + NUM_FRAMES * FRAME_SIZE) == 0); + + mr.addr = (__u64)bufs; + mr.len = NUM_FRAMES * FRAME_SIZE; + mr.frame_size = FRAME_SIZE; + mr.frame_headroom = FRAME_HEADROOM; + + lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)) == 0); + lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_FILL_RING, &fq_size, + sizeof(int)) == 0); + lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size, + sizeof(int)) == 0); + + umem->fq.ring = mmap(0, sizeof(struct xdp_umem_ring) + + FQ_NUM_DESCS * sizeof(u32), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, sfd, + XDP_UMEM_PGOFF_FILL_RING); + lassert(umem->fq.ring != MAP_FAILED); + + umem->fq.mask = FQ_NUM_DESCS - 1; + umem->fq.size = FQ_NUM_DESCS; + + umem->cq.ring = mmap(0, sizeof(struct xdp_umem_ring) + + CQ_NUM_DESCS * sizeof(u32), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, sfd, + XDP_UMEM_PGOFF_COMPLETION_RING); + lassert(umem->cq.ring != MAP_FAILED); + + umem->cq.mask = CQ_NUM_DESCS - 1; + umem->cq.size = CQ_NUM_DESCS; + + umem->frames = (char (*)[FRAME_SIZE])bufs; + umem->fd = sfd; + + if (opt_bench == BENCH_TXONLY) { + int i; + + for (i = 0; i < NUM_FRAMES; i++) + (void)gen_eth_frame(&umem->frames[i][0]); + } + + return umem; +} + +static struct xdpsock *xsk_configure(struct xdp_umem *umem) +{ + struct sockaddr_xdp sxdp = {}; + int sfd, ndescs = NUM_DESCS; + struct xdpsock *xsk; + bool shared = true; + u32 i; + + sfd = socket(PF_XDP, SOCK_RAW, 0); + lassert(sfd >= 0); + + xsk = calloc(1, sizeof(*xsk)); + lassert(xsk); + + xsk->sfd = sfd; + xsk->outstanding_tx = 0; + + if (!umem) { + shared = false; + xsk->umem = xdp_umem_configure(sfd); + } else { + xsk->umem = umem; + } + + lassert(setsockopt(sfd, SOL_XDP, XDP_RX_RING, + &ndescs, sizeof(int)) == 0); + lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING, + &ndescs, sizeof(int)) == 0); + + /* Rx */ + xsk->rx.ring = mmap(NULL, + sizeof(struct xdp_ring) + + NUM_DESCS * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, sfd, + XDP_PGOFF_RX_RING); + lassert(xsk->rx.ring != MAP_FAILED); + + if (!shared) { + for (i = 0; i < NUM_DESCS / 2; i++) + lassert(umem_fill_to_kernel(&xsk->umem->fq, &i, 1) + == 0); + } + + /* Tx */ + xsk->tx.ring = mmap(NULL, + sizeof(struct xdp_ring) + + NUM_DESCS * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, sfd, + XDP_PGOFF_TX_RING); + lassert(xsk->tx.ring != MAP_FAILED); + + xsk->rx.mask = NUM_DESCS - 1; + xsk->rx.size = NUM_DESCS; + + xsk->tx.mask = NUM_DESCS - 1; + xsk->tx.size = NUM_DESCS; + + sxdp.sxdp_family = PF_XDP; + sxdp.sxdp_ifindex = opt_ifindex; + sxdp.sxdp_queue_id = opt_queue; + if (shared) { + sxdp.sxdp_flags = XDP_SHARED_UMEM; + sxdp.sxdp_shared_umem_fd = umem->fd; + } + + lassert(bind(sfd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0); + + return xsk; +} + +static void print_benchmark(bool running) +{ + const char *bench_str = "INVALID"; + + if (opt_bench == BENCH_RXDROP) + bench_str = "rxdrop"; + else if (opt_bench == BENCH_TXONLY) + bench_str = "txonly"; + else if (opt_bench == BENCH_L2FWD) + bench_str = "l2fwd"; + + printf("%s:%d %s ", opt_if, opt_queue, bench_str); + if (opt_xdp_flags & XDP_FLAGS_SKB_MODE) + printf("xdp-skb "); + else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE) + printf("xdp-drv "); + else + printf(" "); + + if (opt_poll) + printf("poll() "); + + if (running) { + printf("running..."); + fflush(stdout); + } +} + +static void dump_stats(void) +{ + unsigned long now = get_nsecs(); + long dt = now - prev_time; + int i; + + prev_time = now; + + for (i = 0; i < num_socks; i++) { + char *fmt = "%-15s %'-11.0f %'-11lu\n"; + double rx_pps, tx_pps; + + rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) * + 1000000000. / dt; + tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) * + 1000000000. / dt; + + printf("\n sock%d@", i); + print_benchmark(false); + printf("\n"); + + printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts", + dt / 1000000000.); + printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts); + printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts); + + xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts; + xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts; + } +} + +static void *poller(void *arg) +{ + (void)arg; + for (;;) { + sleep(opt_interval); + dump_stats(); + } + + return NULL; +} + +static void int_exit(int sig) +{ + (void)sig; + dump_stats(); + bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); + exit(EXIT_SUCCESS); +} + +static struct option long_options[] = { + {"rxdrop", no_argument, 0, 'r'}, + {"txonly", no_argument, 0, 't'}, + {"l2fwd", no_argument, 0, 'l'}, + {"interface", required_argument, 0, 'i'}, + {"queue", required_argument, 0, 'q'}, + {"poll", no_argument, 0, 'p'}, + {"shared-buffer", no_argument, 0, 's'}, + {"xdp-skb", no_argument, 0, 'S'}, + {"xdp-native", no_argument, 0, 'N'}, + {"interval", required_argument, 0, 'n'}, + {0, 0, 0, 0} +}; + +static void usage(const char *prog) +{ + const char *str = + " Usage: %s [OPTIONS]\n" + " Options:\n" + " -r, --rxdrop Discard all incoming packets (default)\n" + " -t, --txonly Only send packets\n" + " -l, --l2fwd MAC swap L2 forwarding\n" + " -i, --interface=n Run on interface n\n" + " -q, --queue=n Use queue n (default 0)\n" + " -p, --poll Use poll syscall\n" + " -s, --shared-buffer Use shared packet buffer\n" + " -S, --xdp-skb=n Use XDP skb-mod\n" + " -N, --xdp-native=n Enfore XDP native mode\n" + " -n, --interval=n Specify statistics update interval (default 1 sec).\n" + "\n"; + fprintf(stderr, str, prog); + exit(EXIT_FAILURE); +} + +static void parse_command_line(int argc, char **argv) +{ + int option_index, c; + + opterr = 0; + + for (;;) { + c = getopt_long(argc, argv, "rtli:q:psSNn:", long_options, + &option_index); + if (c == -1) + break; + + switch (c) { + case 'r': + opt_bench = BENCH_RXDROP; + break; + case 't': + opt_bench = BENCH_TXONLY; + break; + case 'l': + opt_bench = BENCH_L2FWD; + break; + case 'i': + opt_if = optarg; + break; + case 'q': + opt_queue = atoi(optarg); + break; + case 's': + opt_shared_packet_buffer = 1; + break; + case 'p': + opt_poll = 1; + break; + case 'S': + opt_xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case 'N': + opt_xdp_flags |= XDP_FLAGS_DRV_MODE; + break; + case 'n': + opt_interval = atoi(optarg); + break; + default: + usage(basename(argv[0])); + } + } + + opt_ifindex = if_nametoindex(opt_if); + if (!opt_ifindex) { + fprintf(stderr, "ERROR: interface \"%s\" does not exist\n", + opt_if); + usage(basename(argv[0])); + } +} + +static void kick_tx(int fd) +{ + int ret; + + ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0); + if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN) + return; + lassert(0); +} + +static inline void complete_tx_l2fwd(struct xdpsock *xsk) +{ + u32 descs[BATCH_SIZE]; + unsigned int rcvd; + size_t ndescs; + + if (!xsk->outstanding_tx) + return; + + kick_tx(xsk->sfd); + ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE : + xsk->outstanding_tx; + + /* re-add completed Tx buffers */ + rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, ndescs); + if (rcvd > 0) { + umem_fill_to_kernel(&xsk->umem->fq, descs, rcvd); + xsk->outstanding_tx -= rcvd; + xsk->tx_npkts += rcvd; + } +} + +static inline void complete_tx_only(struct xdpsock *xsk) +{ + u32 descs[BATCH_SIZE]; + unsigned int rcvd; + + if (!xsk->outstanding_tx) + return; + + kick_tx(xsk->sfd); + + rcvd = umem_complete_from_kernel(&xsk->umem->cq, descs, BATCH_SIZE); + if (rcvd > 0) { + xsk->outstanding_tx -= rcvd; + xsk->tx_npkts += rcvd; + } +} + +static void rx_drop(struct xdpsock *xsk) +{ + struct xdp_desc descs[BATCH_SIZE]; + unsigned int rcvd, i; + + rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE); + if (!rcvd) + return; + + for (i = 0; i < rcvd; i++) { + u32 idx = descs[i].idx; + + lassert(idx < NUM_FRAMES); +#if DEBUG_HEXDUMP + char *pkt; + char buf[32]; + + pkt = xq_get_data(xsk, idx, descs[i].offset); + sprintf(buf, "idx=%d", idx); + hex_dump(pkt, descs[i].len, buf); +#endif + } + + xsk->rx_npkts += rcvd; + + umem_fill_to_kernel_ex(&xsk->umem->fq, descs, rcvd); +} + +static void rx_drop_all(void) +{ + struct pollfd fds[MAX_SOCKS + 1]; + int i, ret, timeout, nfds = 1; + + memset(fds, 0, sizeof(fds)); + + for (i = 0; i < num_socks; i++) { + fds[i].fd = xsks[i]->sfd; + fds[i].events = POLLIN; + timeout = 1000; /* 1sn */ + } + + for (;;) { + if (opt_poll) { + ret = poll(fds, nfds, timeout); + if (ret <= 0) + continue; + } + + for (i = 0; i < num_socks; i++) + rx_drop(xsks[i]); + } +} + +static void tx_only(struct xdpsock *xsk) +{ + int timeout, ret, nfds = 1; + struct pollfd fds[nfds + 1]; + unsigned int idx = 0; + + memset(fds, 0, sizeof(fds)); + fds[0].fd = xsk->sfd; + fds[0].events = POLLOUT; + timeout = 1000; /* 1sn */ + + for (;;) { + if (opt_poll) { + ret = poll(fds, nfds, timeout); + if (ret <= 0) + continue; + + if (fds[0].fd != xsk->sfd || + !(fds[0].revents & POLLOUT)) + continue; + } + + if (xq_nb_free(&xsk->tx, BATCH_SIZE) >= BATCH_SIZE) { + lassert(xq_enq_tx_only(&xsk->tx, idx, BATCH_SIZE) == 0); + + xsk->outstanding_tx += BATCH_SIZE; + idx += BATCH_SIZE; + idx %= NUM_FRAMES; + } + + complete_tx_only(xsk); + } +} + +static void l2fwd(struct xdpsock *xsk) +{ + for (;;) { + struct xdp_desc descs[BATCH_SIZE]; + unsigned int rcvd, i; + int ret; + + for (;;) { + complete_tx_l2fwd(xsk); + + rcvd = xq_deq(&xsk->rx, descs, BATCH_SIZE); + if (rcvd > 0) + break; + } + + for (i = 0; i < rcvd; i++) { + char *pkt = xq_get_data(xsk, descs[i].idx, + descs[i].offset); + + swap_mac_addresses(pkt); +#if DEBUG_HEXDUMP + char buf[32]; + u32 idx = descs[i].idx; + + sprintf(buf, "idx=%d", idx); + hex_dump(pkt, descs[i].len, buf); +#endif + } + + xsk->rx_npkts += rcvd; + + ret = xq_enq(&xsk->tx, descs, rcvd); + lassert(ret == 0); + xsk->outstanding_tx += rcvd; + } +} + +int main(int argc, char **argv) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + char xdp_filename[256]; + int i, ret, key = 0; + pthread_t pt; + + parse_command_line(argc, argv); + + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n", + strerror(errno)); + exit(EXIT_FAILURE); + } + + snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]); + + if (load_bpf_file(xdp_filename)) { + fprintf(stderr, "ERROR: load_bpf_file %s\n", bpf_log_buf); + exit(EXIT_FAILURE); + } + + if (!prog_fd[0]) { + fprintf(stderr, "ERROR: load_bpf_file: \"%s\"\n", + strerror(errno)); + exit(EXIT_FAILURE); + } + + if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd[0], opt_xdp_flags) < 0) { + fprintf(stderr, "ERROR: link set xdp fd failed\n"); + exit(EXIT_FAILURE); + } + + ret = bpf_map_update_elem(map_fd[0], &key, &opt_queue, 0); + if (ret) { + fprintf(stderr, "ERROR: bpf_map_update_elem qidconf\n"); + exit(EXIT_FAILURE); + } + + /* Create sockets... */ + xsks[num_socks++] = xsk_configure(NULL); + +#if RR_LB + for (i = 0; i < MAX_SOCKS - 1; i++) + xsks[num_socks++] = xsk_configure(xsks[0]->umem); +#endif + + /* ...and insert them into the map. */ + for (i = 0; i < num_socks; i++) { + key = i; + ret = bpf_map_update_elem(map_fd[1], &key, &xsks[i]->sfd, 0); + if (ret) { + fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i); + exit(EXIT_FAILURE); + } + } + + signal(SIGINT, int_exit); + signal(SIGTERM, int_exit); + signal(SIGABRT, int_exit); + + setlocale(LC_ALL, ""); + + ret = pthread_create(&pt, NULL, poller, NULL); + lassert(ret == 0); + + prev_time = get_nsecs(); + + if (opt_bench == BENCH_RXDROP) + rx_drop_all(); + else if (opt_bench == BENCH_TXONLY) + tx_only(xsks[0]); + else + l2fwd(xsks[0]); + + return 0; +} -- cgit v1.2.3 From fe616055f78457a0b78e0d3693d1ae26f2d7dab3 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 9 May 2018 20:34:27 -0700 Subject: samples/bpf: Add example of ipv4 and ipv6 forwarding in XDP Simple example of fast-path forwarding. It has a serious flaw in not verifying the egress device index supports XDP forwarding. If the egress device does not packets are dropped. Take this only as a simple example of fast-path forwarding. Signed-off-by: David Ahern Acked-by: David S. Miller Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- samples/bpf/Makefile | 4 + samples/bpf/xdp_fwd_kern.c | 115 +++++++++++++++++++++++++ samples/bpf/xdp_fwd_user.c | 136 ++++++++++++++++++++++++++++++ tools/testing/selftests/bpf/bpf_helpers.h | 3 + 4 files changed, 258 insertions(+) create mode 100644 samples/bpf/xdp_fwd_kern.c create mode 100644 samples/bpf/xdp_fwd_user.c (limited to 'samples/bpf') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 8e0c7fb6d7cc..28513d6be1bf 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -46,6 +46,7 @@ hostprogs-y += syscall_tp hostprogs-y += cpustat hostprogs-y += xdp_adjust_tail hostprogs-y += xdpsock +hostprogs-y += xdp_fwd # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o @@ -100,6 +101,7 @@ syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o xdpsock-objs := bpf_load.o $(LIBBPF) xdpsock_user.o +xdp_fwd-objs := bpf_load.o $(LIBBPF) xdp_fwd_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -154,6 +156,7 @@ always += syscall_tp_kern.o always += cpustat_kern.o always += xdp_adjust_tail_kern.o always += xdpsock_kern.o +always += xdp_fwd_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -201,6 +204,7 @@ HOSTLOADLIBES_syscall_tp += -lelf HOSTLOADLIBES_cpustat += -lelf HOSTLOADLIBES_xdp_adjust_tail += -lelf HOSTLOADLIBES_xdpsock += -lelf -pthread +HOSTLOADLIBES_xdp_fwd += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c new file mode 100644 index 000000000000..cdf4fc383cc9 --- /dev/null +++ b/samples/bpf/xdp_fwd_kern.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2017-18 David Ahern + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#define KBUILD_MODNAME "foo" +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_helpers.h" + +#define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) + +struct bpf_map_def SEC("maps") tx_port = { + .type = BPF_MAP_TYPE_DEVMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 64, +}; + +static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct bpf_fib_lookup fib_params; + struct ethhdr *eth = data; + int out_index; + u16 h_proto; + u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; + + __builtin_memset(&fib_params, 0, sizeof(fib_params)); + + h_proto = eth->h_proto; + if (h_proto == htons(ETH_P_IP)) { + struct iphdr *iph = data + nh_off; + + if (iph + 1 > data_end) + return XDP_DROP; + + fib_params.family = AF_INET; + fib_params.tos = iph->tos; + fib_params.l4_protocol = iph->protocol; + fib_params.sport = 0; + fib_params.dport = 0; + fib_params.tot_len = ntohs(iph->tot_len); + fib_params.ipv4_src = iph->saddr; + fib_params.ipv4_dst = iph->daddr; + } else if (h_proto == htons(ETH_P_IPV6)) { + struct in6_addr *src = (struct in6_addr *) fib_params.ipv6_src; + struct in6_addr *dst = (struct in6_addr *) fib_params.ipv6_dst; + struct ipv6hdr *iph = data + nh_off; + + if (iph + 1 > data_end) + return XDP_DROP; + + fib_params.family = AF_INET6; + fib_params.flowlabel = *(__be32 *)iph & IPV6_FLOWINFO_MASK; + fib_params.l4_protocol = iph->nexthdr; + fib_params.sport = 0; + fib_params.dport = 0; + fib_params.tot_len = ntohs(iph->payload_len); + *src = iph->saddr; + *dst = iph->daddr; + } else { + return XDP_PASS; + } + + fib_params.ifindex = ctx->ingress_ifindex; + + out_index = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags); + + /* verify egress index has xdp support + * TO-DO bpf_map_lookup_elem(&tx_port, &key) fails with + * cannot pass map_type 14 into func bpf_map_lookup_elem#1: + * NOTE: without verification that egress index supports XDP + * forwarding packets are dropped. + */ + if (out_index > 0) { + memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN); + memcpy(eth->h_source, fib_params.smac, ETH_ALEN); + return bpf_redirect_map(&tx_port, out_index, 0); + } + + return XDP_PASS; +} + +SEC("xdp_fwd") +int xdp_fwd_prog(struct xdp_md *ctx) +{ + return xdp_fwd_flags(ctx, 0); +} + +SEC("xdp_fwd_direct") +int xdp_fwd_direct_prog(struct xdp_md *ctx) +{ + return xdp_fwd_flags(ctx, BPF_FIB_LOOKUP_DIRECT); +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c new file mode 100644 index 000000000000..9c6606f57126 --- /dev/null +++ b/samples/bpf/xdp_fwd_user.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2017-18 David Ahern + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bpf_load.h" +#include "bpf_util.h" +#include "libbpf.h" + + +static int do_attach(int idx, int fd, const char *name) +{ + int err; + + err = bpf_set_link_xdp_fd(idx, fd, 0); + if (err < 0) + printf("ERROR: failed to attach program to %s\n", name); + + return err; +} + +static int do_detach(int idx, const char *name) +{ + int err; + + err = bpf_set_link_xdp_fd(idx, -1, 0); + if (err < 0) + printf("ERROR: failed to detach program from %s\n", name); + + return err; +} + +static void usage(const char *prog) +{ + fprintf(stderr, + "usage: %s [OPTS] interface-list\n" + "\nOPTS:\n" + " -d detach program\n" + " -D direct table lookups (skip fib rules)\n", + prog); +} + +int main(int argc, char **argv) +{ + char filename[PATH_MAX]; + int opt, i, idx, err; + int prog_id = 0; + int attach = 1; + int ret = 0; + + while ((opt = getopt(argc, argv, ":dD")) != -1) { + switch (opt) { + case 'd': + attach = 0; + break; + case 'D': + prog_id = 1; + break; + default: + usage(basename(argv[0])); + return 1; + } + } + + if (optind == argc) { + usage(basename(argv[0])); + return 1; + } + + if (attach) { + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (access(filename, O_RDONLY) < 0) { + printf("error accessing file %s: %s\n", + filename, strerror(errno)); + return 1; + } + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + if (!prog_fd[prog_id]) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + } + if (attach) { + for (i = 1; i < 64; ++i) + bpf_map_update_elem(map_fd[0], &i, &i, 0); + } + + for (i = optind; i < argc; ++i) { + idx = if_nametoindex(argv[i]); + if (!idx) + idx = strtoul(argv[i], NULL, 0); + + if (!idx) { + fprintf(stderr, "Invalid arg\n"); + return 1; + } + if (!attach) { + err = do_detach(idx, argv[i]); + if (err) + ret = err; + } else { + err = do_attach(idx, prog_fd[prog_id], argv[i]); + if (err) + ret = err; + } + } + + return ret; +} diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 265f8e0e8ada..2375d06c706b 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -103,6 +103,9 @@ static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state, (void *) BPF_FUNC_skb_get_xfrm_state; static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) = (void *) BPF_FUNC_get_stack; +static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params, + int plen, __u32 flags) = + (void *) BPF_FUNC_fib_lookup; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions -- cgit v1.2.3 From 74662ea5d41683e7ff723c35649b0192a8e6ba8f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 10 May 2018 10:24:38 -0700 Subject: samples: bpf: rename struct bpf_map_def to avoid conflict with libbpf Both tools/lib/bpf/libbpf.h and samples/bpf/bpf_load.h define their own version of struct bpf_map_def. The version in bpf_load.h has more fields. libbpf does not support inner maps and its definition of struct bpf_map_def lacks the related fields. Rename the definition in bpf_load.h (samples/bpf) to avoid conflicts. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- samples/bpf/bpf_load.c | 10 +++++----- samples/bpf/bpf_load.h | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index da9bccfaf391..a6b290de5632 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -420,7 +420,7 @@ static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx, /* Keeping compatible with ELF maps section changes * ------------------------------------------------ - * The program size of struct bpf_map_def is known by loader + * The program size of struct bpf_load_map_def is known by loader * code, but struct stored in ELF file can be different. * * Unfortunately sym[i].st_size is zero. To calculate the @@ -429,7 +429,7 @@ static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx, * symbols. */ map_sz_elf = data_maps->d_size / nr_maps; - map_sz_copy = sizeof(struct bpf_map_def); + map_sz_copy = sizeof(struct bpf_load_map_def); if (map_sz_elf < map_sz_copy) { /* * Backward compat, loading older ELF file with @@ -448,8 +448,8 @@ static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx, /* Memcpy relevant part of ELF maps data to loader maps */ for (i = 0; i < nr_maps; i++) { + struct bpf_load_map_def *def; unsigned char *addr, *end; - struct bpf_map_def *def; const char *map_name; size_t offset; @@ -464,9 +464,9 @@ static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx, /* Symbol value is offset into ELF maps section data area */ offset = sym[i].st_value; - def = (struct bpf_map_def *)(data_maps->d_buf + offset); + def = (struct bpf_load_map_def *)(data_maps->d_buf + offset); maps[i].elf_offset = offset; - memset(&maps[i].def, 0, sizeof(struct bpf_map_def)); + memset(&maps[i].def, 0, sizeof(struct bpf_load_map_def)); memcpy(&maps[i].def, def, map_sz_copy); /* Verify no newer features were requested */ diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index 2c3d0b448632..f9da59bca0cc 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h @@ -7,7 +7,7 @@ #define MAX_MAPS 32 #define MAX_PROGS 32 -struct bpf_map_def { +struct bpf_load_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; @@ -21,7 +21,7 @@ struct bpf_map_data { int fd; char *name; size_t elf_offset; - struct bpf_map_def def; + struct bpf_load_map_def def; }; typedef void (*fixup_map_cb)(struct bpf_map_data *map, int idx); -- cgit v1.2.3 From 5f9380572b4bb24f60cd492b17331db6ee34a516 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 10 May 2018 10:24:39 -0700 Subject: samples: bpf: compile and link against full libbpf samples/bpf currently cherry-picks object files from tools/lib/bpf to link against. Just compile the full library and link statically against it. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- samples/bpf/Makefile | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 28513d6be1bf..79cdb66a5ea7 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -49,7 +49,7 @@ hostprogs-y += xdpsock hostprogs-y += xdp_fwd # Libbpf dependencies -LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o +LIBBPF := ../../tools/lib/bpf/libbpf.a CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o @@ -74,10 +74,10 @@ offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o $(TRACE_HELPERS) spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o $(TRACE_HELPERS) map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o -test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o -test_cgrp2_attach-objs := $(LIBBPF) test_cgrp2_attach.o -test_cgrp2_attach2-objs := $(LIBBPF) test_cgrp2_attach2.o $(CGROUP_HELPERS) -test_cgrp2_sock-objs := $(LIBBPF) test_cgrp2_sock.o +test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o $(LIBBPF) +test_cgrp2_attach-objs := test_cgrp2_attach.o $(LIBBPF) +test_cgrp2_attach2-objs := test_cgrp2_attach2.o $(LIBBPF) $(CGROUP_HELPERS) +test_cgrp2_sock-objs := test_cgrp2_sock.o $(LIBBPF) test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o # reuse xdp1 source intentionally @@ -91,7 +91,7 @@ tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o -per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o +per_socket_stats_example-objs := cookie_uid_helper_example.o $(LIBBPF) xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o @@ -165,6 +165,8 @@ HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include HOSTCFLAGS += -I$(srctree)/tools/perf HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable +HOSTLOADLIBES_test_lru_dist += -lelf +HOSTLOADLIBES_sock_example += -lelf HOSTLOADLIBES_fds_example += -lelf HOSTLOADLIBES_sockex1 += -lelf HOSTLOADLIBES_sockex2 += -lelf @@ -176,6 +178,10 @@ HOSTLOADLIBES_tracex4 += -lelf -lrt HOSTLOADLIBES_tracex5 += -lelf HOSTLOADLIBES_tracex6 += -lelf HOSTLOADLIBES_tracex7 += -lelf +HOSTLOADLIBES_test_cgrp2_array_pin += -lelf +HOSTLOADLIBES_test_cgrp2_attach += -lelf +HOSTLOADLIBES_test_cgrp2_attach2 += -lelf +HOSTLOADLIBES_test_cgrp2_sock += -lelf HOSTLOADLIBES_test_cgrp2_sock2 += -lelf HOSTLOADLIBES_load_sock_ops += -lelf HOSTLOADLIBES_test_probe_write_user += -lelf @@ -195,6 +201,7 @@ HOSTLOADLIBES_tc_l2_redirect += -l elf HOSTLOADLIBES_lwt_len_hist += -l elf HOSTLOADLIBES_xdp_tx_iptunnel += -lelf HOSTLOADLIBES_test_map_in_map += -lelf +HOSTLOADLIBES_per_socket_stats_example += -lelf HOSTLOADLIBES_xdp_redirect += -lelf HOSTLOADLIBES_xdp_redirect_map += -lelf HOSTLOADLIBES_xdp_redirect_cpu += -lelf @@ -226,7 +233,7 @@ clean: @rm -f *~ $(LIBBPF): FORCE - $(MAKE) -C $(dir $@) $(notdir $@) + $(MAKE) -C $(dir $@) $(obj)/syscall_nrs.s: $(src)/syscall_nrs.c $(call if_changed_dep,cc_s_c) -- cgit v1.2.3 From d0cabbb021bee5c4b831a0235af9534ad07f8d3d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 10 May 2018 10:24:40 -0700 Subject: tools: bpf: move the event reading loop to libbpf There are two copies of event reading loop - in bpftool and trace_helpers "library". Consolidate them and move the code to libbpf. Return codes from trace_helpers are kept, but renamed to include LIBBPF prefix. Suggested-by: Alexei Starovoitov Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- samples/bpf/Makefile | 8 +++ samples/bpf/trace_output_user.c | 6 +- tools/bpf/bpftool/map_perf_ring.c | 66 +++++----------------- tools/lib/bpf/Makefile | 2 +- tools/lib/bpf/libbpf.c | 61 ++++++++++++++++++++ tools/lib/bpf/libbpf.h | 13 +++++ tools/testing/selftests/bpf/Makefile | 2 +- tools/testing/selftests/bpf/test_progs.c | 6 +- tools/testing/selftests/bpf/trace_helpers.c | 87 ++++++++++------------------- tools/testing/selftests/bpf/trace_helpers.h | 11 ++-- 10 files changed, 139 insertions(+), 123 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 79cdb66a5ea7..8ce72d211c3e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -165,6 +165,14 @@ HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include HOSTCFLAGS += -I$(srctree)/tools/perf HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable +HOSTCFLAGS_trace_helpers.o += -I$(srctree)/tools/lib/bpf/ + +HOSTCFLAGS_trace_output_user.o += -I$(srctree)/tools/lib/bpf/ +HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/ +HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/ +HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/ +HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/ + HOSTLOADLIBES_test_lru_dist += -lelf HOSTLOADLIBES_sock_example += -lelf HOSTLOADLIBES_fds_example += -lelf diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c index 5e78c2ecd08d..da98be721001 100644 --- a/samples/bpf/trace_output_user.c +++ b/samples/bpf/trace_output_user.c @@ -48,7 +48,7 @@ static int print_bpf_output(void *data, int size) if (e->cookie != 0x12345678) { printf("BUG pid %llx cookie %llx sized %d\n", e->pid, e->cookie, size); - return PERF_EVENT_ERROR; + return LIBBPF_PERF_EVENT_ERROR; } cnt++; @@ -56,10 +56,10 @@ static int print_bpf_output(void *data, int size) if (cnt == MAX_CNT) { printf("recv %lld events per sec\n", MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); - return PERF_EVENT_DONE; + return LIBBPF_PERF_EVENT_DONE; } - return PERF_EVENT_CONT; + return LIBBPF_PERF_EVENT_CONT; } static void test_bpf_perf_event(void) diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index 9ae4bb8a2cad..1832100d1b27 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -50,14 +50,15 @@ static void int_exit(int signo) stop = true; } -static void -print_bpf_output(struct event_ring_info *ring, struct perf_event_sample *e) +static enum bpf_perf_event_ret print_bpf_output(void *event, void *priv) { + struct event_ring_info *ring = priv; + struct perf_event_sample *e = event; struct { struct perf_event_header header; __u64 id; __u64 lost; - } *lost = (void *)e; + } *lost = event; if (json_output) { jsonw_start_object(json_wtr); @@ -96,60 +97,23 @@ print_bpf_output(struct event_ring_info *ring, struct perf_event_sample *e) e->header.type, e->header.size); } } + + return LIBBPF_PERF_EVENT_CONT; } static void perf_event_read(struct event_ring_info *ring, void **buf, size_t *buf_len) { - volatile struct perf_event_mmap_page *header = ring->mem; - __u64 buffer_size = MMAP_PAGE_CNT * get_page_size(); - __u64 data_tail = header->data_tail; - __u64 data_head = header->data_head; - void *base, *begin, *end; - - asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ - if (data_head == data_tail) - return; - - base = ((char *)header) + get_page_size(); - - begin = base + data_tail % buffer_size; - end = base + data_head % buffer_size; - - while (begin != end) { - struct perf_event_sample *e; - - e = begin; - if (begin + e->header.size > base + buffer_size) { - long len = base + buffer_size - begin; - - if (*buf_len < e->header.size) { - free(*buf); - *buf = malloc(e->header.size); - if (!*buf) { - fprintf(stderr, - "can't allocate memory"); - stop = true; - return; - } - *buf_len = e->header.size; - } - - memcpy(*buf, begin, len); - memcpy(*buf + len, base, e->header.size - len); - e = (void *)*buf; - begin = base + e->header.size - len; - } else if (begin + e->header.size == base + buffer_size) { - begin = base; - } else { - begin += e->header.size; - } - - print_bpf_output(ring, e); + enum bpf_perf_event_ret ret; + + ret = bpf_perf_event_read_simple(ring->mem, + MMAP_PAGE_CNT * get_page_size(), + get_page_size(), buf, buf_len, + print_bpf_output, ring); + if (ret != LIBBPF_PERF_EVENT_CONT) { + fprintf(stderr, "perf read loop failed with %d\n", ret); + stop = true; } - - __sync_synchronize(); /* smp_mb() */ - header->data_tail = data_head; } static int perf_mmap_size(void) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index e6d5f8d1477f..f3fab4af4260 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -69,7 +69,7 @@ FEATURE_USER = .libbpf FEATURE_TESTS = libelf libelf-getphdrnum libelf-mmap bpf FEATURE_DISPLAY = libelf bpf -INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi +INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi -I$(srctree)/tools/perf FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) check_feat := 1 diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7bcdca13083a..ce96f1fe3f37 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -2210,3 +2211,63 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, *prog_fd = bpf_program__fd(first_prog); return 0; } + +enum bpf_perf_event_ret +bpf_perf_event_read_simple(void *mem, unsigned long size, + unsigned long page_size, void **buf, size_t *buf_len, + bpf_perf_event_print_t fn, void *priv) +{ + volatile struct perf_event_mmap_page *header = mem; + __u64 data_tail = header->data_tail; + __u64 data_head = header->data_head; + void *base, *begin, *end; + int ret; + + asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ + if (data_head == data_tail) + return LIBBPF_PERF_EVENT_CONT; + + base = ((char *)header) + page_size; + + begin = base + data_tail % size; + end = base + data_head % size; + + while (begin != end) { + struct perf_event_header *ehdr; + + ehdr = begin; + if (begin + ehdr->size > base + size) { + long len = base + size - begin; + + if (*buf_len < ehdr->size) { + free(*buf); + *buf = malloc(ehdr->size); + if (!*buf) { + ret = LIBBPF_PERF_EVENT_ERROR; + break; + } + *buf_len = ehdr->size; + } + + memcpy(*buf, begin, len); + memcpy(*buf + len, base, ehdr->size - len); + ehdr = (void *)*buf; + begin = base + ehdr->size - len; + } else if (begin + ehdr->size == base + size) { + begin = base; + } else { + begin += ehdr->size; + } + + ret = fn(ehdr, priv); + if (ret != LIBBPF_PERF_EVENT_CONT) + break; + + data_tail += ehdr->size; + } + + __sync_synchronize(); /* smp_mb() */ + header->data_tail = data_tail; + + return ret; +} diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 197f9ce2248c..ce681097584e 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -267,4 +267,17 @@ int bpf_prog_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd); int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); + +enum bpf_perf_event_ret { + LIBBPF_PERF_EVENT_DONE = 0, + LIBBPF_PERF_EVENT_ERROR = -1, + LIBBPF_PERF_EVENT_CONT = -2, +}; + +typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(void *event, + void *priv); +int bpf_perf_event_read_simple(void *mem, unsigned long size, + unsigned long page_size, + void **buf, size_t *buf_len, + bpf_perf_event_print_t fn, void *priv); #endif diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 79d29d6cc719..438d4f93875b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -10,7 +10,7 @@ ifneq ($(wildcard $(GENHDR)),) GENFLAGS := -DHAVE_GENHDR endif -CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include +CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include LDLIBS += -lcap -lelf -lrt -lpthread TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index ed197eef1cfc..f7731973ec68 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1337,12 +1337,12 @@ static int get_stack_print_output(void *data, int size) good_user_stack = true; } if (!good_kern_stack || !good_user_stack) - return PERF_EVENT_ERROR; + return LIBBPF_PERF_EVENT_ERROR; if (cnt == MAX_CNT_RAWTP) - return PERF_EVENT_DONE; + return LIBBPF_PERF_EVENT_DONE; - return PERF_EVENT_CONT; + return LIBBPF_PERF_EVENT_CONT; } static void test_get_stack_raw_tp(void) diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index ad025bd75f1c..8fb4fe8686e4 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -74,7 +74,7 @@ struct ksym *ksym_search(long key) static int page_size; static int page_cnt = 8; -static volatile struct perf_event_mmap_page *header; +static struct perf_event_mmap_page *header; int perf_event_mmap(int fd) { @@ -107,74 +107,47 @@ struct perf_event_sample { char data[]; }; -static int perf_event_read(perf_event_print_fn fn) +static enum bpf_perf_event_ret bpf_perf_event_print(void *event, void *priv) { - __u64 data_tail = header->data_tail; - __u64 data_head = header->data_head; - __u64 buffer_size = page_cnt * page_size; - void *base, *begin, *end; - char buf[256]; + struct perf_event_sample *e = event; + perf_event_print_fn fn = priv; int ret; - asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ - if (data_head == data_tail) - return PERF_EVENT_CONT; - - base = ((char *)header) + page_size; - - begin = base + data_tail % buffer_size; - end = base + data_head % buffer_size; - - while (begin != end) { - struct perf_event_sample *e; - - e = begin; - if (begin + e->header.size > base + buffer_size) { - long len = base + buffer_size - begin; - - assert(len < e->header.size); - memcpy(buf, begin, len); - memcpy(buf + len, base, e->header.size - len); - e = (void *) buf; - begin = base + e->header.size - len; - } else if (begin + e->header.size == base + buffer_size) { - begin = base; - } else { - begin += e->header.size; - } - - if (e->header.type == PERF_RECORD_SAMPLE) { - ret = fn(e->data, e->size); - if (ret != PERF_EVENT_CONT) - return ret; - } else if (e->header.type == PERF_RECORD_LOST) { - struct { - struct perf_event_header header; - __u64 id; - __u64 lost; - } *lost = (void *) e; - printf("lost %lld events\n", lost->lost); - } else { - printf("unknown event type=%d size=%d\n", - e->header.type, e->header.size); - } + if (e->header.type == PERF_RECORD_SAMPLE) { + ret = fn(e->data, e->size); + if (ret != LIBBPF_PERF_EVENT_CONT) + return ret; + } else if (e->header.type == PERF_RECORD_LOST) { + struct { + struct perf_event_header header; + __u64 id; + __u64 lost; + } *lost = (void *) e; + printf("lost %lld events\n", lost->lost); + } else { + printf("unknown event type=%d size=%d\n", + e->header.type, e->header.size); } - __sync_synchronize(); /* smp_mb() */ - header->data_tail = data_head; - return PERF_EVENT_CONT; + return LIBBPF_PERF_EVENT_CONT; } int perf_event_poller(int fd, perf_event_print_fn output_fn) { - int ret; + enum bpf_perf_event_ret ret; + void *buf = NULL; + size_t len = 0; for (;;) { perf_event_poll(fd); - ret = perf_event_read(output_fn); - if (ret != PERF_EVENT_CONT) - return ret; + ret = bpf_perf_event_read_simple(header, page_cnt * page_size, + page_size, &buf, &len, + bpf_perf_event_print, + output_fn); + if (ret != LIBBPF_PERF_EVENT_CONT) + break; } + free(buf); - return PERF_EVENT_DONE; + return ret; } diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index fe3eefd21e86..36d90e3b1ea9 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -2,6 +2,8 @@ #ifndef __TRACE_HELPER_H #define __TRACE_HELPER_H +#include + struct ksym { long addr; char *name; @@ -10,14 +12,9 @@ struct ksym { int load_kallsyms(void); struct ksym *ksym_search(long key); -typedef int (*perf_event_print_fn)(void *data, int size); - -/* return code for perf_event_print_fn */ -#define PERF_EVENT_DONE 0 -#define PERF_EVENT_ERROR -1 -#define PERF_EVENT_CONT -2 +typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size); int perf_event_mmap(int fd); -/* return PERF_EVENT_DONE or PERF_EVENT_ERROR */ +/* return LIBBPF_PERF_EVENT_DONE or LIBBPF_PERF_EVENT_ERROR */ int perf_event_poller(int fd, perf_event_print_fn output_fn); #endif -- cgit v1.2.3 From be5bca44aa6b37b88e900d5f5f155911d6984d86 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 10 May 2018 10:24:43 -0700 Subject: samples: bpf: convert some XDP samples from bpf_load to libbpf Now that we can use full powers of libbpf in BPF samples, we should perhaps make the simplest XDP programs not depend on bpf_load helpers. This way newcomers will be exposed to the recommended library from the start. Use of bpf_prog_load_xattr() will also make it trivial to later on request offload of the programs by simply adding ifindex to the xattr. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- samples/bpf/Makefile | 8 +++---- samples/bpf/xdp1_user.c | 31 ++++++++++++++++--------- samples/bpf/xdp_adjust_tail_user.c | 36 +++++++++++++++++------------ samples/bpf/xdp_rxq_info_user.c | 46 +++++++++++++++++++++++++------------- 4 files changed, 78 insertions(+), 43 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 8ce72d211c3e..9e255ca4059a 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -79,9 +79,9 @@ test_cgrp2_attach-objs := test_cgrp2_attach.o $(LIBBPF) test_cgrp2_attach2-objs := test_cgrp2_attach2.o $(LIBBPF) $(CGROUP_HELPERS) test_cgrp2_sock-objs := test_cgrp2_sock.o $(LIBBPF) test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o -xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o +xdp1-objs := xdp1_user.o $(LIBBPF) # reuse xdp1 source intentionally -xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o +xdp2-objs := xdp1_user.o $(LIBBPF) xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \ test_current_task_under_cgroup_user.o @@ -96,10 +96,10 @@ xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o -xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o +xdp_rxq_info-objs := xdp_rxq_info_user.o $(LIBBPF) syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o -xdp_adjust_tail-objs := bpf_load.o $(LIBBPF) xdp_adjust_tail_user.o +xdp_adjust_tail-objs := xdp_adjust_tail_user.o $(LIBBPF) xdpsock-objs := bpf_load.o $(LIBBPF) xdpsock_user.o xdp_fwd-objs := bpf_load.o $(LIBBPF) xdp_fwd_user.o diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index b901ee2b3336..b02c531510ed 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -16,9 +16,9 @@ #include #include -#include "bpf_load.h" #include "bpf_util.h" -#include "libbpf.h" +#include "bpf/bpf.h" +#include "bpf/libbpf.h" static int ifindex; static __u32 xdp_flags; @@ -31,7 +31,7 @@ static void int_exit(int sig) /* simple per-protocol drop counter */ -static void poll_stats(int interval) +static void poll_stats(int map_fd, int interval) { unsigned int nr_cpus = bpf_num_possible_cpus(); const unsigned int nr_keys = 256; @@ -47,7 +47,7 @@ static void poll_stats(int interval) for (key = 0; key < nr_keys; key++) { __u64 sum = 0; - assert(bpf_map_lookup_elem(map_fd[0], &key, values) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, values) == 0); for (i = 0; i < nr_cpus; i++) sum += (values[i] - prev[key][i]); if (sum) @@ -71,9 +71,14 @@ static void usage(const char *prog) int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; const char *optstr = "SN"; + int prog_fd, map_fd, opt; + struct bpf_object *obj; + struct bpf_map *map; char filename[256]; - int opt; while ((opt = getopt(argc, argv, optstr)) != -1) { switch (opt) { @@ -102,13 +107,19 @@ int main(int argc, char **argv) ifindex = strtoul(argv[optind], NULL, 0); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + return 1; + + map = bpf_map__next(NULL, obj); + if (!map) { + printf("finding a map in obj file failed\n"); return 1; } + map_fd = bpf_map__fd(map); - if (!prog_fd[0]) { + if (!prog_fd) { printf("load_bpf_file: %s\n", strerror(errno)); return 1; } @@ -116,12 +127,12 @@ int main(int argc, char **argv) signal(SIGINT, int_exit); signal(SIGTERM, int_exit); - if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) { + if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { printf("link set xdp fd failed\n"); return 1; } - poll_stats(2); + poll_stats(map_fd, 2); return 0; } diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c index f621a541b574..3042ce37dae8 100644 --- a/samples/bpf/xdp_adjust_tail_user.c +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -18,9 +18,8 @@ #include #include #include -#include "bpf_load.h" -#include "libbpf.h" -#include "bpf_util.h" +#include "bpf/bpf.h" +#include "bpf/libbpf.h" #define STATS_INTERVAL_S 2U @@ -36,7 +35,7 @@ static void int_exit(int sig) /* simple "icmp packet too big sent" counter */ -static void poll_stats(unsigned int kill_after_s) +static void poll_stats(unsigned int map_fd, unsigned int kill_after_s) { time_t started_at = time(NULL); __u64 value = 0; @@ -46,7 +45,7 @@ static void poll_stats(unsigned int kill_after_s) while (!kill_after_s || time(NULL) - started_at <= kill_after_s) { sleep(STATS_INTERVAL_S); - assert(bpf_map_lookup_elem(map_fd[0], &key, &value) == 0); + assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); printf("icmp \"packet too big\" sent: %10llu pkts\n", value); } @@ -66,14 +65,17 @@ static void usage(const char *cmd) int main(int argc, char **argv) { + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; unsigned char opt_flags[256] = {}; unsigned int kill_after_s = 0; const char *optstr = "i:T:SNh"; - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + int i, prog_fd, map_fd, opt; + struct bpf_object *obj; + struct bpf_map *map; char filename[256]; - int opt; - int i; - for (i = 0; i < strlen(optstr); i++) if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z') @@ -115,13 +117,19 @@ int main(int argc, char **argv) } snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; + + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + return 1; - if (load_bpf_file(filename)) { - printf("%s", bpf_log_buf); + map = bpf_map__next(NULL, obj); + if (!map) { + printf("finding a map in obj file failed\n"); return 1; } + map_fd = bpf_map__fd(map); - if (!prog_fd[0]) { + if (!prog_fd) { printf("load_bpf_file: %s\n", strerror(errno)); return 1; } @@ -129,12 +137,12 @@ int main(int argc, char **argv) signal(SIGINT, int_exit); signal(SIGTERM, int_exit); - if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) { + if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { printf("link set xdp fd failed\n"); return 1; } - poll_stats(kill_after_s); + poll_stats(map_fd, kill_after_s); bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index 478d95412de4..e4e9ba52bff0 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -22,8 +22,8 @@ static const char *__doc__ = " XDP RX-queue info extract example\n\n" #include #include -#include "libbpf.h" -#include "bpf_load.h" +#include "bpf/bpf.h" +#include "bpf/libbpf.h" #include "bpf_util.h" static int ifindex = -1; @@ -32,6 +32,9 @@ static char *ifname; static __u32 xdp_flags; +static struct bpf_map *stats_global_map; +static struct bpf_map *rx_queue_index_map; + /* Exit return codes */ #define EXIT_OK 0 #define EXIT_FAIL 1 @@ -174,7 +177,7 @@ static struct datarec *alloc_record_per_cpu(void) static struct record *alloc_record_per_rxq(void) { - unsigned int nr_rxqs = map_data[2].def.max_entries; + unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; struct record *array; size_t size; @@ -190,7 +193,7 @@ static struct record *alloc_record_per_rxq(void) static struct stats_record *alloc_stats_record(void) { - unsigned int nr_rxqs = map_data[2].def.max_entries; + unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; struct stats_record *rec; int i; @@ -210,7 +213,7 @@ static struct stats_record *alloc_stats_record(void) static void free_stats_record(struct stats_record *r) { - unsigned int nr_rxqs = map_data[2].def.max_entries; + unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; int i; for (i = 0; i < nr_rxqs; i++) @@ -254,11 +257,11 @@ static void stats_collect(struct stats_record *rec) { int fd, i, max_rxqs; - fd = map_data[1].fd; /* map: stats_global_map */ + fd = bpf_map__fd(stats_global_map); map_collect_percpu(fd, 0, &rec->stats); - fd = map_data[2].fd; /* map: rx_queue_index_map */ - max_rxqs = map_data[2].def.max_entries; + fd = bpf_map__fd(rx_queue_index_map); + max_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; for (i = 0; i < max_rxqs; i++) map_collect_percpu(fd, i, &rec->rxq[i]); } @@ -304,8 +307,8 @@ static void stats_print(struct stats_record *stats_rec, struct stats_record *stats_prev, int action) { + unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; unsigned int nr_cpus = bpf_num_possible_cpus(); - unsigned int nr_rxqs = map_data[2].def.max_entries; double pps = 0, err = 0; struct record *rec, *prev; double t; @@ -419,31 +422,44 @@ static void stats_poll(int interval, int action) int main(int argc, char **argv) { struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY}; + struct bpf_prog_load_attr prog_load_attr = { + .prog_type = BPF_PROG_TYPE_XDP, + }; + int prog_fd, map_fd, opt, err; bool use_separators = true; struct config cfg = { 0 }; + struct bpf_object *obj; + struct bpf_map *map; char filename[256]; int longindex = 0; int interval = 2; __u32 key = 0; - int opt, err; char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 }; int action = XDP_PASS; /* Default action */ char *action_str = NULL; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + prog_load_attr.file = filename; if (setrlimit(RLIMIT_MEMLOCK, &r)) { perror("setrlimit(RLIMIT_MEMLOCK)"); return 1; } - if (load_bpf_file(filename)) { - fprintf(stderr, "ERR in load_bpf_file(): %s", bpf_log_buf); + if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) + return EXIT_FAIL; + + map = bpf_map__next(NULL, obj); + stats_global_map = bpf_map__next(map, obj); + rx_queue_index_map = bpf_map__next(stats_global_map, obj); + if (!map || !stats_global_map || !rx_queue_index_map) { + printf("finding a map in obj file failed\n"); return EXIT_FAIL; } + map_fd = bpf_map__fd(map); - if (!prog_fd[0]) { + if (!prog_fd) { fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno)); return EXIT_FAIL; } @@ -512,7 +528,7 @@ int main(int argc, char **argv) setlocale(LC_NUMERIC, "en_US"); /* User-side setup ifindex in config_map */ - err = bpf_map_update_elem(map_fd[0], &key, &cfg, 0); + err = bpf_map_update_elem(map_fd, &key, &cfg, 0); if (err) { fprintf(stderr, "Store config failed (err:%d)\n", err); exit(EXIT_FAIL_BPF); @@ -521,7 +537,7 @@ int main(int argc, char **argv) /* Remove XDP program when program is interrupted */ signal(SIGINT, int_exit); - if (bpf_set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) { + if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { fprintf(stderr, "link set xdp fd failed\n"); return EXIT_FAIL_XDP; } -- cgit v1.2.3 From 53ea24c20cea32b1dc70673402b496c4a5291d2d Mon Sep 17 00:00:00 2001 From: Prashant Bhole Date: Mon, 14 May 2018 17:29:15 +0900 Subject: samples/bpf: xdp_monitor, accept short options Updated optstring parameter for getopt_long() to accept short options. Also updated usage() function. Signed-off-by: Prashant Bhole Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- samples/bpf/xdp_monitor_user.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index 894bc64c2cac..05ad3f590c91 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -58,7 +58,7 @@ static void usage(char *argv[]) printf(" flag (internal value:%d)", *long_options[i].flag); else - printf("(internal short-option: -%c)", + printf("short-option: -%c", long_options[i].val); printf("\n"); } @@ -594,7 +594,7 @@ int main(int argc, char **argv) snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]); /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "h", + while ((opt = getopt_long(argc, argv, "hDSs:", long_options, &longindex)) != -1) { switch (opt) { case 'D': -- cgit v1.2.3 From 2bf3e2ef425bc2a164f10b554b7db6a8b4090ef4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 May 2018 22:35:02 -0700 Subject: samples: bpf: include bpf/bpf.h instead of local libbpf.h There are two files in the tree called libbpf.h which is becoming problematic. Most samples don't actually need the local libbpf.h they simply include it to get to bpf/bpf.h. Include bpf/bpf.h directly instead. Signed-off-by: Jakub Kicinski Acked-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov --- samples/bpf/bpf_load.c | 2 +- samples/bpf/bpf_load.h | 2 +- samples/bpf/cpustat_user.c | 2 +- samples/bpf/lathist_user.c | 2 +- samples/bpf/load_sock_ops.c | 2 +- samples/bpf/lwt_len_hist_user.c | 2 +- samples/bpf/map_perf_test_user.c | 2 +- samples/bpf/sock_example.h | 1 - samples/bpf/sockex1_user.c | 2 +- samples/bpf/sockex2_user.c | 2 +- samples/bpf/sockex3_user.c | 2 +- samples/bpf/syscall_tp_user.c | 2 +- samples/bpf/tc_l2_redirect_user.c | 2 +- samples/bpf/test_cgrp2_array_pin.c | 2 +- samples/bpf/test_current_task_under_cgroup_user.c | 2 +- samples/bpf/test_lru_dist.c | 2 +- samples/bpf/test_map_in_map_user.c | 2 +- samples/bpf/test_overhead_user.c | 2 +- samples/bpf/test_probe_write_user_user.c | 2 +- samples/bpf/trace_output_user.c | 2 +- samples/bpf/tracex1_user.c | 2 +- samples/bpf/tracex2_user.c | 2 +- samples/bpf/tracex3_user.c | 2 +- samples/bpf/tracex4_user.c | 2 +- samples/bpf/tracex5_user.c | 2 +- samples/bpf/tracex6_user.c | 2 +- samples/bpf/tracex7_user.c | 2 +- samples/bpf/xdp_fwd_user.c | 2 +- samples/bpf/xdp_monitor_user.c | 2 +- samples/bpf/xdp_redirect_cpu_user.c | 2 +- samples/bpf/xdp_redirect_map_user.c | 2 +- samples/bpf/xdp_redirect_user.c | 2 +- samples/bpf/xdp_router_ipv4_user.c | 2 +- samples/bpf/xdp_tx_iptunnel_user.c | 2 +- samples/bpf/xdpsock_user.c | 2 +- 35 files changed, 34 insertions(+), 35 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index a6b290de5632..89161c9ed466 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -24,7 +24,7 @@ #include #include #include -#include "libbpf.h" +#include #include "bpf_load.h" #include "perf-sys.h" diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index f9da59bca0cc..814894a12974 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h @@ -2,7 +2,7 @@ #ifndef __BPF_LOAD_H #define __BPF_LOAD_H -#include "libbpf.h" +#include #define MAX_MAPS 32 #define MAX_PROGS 32 diff --git a/samples/bpf/cpustat_user.c b/samples/bpf/cpustat_user.c index 2b4cd1ae57c5..869a99406dbf 100644 --- a/samples/bpf/cpustat_user.c +++ b/samples/bpf/cpustat_user.c @@ -17,7 +17,7 @@ #include #include -#include "libbpf.h" +#include #include "bpf_load.h" #define MAX_CPU 8 diff --git a/samples/bpf/lathist_user.c b/samples/bpf/lathist_user.c index 6477bad5b4e2..c8e88cc84e61 100644 --- a/samples/bpf/lathist_user.c +++ b/samples/bpf/lathist_user.c @@ -10,7 +10,7 @@ #include #include #include -#include "libbpf.h" +#include #include "bpf_load.h" #define MAX_ENTRIES 20 diff --git a/samples/bpf/load_sock_ops.c b/samples/bpf/load_sock_ops.c index e5da6cf71a3e..8ecb41ea0c03 100644 --- a/samples/bpf/load_sock_ops.c +++ b/samples/bpf/load_sock_ops.c @@ -8,7 +8,7 @@ #include #include #include -#include "libbpf.h" +#include #include "bpf_load.h" #include #include diff --git a/samples/bpf/lwt_len_hist_user.c b/samples/bpf/lwt_len_hist_user.c index 7fcb94c09112..587b68b1f8dd 100644 --- a/samples/bpf/lwt_len_hist_user.c +++ b/samples/bpf/lwt_len_hist_user.c @@ -9,7 +9,7 @@ #include #include -#include "libbpf.h" +#include #include "bpf_util.h" #define MAX_INDEX 64 diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index 519d9af4b04a..38b7b1a96cc2 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -21,7 +21,7 @@ #include #include -#include "libbpf.h" +#include #include "bpf_load.h" #define TEST_BIT(t) (1U << (t)) diff --git a/samples/bpf/sock_example.h b/samples/bpf/sock_example.h index 772d5dad8465..a27d7579bc73 100644 --- a/samples/bpf/sock_example.h +++ b/samples/bpf/sock_example.h @@ -9,7 +9,6 @@ #include #include #include -#include "libbpf.h" static inline int open_raw_sock(const char *name) { diff --git a/samples/bpf/sockex1_user.c b/samples/bpf/sockex1_user.c index 2be935c2627d..93ec01c56104 100644 --- a/samples/bpf/sockex1_user.c +++ b/samples/bpf/sockex1_user.c @@ -2,7 +2,7 @@ #include #include #include -#include "libbpf.h" +#include #include "bpf_load.h" #include "sock_example.h" #include diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c index 44fe0805b087..1d5c6e9a6d27 100644 --- a/samples/bpf/sockex2_user.c +++ b/samples/bpf/sockex2_user.c @@ -2,7 +2,7 @@ #include #include #include -#include "libbpf.h" +#include #include "bpf_load.h" #include "sock_example.h" #include diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index 495ee02e2fb7..5ba3ae9d180b 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -2,7 +2,7 @@ #include #include #include -#include "libbpf.h" +#include #include "bpf_load.h" #include "sock_example.h" #include diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c index 9169d3207f18..1a1d0059a277 100644 --- a/samples/bpf/syscall_tp_user.c +++ b/samples/bpf/syscall_tp_user.c @@ -16,7 +16,7 @@ #include #include #include -#include "libbpf.h" +#include #include "bpf_load.h" /* This program verifies bpf attachment to tracepoint sys_enter_* and sys_exit_*. diff --git a/samples/bpf/tc_l2_redirect_user.c b/samples/bpf/tc_l2_redirect_user.c index 28995a776560..7ec45c3e8f56 100644 --- a/samples/bpf/tc_l2_redirect_user.c +++ b/samples/bpf/tc_l2_redirect_user.c @@ -13,7 +13,7 @@ #include #include -#include "libbpf.h" +#include static void usage(void) { diff --git a/samples/bpf/test_cgrp2_array_pin.c b/samples/bpf/test_cgrp2_array_pin.c index 8a1b8b5d8def..242184292f59 100644 --- a/samples/bpf/test_cgrp2_array_pin.c +++ b/samples/bpf/test_cgrp2_array_pin.c @@ -14,7 +14,7 @@ #include #include -#include "libbpf.h" +#include static void usage(void) { diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c index 65b5fb51c1db..4be4874ca2bc 100644 --- a/samples/bpf/test_current_task_under_cgroup_user.c +++ b/samples/bpf/test_current_task_under_cgroup_user.c @@ -9,7 +9,7 @@ #include #include #include -#include "libbpf.h" +#include #include "bpf_load.h" #include #include "cgroup_helpers.h" diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c index 73c357142268..eec3e2509ce8 100644 --- a/samples/bpf/test_lru_dist.c +++ b/samples/bpf/test_lru_dist.c @@ -21,7 +21,7 @@ #include #include -#include "libbpf.h" +#include #include "bpf_util.h" #define min(a, b) ((a) < (b) ? (a) : (b)) diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c index 1aca18539d8d..e308858f7bcf 100644 --- a/samples/bpf/test_map_in_map_user.c +++ b/samples/bpf/test_map_in_map_user.c @@ -13,7 +13,7 @@ #include #include #include -#include "libbpf.h" +#include #include "bpf_load.h" #define PORT_A (map_fd[0]) diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c index e1d35e07a10e..6caf47afa635 100644 --- a/samples/bpf/test_overhead_user.c +++ b/samples/bpf/test_overhead_user.c @@ -19,7 +19,7 @@ #include #include #include -#include "libbpf.h" +#include #include "bpf_load.h" #define MAX_CNT 1000000 diff --git a/samples/bpf/test_probe_write_user_user.c b/samples/bpf/test_probe_write_user_user.c index bf8e3a9f3067..045eb5e30f54 100644 --- a/samples/bpf/test_probe_write_user_user.c +++ b/samples/bpf/test_probe_write_user_user.c @@ -3,7 +3,7 @@ #include #include #include -#include "libbpf.h" +#include #include "bpf_load.h" #include #include diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c index da98be721001..4837d73edefe 100644 --- a/samples/bpf/trace_output_user.c +++ b/samples/bpf/trace_output_user.c @@ -18,7 +18,7 @@ #include #include #include -#include "libbpf.h" +#include #include "bpf_load.h" #include "perf-sys.h" #include "trace_helpers.h" diff --git a/samples/bpf/tracex1_user.c b/samples/bpf/tracex1_user.c index 3dcb475fb135..af8c20608ab5 100644 --- a/samples/bpf/tracex1_user.c +++ b/samples/bpf/tracex1_user.c @@ -2,7 +2,7 @@ #include #include #include -#include "libbpf.h" +#include #include "bpf_load.h" int main(int ac, char **argv) diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index efb5e61918df..1a81e6a5c2ea 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -7,7 +7,7 @@ #include #include -#include "libbpf.h" +#include #include "bpf_load.h" #include "bpf_util.h" diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c index fe372239d505..6c6b10f4c3ee 100644 --- a/samples/bpf/tracex3_user.c +++ b/samples/bpf/tracex3_user.c @@ -13,7 +13,7 @@ #include #include -#include "libbpf.h" +#include #include "bpf_load.h" #include "bpf_util.h" diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c index 22c644f1f4c3..14625c898e43 100644 --- a/samples/bpf/tracex4_user.c +++ b/samples/bpf/tracex4_user.c @@ -14,7 +14,7 @@ #include #include -#include "libbpf.h" +#include #include "bpf_load.h" struct pair { diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c index 4e2774b731f0..c4ab91c89494 100644 --- a/samples/bpf/tracex5_user.c +++ b/samples/bpf/tracex5_user.c @@ -5,7 +5,7 @@ #include #include #include -#include "libbpf.h" +#include #include "bpf_load.h" #include diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c index 89ab8d408474..4bb3c830adb2 100644 --- a/samples/bpf/tracex6_user.c +++ b/samples/bpf/tracex6_user.c @@ -16,7 +16,7 @@ #include #include "bpf_load.h" -#include "libbpf.h" +#include #include "perf-sys.h" #define SAMPLE_PERIOD 0x7fffffffffffffffULL diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c index 8a52ac492e8b..ea6dae78f0df 100644 --- a/samples/bpf/tracex7_user.c +++ b/samples/bpf/tracex7_user.c @@ -3,7 +3,7 @@ #include #include #include -#include "libbpf.h" +#include #include "bpf_load.h" int main(int argc, char **argv) diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c index 9c6606f57126..a87a2048ed32 100644 --- a/samples/bpf/xdp_fwd_user.c +++ b/samples/bpf/xdp_fwd_user.c @@ -26,7 +26,7 @@ #include "bpf_load.h" #include "bpf_util.h" -#include "libbpf.h" +#include static int do_attach(int idx, int fd, const char *name) diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index 05ad3f590c91..bf09b5188acd 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -26,7 +26,7 @@ static const char *__doc_err_only__= #include #include -#include "libbpf.h" +#include #include "bpf_load.h" #include "bpf_util.h" diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 23744a8aaf21..f6efaefd485b 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -28,7 +28,7 @@ static const char *__doc__ = * use bpf/libbpf.h), but cannot as (currently) needed for XDP * attaching to a device via bpf_set_link_xdp_fd() */ -#include "libbpf.h" +#include #include "bpf_load.h" #include "bpf_util.h" diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c index 7eae07d7293e..4445e76854b5 100644 --- a/samples/bpf/xdp_redirect_map_user.c +++ b/samples/bpf/xdp_redirect_map_user.c @@ -24,7 +24,7 @@ #include "bpf_load.h" #include "bpf_util.h" -#include "libbpf.h" +#include static int ifindex_in; static int ifindex_out; diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c index b701b5c21342..81a69e36cb78 100644 --- a/samples/bpf/xdp_redirect_user.c +++ b/samples/bpf/xdp_redirect_user.c @@ -24,7 +24,7 @@ #include "bpf_load.h" #include "bpf_util.h" -#include "libbpf.h" +#include static int ifindex_in; static int ifindex_out; diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index 6296741c1fbd..b2b4dfa776c8 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -16,7 +16,7 @@ #include #include #include "bpf_load.h" -#include "libbpf.h" +#include #include #include #include diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index f0a787268a87..a4ccc33adac0 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -18,7 +18,7 @@ #include #include #include "bpf_load.h" -#include "libbpf.h" +#include #include "bpf_util.h" #include "xdp_tx_iptunnel_common.h" diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 4b8a7cf3e63b..7fe60f6f7d53 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -38,7 +38,7 @@ #include "bpf_load.h" #include "bpf_util.h" -#include "libbpf.h" +#include #include "xdpsock.h" -- cgit v1.2.3 From 8d93045077aeede62127b6d6663bfdd31f6240da Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 May 2018 22:35:03 -0700 Subject: samples: bpf: rename libbpf.h to bpf_insn.h The libbpf.h file in samples is clashing with libbpf's header. Since it only includes a subset of filter.h instruction helpers rename it to bpf_insn.h. Drop the unnecessary include of bpf/bpf.h. Signed-off-by: Jakub Kicinski Acked-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov --- samples/bpf/bpf_insn.h | 197 +++++++++++++++++++++++++++++++ samples/bpf/cookie_uid_helper_example.c | 2 +- samples/bpf/fds_example.c | 4 +- samples/bpf/libbpf.h | 199 -------------------------------- samples/bpf/sock_example.c | 3 +- samples/bpf/test_cgrp2_attach.c | 3 +- samples/bpf/test_cgrp2_attach2.c | 3 +- samples/bpf/test_cgrp2_sock.c | 3 +- samples/bpf/test_cgrp2_sock2.c | 3 +- 9 files changed, 211 insertions(+), 206 deletions(-) create mode 100644 samples/bpf/bpf_insn.h delete mode 100644 samples/bpf/libbpf.h (limited to 'samples/bpf') diff --git a/samples/bpf/bpf_insn.h b/samples/bpf/bpf_insn.h new file mode 100644 index 000000000000..20dc5cefec84 --- /dev/null +++ b/samples/bpf/bpf_insn.h @@ -0,0 +1,197 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* eBPF instruction mini library */ +#ifndef __BPF_INSN_H +#define __BPF_INSN_H + +struct bpf_insn; + +/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ + +#define BPF_ALU64_REG(OP, DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_ALU32_REG(OP, DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ + +#define BPF_ALU64_IMM(OP, DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_ALU32_IMM(OP, DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Short form of mov, dst_reg = src_reg */ + +#define BPF_MOV64_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_MOV32_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +/* Short form of mov, dst_reg = imm32 */ + +#define BPF_MOV64_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_MOV32_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ +#define BPF_LD_IMM64(DST, IMM) \ + BPF_LD_IMM64_RAW(DST, 0, IMM) + +#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_DW | BPF_IMM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = (__u32) (IMM) }), \ + ((struct bpf_insn) { \ + .code = 0, /* zero is reserved opcode */ \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = ((__u64) (IMM)) >> 32 }) + +#ifndef BPF_PSEUDO_MAP_FD +# define BPF_PSEUDO_MAP_FD 1 +#endif + +/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ +#define BPF_LD_MAP_FD(DST, MAP_FD) \ + BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) + + +/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ + +#define BPF_LD_ABS(SIZE, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Memory load, dst_reg = *(uint *) (src_reg + off16) */ + +#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Memory store, *(uint *) (dst_reg + off16) = src_reg */ + +#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ + +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Memory store, *(uint *) (dst_reg + off16) = imm32 */ + +#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ + +#define BPF_JMP_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ + +#define BPF_JMP_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Raw code statement block */ + +#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ + ((struct bpf_insn) { \ + .code = CODE, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = IMM }) + +/* Program exit */ + +#define BPF_EXIT_INSN() \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_EXIT, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = 0 }) + +#endif diff --git a/samples/bpf/cookie_uid_helper_example.c b/samples/bpf/cookie_uid_helper_example.c index 8eca27e595ae..deb0e3e0324d 100644 --- a/samples/bpf/cookie_uid_helper_example.c +++ b/samples/bpf/cookie_uid_helper_example.c @@ -51,7 +51,7 @@ #include #include #include -#include "libbpf.h" +#include "bpf_insn.h" #define PORT 8888 diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c index e29bd52ff9e8..9854854f05d1 100644 --- a/samples/bpf/fds_example.c +++ b/samples/bpf/fds_example.c @@ -12,8 +12,10 @@ #include #include +#include + +#include "bpf_insn.h" #include "bpf_load.h" -#include "libbpf.h" #include "sock_example.h" #define BPF_F_PIN (1 << 0) diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h deleted file mode 100644 index 18bfee5aab6b..000000000000 --- a/samples/bpf/libbpf.h +++ /dev/null @@ -1,199 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* eBPF mini library */ -#ifndef __LIBBPF_H -#define __LIBBPF_H - -#include - -struct bpf_insn; - -/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ - -#define BPF_ALU64_REG(OP, DST, SRC) \ - ((struct bpf_insn) { \ - .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ - .dst_reg = DST, \ - .src_reg = SRC, \ - .off = 0, \ - .imm = 0 }) - -#define BPF_ALU32_REG(OP, DST, SRC) \ - ((struct bpf_insn) { \ - .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ - .dst_reg = DST, \ - .src_reg = SRC, \ - .off = 0, \ - .imm = 0 }) - -/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ - -#define BPF_ALU64_IMM(OP, DST, IMM) \ - ((struct bpf_insn) { \ - .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ - .dst_reg = DST, \ - .src_reg = 0, \ - .off = 0, \ - .imm = IMM }) - -#define BPF_ALU32_IMM(OP, DST, IMM) \ - ((struct bpf_insn) { \ - .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ - .dst_reg = DST, \ - .src_reg = 0, \ - .off = 0, \ - .imm = IMM }) - -/* Short form of mov, dst_reg = src_reg */ - -#define BPF_MOV64_REG(DST, SRC) \ - ((struct bpf_insn) { \ - .code = BPF_ALU64 | BPF_MOV | BPF_X, \ - .dst_reg = DST, \ - .src_reg = SRC, \ - .off = 0, \ - .imm = 0 }) - -#define BPF_MOV32_REG(DST, SRC) \ - ((struct bpf_insn) { \ - .code = BPF_ALU | BPF_MOV | BPF_X, \ - .dst_reg = DST, \ - .src_reg = SRC, \ - .off = 0, \ - .imm = 0 }) - -/* Short form of mov, dst_reg = imm32 */ - -#define BPF_MOV64_IMM(DST, IMM) \ - ((struct bpf_insn) { \ - .code = BPF_ALU64 | BPF_MOV | BPF_K, \ - .dst_reg = DST, \ - .src_reg = 0, \ - .off = 0, \ - .imm = IMM }) - -#define BPF_MOV32_IMM(DST, IMM) \ - ((struct bpf_insn) { \ - .code = BPF_ALU | BPF_MOV | BPF_K, \ - .dst_reg = DST, \ - .src_reg = 0, \ - .off = 0, \ - .imm = IMM }) - -/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ -#define BPF_LD_IMM64(DST, IMM) \ - BPF_LD_IMM64_RAW(DST, 0, IMM) - -#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ - ((struct bpf_insn) { \ - .code = BPF_LD | BPF_DW | BPF_IMM, \ - .dst_reg = DST, \ - .src_reg = SRC, \ - .off = 0, \ - .imm = (__u32) (IMM) }), \ - ((struct bpf_insn) { \ - .code = 0, /* zero is reserved opcode */ \ - .dst_reg = 0, \ - .src_reg = 0, \ - .off = 0, \ - .imm = ((__u64) (IMM)) >> 32 }) - -#ifndef BPF_PSEUDO_MAP_FD -# define BPF_PSEUDO_MAP_FD 1 -#endif - -/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ -#define BPF_LD_MAP_FD(DST, MAP_FD) \ - BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) - - -/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ - -#define BPF_LD_ABS(SIZE, IMM) \ - ((struct bpf_insn) { \ - .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ - .dst_reg = 0, \ - .src_reg = 0, \ - .off = 0, \ - .imm = IMM }) - -/* Memory load, dst_reg = *(uint *) (src_reg + off16) */ - -#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ - ((struct bpf_insn) { \ - .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ - .dst_reg = DST, \ - .src_reg = SRC, \ - .off = OFF, \ - .imm = 0 }) - -/* Memory store, *(uint *) (dst_reg + off16) = src_reg */ - -#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ - ((struct bpf_insn) { \ - .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ - .dst_reg = DST, \ - .src_reg = SRC, \ - .off = OFF, \ - .imm = 0 }) - -/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */ - -#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \ - ((struct bpf_insn) { \ - .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \ - .dst_reg = DST, \ - .src_reg = SRC, \ - .off = OFF, \ - .imm = 0 }) - -/* Memory store, *(uint *) (dst_reg + off16) = imm32 */ - -#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ - ((struct bpf_insn) { \ - .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ - .dst_reg = DST, \ - .src_reg = 0, \ - .off = OFF, \ - .imm = IMM }) - -/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ - -#define BPF_JMP_REG(OP, DST, SRC, OFF) \ - ((struct bpf_insn) { \ - .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ - .dst_reg = DST, \ - .src_reg = SRC, \ - .off = OFF, \ - .imm = 0 }) - -/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ - -#define BPF_JMP_IMM(OP, DST, IMM, OFF) \ - ((struct bpf_insn) { \ - .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ - .dst_reg = DST, \ - .src_reg = 0, \ - .off = OFF, \ - .imm = IMM }) - -/* Raw code statement block */ - -#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ - ((struct bpf_insn) { \ - .code = CODE, \ - .dst_reg = DST, \ - .src_reg = SRC, \ - .off = OFF, \ - .imm = IMM }) - -/* Program exit */ - -#define BPF_EXIT_INSN() \ - ((struct bpf_insn) { \ - .code = BPF_JMP | BPF_EXIT, \ - .dst_reg = 0, \ - .src_reg = 0, \ - .off = 0, \ - .imm = 0 }) - -#endif diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c index 33a637507c00..60ec467c78ab 100644 --- a/samples/bpf/sock_example.c +++ b/samples/bpf/sock_example.c @@ -26,7 +26,8 @@ #include #include #include -#include "libbpf.h" +#include +#include "bpf_insn.h" #include "sock_example.h" char bpf_log_buf[BPF_LOG_BUF_SIZE]; diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c index 4bfcaf93fcf3..20fbd1241db3 100644 --- a/samples/bpf/test_cgrp2_attach.c +++ b/samples/bpf/test_cgrp2_attach.c @@ -28,8 +28,9 @@ #include #include +#include -#include "libbpf.h" +#include "bpf_insn.h" enum { MAP_KEY_PACKETS, diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c index 1af412ec6007..b453e6a161be 100644 --- a/samples/bpf/test_cgrp2_attach2.c +++ b/samples/bpf/test_cgrp2_attach2.c @@ -24,8 +24,9 @@ #include #include +#include -#include "libbpf.h" +#include "bpf_insn.h" #include "cgroup_helpers.h" #define FOO "/foo" diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c index e79594dd629b..b0811da5a00f 100644 --- a/samples/bpf/test_cgrp2_sock.c +++ b/samples/bpf/test_cgrp2_sock.c @@ -21,8 +21,9 @@ #include #include #include +#include -#include "libbpf.h" +#include "bpf_insn.h" char bpf_log_buf[BPF_LOG_BUF_SIZE]; diff --git a/samples/bpf/test_cgrp2_sock2.c b/samples/bpf/test_cgrp2_sock2.c index e53f1f6f0867..3b5be2364975 100644 --- a/samples/bpf/test_cgrp2_sock2.c +++ b/samples/bpf/test_cgrp2_sock2.c @@ -19,8 +19,9 @@ #include #include #include +#include -#include "libbpf.h" +#include "bpf_insn.h" #include "bpf_load.h" static int usage(const char *argv0) -- cgit v1.2.3 From 787360f8c2b87d4ae4858bb8736a19c289904885 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 May 2018 22:35:04 -0700 Subject: samples: bpf: fix build after move to compiling full libbpf.a MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are many ways users may compile samples, some of them got broken by commit 5f9380572b4b ("samples: bpf: compile and link against full libbpf"). Improve path resolution and make libbpf building a dependency of source files to force its build. Samples should now again build with any of: cd samples/bpf; make make samples/bpf/ make -C samples/bpf cd samples/bpf; make O=builddir make samples/bpf/ O=builddir make -C samples/bpf O=builddir export KBUILD_OUTPUT=builddir make samples/bpf/ make -C samples/bpf Fixes: 5f9380572b4b ("samples: bpf: compile and link against full libbpf") Reported-by: Björn Töpel Signed-off-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- samples/bpf/Makefile | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 9e255ca4059a..0dae77c88d2e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -1,4 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 + +BPF_SAMPLES_PATH ?= $(abspath $(srctree)/$(src)) +TOOLS_PATH := $(BPF_SAMPLES_PATH)/../../tools + # List of programs to build hostprogs-y := test_lru_dist hostprogs-y += sock_example @@ -49,7 +53,8 @@ hostprogs-y += xdpsock hostprogs-y += xdp_fwd # Libbpf dependencies -LIBBPF := ../../tools/lib/bpf/libbpf.a +LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a + CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o @@ -233,15 +238,16 @@ CLANG_ARCH_ARGS = -target $(ARCH) endif # Trick to allow make to be run from this directory -all: $(LIBBPF) - $(MAKE) -C ../../ $(CURDIR)/ +all: + $(MAKE) -C ../../ $(CURDIR)/ BPF_SAMPLES_PATH=$(CURDIR) clean: $(MAKE) -C ../../ M=$(CURDIR) clean @rm -f *~ $(LIBBPF): FORCE - $(MAKE) -C $(dir $@) +# Fix up variables inherited from Kbuild that tools/ build system won't like + $(MAKE) -C $(dir $@) RM='rm -rf' LDFLAGS= srctree=$(BPF_SAMPLES_PATH)/../../ O= $(obj)/syscall_nrs.s: $(src)/syscall_nrs.c $(call if_changed_dep,cc_s_c) @@ -272,7 +278,8 @@ verify_target_bpf: verify_cmds exit 2; \ else true; fi -$(src)/*.c: verify_target_bpf +$(BPF_SAMPLES_PATH)/*.c: verify_target_bpf $(LIBBPF) +$(src)/*.c: verify_target_bpf $(LIBBPF) $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h -- cgit v1.2.3 From 0cc54db1818ad38f400be9f24871f3b7bf09e911 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 May 2018 22:35:05 -0700 Subject: samples: bpf: move libbpf from object dependencies to libs Make complains that it doesn't know how to make libbpf.a: scripts/Makefile.host:106: target 'samples/bpf/../../tools/lib/bpf/libbpf.a' doesn't match the target pattern Now that we have it as a dependency of the sources simply add libbpf.a to libraries not objects. Signed-off-by: Jakub Kicinski Acked-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov --- samples/bpf/Makefile | 145 ++++++++++++++++++--------------------------------- 1 file changed, 51 insertions(+), 94 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 0dae77c88d2e..0036a77c2d97 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -58,55 +58,53 @@ LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o -test_lru_dist-objs := test_lru_dist.o $(LIBBPF) -sock_example-objs := sock_example.o $(LIBBPF) -fds_example-objs := bpf_load.o $(LIBBPF) fds_example.o -sockex1-objs := bpf_load.o $(LIBBPF) sockex1_user.o -sockex2-objs := bpf_load.o $(LIBBPF) sockex2_user.o -sockex3-objs := bpf_load.o $(LIBBPF) sockex3_user.o -tracex1-objs := bpf_load.o $(LIBBPF) tracex1_user.o -tracex2-objs := bpf_load.o $(LIBBPF) tracex2_user.o -tracex3-objs := bpf_load.o $(LIBBPF) tracex3_user.o -tracex4-objs := bpf_load.o $(LIBBPF) tracex4_user.o -tracex5-objs := bpf_load.o $(LIBBPF) tracex5_user.o -tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o -tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o -load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o -test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o -trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o $(TRACE_HELPERS) -lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o -offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o $(TRACE_HELPERS) -spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o $(TRACE_HELPERS) -map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o -test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o -test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o $(LIBBPF) -test_cgrp2_attach-objs := test_cgrp2_attach.o $(LIBBPF) -test_cgrp2_attach2-objs := test_cgrp2_attach2.o $(LIBBPF) $(CGROUP_HELPERS) -test_cgrp2_sock-objs := test_cgrp2_sock.o $(LIBBPF) -test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o -xdp1-objs := xdp1_user.o $(LIBBPF) +fds_example-objs := bpf_load.o fds_example.o +sockex1-objs := bpf_load.o sockex1_user.o +sockex2-objs := bpf_load.o sockex2_user.o +sockex3-objs := bpf_load.o sockex3_user.o +tracex1-objs := bpf_load.o tracex1_user.o +tracex2-objs := bpf_load.o tracex2_user.o +tracex3-objs := bpf_load.o tracex3_user.o +tracex4-objs := bpf_load.o tracex4_user.o +tracex5-objs := bpf_load.o tracex5_user.o +tracex6-objs := bpf_load.o tracex6_user.o +tracex7-objs := bpf_load.o tracex7_user.o +load_sock_ops-objs := bpf_load.o load_sock_ops.o +test_probe_write_user-objs := bpf_load.o test_probe_write_user_user.o +trace_output-objs := bpf_load.o trace_output_user.o $(TRACE_HELPERS) +lathist-objs := bpf_load.o lathist_user.o +offwaketime-objs := bpf_load.o offwaketime_user.o $(TRACE_HELPERS) +spintest-objs := bpf_load.o spintest_user.o $(TRACE_HELPERS) +map_perf_test-objs := bpf_load.o map_perf_test_user.o +test_overhead-objs := bpf_load.o test_overhead_user.o +test_cgrp2_array_pin-objs := test_cgrp2_array_pin.o +test_cgrp2_attach-objs := test_cgrp2_attach.o +test_cgrp2_attach2-objs := test_cgrp2_attach2.o $(CGROUP_HELPERS) +test_cgrp2_sock-objs := test_cgrp2_sock.o +test_cgrp2_sock2-objs := bpf_load.o test_cgrp2_sock2.o +xdp1-objs := xdp1_user.o # reuse xdp1 source intentionally -xdp2-objs := xdp1_user.o $(LIBBPF) -xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o -test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \ +xdp2-objs := xdp1_user.o +xdp_router_ipv4-objs := bpf_load.o xdp_router_ipv4_user.o +test_current_task_under_cgroup-objs := bpf_load.o $(CGROUP_HELPERS) \ test_current_task_under_cgroup_user.o -trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o $(TRACE_HELPERS) -sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o $(TRACE_HELPERS) -tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o -lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o -xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o -test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o -per_socket_stats_example-objs := cookie_uid_helper_example.o $(LIBBPF) -xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o -xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o -xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o -xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o -xdp_rxq_info-objs := xdp_rxq_info_user.o $(LIBBPF) -syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o -cpustat-objs := bpf_load.o $(LIBBPF) cpustat_user.o -xdp_adjust_tail-objs := xdp_adjust_tail_user.o $(LIBBPF) -xdpsock-objs := bpf_load.o $(LIBBPF) xdpsock_user.o -xdp_fwd-objs := bpf_load.o $(LIBBPF) xdp_fwd_user.o +trace_event-objs := bpf_load.o trace_event_user.o $(TRACE_HELPERS) +sampleip-objs := bpf_load.o sampleip_user.o $(TRACE_HELPERS) +tc_l2_redirect-objs := bpf_load.o tc_l2_redirect_user.o +lwt_len_hist-objs := bpf_load.o lwt_len_hist_user.o +xdp_tx_iptunnel-objs := bpf_load.o xdp_tx_iptunnel_user.o +test_map_in_map-objs := bpf_load.o test_map_in_map_user.o +per_socket_stats_example-objs := cookie_uid_helper_example.o +xdp_redirect-objs := bpf_load.o xdp_redirect_user.o +xdp_redirect_map-objs := bpf_load.o xdp_redirect_map_user.o +xdp_redirect_cpu-objs := bpf_load.o xdp_redirect_cpu_user.o +xdp_monitor-objs := bpf_load.o xdp_monitor_user.o +xdp_rxq_info-objs := xdp_rxq_info_user.o +syscall_tp-objs := bpf_load.o syscall_tp_user.o +cpustat-objs := bpf_load.o cpustat_user.o +xdp_adjust_tail-objs := xdp_adjust_tail_user.o +xdpsock-objs := bpf_load.o xdpsock_user.o +xdp_fwd-objs := bpf_load.o xdp_fwd_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -178,53 +176,12 @@ HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/ -HOSTLOADLIBES_test_lru_dist += -lelf -HOSTLOADLIBES_sock_example += -lelf -HOSTLOADLIBES_fds_example += -lelf -HOSTLOADLIBES_sockex1 += -lelf -HOSTLOADLIBES_sockex2 += -lelf -HOSTLOADLIBES_sockex3 += -lelf -HOSTLOADLIBES_tracex1 += -lelf -HOSTLOADLIBES_tracex2 += -lelf -HOSTLOADLIBES_tracex3 += -lelf -HOSTLOADLIBES_tracex4 += -lelf -lrt -HOSTLOADLIBES_tracex5 += -lelf -HOSTLOADLIBES_tracex6 += -lelf -HOSTLOADLIBES_tracex7 += -lelf -HOSTLOADLIBES_test_cgrp2_array_pin += -lelf -HOSTLOADLIBES_test_cgrp2_attach += -lelf -HOSTLOADLIBES_test_cgrp2_attach2 += -lelf -HOSTLOADLIBES_test_cgrp2_sock += -lelf -HOSTLOADLIBES_test_cgrp2_sock2 += -lelf -HOSTLOADLIBES_load_sock_ops += -lelf -HOSTLOADLIBES_test_probe_write_user += -lelf -HOSTLOADLIBES_trace_output += -lelf -lrt -HOSTLOADLIBES_lathist += -lelf -HOSTLOADLIBES_offwaketime += -lelf -HOSTLOADLIBES_spintest += -lelf -HOSTLOADLIBES_map_perf_test += -lelf -lrt -HOSTLOADLIBES_test_overhead += -lelf -lrt -HOSTLOADLIBES_xdp1 += -lelf -HOSTLOADLIBES_xdp2 += -lelf -HOSTLOADLIBES_xdp_router_ipv4 += -lelf -HOSTLOADLIBES_test_current_task_under_cgroup += -lelf -HOSTLOADLIBES_trace_event += -lelf -HOSTLOADLIBES_sampleip += -lelf -HOSTLOADLIBES_tc_l2_redirect += -l elf -HOSTLOADLIBES_lwt_len_hist += -l elf -HOSTLOADLIBES_xdp_tx_iptunnel += -lelf -HOSTLOADLIBES_test_map_in_map += -lelf -HOSTLOADLIBES_per_socket_stats_example += -lelf -HOSTLOADLIBES_xdp_redirect += -lelf -HOSTLOADLIBES_xdp_redirect_map += -lelf -HOSTLOADLIBES_xdp_redirect_cpu += -lelf -HOSTLOADLIBES_xdp_monitor += -lelf -HOSTLOADLIBES_xdp_rxq_info += -lelf -HOSTLOADLIBES_syscall_tp += -lelf -HOSTLOADLIBES_cpustat += -lelf -HOSTLOADLIBES_xdp_adjust_tail += -lelf -HOSTLOADLIBES_xdpsock += -lelf -pthread -HOSTLOADLIBES_xdp_fwd += -lelf +HOST_LOADLIBES += $(LIBBPF) -lelf +HOSTLOADLIBES_tracex4 += -lrt +HOSTLOADLIBES_trace_output += -lrt +HOSTLOADLIBES_map_perf_test += -lrt +HOSTLOADLIBES_test_overhead += -lrt +HOSTLOADLIBES_xdpsock += -pthread # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang -- cgit v1.2.3 From 768759edb9a1bd1b3fc38313b6578e5c8b252aee Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 May 2018 22:35:06 -0700 Subject: samples: bpf: make the build less noisy Building samples with clang ignores the $(Q) setting, always printing full command to the output. Make it less verbose. Signed-off-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- samples/bpf/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'samples/bpf') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 0036a77c2d97..62d1aa1a4cf3 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -244,7 +244,8 @@ $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h # But, there is no easy way to fix it, so just exclude it since it is # useless for BPF samples. $(obj)/%.o: $(src)/%.c - $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \ + @echo " CLANG-bpf " $@ + $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \ -I$(srctree)/tools/testing/selftests/bpf/ \ -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ -D__TARGET_ARCH_$(ARCH) -Wno-compare-distinct-pointer-types \ -- cgit v1.2.3 From 44edef77bd92730e1520b07f5ae2c9f4628738a8 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 15 May 2018 16:20:52 -0700 Subject: samples/bpf: Decrement ttl in fib forwarding example Only consider forwarding packets if ttl in received packet is > 1 and decrement ttl before handing off to bpf_redirect_map. Signed-off-by: David Ahern Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- samples/bpf/xdp_fwd_kern.c | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c index cdf4fc383cc9..4a6be0f87505 100644 --- a/samples/bpf/xdp_fwd_kern.c +++ b/samples/bpf/xdp_fwd_kern.c @@ -30,12 +30,24 @@ struct bpf_map_def SEC("maps") tx_port = { .max_entries = 64, }; +/* from include/net/ip.h */ +static __always_inline int ip_decrease_ttl(struct iphdr *iph) +{ + u32 check = (__force u32)iph->check; + + check += (__force u32)htons(0x0100); + iph->check = (__force __sum16)(check + (check >= 0xFFFF)); + return --iph->ttl; +} + static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags) { void *data_end = (void *)(long)ctx->data_end; void *data = (void *)(long)ctx->data; struct bpf_fib_lookup fib_params; struct ethhdr *eth = data; + struct ipv6hdr *ip6h; + struct iphdr *iph; int out_index; u16 h_proto; u64 nh_off; @@ -48,11 +60,14 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags) h_proto = eth->h_proto; if (h_proto == htons(ETH_P_IP)) { - struct iphdr *iph = data + nh_off; + iph = data + nh_off; if (iph + 1 > data_end) return XDP_DROP; + if (iph->ttl <= 1) + return XDP_PASS; + fib_params.family = AF_INET; fib_params.tos = iph->tos; fib_params.l4_protocol = iph->protocol; @@ -64,19 +79,22 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags) } else if (h_proto == htons(ETH_P_IPV6)) { struct in6_addr *src = (struct in6_addr *) fib_params.ipv6_src; struct in6_addr *dst = (struct in6_addr *) fib_params.ipv6_dst; - struct ipv6hdr *iph = data + nh_off; - if (iph + 1 > data_end) + ip6h = data + nh_off; + if (ip6h + 1 > data_end) return XDP_DROP; + if (ip6h->hop_limit <= 1) + return XDP_PASS; + fib_params.family = AF_INET6; - fib_params.flowlabel = *(__be32 *)iph & IPV6_FLOWINFO_MASK; - fib_params.l4_protocol = iph->nexthdr; + fib_params.flowlabel = *(__be32 *)ip6h & IPV6_FLOWINFO_MASK; + fib_params.l4_protocol = ip6h->nexthdr; fib_params.sport = 0; fib_params.dport = 0; - fib_params.tot_len = ntohs(iph->payload_len); - *src = iph->saddr; - *dst = iph->daddr; + fib_params.tot_len = ntohs(ip6h->payload_len); + *src = ip6h->saddr; + *dst = ip6h->daddr; } else { return XDP_PASS; } @@ -92,6 +110,11 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags) * forwarding packets are dropped. */ if (out_index > 0) { + if (h_proto == htons(ETH_P_IP)) + ip_decrease_ttl(iph); + else if (h_proto == htons(ETH_P_IPV6)) + ip6h->hop_limit--; + memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN); memcpy(eth->h_source, fib_params.smac, ETH_ALEN); return bpf_redirect_map(&tx_port, out_index, 0); -- cgit v1.2.3 From dac09149d992995adbef0f472093fbb6940a8653 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Fri, 18 May 2018 14:00:21 +0200 Subject: xsk: clean up SPDX headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clean up SPDX-License-Identifier and removing licensing leftovers. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- include/net/xdp_sock.h | 13 ++----------- include/uapi/linux/if_xdp.h | 13 ++----------- kernel/bpf/xskmap.c | 9 --------- net/xdp/xdp_umem.c | 9 --------- net/xdp/xdp_umem.h | 13 ++----------- net/xdp/xdp_umem_props.h | 13 ++----------- net/xdp/xsk.c | 9 --------- net/xdp/xsk_queue.c | 9 --------- net/xdp/xsk_queue.h | 13 ++----------- samples/bpf/xdpsock_user.c | 12 +----------- 10 files changed, 11 insertions(+), 102 deletions(-) (limited to 'samples/bpf') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 185f4928fbda..7a647c56ec15 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -1,15 +1,6 @@ -/* SPDX-License-Identifier: GPL-2.0 - * AF_XDP internal functions +/* SPDX-License-Identifier: GPL-2.0 */ +/* AF_XDP internal functions * Copyright(c) 2018 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #ifndef _LINUX_XDP_SOCK_H diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index 77b88c4efe98..56db977221d2 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -1,17 +1,8 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note - * +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* * if_xdp: XDP socket user-space interface * Copyright(c) 2018 Intel Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * * Author(s): Björn Töpel * Magnus Karlsson */ diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index cb3a12137404..b3c557476a8d 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -1,15 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* XSKMAP used for AF_XDP sockets * Copyright(c) 2018 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #include diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 2b47a1dd7c6c..df4ea97c433b 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -1,15 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* XDP user-space packet buffer * Copyright(c) 2018 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #include diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h index 7e0b2fab8522..70fe225baa51 100644 --- a/net/xdp/xdp_umem.h +++ b/net/xdp/xdp_umem.h @@ -1,15 +1,6 @@ -/* SPDX-License-Identifier: GPL-2.0 - * XDP user-space packet buffer +/* SPDX-License-Identifier: GPL-2.0 */ +/* XDP user-space packet buffer * Copyright(c) 2018 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #ifndef XDP_UMEM_H_ diff --git a/net/xdp/xdp_umem_props.h b/net/xdp/xdp_umem_props.h index 77fb5daf29f3..2cf8ec485fd2 100644 --- a/net/xdp/xdp_umem_props.h +++ b/net/xdp/xdp_umem_props.h @@ -1,15 +1,6 @@ -/* SPDX-License-Identifier: GPL-2.0 - * XDP user-space packet buffer +/* SPDX-License-Identifier: GPL-2.0 */ +/* XDP user-space packet buffer * Copyright(c) 2018 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #ifndef XDP_UMEM_PROPS_H_ diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 009c5af5bba5..b8d1cb4d78c0 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -5,15 +5,6 @@ * applications. * Copyright(c) 2018 Intel Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * * Author(s): Björn Töpel * Magnus Karlsson */ diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c index d012e5e23591..9f605d22dad4 100644 --- a/net/xdp/xsk_queue.c +++ b/net/xdp/xsk_queue.c @@ -1,15 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* XDP user-space ring structure * Copyright(c) 2018 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #include diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 7aa9a535db0e..928d464e57b9 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -1,15 +1,6 @@ -/* SPDX-License-Identifier: GPL-2.0 - * XDP user-space ring structure +/* SPDX-License-Identifier: GPL-2.0 */ +/* XDP user-space ring structure * Copyright(c) 2018 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #ifndef _LINUX_XSK_QUEUE_H diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 7fe60f6f7d53..60a882a2296c 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1,15 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 2017 - 2018 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ +/* Copyright(c) 2017 - 2018 Intel Corporation. */ #include #include -- cgit v1.2.3 From 1c4917da36ed76981cc3c2671b3a44765c02bbc3 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 22 May 2018 09:35:00 +0200 Subject: samples/bpf: adapt xdpsock to the new uapi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adapt xdpsock to use the new getsockopt introduced in the previous commit. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- samples/bpf/xdpsock_user.c | 123 ++++++++++++++++++++++++++++----------------- 1 file changed, 76 insertions(+), 47 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 60a882a2296c..e379eac034ac 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -79,7 +79,10 @@ struct xdp_umem_uqueue { u32 cached_cons; u32 mask; u32 size; - struct xdp_umem_ring *ring; + u32 *producer; + u32 *consumer; + u32 *ring; + void *map; }; struct xdp_umem { @@ -94,7 +97,10 @@ struct xdp_uqueue { u32 cached_cons; u32 mask; u32 size; - struct xdp_rxtx_ring *ring; + u32 *producer; + u32 *consumer; + struct xdp_desc *ring; + void *map; }; struct xdpsock { @@ -155,7 +161,7 @@ static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb) return free_entries; /* Refresh the local tail pointer */ - q->cached_cons = q->ring->ptrs.consumer; + q->cached_cons = *q->consumer; return q->size - (q->cached_prod - q->cached_cons); } @@ -168,7 +174,7 @@ static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs) return free_entries; /* Refresh the local tail pointer */ - q->cached_cons = q->ring->ptrs.consumer + q->size; + q->cached_cons = *q->consumer + q->size; return q->cached_cons - q->cached_prod; } @@ -177,7 +183,7 @@ static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb) u32 entries = q->cached_prod - q->cached_cons; if (entries == 0) { - q->cached_prod = q->ring->ptrs.producer; + q->cached_prod = *q->producer; entries = q->cached_prod - q->cached_cons; } @@ -189,7 +195,7 @@ static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs) u32 entries = q->cached_prod - q->cached_cons; if (entries == 0) { - q->cached_prod = q->ring->ptrs.producer; + q->cached_prod = *q->producer; entries = q->cached_prod - q->cached_cons; } @@ -208,12 +214,12 @@ static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq, for (i = 0; i < nb; i++) { u32 idx = fq->cached_prod++ & fq->mask; - fq->ring->desc[idx] = d[i].idx; + fq->ring[idx] = d[i].idx; } u_smp_wmb(); - fq->ring->ptrs.producer = fq->cached_prod; + *fq->producer = fq->cached_prod; return 0; } @@ -229,12 +235,12 @@ static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d, for (i = 0; i < nb; i++) { u32 idx = fq->cached_prod++ & fq->mask; - fq->ring->desc[idx] = d[i]; + fq->ring[idx] = d[i]; } u_smp_wmb(); - fq->ring->ptrs.producer = fq->cached_prod; + *fq->producer = fq->cached_prod; return 0; } @@ -248,13 +254,13 @@ static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq, for (i = 0; i < entries; i++) { idx = cq->cached_cons++ & cq->mask; - d[i] = cq->ring->desc[idx]; + d[i] = cq->ring[idx]; } if (entries > 0) { u_smp_wmb(); - cq->ring->ptrs.consumer = cq->cached_cons; + *cq->consumer = cq->cached_cons; } return entries; @@ -270,7 +276,7 @@ static inline int xq_enq(struct xdp_uqueue *uq, const struct xdp_desc *descs, unsigned int ndescs) { - struct xdp_rxtx_ring *r = uq->ring; + struct xdp_desc *r = uq->ring; unsigned int i; if (xq_nb_free(uq, ndescs) < ndescs) @@ -279,21 +285,21 @@ static inline int xq_enq(struct xdp_uqueue *uq, for (i = 0; i < ndescs; i++) { u32 idx = uq->cached_prod++ & uq->mask; - r->desc[idx].idx = descs[i].idx; - r->desc[idx].len = descs[i].len; - r->desc[idx].offset = descs[i].offset; + r[idx].idx = descs[i].idx; + r[idx].len = descs[i].len; + r[idx].offset = descs[i].offset; } u_smp_wmb(); - r->ptrs.producer = uq->cached_prod; + *uq->producer = uq->cached_prod; return 0; } static inline int xq_enq_tx_only(struct xdp_uqueue *uq, __u32 idx, unsigned int ndescs) { - struct xdp_rxtx_ring *q = uq->ring; + struct xdp_desc *r = uq->ring; unsigned int i; if (xq_nb_free(uq, ndescs) < ndescs) @@ -302,14 +308,14 @@ static inline int xq_enq_tx_only(struct xdp_uqueue *uq, for (i = 0; i < ndescs; i++) { u32 idx = uq->cached_prod++ & uq->mask; - q->desc[idx].idx = idx + i; - q->desc[idx].len = sizeof(pkt_data) - 1; - q->desc[idx].offset = 0; + r[idx].idx = idx + i; + r[idx].len = sizeof(pkt_data) - 1; + r[idx].offset = 0; } u_smp_wmb(); - q->ptrs.producer = uq->cached_prod; + *uq->producer = uq->cached_prod; return 0; } @@ -317,7 +323,7 @@ static inline int xq_deq(struct xdp_uqueue *uq, struct xdp_desc *descs, int ndescs) { - struct xdp_rxtx_ring *r = uq->ring; + struct xdp_desc *r = uq->ring; unsigned int idx; int i, entries; @@ -327,13 +333,13 @@ static inline int xq_deq(struct xdp_uqueue *uq, for (i = 0; i < entries; i++) { idx = uq->cached_cons++ & uq->mask; - descs[i] = r->desc[idx]; + descs[i] = r[idx]; } if (entries > 0) { u_smp_wmb(); - r->ptrs.consumer = uq->cached_cons; + *uq->consumer = uq->cached_cons; } return entries; @@ -392,8 +398,10 @@ static size_t gen_eth_frame(char *frame) static struct xdp_umem *xdp_umem_configure(int sfd) { int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS; + struct xdp_mmap_offsets off; struct xdp_umem_reg mr; struct xdp_umem *umem; + socklen_t optlen; void *bufs; umem = calloc(1, sizeof(*umem)); @@ -413,25 +421,35 @@ static struct xdp_umem *xdp_umem_configure(int sfd) lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size, sizeof(int)) == 0); - umem->fq.ring = mmap(0, sizeof(struct xdp_umem_ring) + - FQ_NUM_DESCS * sizeof(u32), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, sfd, - XDP_UMEM_PGOFF_FILL_RING); - lassert(umem->fq.ring != MAP_FAILED); + optlen = sizeof(off); + lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off, + &optlen) == 0); + + umem->fq.map = mmap(0, off.fr.desc + + FQ_NUM_DESCS * sizeof(u32), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, sfd, + XDP_UMEM_PGOFF_FILL_RING); + lassert(umem->fq.map != MAP_FAILED); umem->fq.mask = FQ_NUM_DESCS - 1; umem->fq.size = FQ_NUM_DESCS; + umem->fq.producer = umem->fq.map + off.fr.producer; + umem->fq.consumer = umem->fq.map + off.fr.consumer; + umem->fq.ring = umem->fq.map + off.fr.desc; - umem->cq.ring = mmap(0, sizeof(struct xdp_umem_ring) + + umem->cq.map = mmap(0, off.cr.desc + CQ_NUM_DESCS * sizeof(u32), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, sfd, XDP_UMEM_PGOFF_COMPLETION_RING); - lassert(umem->cq.ring != MAP_FAILED); + lassert(umem->cq.map != MAP_FAILED); umem->cq.mask = CQ_NUM_DESCS - 1; umem->cq.size = CQ_NUM_DESCS; + umem->cq.producer = umem->cq.map + off.cr.producer; + umem->cq.consumer = umem->cq.map + off.cr.consumer; + umem->cq.ring = umem->cq.map + off.cr.desc; umem->frames = (char (*)[FRAME_SIZE])bufs; umem->fd = sfd; @@ -449,9 +467,11 @@ static struct xdp_umem *xdp_umem_configure(int sfd) static struct xdpsock *xsk_configure(struct xdp_umem *umem) { struct sockaddr_xdp sxdp = {}; + struct xdp_mmap_offsets off; int sfd, ndescs = NUM_DESCS; struct xdpsock *xsk; bool shared = true; + socklen_t optlen; u32 i; sfd = socket(PF_XDP, SOCK_RAW, 0); @@ -474,15 +494,18 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem) &ndescs, sizeof(int)) == 0); lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING, &ndescs, sizeof(int)) == 0); + optlen = sizeof(off); + lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off, + &optlen) == 0); /* Rx */ - xsk->rx.ring = mmap(NULL, - sizeof(struct xdp_ring) + - NUM_DESCS * sizeof(struct xdp_desc), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, sfd, - XDP_PGOFF_RX_RING); - lassert(xsk->rx.ring != MAP_FAILED); + xsk->rx.map = mmap(NULL, + off.rx.desc + + NUM_DESCS * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, sfd, + XDP_PGOFF_RX_RING); + lassert(xsk->rx.map != MAP_FAILED); if (!shared) { for (i = 0; i < NUM_DESCS / 2; i++) @@ -491,19 +514,25 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem) } /* Tx */ - xsk->tx.ring = mmap(NULL, - sizeof(struct xdp_ring) + - NUM_DESCS * sizeof(struct xdp_desc), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, sfd, - XDP_PGOFF_TX_RING); - lassert(xsk->tx.ring != MAP_FAILED); + xsk->tx.map = mmap(NULL, + off.tx.desc + + NUM_DESCS * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, sfd, + XDP_PGOFF_TX_RING); + lassert(xsk->tx.map != MAP_FAILED); xsk->rx.mask = NUM_DESCS - 1; xsk->rx.size = NUM_DESCS; + xsk->rx.producer = xsk->rx.map + off.rx.producer; + xsk->rx.consumer = xsk->rx.map + off.rx.consumer; + xsk->rx.ring = xsk->rx.map + off.rx.desc; xsk->tx.mask = NUM_DESCS - 1; xsk->tx.size = NUM_DESCS; + xsk->tx.producer = xsk->tx.map + off.tx.producer; + xsk->tx.consumer = xsk->tx.map + off.tx.consumer; + xsk->tx.ring = xsk->tx.map + off.tx.desc; sxdp.sxdp_family = PF_XDP; sxdp.sxdp_ifindex = opt_ifindex; -- cgit v1.2.3 From ecb96f7fe153c7ff2fd31db64c52a53b7e6401ab Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 24 May 2018 11:21:56 -0700 Subject: samples/bpf: add a samples/bpf test for BPF_TASK_FD_QUERY This is mostly to test kprobe/uprobe which needs kernel headers. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- samples/bpf/Makefile | 4 + samples/bpf/task_fd_query_kern.c | 19 ++ samples/bpf/task_fd_query_user.c | 382 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 405 insertions(+) create mode 100644 samples/bpf/task_fd_query_kern.c create mode 100644 samples/bpf/task_fd_query_user.c (limited to 'samples/bpf') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 62d1aa1a4cf3..7dc85ed0ce4b 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -51,6 +51,7 @@ hostprogs-y += cpustat hostprogs-y += xdp_adjust_tail hostprogs-y += xdpsock hostprogs-y += xdp_fwd +hostprogs-y += task_fd_query # Libbpf dependencies LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a @@ -105,6 +106,7 @@ cpustat-objs := bpf_load.o cpustat_user.o xdp_adjust_tail-objs := xdp_adjust_tail_user.o xdpsock-objs := bpf_load.o xdpsock_user.o xdp_fwd-objs := bpf_load.o xdp_fwd_user.o +task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -160,6 +162,7 @@ always += cpustat_kern.o always += xdp_adjust_tail_kern.o always += xdpsock_kern.o always += xdp_fwd_kern.o +always += task_fd_query_kern.o HOSTCFLAGS += -I$(objtree)/usr/include HOSTCFLAGS += -I$(srctree)/tools/lib/ @@ -175,6 +178,7 @@ HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/ HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/ +HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/ HOST_LOADLIBES += $(LIBBPF) -lelf HOSTLOADLIBES_tracex4 += -lrt diff --git a/samples/bpf/task_fd_query_kern.c b/samples/bpf/task_fd_query_kern.c new file mode 100644 index 000000000000..f4b0a9ea674d --- /dev/null +++ b/samples/bpf/task_fd_query_kern.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include "bpf_helpers.h" + +SEC("kprobe/blk_start_request") +int bpf_prog1(struct pt_regs *ctx) +{ + return 0; +} + +SEC("kretprobe/blk_account_io_completion") +int bpf_prog2(struct pt_regs *ctx) +{ + return 0; +} +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c new file mode 100644 index 000000000000..8381d792f138 --- /dev/null +++ b/samples/bpf/task_fd_query_user.c @@ -0,0 +1,382 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libbpf.h" +#include "bpf_load.h" +#include "bpf_util.h" +#include "perf-sys.h" +#include "trace_helpers.h" + +#define CHECK_PERROR_RET(condition) ({ \ + int __ret = !!(condition); \ + if (__ret) { \ + printf("FAIL: %s:\n", __func__); \ + perror(" "); \ + return -1; \ + } \ +}) + +#define CHECK_AND_RET(condition) ({ \ + int __ret = !!(condition); \ + if (__ret) \ + return -1; \ +}) + +static __u64 ptr_to_u64(void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + +#define PMU_TYPE_FILE "/sys/bus/event_source/devices/%s/type" +static int bpf_find_probe_type(const char *event_type) +{ + char buf[256]; + int fd, ret; + + ret = snprintf(buf, sizeof(buf), PMU_TYPE_FILE, event_type); + CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf)); + + fd = open(buf, O_RDONLY); + CHECK_PERROR_RET(fd < 0); + + ret = read(fd, buf, sizeof(buf)); + close(fd); + CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf)); + + errno = 0; + ret = (int)strtol(buf, NULL, 10); + CHECK_PERROR_RET(errno); + return ret; +} + +#define PMU_RETPROBE_FILE "/sys/bus/event_source/devices/%s/format/retprobe" +static int bpf_get_retprobe_bit(const char *event_type) +{ + char buf[256]; + int fd, ret; + + ret = snprintf(buf, sizeof(buf), PMU_RETPROBE_FILE, event_type); + CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf)); + + fd = open(buf, O_RDONLY); + CHECK_PERROR_RET(fd < 0); + + ret = read(fd, buf, sizeof(buf)); + close(fd); + CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf)); + CHECK_PERROR_RET(strlen(buf) < strlen("config:")); + + errno = 0; + ret = (int)strtol(buf + strlen("config:"), NULL, 10); + CHECK_PERROR_RET(errno); + return ret; +} + +static int test_debug_fs_kprobe(int prog_fd_idx, const char *fn_name, + __u32 expected_fd_type) +{ + __u64 probe_offset, probe_addr; + __u32 len, prog_id, fd_type; + char buf[256]; + int err; + + len = sizeof(buf); + err = bpf_task_fd_query(getpid(), event_fd[prog_fd_idx], 0, buf, &len, + &prog_id, &fd_type, &probe_offset, + &probe_addr); + if (err < 0) { + printf("FAIL: %s, for event_fd idx %d, fn_name %s\n", + __func__, prog_fd_idx, fn_name); + perror(" :"); + return -1; + } + if (strcmp(buf, fn_name) != 0 || + fd_type != expected_fd_type || + probe_offset != 0x0 || probe_addr != 0x0) { + printf("FAIL: bpf_trace_event_query(event_fd[%d]):\n", + prog_fd_idx); + printf("buf: %s, fd_type: %u, probe_offset: 0x%llx," + " probe_addr: 0x%llx\n", + buf, fd_type, probe_offset, probe_addr); + return -1; + } + return 0; +} + +static int test_nondebug_fs_kuprobe_common(const char *event_type, + const char *name, __u64 offset, __u64 addr, bool is_return, + char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, + __u64 *probe_offset, __u64 *probe_addr) +{ + int is_return_bit = bpf_get_retprobe_bit(event_type); + int type = bpf_find_probe_type(event_type); + struct perf_event_attr attr = {}; + int fd; + + if (type < 0 || is_return_bit < 0) { + printf("FAIL: %s incorrect type (%d) or is_return_bit (%d)\n", + __func__, type, is_return_bit); + return -1; + } + + attr.sample_period = 1; + attr.wakeup_events = 1; + if (is_return) + attr.config |= 1 << is_return_bit; + + if (name) { + attr.config1 = ptr_to_u64((void *)name); + attr.config2 = offset; + } else { + attr.config1 = 0; + attr.config2 = addr; + } + attr.size = sizeof(attr); + attr.type = type; + + fd = sys_perf_event_open(&attr, -1, 0, -1, 0); + CHECK_PERROR_RET(fd < 0); + + CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0); + CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0); + CHECK_PERROR_RET(bpf_task_fd_query(getpid(), fd, 0, buf, buf_len, + prog_id, fd_type, probe_offset, probe_addr) < 0); + + return 0; +} + +static int test_nondebug_fs_probe(const char *event_type, const char *name, + __u64 offset, __u64 addr, bool is_return, + __u32 expected_fd_type, + __u32 expected_ret_fd_type, + char *buf, __u32 buf_len) +{ + __u64 probe_offset, probe_addr; + __u32 prog_id, fd_type; + int err; + + err = test_nondebug_fs_kuprobe_common(event_type, name, + offset, addr, is_return, + buf, &buf_len, &prog_id, + &fd_type, &probe_offset, + &probe_addr); + if (err < 0) { + printf("FAIL: %s, " + "for name %s, offset 0x%llx, addr 0x%llx, is_return %d\n", + __func__, name ? name : "", offset, addr, is_return); + perror(" :"); + return -1; + } + if ((is_return && fd_type != expected_ret_fd_type) || + (!is_return && fd_type != expected_fd_type)) { + printf("FAIL: %s, incorrect fd_type %u\n", + __func__, fd_type); + return -1; + } + if (name) { + if (strcmp(name, buf) != 0) { + printf("FAIL: %s, incorrect buf %s\n", __func__, buf); + return -1; + } + if (probe_offset != offset) { + printf("FAIL: %s, incorrect probe_offset 0x%llx\n", + __func__, probe_offset); + return -1; + } + } else { + if (buf_len != 0) { + printf("FAIL: %s, incorrect buf %p\n", + __func__, buf); + return -1; + } + + if (probe_addr != addr) { + printf("FAIL: %s, incorrect probe_addr 0x%llx\n", + __func__, probe_addr); + return -1; + } + } + return 0; +} + +static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return) +{ + const char *event_type = "uprobe"; + struct perf_event_attr attr = {}; + char buf[256], event_alias[256]; + __u64 probe_offset, probe_addr; + __u32 len, prog_id, fd_type; + int err, res, kfd, efd; + ssize_t bytes; + + snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", + event_type); + kfd = open(buf, O_WRONLY | O_APPEND, 0); + CHECK_PERROR_RET(kfd < 0); + + res = snprintf(event_alias, sizeof(event_alias), "test_%d", getpid()); + CHECK_PERROR_RET(res < 0 || res >= sizeof(event_alias)); + + res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx", + is_return ? 'r' : 'p', event_type, event_alias, + binary_path, offset); + CHECK_PERROR_RET(res < 0 || res >= sizeof(buf)); + CHECK_PERROR_RET(write(kfd, buf, strlen(buf)) < 0); + + close(kfd); + kfd = -1; + + snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s/id", + event_type, event_alias); + efd = open(buf, O_RDONLY, 0); + CHECK_PERROR_RET(efd < 0); + + bytes = read(efd, buf, sizeof(buf)); + CHECK_PERROR_RET(bytes <= 0 || bytes >= sizeof(buf)); + close(efd); + buf[bytes] = '\0'; + + attr.config = strtol(buf, NULL, 0); + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_period = 1; + attr.wakeup_events = 1; + kfd = sys_perf_event_open(&attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC); + CHECK_PERROR_RET(kfd < 0); + CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0); + CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_ENABLE, 0) < 0); + + len = sizeof(buf); + err = bpf_task_fd_query(getpid(), kfd, 0, buf, &len, + &prog_id, &fd_type, &probe_offset, + &probe_addr); + if (err < 0) { + printf("FAIL: %s, binary_path %s\n", __func__, binary_path); + perror(" :"); + return -1; + } + if ((is_return && fd_type != BPF_FD_TYPE_URETPROBE) || + (!is_return && fd_type != BPF_FD_TYPE_UPROBE)) { + printf("FAIL: %s, incorrect fd_type %u\n", __func__, + fd_type); + return -1; + } + if (strcmp(binary_path, buf) != 0) { + printf("FAIL: %s, incorrect buf %s\n", __func__, buf); + return -1; + } + if (probe_offset != offset) { + printf("FAIL: %s, incorrect probe_offset 0x%llx\n", __func__, + probe_offset); + return -1; + } + + close(kfd); + return 0; +} + +int main(int argc, char **argv) +{ + struct rlimit r = {1024*1024, RLIM_INFINITY}; + extern char __executable_start; + char filename[256], buf[256]; + __u64 uprobe_file_offset; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + perror("setrlimit(RLIMIT_MEMLOCK)"); + return 1; + } + + if (load_kallsyms()) { + printf("failed to process /proc/kallsyms\n"); + return 1; + } + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + /* test two functions in the corresponding *_kern.c file */ + CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_start_request", + BPF_FD_TYPE_KPROBE)); + CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion", + BPF_FD_TYPE_KRETPROBE)); + + /* test nondebug fs kprobe */ + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0, + false, BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + buf, sizeof(buf))); +#ifdef __x86_64__ + /* set a kprobe on "bpf_check + 0x5", which is x64 specific */ + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x5, 0x0, + false, BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + buf, sizeof(buf))); +#endif + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0, + true, BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + buf, sizeof(buf))); + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0, + ksym_get_addr("bpf_check"), false, + BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + buf, sizeof(buf))); + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0, + ksym_get_addr("bpf_check"), false, + BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + NULL, 0)); + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0, + ksym_get_addr("bpf_check"), true, + BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + buf, sizeof(buf))); + CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0, + ksym_get_addr("bpf_check"), true, + BPF_FD_TYPE_KPROBE, + BPF_FD_TYPE_KRETPROBE, + 0, 0)); + + /* test nondebug fs uprobe */ + /* the calculation of uprobe file offset is based on gcc 7.3.1 on x64 + * and the default linker script, which defines __executable_start as + * the start of the .text section. The calculation could be different + * on different systems with different compilers. The right way is + * to parse the ELF file. We took a shortcut here. + */ + uprobe_file_offset = (__u64)main - (__u64)&__executable_start; + CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0], + uprobe_file_offset, 0x0, false, + BPF_FD_TYPE_UPROBE, + BPF_FD_TYPE_URETPROBE, + buf, sizeof(buf))); + CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0], + uprobe_file_offset, 0x0, true, + BPF_FD_TYPE_UPROBE, + BPF_FD_TYPE_URETPROBE, + buf, sizeof(buf))); + + /* test debug fs uprobe */ + CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset, + false)); + CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset, + true)); + + return 0; +} -- cgit v1.2.3 From 9940fbf633e8714c7c885f8d3848f508b8612069 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 24 May 2018 16:46:02 +0200 Subject: samples/bpf: xdp_monitor use tracepoint xdp:xdp_devmap_xmit The xdp_monitor sample/tool is updated to use the new tracepoint xdp:xdp_devmap_xmit the previous patch just introduced. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov --- samples/bpf/xdp_monitor_kern.c | 39 +++++++++++++++++++++++++++++++++++++ samples/bpf/xdp_monitor_user.c | 44 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 82 insertions(+), 1 deletion(-) (limited to 'samples/bpf') diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c index 211db8ded0de..2854aa0665ea 100644 --- a/samples/bpf/xdp_monitor_kern.c +++ b/samples/bpf/xdp_monitor_kern.c @@ -208,3 +208,42 @@ int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) return 0; } + +struct bpf_map_def SEC("maps") devmap_xmit_cnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(struct datarec), + .max_entries = 1, +}; + +/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format + * Code in: kernel/include/trace/events/xdp.h + */ +struct devmap_xmit_ctx { + u64 __pad; // First 8 bytes are not accessible by bpf code + int map_id; // offset:8; size:4; signed:1; + u32 act; // offset:12; size:4; signed:0; + u32 map_index; // offset:16; size:4; signed:0; + int drops; // offset:20; size:4; signed:1; + int sent; // offset:24; size:4; signed:1; + int from_ifindex; // offset:28; size:4; signed:1; + int to_ifindex; // offset:32; size:4; signed:1; +}; + +SEC("tracepoint/xdp/xdp_devmap_xmit") +int trace_xdp_devmap_xmit(struct devmap_xmit_ctx *ctx) +{ + struct datarec *rec; + u32 key = 0; + + rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &key); + if (!rec) + return 0; + rec->processed += ctx->sent; + rec->dropped += ctx->drops; + + /* Record bulk events, then userspace can calc average bulk size */ + rec->info += 1; + + return 1; +} diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index bf09b5188acd..7e18a454924c 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -141,6 +141,7 @@ struct stats_record { struct record_u64 xdp_exception[XDP_ACTION_MAX]; struct record xdp_cpumap_kthread; struct record xdp_cpumap_enqueue[MAX_CPUS]; + struct record xdp_devmap_xmit; }; static bool map_collect_record(int fd, __u32 key, struct record *rec) @@ -397,7 +398,7 @@ static void stats_print(struct stats_record *stats_rec, info = calc_info(r, p, t); if (info > 0) i_str = "sched"; - if (pps > 0) + if (pps > 0 || drop > 0) printf(fmt1, "cpumap-kthread", i, pps, drop, info, i_str); } @@ -409,6 +410,42 @@ static void stats_print(struct stats_record *stats_rec, printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str); } + /* devmap ndo_xdp_xmit stats */ + { + char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s\n"; + char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s\n"; + struct record *rec, *prev; + double drop, info; + char *i_str = ""; + + rec = &stats_rec->xdp_devmap_xmit; + prev = &stats_prev->xdp_devmap_xmit; + t = calc_period(rec, prev); + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + + pps = calc_pps(r, p, t); + drop = calc_drop(r, p, t); + info = calc_info(r, p, t); + if (info > 0) { + i_str = "bulk-average"; + info = (pps+drop) / info; /* calc avg bulk */ + } + if (pps > 0 || drop > 0) + printf(fmt1, "devmap-xmit", + i, pps, drop, info, i_str); + } + pps = calc_pps(&rec->total, &prev->total, t); + drop = calc_drop(&rec->total, &prev->total, t); + info = calc_info(&rec->total, &prev->total, t); + if (info > 0) { + i_str = "bulk-average"; + info = (pps+drop) / info; /* calc avg bulk */ + } + printf(fmt2, "devmap-xmit", "total", pps, drop, info, i_str); + } + printf("\n"); } @@ -437,6 +474,9 @@ static bool stats_collect(struct stats_record *rec) fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */ map_collect_record(fd, 0, &rec->xdp_cpumap_kthread); + fd = map_data[4].fd; /* map4: devmap_xmit_cnt */ + map_collect_record(fd, 0, &rec->xdp_devmap_xmit); + return true; } @@ -480,6 +520,7 @@ static struct stats_record *alloc_stats_record(void) rec_sz = sizeof(struct datarec); rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz); + rec->xdp_devmap_xmit.cpu = alloc_rec_per_cpu(rec_sz); for (i = 0; i < MAX_CPUS; i++) rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz); @@ -498,6 +539,7 @@ static void free_stats_record(struct stats_record *r) free(r->xdp_exception[i].cpu); free(r->xdp_cpumap_kthread.cpu); + free(r->xdp_devmap_xmit.cpu); for (i = 0; i < MAX_CPUS; i++) free(r->xdp_cpumap_enqueue[i].cpu); -- cgit v1.2.3 From a570e48fee1bc26f47aba2e1493f96a03bed3c8f Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 24 May 2018 16:46:22 +0200 Subject: samples/bpf: xdp_monitor use err code from tracepoint xdp:xdp_devmap_xmit Update xdp_monitor to use the recently added err code introduced in tracepoint xdp:xdp_devmap_xmit, to show if the drop count is caused by some driver general delivery problem. Other kind of drops will likely just be more normal TX space issues. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov --- samples/bpf/xdp_monitor_kern.c | 10 ++++++++++ samples/bpf/xdp_monitor_user.c | 35 ++++++++++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 5 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c index 2854aa0665ea..ad10fe700d7d 100644 --- a/samples/bpf/xdp_monitor_kern.c +++ b/samples/bpf/xdp_monitor_kern.c @@ -125,6 +125,7 @@ struct datarec { u64 processed; u64 dropped; u64 info; + u64 err; }; #define MAX_CPUS 64 @@ -228,6 +229,7 @@ struct devmap_xmit_ctx { int sent; // offset:24; size:4; signed:1; int from_ifindex; // offset:28; size:4; signed:1; int to_ifindex; // offset:32; size:4; signed:1; + int err; // offset:36; size:4; signed:1; }; SEC("tracepoint/xdp/xdp_devmap_xmit") @@ -245,5 +247,13 @@ int trace_xdp_devmap_xmit(struct devmap_xmit_ctx *ctx) /* Record bulk events, then userspace can calc average bulk size */ rec->info += 1; + /* Record error cases, where no frame were sent */ + if (ctx->err) + rec->err++; + + /* Catch API error of drv ndo_xdp_xmit sent more than count */ + if (ctx->drops < 0) + rec->err++; + return 1; } diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index 7e18a454924c..dd558cbb2309 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -117,6 +117,7 @@ struct datarec { __u64 processed; __u64 dropped; __u64 info; + __u64 err; }; #define MAX_CPUS 64 @@ -152,6 +153,7 @@ static bool map_collect_record(int fd, __u32 key, struct record *rec) __u64 sum_processed = 0; __u64 sum_dropped = 0; __u64 sum_info = 0; + __u64 sum_err = 0; int i; if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { @@ -170,10 +172,13 @@ static bool map_collect_record(int fd, __u32 key, struct record *rec) sum_dropped += values[i].dropped; rec->cpu[i].info = values[i].info; sum_info += values[i].info; + rec->cpu[i].err = values[i].err; + sum_err += values[i].err; } rec->total.processed = sum_processed; rec->total.dropped = sum_dropped; rec->total.info = sum_info; + rec->total.err = sum_err; return true; } @@ -274,6 +279,18 @@ static double calc_info(struct datarec *r, struct datarec *p, double period) return pps; } +static double calc_err(struct datarec *r, struct datarec *p, double period) +{ + __u64 packets = 0; + double pps = 0; + + if (period > 0) { + packets = r->err - p->err; + pps = packets / period; + } + return pps; +} + static void stats_print(struct stats_record *stats_rec, struct stats_record *stats_prev, bool err_only) @@ -412,11 +429,12 @@ static void stats_print(struct stats_record *stats_rec, /* devmap ndo_xdp_xmit stats */ { - char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s\n"; - char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s\n"; + char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s %s\n"; + char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s %s\n"; struct record *rec, *prev; - double drop, info; + double drop, info, err; char *i_str = ""; + char *err_str = ""; rec = &stats_rec->xdp_devmap_xmit; prev = &stats_prev->xdp_devmap_xmit; @@ -428,22 +446,29 @@ static void stats_print(struct stats_record *stats_rec, pps = calc_pps(r, p, t); drop = calc_drop(r, p, t); info = calc_info(r, p, t); + err = calc_err(r, p, t); if (info > 0) { i_str = "bulk-average"; info = (pps+drop) / info; /* calc avg bulk */ } + if (err > 0) + err_str = "drv-err"; if (pps > 0 || drop > 0) printf(fmt1, "devmap-xmit", - i, pps, drop, info, i_str); + i, pps, drop, info, i_str, err_str); } pps = calc_pps(&rec->total, &prev->total, t); drop = calc_drop(&rec->total, &prev->total, t); info = calc_info(&rec->total, &prev->total, t); + err = calc_err(&rec->total, &prev->total, t); if (info > 0) { i_str = "bulk-average"; info = (pps+drop) / info; /* calc avg bulk */ } - printf(fmt2, "devmap-xmit", "total", pps, drop, info, i_str); + if (err > 0) + err_str = "drv-err"; + printf(fmt2, "devmap-xmit", "total", pps, drop, + info, i_str, err_str); } printf("\n"); -- cgit v1.2.3 From bd3a08aaa9a383ffbbd5b788b797ae6e64eaa7a1 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 3 Jun 2018 08:15:19 -0700 Subject: bpf: flowlabel in bpf_fib_lookup should be flowinfo As Michal noted the flow struct takes both the flow label and priority. Update the bpf_fib_lookup API to note that it is flowinfo and not just the flow label. Cc: Michal Kubecek Signed-off-by: David Ahern Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 +- net/core/filter.c | 2 +- samples/bpf/xdp_fwd_kern.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'samples/bpf') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 18712b0dbfe7..eeb6237be5c2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2629,7 +2629,7 @@ struct bpf_fib_lookup { union { /* inputs to lookup */ __u8 tos; /* AF_INET */ - __be32 flowlabel; /* AF_INET6 */ + __be32 flowinfo; /* AF_INET6, flow_label + priority */ /* output: metric of fib result (IPv4/IPv6 only) */ __u32 rt_metric; diff --git a/net/core/filter.c b/net/core/filter.c index a72ea9f61010..3d9ba7e5965a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4221,7 +4221,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, fl6.flowi6_oif = 0; strict = RT6_LOOKUP_F_HAS_SADDR; } - fl6.flowlabel = params->flowlabel; + fl6.flowlabel = params->flowinfo; fl6.flowi6_scope = 0; fl6.flowi6_flags = 0; fl6.mp_hash = 0; diff --git a/samples/bpf/xdp_fwd_kern.c b/samples/bpf/xdp_fwd_kern.c index 4a6be0f87505..6673cdb9f55c 100644 --- a/samples/bpf/xdp_fwd_kern.c +++ b/samples/bpf/xdp_fwd_kern.c @@ -88,7 +88,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags) return XDP_PASS; fib_params.family = AF_INET6; - fib_params.flowlabel = *(__be32 *)ip6h & IPV6_FLOWINFO_MASK; + fib_params.flowinfo = *(__be32 *)ip6h & IPV6_FLOWINFO_MASK; fib_params.l4_protocol = ip6h->nexthdr; fib_params.sport = 0; fib_params.dport = 0; -- cgit v1.2.3 From a412ef54fc2eb81bb55428dcdcdaa2e38ae9bba5 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Mon, 4 Jun 2018 13:57:14 +0200 Subject: samples/bpf: adapted to new uapi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Here, the xdpsock sample application is adjusted to the new descriptor format. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- samples/bpf/xdpsock_user.c | 84 ++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 48 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index e379eac034ac..b71a342b9082 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -46,6 +46,7 @@ #define NUM_FRAMES 131072 #define FRAME_HEADROOM 0 +#define FRAME_SHIFT 11 #define FRAME_SIZE 2048 #define NUM_DESCS 1024 #define BATCH_SIZE 16 @@ -55,6 +56,7 @@ #define DEBUG_HEXDUMP 0 +typedef __u64 u64; typedef __u32 u32; static unsigned long prev_time; @@ -81,12 +83,12 @@ struct xdp_umem_uqueue { u32 size; u32 *producer; u32 *consumer; - u32 *ring; + u64 *ring; void *map; }; struct xdp_umem { - char (*frames)[FRAME_SIZE]; + char *frames; struct xdp_umem_uqueue fq; struct xdp_umem_uqueue cq; int fd; @@ -214,7 +216,7 @@ static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq, for (i = 0; i < nb; i++) { u32 idx = fq->cached_prod++ & fq->mask; - fq->ring[idx] = d[i].idx; + fq->ring[idx] = d[i].addr; } u_smp_wmb(); @@ -224,7 +226,7 @@ static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq, return 0; } -static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d, +static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u64 *d, size_t nb) { u32 i; @@ -246,7 +248,7 @@ static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d, } static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq, - u32 *d, size_t nb) + u64 *d, size_t nb) { u32 idx, i, entries = umem_nb_avail(cq, nb); @@ -266,10 +268,9 @@ static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq, return entries; } -static inline void *xq_get_data(struct xdpsock *xsk, __u32 idx, __u32 off) +static inline void *xq_get_data(struct xdpsock *xsk, u64 addr) { - lassert(idx < NUM_FRAMES); - return &xsk->umem->frames[idx][off]; + return &xsk->umem->frames[addr]; } static inline int xq_enq(struct xdp_uqueue *uq, @@ -285,9 +286,8 @@ static inline int xq_enq(struct xdp_uqueue *uq, for (i = 0; i < ndescs; i++) { u32 idx = uq->cached_prod++ & uq->mask; - r[idx].idx = descs[i].idx; + r[idx].addr = descs[i].addr; r[idx].len = descs[i].len; - r[idx].offset = descs[i].offset; } u_smp_wmb(); @@ -297,7 +297,7 @@ static inline int xq_enq(struct xdp_uqueue *uq, } static inline int xq_enq_tx_only(struct xdp_uqueue *uq, - __u32 idx, unsigned int ndescs) + unsigned int id, unsigned int ndescs) { struct xdp_desc *r = uq->ring; unsigned int i; @@ -308,9 +308,8 @@ static inline int xq_enq_tx_only(struct xdp_uqueue *uq, for (i = 0; i < ndescs; i++) { u32 idx = uq->cached_prod++ & uq->mask; - r[idx].idx = idx + i; + r[idx].addr = (id + i) << FRAME_SHIFT; r[idx].len = sizeof(pkt_data) - 1; - r[idx].offset = 0; } u_smp_wmb(); @@ -357,17 +356,21 @@ static void swap_mac_addresses(void *data) *dst_addr = tmp; } -#if DEBUG_HEXDUMP -static void hex_dump(void *pkt, size_t length, const char *prefix) +static void hex_dump(void *pkt, size_t length, u64 addr) { - int i = 0; const unsigned char *address = (unsigned char *)pkt; const unsigned char *line = address; size_t line_size = 32; unsigned char c; + char buf[32]; + int i = 0; + if (!DEBUG_HEXDUMP) + return; + + sprintf(buf, "addr=%llu", addr); printf("length = %zu\n", length); - printf("%s | ", prefix); + printf("%s | ", buf); while (length-- > 0) { printf("%02X ", *address++); if (!(++i % line_size) || (length == 0 && i % line_size)) { @@ -382,12 +385,11 @@ static void hex_dump(void *pkt, size_t length, const char *prefix) } printf("\n"); if (length > 0) - printf("%s | ", prefix); + printf("%s | ", buf); } } printf("\n"); } -#endif static size_t gen_eth_frame(char *frame) { @@ -412,8 +414,8 @@ static struct xdp_umem *xdp_umem_configure(int sfd) mr.addr = (__u64)bufs; mr.len = NUM_FRAMES * FRAME_SIZE; - mr.frame_size = FRAME_SIZE; - mr.frame_headroom = FRAME_HEADROOM; + mr.chunk_size = FRAME_SIZE; + mr.headroom = FRAME_HEADROOM; lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)) == 0); lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_FILL_RING, &fq_size, @@ -426,7 +428,7 @@ static struct xdp_umem *xdp_umem_configure(int sfd) &optlen) == 0); umem->fq.map = mmap(0, off.fr.desc + - FQ_NUM_DESCS * sizeof(u32), + FQ_NUM_DESCS * sizeof(u64), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, sfd, XDP_UMEM_PGOFF_FILL_RING); @@ -439,7 +441,7 @@ static struct xdp_umem *xdp_umem_configure(int sfd) umem->fq.ring = umem->fq.map + off.fr.desc; umem->cq.map = mmap(0, off.cr.desc + - CQ_NUM_DESCS * sizeof(u32), + CQ_NUM_DESCS * sizeof(u64), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, sfd, XDP_UMEM_PGOFF_COMPLETION_RING); @@ -451,14 +453,14 @@ static struct xdp_umem *xdp_umem_configure(int sfd) umem->cq.consumer = umem->cq.map + off.cr.consumer; umem->cq.ring = umem->cq.map + off.cr.desc; - umem->frames = (char (*)[FRAME_SIZE])bufs; + umem->frames = bufs; umem->fd = sfd; if (opt_bench == BENCH_TXONLY) { int i; - for (i = 0; i < NUM_FRAMES; i++) - (void)gen_eth_frame(&umem->frames[i][0]); + for (i = 0; i < NUM_FRAMES * FRAME_SIZE; i += FRAME_SIZE) + (void)gen_eth_frame(&umem->frames[i]); } return umem; @@ -472,7 +474,7 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem) struct xdpsock *xsk; bool shared = true; socklen_t optlen; - u32 i; + u64 i; sfd = socket(PF_XDP, SOCK_RAW, 0); lassert(sfd >= 0); @@ -508,7 +510,7 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem) lassert(xsk->rx.map != MAP_FAILED); if (!shared) { - for (i = 0; i < NUM_DESCS / 2; i++) + for (i = 0; i < NUM_DESCS * FRAME_SIZE; i += FRAME_SIZE) lassert(umem_fill_to_kernel(&xsk->umem->fq, &i, 1) == 0); } @@ -727,7 +729,7 @@ static void kick_tx(int fd) static inline void complete_tx_l2fwd(struct xdpsock *xsk) { - u32 descs[BATCH_SIZE]; + u64 descs[BATCH_SIZE]; unsigned int rcvd; size_t ndescs; @@ -749,7 +751,7 @@ static inline void complete_tx_l2fwd(struct xdpsock *xsk) static inline void complete_tx_only(struct xdpsock *xsk) { - u32 descs[BATCH_SIZE]; + u64 descs[BATCH_SIZE]; unsigned int rcvd; if (!xsk->outstanding_tx) @@ -774,17 +776,9 @@ static void rx_drop(struct xdpsock *xsk) return; for (i = 0; i < rcvd; i++) { - u32 idx = descs[i].idx; - - lassert(idx < NUM_FRAMES); -#if DEBUG_HEXDUMP - char *pkt; - char buf[32]; + char *pkt = xq_get_data(xsk, descs[i].addr); - pkt = xq_get_data(xsk, idx, descs[i].offset); - sprintf(buf, "idx=%d", idx); - hex_dump(pkt, descs[i].len, buf); -#endif + hex_dump(pkt, descs[i].len, descs[i].addr); } xsk->rx_npkts += rcvd; @@ -867,17 +861,11 @@ static void l2fwd(struct xdpsock *xsk) } for (i = 0; i < rcvd; i++) { - char *pkt = xq_get_data(xsk, descs[i].idx, - descs[i].offset); + char *pkt = xq_get_data(xsk, descs[i].addr); swap_mac_addresses(pkt); -#if DEBUG_HEXDUMP - char buf[32]; - u32 idx = descs[i].idx; - sprintf(buf, "idx=%d", idx); - hex_dump(pkt, descs[i].len, buf); -#endif + hex_dump(pkt, descs[i].len, descs[i].addr); } xsk->rx_npkts += rcvd; -- cgit v1.2.3 From a65ea68b8dd92fea86d2b8ca7e43caeaa5ddcdff Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Mon, 4 Jun 2018 13:57:15 +0200 Subject: samples/bpf: minor *_nb_free performance fix Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann --- samples/bpf/xdpsock_user.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'samples/bpf') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index b71a342b9082..7494f60fbff8 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -157,15 +157,15 @@ static const char pkt_data[] = static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb) { - u32 free_entries = q->size - (q->cached_prod - q->cached_cons); + u32 free_entries = q->cached_cons - q->cached_prod; if (free_entries >= nb) return free_entries; /* Refresh the local tail pointer */ - q->cached_cons = *q->consumer; + q->cached_cons = *q->consumer + q->size; - return q->size - (q->cached_prod - q->cached_cons); + return q->cached_cons - q->cached_prod; } static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs) @@ -439,6 +439,7 @@ static struct xdp_umem *xdp_umem_configure(int sfd) umem->fq.producer = umem->fq.map + off.fr.producer; umem->fq.consumer = umem->fq.map + off.fr.consumer; umem->fq.ring = umem->fq.map + off.fr.desc; + umem->fq.cached_cons = FQ_NUM_DESCS; umem->cq.map = mmap(0, off.cr.desc + CQ_NUM_DESCS * sizeof(u64), @@ -535,6 +536,7 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem) xsk->tx.producer = xsk->tx.map + off.tx.producer; xsk->tx.consumer = xsk->tx.map + off.tx.consumer; xsk->tx.ring = xsk->tx.map + off.tx.desc; + xsk->tx.cached_cons = NUM_DESCS; sxdp.sxdp_family = PF_XDP; sxdp.sxdp_ifindex = opt_ifindex; -- cgit v1.2.3 From 9f5232cc7f040f443f81069f553d31b27ab7eb79 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Mon, 4 Jun 2018 14:06:01 +0200 Subject: samples/bpf: xdpsock: use skb Tx path for XDP_SKB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure that XDP_SKB also uses the skb Tx path. Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- samples/bpf/xdpsock_user.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'samples/bpf') diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 7494f60fbff8..d69c8d78d3fd 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -75,6 +75,7 @@ static int opt_queue; static int opt_poll; static int opt_shared_packet_buffer; static int opt_interval = 1; +static u32 opt_xdp_bind_flags; struct xdp_umem_uqueue { u32 cached_prod; @@ -541,9 +542,12 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem) sxdp.sxdp_family = PF_XDP; sxdp.sxdp_ifindex = opt_ifindex; sxdp.sxdp_queue_id = opt_queue; + if (shared) { sxdp.sxdp_flags = XDP_SHARED_UMEM; sxdp.sxdp_shared_umem_fd = umem->fd; + } else { + sxdp.sxdp_flags = opt_xdp_bind_flags; } lassert(bind(sfd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0); @@ -699,6 +703,7 @@ static void parse_command_line(int argc, char **argv) break; case 'S': opt_xdp_flags |= XDP_FLAGS_SKB_MODE; + opt_xdp_bind_flags |= XDP_COPY; break; case 'N': opt_xdp_flags |= XDP_FLAGS_DRV_MODE; -- cgit v1.2.3