diff options
Diffstat (limited to 'tools/testing/selftests/bpf/progs')
50 files changed, 6611 insertions, 0 deletions
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c new file mode 100644 index 000000000000..284660f5aa95 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <limits.h> +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/pkt_cls.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/icmp.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/if_packet.h> +#include <sys/socket.h> +#include <linux/if_tunnel.h> +#include <linux/mpls.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; +#define PROG(F) SEC(#F) int bpf_func_##F + +/* These are the identifiers of the BPF programs that will be used in tail + * calls. Name is limited to 16 characters, with the terminating character and + * bpf_func_ above, we have only 6 to work with, anything after will be cropped. + */ +enum { + IP, + IPV6, + IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */ + IPV6FR, /* Fragmentation IPv6 Extension Header */ + MPLS, + VLAN, +}; + +#define IP_MF 0x2000 +#define IP_OFFSET 0x1FFF +#define IP6_MF 0x0001 +#define IP6_OFFSET 0xFFF8 + +struct vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +struct gre_hdr { + __be16 flags; + __be16 proto; +}; + +struct frag_hdr { + __u8 nexthdr; + __u8 reserved; + __be16 frag_off; + __be32 identification; +}; + +struct bpf_map_def SEC("maps") jmp_table = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 8 +}; + +static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, + __u16 hdr_size, + void *buffer) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + __u16 thoff = skb->flow_keys->thoff; + __u8 *hdr; + + /* Verifies this variable offset does not overflow */ + if (thoff > (USHRT_MAX - hdr_size)) + return NULL; + + hdr = data + thoff; + if (hdr + hdr_size <= data_end) + return hdr; + + if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size)) + return NULL; + + return buffer; +} + +/* Dispatches on ETHERTYPE */ +static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + + keys->n_proto = proto; + switch (proto) { + case bpf_htons(ETH_P_IP): + bpf_tail_call(skb, &jmp_table, IP); + break; + case bpf_htons(ETH_P_IPV6): + bpf_tail_call(skb, &jmp_table, IPV6); + break; + case bpf_htons(ETH_P_MPLS_MC): + case bpf_htons(ETH_P_MPLS_UC): + bpf_tail_call(skb, &jmp_table, MPLS); + break; + case bpf_htons(ETH_P_8021Q): + case bpf_htons(ETH_P_8021AD): + bpf_tail_call(skb, &jmp_table, VLAN); + break; + default: + /* Protocol not supported */ + return BPF_DROP; + } + + return BPF_DROP; +} + +SEC("flow_dissector") +int _dissect(struct __sk_buff *skb) +{ + if (!skb->vlan_present) + return parse_eth_proto(skb, skb->protocol); + else + return parse_eth_proto(skb, skb->vlan_proto); +} + +/* Parses on IPPROTO_* */ +static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + void *data_end = (void *)(long)skb->data_end; + struct icmphdr *icmp, _icmp; + struct gre_hdr *gre, _gre; + struct ethhdr *eth, _eth; + struct tcphdr *tcp, _tcp; + struct udphdr *udp, _udp; + + keys->ip_proto = proto; + switch (proto) { + case IPPROTO_ICMP: + icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp); + if (!icmp) + return BPF_DROP; + return BPF_OK; + case IPPROTO_IPIP: + keys->is_encap = true; + return parse_eth_proto(skb, bpf_htons(ETH_P_IP)); + case IPPROTO_IPV6: + keys->is_encap = true; + return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6)); + case IPPROTO_GRE: + gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre); + if (!gre) + return BPF_DROP; + + if (bpf_htons(gre->flags & GRE_VERSION)) + /* Only inspect standard GRE packets with version 0 */ + return BPF_OK; + + keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */ + if (GRE_IS_CSUM(gre->flags)) + keys->thoff += 4; /* Step over chksum and Padding */ + if (GRE_IS_KEY(gre->flags)) + keys->thoff += 4; /* Step over key */ + if (GRE_IS_SEQ(gre->flags)) + keys->thoff += 4; /* Step over sequence number */ + + keys->is_encap = true; + + if (gre->proto == bpf_htons(ETH_P_TEB)) { + eth = bpf_flow_dissect_get_header(skb, sizeof(*eth), + &_eth); + if (!eth) + return BPF_DROP; + + keys->thoff += sizeof(*eth); + + return parse_eth_proto(skb, eth->h_proto); + } else { + return parse_eth_proto(skb, gre->proto); + } + case IPPROTO_TCP: + tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp); + if (!tcp) + return BPF_DROP; + + if (tcp->doff < 5) + return BPF_DROP; + + if ((__u8 *)tcp + (tcp->doff << 2) > data_end) + return BPF_DROP; + + keys->sport = tcp->source; + keys->dport = tcp->dest; + return BPF_OK; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp); + if (!udp) + return BPF_DROP; + + keys->sport = udp->source; + keys->dport = udp->dest; + return BPF_OK; + default: + return BPF_DROP; + } + + return BPF_DROP; +} + +static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + + keys->ip_proto = nexthdr; + switch (nexthdr) { + case IPPROTO_HOPOPTS: + case IPPROTO_DSTOPTS: + bpf_tail_call(skb, &jmp_table, IPV6OP); + break; + case IPPROTO_FRAGMENT: + bpf_tail_call(skb, &jmp_table, IPV6FR); + break; + default: + return parse_ip_proto(skb, nexthdr); + } + + return BPF_DROP; +} + +PROG(IP)(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + struct bpf_flow_keys *keys = skb->flow_keys; + void *data = (void *)(long)skb->data; + struct iphdr *iph, _iph; + bool done = false; + + iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph); + if (!iph) + return BPF_DROP; + + /* IP header cannot be smaller than 20 bytes */ + if (iph->ihl < 5) + return BPF_DROP; + + keys->addr_proto = ETH_P_IP; + keys->ipv4_src = iph->saddr; + keys->ipv4_dst = iph->daddr; + + keys->thoff += iph->ihl << 2; + if (data + keys->thoff > data_end) + return BPF_DROP; + + if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { + keys->is_frag = true; + if (iph->frag_off & bpf_htons(IP_OFFSET)) + /* From second fragment on, packets do not have headers + * we can parse. + */ + done = true; + else + keys->is_first_frag = true; + } + + if (done) + return BPF_OK; + + return parse_ip_proto(skb, iph->protocol); +} + +PROG(IPV6)(struct __sk_buff *skb) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + struct ipv6hdr *ip6h, _ip6h; + + ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); + if (!ip6h) + return BPF_DROP; + + keys->addr_proto = ETH_P_IPV6; + memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); + + keys->thoff += sizeof(struct ipv6hdr); + + return parse_ipv6_proto(skb, ip6h->nexthdr); +} + +PROG(IPV6OP)(struct __sk_buff *skb) +{ + struct ipv6_opt_hdr *ip6h, _ip6h; + + ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); + if (!ip6h) + return BPF_DROP; + + /* hlen is in 8-octets and does not include the first 8 bytes + * of the header + */ + skb->flow_keys->thoff += (1 + ip6h->hdrlen) << 3; + + return parse_ipv6_proto(skb, ip6h->nexthdr); +} + +PROG(IPV6FR)(struct __sk_buff *skb) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + struct frag_hdr *fragh, _fragh; + + fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh); + if (!fragh) + return BPF_DROP; + + keys->thoff += sizeof(*fragh); + keys->is_frag = true; + if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) + keys->is_first_frag = true; + + return parse_ipv6_proto(skb, fragh->nexthdr); +} + +PROG(MPLS)(struct __sk_buff *skb) +{ + struct mpls_label *mpls, _mpls; + + mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls); + if (!mpls) + return BPF_DROP; + + return BPF_OK; +} + +PROG(VLAN)(struct __sk_buff *skb) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + struct vlan_hdr *vlan, _vlan; + __be16 proto; + + /* Peek back to see if single or double-tagging */ + if (bpf_skb_load_bytes(skb, keys->thoff - sizeof(proto), &proto, + sizeof(proto))) + return BPF_DROP; + + /* Account for double-tagging */ + if (proto == bpf_htons(ETH_P_8021AD)) { + vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); + if (!vlan) + return BPF_DROP; + + if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) + return BPF_DROP; + + keys->thoff += sizeof(*vlan); + } + + vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); + if (!vlan) + return BPF_DROP; + + keys->thoff += sizeof(*vlan); + /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ + if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || + vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) + return BPF_DROP; + + return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto); +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c new file mode 100644 index 000000000000..1fd244d35ba9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <string.h> + +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <sys/socket.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define SRC_REWRITE_IP4 0x7f000004U +#define DST_REWRITE_IP4 0x7f000001U +#define DST_REWRITE_PORT4 4444 + +int _version SEC("version") = 1; + +SEC("cgroup/connect4") +int connect_v4_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock_tuple tuple = {}; + struct sockaddr_in sa; + struct bpf_sock *sk; + + /* Verify that new destination is available. */ + memset(&tuple.ipv4.saddr, 0, sizeof(tuple.ipv4.saddr)); + memset(&tuple.ipv4.sport, 0, sizeof(tuple.ipv4.sport)); + + tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4); + tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4); + + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) + return 0; + else if (ctx->type == SOCK_STREAM) + sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv4), + BPF_F_CURRENT_NETNS, 0); + else + sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv4), + BPF_F_CURRENT_NETNS, 0); + + if (!sk) + return 0; + + if (sk->src_ip4 != tuple.ipv4.daddr || + sk->src_port != DST_REWRITE_PORT4) { + bpf_sk_release(sk); + return 0; + } + + bpf_sk_release(sk); + + /* Rewrite destination. */ + ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4); + ctx->user_port = bpf_htons(DST_REWRITE_PORT4); + + /* Rewrite source. */ + memset(&sa, 0, sizeof(sa)); + + sa.sin_family = AF_INET; + sa.sin_port = bpf_htons(0); + sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4); + + if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) + return 0; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/connect6_prog.c b/tools/testing/selftests/bpf/progs/connect6_prog.c new file mode 100644 index 000000000000..26397ab7b3c7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/connect6_prog.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <string.h> + +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <sys/socket.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define SRC_REWRITE_IP6_0 0 +#define SRC_REWRITE_IP6_1 0 +#define SRC_REWRITE_IP6_2 0 +#define SRC_REWRITE_IP6_3 6 + +#define DST_REWRITE_IP6_0 0 +#define DST_REWRITE_IP6_1 0 +#define DST_REWRITE_IP6_2 0 +#define DST_REWRITE_IP6_3 1 + +#define DST_REWRITE_PORT6 6666 + +int _version SEC("version") = 1; + +SEC("cgroup/connect6") +int connect_v6_prog(struct bpf_sock_addr *ctx) +{ + struct bpf_sock_tuple tuple = {}; + struct sockaddr_in6 sa; + struct bpf_sock *sk; + + /* Verify that new destination is available. */ + memset(&tuple.ipv6.saddr, 0, sizeof(tuple.ipv6.saddr)); + memset(&tuple.ipv6.sport, 0, sizeof(tuple.ipv6.sport)); + + tuple.ipv6.daddr[0] = bpf_htonl(DST_REWRITE_IP6_0); + tuple.ipv6.daddr[1] = bpf_htonl(DST_REWRITE_IP6_1); + tuple.ipv6.daddr[2] = bpf_htonl(DST_REWRITE_IP6_2); + tuple.ipv6.daddr[3] = bpf_htonl(DST_REWRITE_IP6_3); + + tuple.ipv6.dport = bpf_htons(DST_REWRITE_PORT6); + + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) + return 0; + else if (ctx->type == SOCK_STREAM) + sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv6), + BPF_F_CURRENT_NETNS, 0); + else + sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv6), + BPF_F_CURRENT_NETNS, 0); + + if (!sk) + return 0; + + if (sk->src_ip6[0] != tuple.ipv6.daddr[0] || + sk->src_ip6[1] != tuple.ipv6.daddr[1] || + sk->src_ip6[2] != tuple.ipv6.daddr[2] || + sk->src_ip6[3] != tuple.ipv6.daddr[3] || + sk->src_port != DST_REWRITE_PORT6) { + bpf_sk_release(sk); + return 0; + } + + bpf_sk_release(sk); + + /* Rewrite destination. */ + ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0); + ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1); + ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2); + ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3); + + ctx->user_port = bpf_htons(DST_REWRITE_PORT6); + + /* Rewrite source. */ + memset(&sa, 0, sizeof(sa)); + + sa.sin6_family = AF_INET6; + sa.sin6_port = bpf_htons(0); + + sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0); + sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1); + sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2); + sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3); + + if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) + return 0; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/dev_cgroup.c b/tools/testing/selftests/bpf/progs/dev_cgroup.c new file mode 100644 index 000000000000..ce41a3475f27 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/dev_cgroup.c @@ -0,0 +1,60 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include <linux/bpf.h> +#include <linux/version.h> +#include "bpf_helpers.h" + +SEC("cgroup/dev") +int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx) +{ + short type = ctx->access_type & 0xFFFF; +#ifdef DEBUG + short access = ctx->access_type >> 16; + char fmt[] = " %d:%d \n"; + + switch (type) { + case BPF_DEVCG_DEV_BLOCK: + fmt[0] = 'b'; + break; + case BPF_DEVCG_DEV_CHAR: + fmt[0] = 'c'; + break; + default: + fmt[0] = '?'; + break; + } + + if (access & BPF_DEVCG_ACC_READ) + fmt[8] = 'r'; + + if (access & BPF_DEVCG_ACC_WRITE) + fmt[9] = 'w'; + + if (access & BPF_DEVCG_ACC_MKNOD) + fmt[10] = 'm'; + + bpf_trace_printk(fmt, sizeof(fmt), ctx->major, ctx->minor); +#endif + + /* Allow access to /dev/zero and /dev/random. + * Forbid everything else. + */ + if (ctx->major != 1 || type != BPF_DEVCG_DEV_CHAR) + return 0; + + switch (ctx->minor) { + case 5: /* 1:5 /dev/zero */ + case 9: /* 1:9 /dev/urandom */ + return 1; + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c new file mode 100644 index 000000000000..014dba10b8a5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <linux/bpf.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") cg_ids = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") pidmap = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 1, +}; + +SEC("tracepoint/syscalls/sys_enter_nanosleep") +int trace(void *ctx) +{ + __u32 pid = bpf_get_current_pid_tgid(); + __u32 key = 0, *expected_pid; + __u64 *val; + + expected_pid = bpf_map_lookup_elem(&pidmap, &key); + if (!expected_pid || *expected_pid != pid) + return 0; + + val = bpf_map_lookup_elem(&cg_ids, &key); + if (val) + *val = bpf_get_current_cgroup_id(); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c new file mode 100644 index 000000000000..9f741e69cebe --- /dev/null +++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <linux/version.h> + +#include "bpf_helpers.h" +#include "netcnt_common.h" + +#define MAX_BPS (3 * 1024 * 1024) + +#define REFRESH_TIME_NS 100000000 +#define NS_PER_SEC 1000000000 + +struct bpf_map_def SEC("maps") percpu_netcnt = { + .type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + .key_size = sizeof(struct bpf_cgroup_storage_key), + .value_size = sizeof(struct percpu_net_cnt), +}; + +BPF_ANNOTATE_KV_PAIR(percpu_netcnt, struct bpf_cgroup_storage_key, + struct percpu_net_cnt); + +struct bpf_map_def SEC("maps") netcnt = { + .type = BPF_MAP_TYPE_CGROUP_STORAGE, + .key_size = sizeof(struct bpf_cgroup_storage_key), + .value_size = sizeof(struct net_cnt), +}; + +BPF_ANNOTATE_KV_PAIR(netcnt, struct bpf_cgroup_storage_key, + struct net_cnt); + +SEC("cgroup/skb") +int bpf_nextcnt(struct __sk_buff *skb) +{ + struct percpu_net_cnt *percpu_cnt; + char fmt[] = "%d %llu %llu\n"; + struct net_cnt *cnt; + __u64 ts, dt; + int ret; + + cnt = bpf_get_local_storage(&netcnt, 0); + percpu_cnt = bpf_get_local_storage(&percpu_netcnt, 0); + + percpu_cnt->packets++; + percpu_cnt->bytes += skb->len; + + if (percpu_cnt->packets > MAX_PERCPU_PACKETS) { + __sync_fetch_and_add(&cnt->packets, + percpu_cnt->packets); + percpu_cnt->packets = 0; + + __sync_fetch_and_add(&cnt->bytes, + percpu_cnt->bytes); + percpu_cnt->bytes = 0; + } + + ts = bpf_ktime_get_ns(); + dt = ts - percpu_cnt->prev_ts; + + dt *= MAX_BPS; + dt /= NS_PER_SEC; + + if (cnt->bytes + percpu_cnt->bytes - percpu_cnt->prev_bytes < dt) + ret = 1; + else + ret = 0; + + if (dt > REFRESH_TIME_NS) { + percpu_cnt->prev_ts = ts; + percpu_cnt->prev_packets = cnt->packets; + percpu_cnt->prev_bytes = cnt->bytes; + } + + return !!ret; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/testing/selftests/bpf/progs/sample_map_ret0.c b/tools/testing/selftests/bpf/progs/sample_map_ret0.c new file mode 100644 index 000000000000..0756303676ac --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sample_map_ret0.c @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ +#include <linux/bpf.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") htab = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(long), + .max_entries = 2, +}; + +struct bpf_map_def SEC("maps") array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(long), + .max_entries = 2, +}; + +/* Sample program which should always load for testing control paths. */ +SEC(".text") int func() +{ + __u64 key64 = 0; + __u32 key = 0; + long *value; + + value = bpf_map_lookup_elem(&htab, &key); + if (!value) + return 1; + value = bpf_map_lookup_elem(&array, &key64); + if (!value) + return 1; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/sample_ret0.c b/tools/testing/selftests/bpf/progs/sample_ret0.c new file mode 100644 index 000000000000..fec99750d6ea --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sample_ret0.c @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ + +/* Sample program which should always load for testing control paths. */ +int func() +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c new file mode 100644 index 000000000000..a91536b1c47e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <sys/socket.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define SRC1_IP4 0xAC100001U /* 172.16.0.1 */ +#define SRC2_IP4 0x00000000U +#define SRC_REWRITE_IP4 0x7f000004U +#define DST_IP4 0xC0A801FEU /* 192.168.1.254 */ +#define DST_REWRITE_IP4 0x7f000001U +#define DST_PORT 4040 +#define DST_REWRITE_PORT4 4444 + +int _version SEC("version") = 1; + +SEC("cgroup/sendmsg4") +int sendmsg_v4_prog(struct bpf_sock_addr *ctx) +{ + if (ctx->type != SOCK_DGRAM) + return 0; + + /* Rewrite source. */ + if (ctx->msg_src_ip4 == bpf_htonl(SRC1_IP4) || + ctx->msg_src_ip4 == bpf_htonl(SRC2_IP4)) { + ctx->msg_src_ip4 = bpf_htonl(SRC_REWRITE_IP4); + } else { + /* Unexpected source. Reject sendmsg. */ + return 0; + } + + /* Rewrite destination. */ + if ((ctx->user_ip4 >> 24) == (bpf_htonl(DST_IP4) >> 24) && + ctx->user_port == bpf_htons(DST_PORT)) { + ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4); + ctx->user_port = bpf_htons(DST_REWRITE_PORT4); + } else { + /* Unexpected source. Reject sendmsg. */ + return 0; + } + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c new file mode 100644 index 000000000000..5aeaa284fc47 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <sys/socket.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define SRC_REWRITE_IP6_0 0 +#define SRC_REWRITE_IP6_1 0 +#define SRC_REWRITE_IP6_2 0 +#define SRC_REWRITE_IP6_3 6 + +#define DST_REWRITE_IP6_0 0 +#define DST_REWRITE_IP6_1 0 +#define DST_REWRITE_IP6_2 0 +#define DST_REWRITE_IP6_3 1 + +#define DST_REWRITE_PORT6 6666 + +int _version SEC("version") = 1; + +SEC("cgroup/sendmsg6") +int sendmsg_v6_prog(struct bpf_sock_addr *ctx) +{ + if (ctx->type != SOCK_DGRAM) + return 0; + + /* Rewrite source. */ + if (ctx->msg_src_ip6[3] == bpf_htonl(1) || + ctx->msg_src_ip6[3] == bpf_htonl(0)) { + ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0); + ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1); + ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2); + ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3); + } else { + /* Unexpected source. Reject sendmsg. */ + return 0; + } + + /* Rewrite destination. */ + if ((ctx->user_ip6[0] & 0xFFFF) == bpf_htons(0xFACE) && + ctx->user_ip6[0] >> 16 == bpf_htons(0xB00C)) { + ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0); + ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1); + ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2); + ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3); + + ctx->user_port = bpf_htons(DST_REWRITE_PORT6); + } else { + /* Unexpected destination. Reject sendmsg. */ + return 0; + } + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c new file mode 100644 index 000000000000..9ff8ac4b0bf6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <linux/bpf.h> +#include <sys/socket.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +struct bpf_map_def SEC("maps") socket_cookies = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u64), + .value_size = sizeof(__u32), + .max_entries = 1 << 8, +}; + +SEC("cgroup/connect6") +int set_cookie(struct bpf_sock_addr *ctx) +{ + __u32 cookie_value = 0xFF; + __u64 cookie_key; + + if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6) + return 1; + + cookie_key = bpf_get_socket_cookie(ctx); + if (bpf_map_update_elem(&socket_cookies, &cookie_key, &cookie_value, 0)) + return 0; + + return 1; +} + +SEC("sockops") +int update_cookie(struct bpf_sock_ops *ctx) +{ + __u32 new_cookie_value; + __u32 *cookie_value; + __u64 cookie_key; + + if (ctx->family != AF_INET6) + return 1; + + if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB) + return 1; + + cookie_key = bpf_get_socket_cookie(ctx); + + cookie_value = bpf_map_lookup_elem(&socket_cookies, &cookie_key); + if (!cookie_value) + return 1; + + new_cookie_value = (ctx->local_port << 8) | *cookie_value; + bpf_map_update_elem(&socket_cookies, &cookie_key, &new_cookie_value, 0); + + return 1; +} + +int _version SEC("version") = 1; + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c new file mode 100644 index 000000000000..0f92858f6226 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c @@ -0,0 +1,46 @@ +#include <linux/bpf.h> +#include "bpf_helpers.h" +#include "bpf_util.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sk_skb1") +int bpf_prog1(struct __sk_buff *skb) +{ + void *data_end = (void *)(long) skb->data_end; + void *data = (void *)(long) skb->data; + __u32 lport = skb->local_port; + __u32 rport = skb->remote_port; + __u8 *d = data; + __u32 len = (__u32) data_end - (__u32) data; + int err; + + if (data + 10 > data_end) { + err = bpf_skb_pull_data(skb, 10); + if (err) + return SK_DROP; + + data_end = (void *)(long)skb->data_end; + data = (void *)(long)skb->data; + if (data + 10 > data_end) + return SK_DROP; + } + + /* This write/read is a bit pointless but tests the verifier and + * strparser handler for read/write pkt data and access into sk + * fields. + */ + d = data; + d[7] = 1; + return skb->len; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c new file mode 100644 index 000000000000..12a7b5c82ed6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c @@ -0,0 +1,33 @@ +#include <linux/bpf.h> +#include "bpf_helpers.h" +#include "bpf_util.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sk_msg1") +int bpf_prog1(struct sk_msg_md *msg) +{ + void *data_end = (void *)(long) msg->data_end; + void *data = (void *)(long) msg->data; + + char *d; + + if (data + 8 > data_end) + return SK_DROP; + + bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data); + d = (char *)data; + bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]); + + return SK_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c new file mode 100644 index 000000000000..2ce7634a4012 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c @@ -0,0 +1,73 @@ +#include <linux/bpf.h> +#include "bpf_helpers.h" +#include "bpf_util.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +struct bpf_map_def SEC("maps") sock_map_rx = { + .type = BPF_MAP_TYPE_SOCKMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 20, +}; + +struct bpf_map_def SEC("maps") sock_map_tx = { + .type = BPF_MAP_TYPE_SOCKMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 20, +}; + +struct bpf_map_def SEC("maps") sock_map_msg = { + .type = BPF_MAP_TYPE_SOCKMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 20, +}; + +struct bpf_map_def SEC("maps") sock_map_break = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 20, +}; + +SEC("sk_skb2") +int bpf_prog2(struct __sk_buff *skb) +{ + void *data_end = (void *)(long) skb->data_end; + void *data = (void *)(long) skb->data; + __u32 lport = skb->local_port; + __u32 rport = skb->remote_port; + __u8 *d = data; + __u8 sk, map; + + if (data + 8 > data_end) + return SK_DROP; + + map = d[0]; + sk = d[1]; + + d[0] = 0xd; + d[1] = 0xe; + d[2] = 0xa; + d[3] = 0xd; + d[4] = 0xb; + d[5] = 0xe; + d[6] = 0xe; + d[7] = 0xf; + + if (!map) + return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0); + return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_adjust_tail.c b/tools/testing/selftests/bpf/progs/test_adjust_tail.c new file mode 100644 index 000000000000..4cd5e860c903 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_adjust_tail.c @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +SEC("xdp_adjust_tail") +int _xdp_adjust_tail(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + int offset = 0; + + if (data_end - data == 54) + offset = 256; + else + offset = 20; + if (bpf_xdp_adjust_tail(xdp, 0 - offset)) + return XDP_DROP; + return XDP_TX; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c new file mode 100644 index 000000000000..e5c79fe0ffdb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Facebook */ +#include <linux/bpf.h> +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +struct ipv_counts { + unsigned int v4; + unsigned int v6; +}; + +struct bpf_map_def SEC("maps") btf_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(struct ipv_counts), + .max_entries = 4, +}; + +BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts); + +struct dummy_tracepoint_args { + unsigned long long pad; + struct sock *sock; +}; + +__attribute__((noinline)) +static int test_long_fname_2(struct dummy_tracepoint_args *arg) +{ + struct ipv_counts *counts; + int key = 0; + + if (!arg->sock) + return 0; + + counts = bpf_map_lookup_elem(&btf_map, &key); + if (!counts) + return 0; + + counts->v6++; + + return 0; +} + +__attribute__((noinline)) +static int test_long_fname_1(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_2(arg); +} + +SEC("dummy_tracepoint") +int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_1(arg); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c new file mode 100644 index 000000000000..434188c37774 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018 Facebook */ +#include <linux/bpf.h> +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +struct ipv_counts { + unsigned int v4; + unsigned int v6; +}; + +struct bpf_map_def SEC("maps") btf_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(struct ipv_counts), + .max_entries = 4, +}; + +struct dummy_tracepoint_args { + unsigned long long pad; + struct sock *sock; +}; + +__attribute__((noinline)) +static int test_long_fname_2(struct dummy_tracepoint_args *arg) +{ + struct ipv_counts *counts; + int key = 0; + + if (!arg->sock) + return 0; + + counts = bpf_map_lookup_elem(&btf_map, &key); + if (!counts) + return 0; + + counts->v6++; + + return 0; +} + +__attribute__((noinline)) +static int test_long_fname_1(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_2(arg); +} + +SEC("dummy_tracepoint") +int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_1(arg); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c new file mode 100644 index 000000000000..f6d9f238e00a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include "bpf_helpers.h" + +/* Permit pretty deep stack traces */ +#define MAX_STACK_RAWTP 100 +struct stack_trace_t { + int pid; + int kern_stack_size; + int user_stack_size; + int user_stack_buildid_size; + __u64 kern_stack[MAX_STACK_RAWTP]; + __u64 user_stack[MAX_STACK_RAWTP]; + struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP]; +}; + +struct bpf_map_def SEC("maps") perfmap = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(__u32), + .max_entries = 2, +}; + +struct bpf_map_def SEC("maps") stackdata_map = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct stack_trace_t), + .max_entries = 1, +}; + +/* Allocate per-cpu space twice the needed. For the code below + * usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK); + * if (usize < 0) + * return 0; + * ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0); + * + * If we have value_size = MAX_STACK_RAWTP * sizeof(__u64), + * verifier will complain that access "raw_data + usize" + * with size "max_len - usize" may be out of bound. + * The maximum "raw_data + usize" is "raw_data + max_len" + * and the maximum "max_len - usize" is "max_len", verifier + * concludes that the maximum buffer access range is + * "raw_data[0...max_len * 2 - 1]" and hence reject the program. + * + * Doubling the to-be-used max buffer size can fix this verifier + * issue and avoid complicated C programming massaging. + * This is an acceptable workaround since there is one entry here. + */ +struct bpf_map_def SEC("maps") rawdata_map = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2, + .max_entries = 1, +}; + +SEC("tracepoint/raw_syscalls/sys_enter") +int bpf_prog1(void *ctx) +{ + int max_len, max_buildid_len, usize, ksize, total_size; + struct stack_trace_t *data; + void *raw_data; + __u32 key = 0; + + data = bpf_map_lookup_elem(&stackdata_map, &key); + if (!data) + return 0; + + max_len = MAX_STACK_RAWTP * sizeof(__u64); + max_buildid_len = MAX_STACK_RAWTP * sizeof(struct bpf_stack_build_id); + data->pid = bpf_get_current_pid_tgid(); + data->kern_stack_size = bpf_get_stack(ctx, data->kern_stack, + max_len, 0); + data->user_stack_size = bpf_get_stack(ctx, data->user_stack, max_len, + BPF_F_USER_STACK); + data->user_stack_buildid_size = bpf_get_stack( + ctx, data->user_stack_buildid, max_buildid_len, + BPF_F_USER_STACK | BPF_F_USER_BUILD_ID); + bpf_perf_event_output(ctx, &perfmap, 0, data, sizeof(*data)); + + /* write both kernel and user stacks to the same buffer */ + raw_data = bpf_map_lookup_elem(&rawdata_map, &key); + if (!raw_data) + return 0; + + usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK); + if (usize < 0) + return 0; + + ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0); + if (ksize < 0) + return 0; + + total_size = usize + ksize; + if (total_size > 0 && total_size <= max_len) + bpf_perf_event_output(ctx, &perfmap, 0, raw_data, total_size); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c new file mode 100644 index 000000000000..1e10c9590991 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_l4lb.c @@ -0,0 +1,473 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/pkt_cls.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include "bpf_helpers.h" +#include "test_iptunnel_common.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; + +static inline __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/* copy paste of jhash from kernel sources to make sure llvm + * can compile it into valid sequence of bpf instructions + */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +typedef unsigned int u32; + +static inline u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(u32 *)(k); + b += *(u32 *)(k + 4); + c += *(u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static inline u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +#define PCKT_FRAGMENTED 65343 +#define IPV4_HDR_LEN_NO_OPT 20 +#define IPV4_PLUS_ICMP_HDR 28 +#define IPV6_PLUS_ICMP_HDR 48 +#define RING_SIZE 2 +#define MAX_VIPS 12 +#define MAX_REALS 5 +#define CTL_MAP_SIZE 16 +#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE) +#define F_IPV6 (1 << 0) +#define F_HASH_NO_SRC_PORT (1 << 0) +#define F_ICMP (1 << 0) +#define F_SYN_SET (1 << 1) + +struct packet_description { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; + __u8 flags; +}; + +struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; +}; + +struct vip_stats { + __u64 bytes; + __u64 pkts; +}; + +struct eth_hdr { + unsigned char eth_dest[ETH_ALEN]; + unsigned char eth_source[ETH_ALEN]; + unsigned short eth_proto; +}; + +struct bpf_map_def SEC("maps") vip_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip), + .value_size = sizeof(struct vip_meta), + .max_entries = MAX_VIPS, +}; + +struct bpf_map_def SEC("maps") ch_rings = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = CH_RINGS_SIZE, +}; + +struct bpf_map_def SEC("maps") reals = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct real_definition), + .max_entries = MAX_REALS, +}; + +struct bpf_map_def SEC("maps") stats = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct vip_stats), + .max_entries = MAX_VIPS, +}; + +struct bpf_map_def SEC("maps") ctl_array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct ctl_value), + .max_entries = CTL_MAP_SIZE, +}; + +static __always_inline __u32 get_packet_hash(struct packet_description *pckt, + bool ipv6) +{ + if (ipv6) + return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS), + pckt->ports, CH_RINGS_SIZE); + else + return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE); +} + +static __always_inline bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6) +{ + __u32 hash = get_packet_hash(pckt, is_ipv6) % RING_SIZE; + __u32 key = RING_SIZE * vip_info->vip_num + hash; + __u32 *real_pos; + + real_pos = bpf_map_lookup_elem(&ch_rings, &key); + if (!real_pos) + return false; + key = *real_pos; + *real = bpf_map_lookup_elem(&reals, &key); + if (!(*real)) + return false; + return true; +} + +static __always_inline int parse_icmpv6(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return TC_ACT_SHOT; + if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG) + return TC_ACT_OK; + off += sizeof(struct icmp6hdr); + ip6h = data + off; + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + pckt->proto = ip6h->nexthdr; + pckt->flags |= F_ICMP; + memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16); + memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16); + return TC_ACT_UNSPEC; +} + +static __always_inline int parse_icmp(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmphdr *icmp_hdr; + struct iphdr *iph; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return TC_ACT_SHOT; + if (icmp_hdr->type != ICMP_DEST_UNREACH || + icmp_hdr->code != ICMP_FRAG_NEEDED) + return TC_ACT_OK; + off += sizeof(struct icmphdr); + iph = data + off; + if (iph + 1 > data_end) + return TC_ACT_SHOT; + if (iph->ihl != 5) + return TC_ACT_SHOT; + pckt->proto = iph->protocol; + pckt->flags |= F_ICMP; + pckt->src = iph->daddr; + pckt->dst = iph->saddr; + return TC_ACT_UNSPEC; +} + +static __always_inline bool parse_udp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) +{ + struct udphdr *udp; + udp = data + off; + + if (udp + 1 > data_end) + return false; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = udp->source; + pckt->port16[1] = udp->dest; + } else { + pckt->port16[0] = udp->dest; + pckt->port16[1] = udp->source; + } + return true; +} + +static __always_inline bool parse_tcp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) +{ + struct tcphdr *tcp; + + tcp = data + off; + if (tcp + 1 > data_end) + return false; + + if (tcp->syn) + pckt->flags |= F_SYN_SET; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = tcp->source; + pckt->port16[1] = tcp->dest; + } else { + pckt->port16[0] = tcp->dest; + pckt->port16[1] = tcp->source; + } + return true; +} + +static __always_inline int process_packet(void *data, __u64 off, void *data_end, + bool is_ipv6, struct __sk_buff *skb) +{ + void *pkt_start = (void *)(long)skb->data; + struct packet_description pckt = {}; + struct eth_hdr *eth = pkt_start; + struct bpf_tunnel_key tkey = {}; + struct vip_stats *data_stats; + struct real_definition *dst; + struct vip_meta *vip_info; + struct ctl_value *cval; + __u32 v4_intf_pos = 1; + __u32 v6_intf_pos = 2; + struct ipv6hdr *ip6h; + struct vip vip = {}; + struct iphdr *iph; + int tun_flag = 0; + __u16 pkt_bytes; + __u64 iph_len; + __u32 ifindex; + __u8 protocol; + __u32 vip_num; + int action; + + tkey.tunnel_ttl = 64; + if (is_ipv6) { + ip6h = data + off; + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + + iph_len = sizeof(struct ipv6hdr); + protocol = ip6h->nexthdr; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(ip6h->payload_len); + off += iph_len; + if (protocol == IPPROTO_FRAGMENT) { + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_ICMPV6) { + action = parse_icmpv6(data, data_end, off, &pckt); + if (action >= 0) + return action; + off += IPV6_PLUS_ICMP_HDR; + } else { + memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16); + memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16); + } + } else { + iph = data + off; + if (iph + 1 > data_end) + return TC_ACT_SHOT; + if (iph->ihl != 5) + return TC_ACT_SHOT; + + protocol = iph->protocol; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(iph->tot_len); + off += IPV4_HDR_LEN_NO_OPT; + + if (iph->frag_off & PCKT_FRAGMENTED) + return TC_ACT_SHOT; + if (protocol == IPPROTO_ICMP) { + action = parse_icmp(data, data_end, off, &pckt); + if (action >= 0) + return action; + off += IPV4_PLUS_ICMP_HDR; + } else { + pckt.src = iph->saddr; + pckt.dst = iph->daddr; + } + } + protocol = pckt.proto; + + if (protocol == IPPROTO_TCP) { + if (!parse_tcp(data, off, data_end, &pckt)) + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_UDP) { + if (!parse_udp(data, off, data_end, &pckt)) + return TC_ACT_SHOT; + } else { + return TC_ACT_SHOT; + } + + if (is_ipv6) + memcpy(vip.daddr.v6, pckt.dstv6, 16); + else + vip.daddr.v4 = pckt.dst; + + vip.dport = pckt.port16[1]; + vip.protocol = pckt.proto; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) { + vip.dport = 0; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) + return TC_ACT_SHOT; + pckt.port16[1] = 0; + } + + if (vip_info->flags & F_HASH_NO_SRC_PORT) + pckt.port16[0] = 0; + + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6)) + return TC_ACT_SHOT; + + if (dst->flags & F_IPV6) { + cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + memcpy(tkey.remote_ipv6, dst->dstv6, 16); + tun_flag = BPF_F_TUNINFO_IPV6; + } else { + cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + tkey.remote_ipv4 = dst->dst; + } + vip_num = vip_info->vip_num; + data_stats = bpf_map_lookup_elem(&stats, &vip_num); + if (!data_stats) + return TC_ACT_SHOT; + data_stats->pkts++; + data_stats->bytes += pkt_bytes; + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag); + *(u32 *)eth->eth_dest = tkey.remote_ipv4; + return bpf_redirect(ifindex, 0); +} + +SEC("l4lb-demo") +int balancer_ingress(struct __sk_buff *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct eth_hdr *eth = data; + __u32 eth_proto; + __u32 nh_off; + + nh_off = sizeof(struct eth_hdr); + if (data + nh_off > data_end) + return TC_ACT_SHOT; + eth_proto = eth->eth_proto; + if (eth_proto == bpf_htons(ETH_P_IP)) + return process_packet(data, nh_off, data_end, false, ctx); + else if (eth_proto == bpf_htons(ETH_P_IPV6)) + return process_packet(data, nh_off, data_end, true, ctx); + else + return TC_ACT_SHOT; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c new file mode 100644 index 000000000000..ba44a14e6dc4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c @@ -0,0 +1,473 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/pkt_cls.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include "bpf_helpers.h" +#include "test_iptunnel_common.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; + +static __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/* copy paste of jhash from kernel sources to make sure llvm + * can compile it into valid sequence of bpf instructions + */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +typedef unsigned int u32; + +static u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(u32 *)(k); + b += *(u32 *)(k + 4); + c += *(u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +#define PCKT_FRAGMENTED 65343 +#define IPV4_HDR_LEN_NO_OPT 20 +#define IPV4_PLUS_ICMP_HDR 28 +#define IPV6_PLUS_ICMP_HDR 48 +#define RING_SIZE 2 +#define MAX_VIPS 12 +#define MAX_REALS 5 +#define CTL_MAP_SIZE 16 +#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE) +#define F_IPV6 (1 << 0) +#define F_HASH_NO_SRC_PORT (1 << 0) +#define F_ICMP (1 << 0) +#define F_SYN_SET (1 << 1) + +struct packet_description { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; + __u8 flags; +}; + +struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; +}; + +struct vip_stats { + __u64 bytes; + __u64 pkts; +}; + +struct eth_hdr { + unsigned char eth_dest[ETH_ALEN]; + unsigned char eth_source[ETH_ALEN]; + unsigned short eth_proto; +}; + +struct bpf_map_def SEC("maps") vip_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip), + .value_size = sizeof(struct vip_meta), + .max_entries = MAX_VIPS, +}; + +struct bpf_map_def SEC("maps") ch_rings = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = CH_RINGS_SIZE, +}; + +struct bpf_map_def SEC("maps") reals = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct real_definition), + .max_entries = MAX_REALS, +}; + +struct bpf_map_def SEC("maps") stats = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct vip_stats), + .max_entries = MAX_VIPS, +}; + +struct bpf_map_def SEC("maps") ctl_array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct ctl_value), + .max_entries = CTL_MAP_SIZE, +}; + +static __u32 get_packet_hash(struct packet_description *pckt, + bool ipv6) +{ + if (ipv6) + return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS), + pckt->ports, CH_RINGS_SIZE); + else + return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE); +} + +static bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6) +{ + __u32 hash = get_packet_hash(pckt, is_ipv6); + __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE; + __u32 *real_pos; + + if (hash != 0x358459b7 /* jhash of ipv4 packet */ && + hash != 0x2f4bc6bb /* jhash of ipv6 packet */) + return 0; + + real_pos = bpf_map_lookup_elem(&ch_rings, &key); + if (!real_pos) + return false; + key = *real_pos; + *real = bpf_map_lookup_elem(&reals, &key); + if (!(*real)) + return false; + return true; +} + +static int parse_icmpv6(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return TC_ACT_SHOT; + if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG) + return TC_ACT_OK; + off += sizeof(struct icmp6hdr); + ip6h = data + off; + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + pckt->proto = ip6h->nexthdr; + pckt->flags |= F_ICMP; + memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16); + memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16); + return TC_ACT_UNSPEC; +} + +static int parse_icmp(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmphdr *icmp_hdr; + struct iphdr *iph; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return TC_ACT_SHOT; + if (icmp_hdr->type != ICMP_DEST_UNREACH || + icmp_hdr->code != ICMP_FRAG_NEEDED) + return TC_ACT_OK; + off += sizeof(struct icmphdr); + iph = data + off; + if (iph + 1 > data_end) + return TC_ACT_SHOT; + if (iph->ihl != 5) + return TC_ACT_SHOT; + pckt->proto = iph->protocol; + pckt->flags |= F_ICMP; + pckt->src = iph->daddr; + pckt->dst = iph->saddr; + return TC_ACT_UNSPEC; +} + +static bool parse_udp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) +{ + struct udphdr *udp; + udp = data + off; + + if (udp + 1 > data_end) + return false; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = udp->source; + pckt->port16[1] = udp->dest; + } else { + pckt->port16[0] = udp->dest; + pckt->port16[1] = udp->source; + } + return true; +} + +static bool parse_tcp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) +{ + struct tcphdr *tcp; + + tcp = data + off; + if (tcp + 1 > data_end) + return false; + + if (tcp->syn) + pckt->flags |= F_SYN_SET; + + if (!(pckt->flags & F_ICMP)) { + pckt->port16[0] = tcp->source; + pckt->port16[1] = tcp->dest; + } else { + pckt->port16[0] = tcp->dest; + pckt->port16[1] = tcp->source; + } + return true; +} + +static int process_packet(void *data, __u64 off, void *data_end, + bool is_ipv6, struct __sk_buff *skb) +{ + void *pkt_start = (void *)(long)skb->data; + struct packet_description pckt = {}; + struct eth_hdr *eth = pkt_start; + struct bpf_tunnel_key tkey = {}; + struct vip_stats *data_stats; + struct real_definition *dst; + struct vip_meta *vip_info; + struct ctl_value *cval; + __u32 v4_intf_pos = 1; + __u32 v6_intf_pos = 2; + struct ipv6hdr *ip6h; + struct vip vip = {}; + struct iphdr *iph; + int tun_flag = 0; + __u16 pkt_bytes; + __u64 iph_len; + __u32 ifindex; + __u8 protocol; + __u32 vip_num; + int action; + + tkey.tunnel_ttl = 64; + if (is_ipv6) { + ip6h = data + off; + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + + iph_len = sizeof(struct ipv6hdr); + protocol = ip6h->nexthdr; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(ip6h->payload_len); + off += iph_len; + if (protocol == IPPROTO_FRAGMENT) { + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_ICMPV6) { + action = parse_icmpv6(data, data_end, off, &pckt); + if (action >= 0) + return action; + off += IPV6_PLUS_ICMP_HDR; + } else { + memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16); + memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16); + } + } else { + iph = data + off; + if (iph + 1 > data_end) + return TC_ACT_SHOT; + if (iph->ihl != 5) + return TC_ACT_SHOT; + + protocol = iph->protocol; + pckt.proto = protocol; + pkt_bytes = bpf_ntohs(iph->tot_len); + off += IPV4_HDR_LEN_NO_OPT; + + if (iph->frag_off & PCKT_FRAGMENTED) + return TC_ACT_SHOT; + if (protocol == IPPROTO_ICMP) { + action = parse_icmp(data, data_end, off, &pckt); + if (action >= 0) + return action; + off += IPV4_PLUS_ICMP_HDR; + } else { + pckt.src = iph->saddr; + pckt.dst = iph->daddr; + } + } + protocol = pckt.proto; + + if (protocol == IPPROTO_TCP) { + if (!parse_tcp(data, off, data_end, &pckt)) + return TC_ACT_SHOT; + } else if (protocol == IPPROTO_UDP) { + if (!parse_udp(data, off, data_end, &pckt)) + return TC_ACT_SHOT; + } else { + return TC_ACT_SHOT; + } + + if (is_ipv6) + memcpy(vip.daddr.v6, pckt.dstv6, 16); + else + vip.daddr.v4 = pckt.dst; + + vip.dport = pckt.port16[1]; + vip.protocol = pckt.proto; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) { + vip.dport = 0; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) + return TC_ACT_SHOT; + pckt.port16[1] = 0; + } + + if (vip_info->flags & F_HASH_NO_SRC_PORT) + pckt.port16[0] = 0; + + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6)) + return TC_ACT_SHOT; + + if (dst->flags & F_IPV6) { + cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + memcpy(tkey.remote_ipv6, dst->dstv6, 16); + tun_flag = BPF_F_TUNINFO_IPV6; + } else { + cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos); + if (!cval) + return TC_ACT_SHOT; + ifindex = cval->ifindex; + tkey.remote_ipv4 = dst->dst; + } + vip_num = vip_info->vip_num; + data_stats = bpf_map_lookup_elem(&stats, &vip_num); + if (!data_stats) + return TC_ACT_SHOT; + data_stats->pkts++; + data_stats->bytes += pkt_bytes; + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag); + *(u32 *)eth->eth_dest = tkey.remote_ipv4; + return bpf_redirect(ifindex, 0); +} + +SEC("l4lb-demo") +int balancer_ingress(struct __sk_buff *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct eth_hdr *eth = data; + __u32 eth_proto; + __u32 nh_off; + + nh_off = sizeof(struct eth_hdr); + if (data + nh_off > data_end) + return TC_ACT_SHOT; + eth_proto = eth->eth_proto; + if (eth_proto == bpf_htons(ETH_P_IP)) + return process_packet(data, nh_off, data_end, false, ctx); + else if (eth_proto == bpf_htons(ETH_P_IPV6)) + return process_packet(data, nh_off, data_end, true, ctx); + else + return TC_ACT_SHOT; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c new file mode 100644 index 000000000000..4147130cc3b7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// test ir decoder +// +// Copyright (C) 2018 Sean Young <sean@mess.org> + +#include <linux/bpf.h> +#include <linux/lirc.h> +#include "bpf_helpers.h" + +SEC("lirc_mode2") +int bpf_decoder(unsigned int *sample) +{ + if (LIRC_IS_PULSE(*sample)) { + unsigned int duration = LIRC_VALUE(*sample); + + if (duration & 0x10000) + bpf_rc_keydown(sample, 0x40, duration & 0xffff, 0); + if (duration & 0x20000) + bpf_rc_pointer_rel(sample, (duration >> 8) & 0xff, + duration & 0xff); + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c new file mode 100644 index 000000000000..c957d6dfe6d7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +struct grehdr { + __be16 flags; + __be16 protocol; +}; + +SEC("encap_gre") +int bpf_lwt_encap_gre(struct __sk_buff *skb) +{ + struct encap_hdr { + struct iphdr iph; + struct grehdr greh; + } hdr; + int err; + + memset(&hdr, 0, sizeof(struct encap_hdr)); + + hdr.iph.ihl = 5; + hdr.iph.version = 4; + hdr.iph.ttl = 0x40; + hdr.iph.protocol = 47; /* IPPROTO_GRE */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + hdr.iph.saddr = 0x640110ac; /* 172.16.1.100 */ + hdr.iph.daddr = 0x641010ac; /* 172.16.16.100 */ +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + hdr.iph.saddr = 0xac100164; /* 172.16.1.100 */ + hdr.iph.daddr = 0xac101064; /* 172.16.16.100 */ +#else +#error "Fix your compiler's __BYTE_ORDER__?!" +#endif + hdr.iph.tot_len = bpf_htons(skb->len + sizeof(struct encap_hdr)); + + hdr.greh.protocol = skb->protocol; + + err = bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &hdr, + sizeof(struct encap_hdr)); + if (err) + return BPF_DROP; + + return BPF_LWT_REROUTE; +} + +SEC("encap_gre6") +int bpf_lwt_encap_gre6(struct __sk_buff *skb) +{ + struct encap_hdr { + struct ipv6hdr ip6hdr; + struct grehdr greh; + } hdr; + int err; + + memset(&hdr, 0, sizeof(struct encap_hdr)); + + hdr.ip6hdr.version = 6; + hdr.ip6hdr.payload_len = bpf_htons(skb->len + sizeof(struct grehdr)); + hdr.ip6hdr.nexthdr = 47; /* IPPROTO_GRE */ + hdr.ip6hdr.hop_limit = 0x40; + /* fb01::1 */ + hdr.ip6hdr.saddr.s6_addr[0] = 0xfb; + hdr.ip6hdr.saddr.s6_addr[1] = 1; + hdr.ip6hdr.saddr.s6_addr[15] = 1; + /* fb10::1 */ + hdr.ip6hdr.daddr.s6_addr[0] = 0xfb; + hdr.ip6hdr.daddr.s6_addr[1] = 0x10; + hdr.ip6hdr.daddr.s6_addr[15] = 1; + + hdr.greh.protocol = skb->protocol; + + err = bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &hdr, + sizeof(struct encap_hdr)); + if (err) + return BPF_DROP; + + return BPF_LWT_REROUTE; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c new file mode 100644 index 000000000000..0575751bc1bc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c @@ -0,0 +1,437 @@ +#include <stddef.h> +#include <inttypes.h> +#include <errno.h> +#include <linux/seg6_local.h> +#include <linux/bpf.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +/* Packet parsing state machine helpers. */ +#define cursor_advance(_cursor, _len) \ + ({ void *_tmp = _cursor; _cursor += _len; _tmp; }) + +#define SR6_FLAG_ALERT (1 << 4) + +#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \ + 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32)) +#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \ + 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32)) +#define BPF_PACKET_HEADER __attribute__((packed)) + +struct ip6_t { + unsigned int ver:4; + unsigned int priority:8; + unsigned int flow_label:20; + unsigned short payload_len; + unsigned char next_header; + unsigned char hop_limit; + unsigned long long src_hi; + unsigned long long src_lo; + unsigned long long dst_hi; + unsigned long long dst_lo; +} BPF_PACKET_HEADER; + +struct ip6_addr_t { + unsigned long long hi; + unsigned long long lo; +} BPF_PACKET_HEADER; + +struct ip6_srh_t { + unsigned char nexthdr; + unsigned char hdrlen; + unsigned char type; + unsigned char segments_left; + unsigned char first_segment; + unsigned char flags; + unsigned short tag; + + struct ip6_addr_t segments[0]; +} BPF_PACKET_HEADER; + +struct sr6_tlv_t { + unsigned char type; + unsigned char len; + unsigned char value[0]; +} BPF_PACKET_HEADER; + +__attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb) +{ + void *cursor, *data_end; + struct ip6_srh_t *srh; + struct ip6_t *ip; + uint8_t *ipver; + + data_end = (void *)(long)skb->data_end; + cursor = (void *)(long)skb->data; + ipver = (uint8_t *)cursor; + + if ((void *)ipver + sizeof(*ipver) > data_end) + return NULL; + + if ((*ipver >> 4) != 6) + return NULL; + + ip = cursor_advance(cursor, sizeof(*ip)); + if ((void *)ip + sizeof(*ip) > data_end) + return NULL; + + if (ip->next_header != 43) + return NULL; + + srh = cursor_advance(cursor, sizeof(*srh)); + if ((void *)srh + sizeof(*srh) > data_end) + return NULL; + + if (srh->type != 4) + return NULL; + + return srh; +} + +__attribute__((always_inline)) +int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad, + uint32_t old_pad, uint32_t pad_off) +{ + int err; + + if (new_pad != old_pad) { + err = bpf_lwt_seg6_adjust_srh(skb, pad_off, + (int) new_pad - (int) old_pad); + if (err) + return err; + } + + if (new_pad > 0) { + char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0}; + struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf; + + pad_tlv->type = SR6_TLV_PADDING; + pad_tlv->len = new_pad - 2; + + err = bpf_lwt_seg6_store_bytes(skb, pad_off, + (void *)pad_tlv_buf, new_pad); + if (err) + return err; + } + + return 0; +} + +__attribute__((always_inline)) +int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh, + uint32_t *tlv_off, uint32_t *pad_size, + uint32_t *pad_off) +{ + uint32_t srh_off, cur_off; + int offset_valid = 0; + int err; + + srh_off = (char *)srh - (char *)(long)skb->data; + // cur_off = end of segments, start of possible TLVs + cur_off = srh_off + sizeof(*srh) + + sizeof(struct ip6_addr_t) * (srh->first_segment + 1); + + *pad_off = 0; + + // we can only go as far as ~10 TLVs due to the BPF max stack size + #pragma clang loop unroll(full) + for (int i = 0; i < 10; i++) { + struct sr6_tlv_t tlv; + + if (cur_off == *tlv_off) + offset_valid = 1; + + if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3)) + break; + + err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv)); + if (err) + return err; + + if (tlv.type == SR6_TLV_PADDING) { + *pad_size = tlv.len + sizeof(tlv); + *pad_off = cur_off; + + if (*tlv_off == srh_off) { + *tlv_off = cur_off; + offset_valid = 1; + } + break; + + } else if (tlv.type == SR6_TLV_HMAC) { + break; + } + + cur_off += sizeof(tlv) + tlv.len; + } // we reached the padding or HMAC TLVs, or the end of the SRH + + if (*pad_off == 0) + *pad_off = cur_off; + + if (*tlv_off == -1) + *tlv_off = cur_off; + else if (!offset_valid) + return -EINVAL; + + return 0; +} + +__attribute__((always_inline)) +int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off, + struct sr6_tlv_t *itlv, uint8_t tlv_size) +{ + uint32_t srh_off = (char *)srh - (char *)(long)skb->data; + uint8_t len_remaining, new_pad; + uint32_t pad_off = 0; + uint32_t pad_size = 0; + uint32_t partial_srh_len; + int err; + + if (tlv_off != -1) + tlv_off += srh_off; + + if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC) + return -EINVAL; + + err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off); + if (err) + return err; + + err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len); + if (err) + return err; + + err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size); + if (err) + return err; + + // the following can't be moved inside update_tlv_pad because the + // bpf verifier has some issues with it + pad_off += sizeof(*itlv) + itlv->len; + partial_srh_len = pad_off - srh_off; + len_remaining = partial_srh_len % 8; + new_pad = 8 - len_remaining; + + if (new_pad == 1) // cannot pad for 1 byte only + new_pad = 9; + else if (new_pad == 8) + new_pad = 0; + + return update_tlv_pad(skb, new_pad, pad_size, pad_off); +} + +__attribute__((always_inline)) +int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, + uint32_t tlv_off) +{ + uint32_t srh_off = (char *)srh - (char *)(long)skb->data; + uint8_t len_remaining, new_pad; + uint32_t partial_srh_len; + uint32_t pad_off = 0; + uint32_t pad_size = 0; + struct sr6_tlv_t tlv; + int err; + + tlv_off += srh_off; + + err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off); + if (err) + return err; + + err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv)); + if (err) + return err; + + err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len)); + if (err) + return err; + + pad_off -= sizeof(tlv) + tlv.len; + partial_srh_len = pad_off - srh_off; + len_remaining = partial_srh_len % 8; + new_pad = 8 - len_remaining; + if (new_pad == 1) // cannot pad for 1 byte only + new_pad = 9; + else if (new_pad == 8) + new_pad = 0; + + return update_tlv_pad(skb, new_pad, pad_size, pad_off); +} + +__attribute__((always_inline)) +int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh) +{ + int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) + + ((srh->first_segment + 1) << 4); + struct sr6_tlv_t tlv; + + if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t))) + return 0; + + if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) { + struct ip6_addr_t egr_addr; + + if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16)) + return 0; + + // check if egress TLV value is correct + if (ntohll(egr_addr.hi) == 0xfd00000000000000 && + ntohll(egr_addr.lo) == 0x4) + return 1; + } + + return 0; +} + +// This function will push a SRH with segments fd00::1, fd00::2, fd00::3, +// fd00::4 +SEC("encap_srh") +int __encap_srh(struct __sk_buff *skb) +{ + unsigned long long hi = 0xfd00000000000000; + struct ip6_addr_t *seg; + struct ip6_srh_t *srh; + char srh_buf[72]; // room for 4 segments + int err; + + srh = (struct ip6_srh_t *)srh_buf; + srh->nexthdr = 0; + srh->hdrlen = 8; + srh->type = 4; + srh->segments_left = 3; + srh->first_segment = 3; + srh->flags = 0; + srh->tag = 0; + + seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh)); + + #pragma clang loop unroll(full) + for (unsigned long long lo = 0; lo < 4; lo++) { + seg->lo = htonll(4 - lo); + seg->hi = htonll(hi); + seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg)); + } + + err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf)); + if (err) + return BPF_DROP; + + return BPF_REDIRECT; +} + +// Add an Egress TLV fc00::4, add the flag A, +// and apply End.X action to fc42::1 +SEC("add_egr_x") +int __add_egr_x(struct __sk_buff *skb) +{ + unsigned long long hi = 0xfc42000000000000; + unsigned long long lo = 0x1; + struct ip6_srh_t *srh = get_srh(skb); + uint8_t new_flags = SR6_FLAG_ALERT; + struct ip6_addr_t addr; + int err, offset; + + if (srh == NULL) + return BPF_DROP; + + uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4}; + + err = add_tlv(skb, srh, (srh->hdrlen+1) << 3, + (struct sr6_tlv_t *)&tlv, 20); + if (err) + return BPF_DROP; + + offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags); + err = bpf_lwt_seg6_store_bytes(skb, offset, + (void *)&new_flags, sizeof(new_flags)); + if (err) + return BPF_DROP; + + addr.lo = htonll(lo); + addr.hi = htonll(hi); + err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X, + (void *)&addr, sizeof(addr)); + if (err) + return BPF_DROP; + return BPF_REDIRECT; +} + +// Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a +// simple End action +SEC("pop_egr") +int __pop_egr(struct __sk_buff *skb) +{ + struct ip6_srh_t *srh = get_srh(skb); + uint16_t new_tag = bpf_htons(2442); + uint8_t new_flags = 0; + int err, offset; + + if (srh == NULL) + return BPF_DROP; + + if (srh->flags != SR6_FLAG_ALERT) + return BPF_DROP; + + if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV + return BPF_DROP; + + if (!has_egr_tlv(skb, srh)) + return BPF_DROP; + + err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16); + if (err) + return BPF_DROP; + + offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags); + if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags, + sizeof(new_flags))) + return BPF_DROP; + + offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag); + if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag, + sizeof(new_tag))) + return BPF_DROP; + + return BPF_OK; +} + +// Inspect if the Egress TLV and flag have been removed, if the tag is correct, +// then apply a End.T action to reach the last segment +SEC("inspect_t") +int __inspect_t(struct __sk_buff *skb) +{ + struct ip6_srh_t *srh = get_srh(skb); + int table = 117; + int err; + + if (srh == NULL) + return BPF_DROP; + + if (srh->flags != 0) + return BPF_DROP; + + if (srh->tag != bpf_htons(2442)) + return BPF_DROP; + + if (srh->hdrlen != 8) // 4 segments + return BPF_DROP; + + err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T, + (void *)&table, sizeof(table)); + + if (err) + return BPF_DROP; + + return BPF_REDIRECT; +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c new file mode 100644 index 000000000000..ce923e67e08e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Facebook */ +#include <stddef.h> +#include <linux/bpf.h> +#include <linux/types.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") mim_array = { + .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, + .key_size = sizeof(int), + /* must be sizeof(__u32) for map in map */ + .value_size = sizeof(__u32), + .max_entries = 1, + .map_flags = 0, +}; + +struct bpf_map_def SEC("maps") mim_hash = { + .type = BPF_MAP_TYPE_HASH_OF_MAPS, + .key_size = sizeof(int), + /* must be sizeof(__u32) for map in map */ + .value_size = sizeof(__u32), + .max_entries = 1, + .map_flags = 0, +}; + +SEC("xdp_mimtest") +int xdp_mimtest0(struct xdp_md *ctx) +{ + int value = 123; + int key = 0; + void *map; + + map = bpf_map_lookup_elem(&mim_array, &key); + if (!map) + return XDP_DROP; + + bpf_map_update_elem(map, &key, &value, 0); + + map = bpf_map_lookup_elem(&mim_hash, &key); + if (!map) + return XDP_DROP; + + bpf_map_update_elem(map, &key, &value, 0); + + return XDP_PASS; +} + +int _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c new file mode 100644 index 000000000000..af8cc68ed2f9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_map_lock.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <linux/bpf.h> +#include <linux/version.h> +#include "bpf_helpers.h" + +#define VAR_NUM 16 + +struct hmap_elem { + struct bpf_spin_lock lock; + int var[VAR_NUM]; +}; + +struct bpf_map_def SEC("maps") hash_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(struct hmap_elem), + .max_entries = 1, +}; + +BPF_ANNOTATE_KV_PAIR(hash_map, int, struct hmap_elem); + +struct array_elem { + struct bpf_spin_lock lock; + int var[VAR_NUM]; +}; + +struct bpf_map_def SEC("maps") array_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(struct array_elem), + .max_entries = 1, +}; + +BPF_ANNOTATE_KV_PAIR(array_map, int, struct array_elem); + +SEC("map_lock_demo") +int bpf_map_lock_test(struct __sk_buff *skb) +{ + struct hmap_elem zero = {}, *val; + int rnd = bpf_get_prandom_u32(); + int key = 0, err = 1, i; + struct array_elem *q; + + val = bpf_map_lookup_elem(&hash_map, &key); + if (!val) + goto err; + /* spin_lock in hash map */ + bpf_spin_lock(&val->lock); + for (i = 0; i < VAR_NUM; i++) + val->var[i] = rnd; + bpf_spin_unlock(&val->lock); + + /* spin_lock in array */ + q = bpf_map_lookup_elem(&array_map, &key); + if (!q) + goto err; + bpf_spin_lock(&q->lock); + for (i = 0; i < VAR_NUM; i++) + q->var[i] = rnd; + bpf_spin_unlock(&q->lock); + err = 0; +err: + return err; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c new file mode 100644 index 000000000000..880d2963b472 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_obj_id.c @@ -0,0 +1,35 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stddef.h> +#include <linux/bpf.h> +#include <linux/pkt_cls.h> +#include "bpf_helpers.h" + +/* It is a dumb bpf program such that it must have no + * issue to be loaded since testing the verifier is + * not the focus here. + */ + +int _version SEC("version") = 1; + +struct bpf_map_def SEC("maps") test_map_id = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64), + .max_entries = 1, +}; + +SEC("test_obj_id_dummy") +int test_obj_id(struct __sk_buff *skb) +{ + __u32 key = 0; + __u64 *value; + + value = bpf_map_lookup_elem(&test_map_id, &key); + + return TC_ACT_OK; +} diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c new file mode 100644 index 000000000000..6e11ba11709e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c @@ -0,0 +1,65 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/in.h> +#include <linux/tcp.h> +#include <linux/pkt_cls.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define barrier() __asm__ __volatile__("": : :"memory") +int _version SEC("version") = 1; + +SEC("test1") +int process(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct ethhdr *eth = (struct ethhdr *)(data); + struct tcphdr *tcp = NULL; + __u8 proto = 255; + __u64 ihl_len; + + if (eth + 1 > data_end) + return TC_ACT_SHOT; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)(eth + 1); + + if (iph + 1 > data_end) + return TC_ACT_SHOT; + ihl_len = iph->ihl * 4; + proto = iph->protocol; + tcp = (struct tcphdr *)((void *)(iph) + ihl_len); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = (struct ipv6hdr *)(eth + 1); + + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + ihl_len = sizeof(*ip6h); + proto = ip6h->nexthdr; + tcp = (struct tcphdr *)((void *)(ip6h) + ihl_len); + } + + if (tcp) { + if (((void *)(tcp) + 20) > data_end || proto != 6) + return TC_ACT_SHOT; + barrier(); /* to force ordering of checks */ + if (((void *)(tcp) + 18) > data_end) + return TC_ACT_SHOT; + if (tcp->urg_ptr == 123) + return TC_ACT_OK; + } + + return TC_ACT_UNSPEC; +} diff --git a/tools/testing/selftests/bpf/progs/test_pkt_md_access.c b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c new file mode 100644 index 000000000000..7956302ecdf2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c @@ -0,0 +1,46 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/pkt_cls.h> +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define TEST_FIELD(TYPE, FIELD, MASK) \ + { \ + TYPE tmp = *(volatile TYPE *)&skb->FIELD; \ + if (tmp != ((*(volatile __u32 *)&skb->FIELD) & MASK)) \ + return TC_ACT_SHOT; \ + } +#else +#define TEST_FIELD_OFFSET(a, b) ((sizeof(a) - sizeof(b)) / sizeof(b)) +#define TEST_FIELD(TYPE, FIELD, MASK) \ + { \ + TYPE tmp = *((volatile TYPE *)&skb->FIELD + \ + TEST_FIELD_OFFSET(skb->FIELD, TYPE)); \ + if (tmp != ((*(volatile __u32 *)&skb->FIELD) & MASK)) \ + return TC_ACT_SHOT; \ + } +#endif + +SEC("test1") +int process(struct __sk_buff *skb) +{ + TEST_FIELD(__u8, len, 0xFF); + TEST_FIELD(__u16, len, 0xFFFF); + TEST_FIELD(__u32, len, 0xFFFFFFFF); + TEST_FIELD(__u16, protocol, 0xFFFF); + TEST_FIELD(__u32, protocol, 0xFFFFFFFF); + TEST_FIELD(__u8, hash, 0xFF); + TEST_FIELD(__u16, hash, 0xFFFF); + TEST_FIELD(__u32, hash, 0xFFFFFFFF); + + return TC_ACT_OK; +} diff --git a/tools/testing/selftests/bpf/progs/test_queue_map.c b/tools/testing/selftests/bpf/progs/test_queue_map.c new file mode 100644 index 000000000000..87db1f9da33d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_queue_map.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Politecnico di Torino +#define MAP_TYPE BPF_MAP_TYPE_QUEUE +#include "test_queue_stack_map.h" diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c new file mode 100644 index 000000000000..5b54ec637ada --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Facebook */ + +#include <stdlib.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/bpf.h> +#include <linux/types.h> +#include <linux/if_ether.h> + +#include "bpf_endian.h" +#include "bpf_helpers.h" +#include "test_select_reuseport_common.h" + +int _version SEC("version") = 1; + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#endif + +struct bpf_map_def SEC("maps") outer_map = { + .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") result_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = NR_RESULTS, +}; + +struct bpf_map_def SEC("maps") tmp_index_ovr_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(int), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") linum_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") data_check_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct data_check), + .max_entries = 1, +}; + +#define GOTO_DONE(_result) ({ \ + result = (_result); \ + linum = __LINE__; \ + goto done; \ +}) + +SEC("select_by_skb_data") +int _select_by_skb_data(struct sk_reuseport_md *reuse_md) +{ + __u32 linum, index = 0, flags = 0, index_zero = 0; + __u32 *result_cnt, *linum_value; + struct data_check data_check = {}; + struct cmd *cmd, cmd_copy; + void *data, *data_end; + void *reuseport_array; + enum result result; + int *index_ovr; + int err; + + data = reuse_md->data; + data_end = reuse_md->data_end; + data_check.len = reuse_md->len; + data_check.eth_protocol = reuse_md->eth_protocol; + data_check.ip_protocol = reuse_md->ip_protocol; + data_check.hash = reuse_md->hash; + data_check.bind_inany = reuse_md->bind_inany; + if (data_check.eth_protocol == bpf_htons(ETH_P_IP)) { + if (bpf_skb_load_bytes_relative(reuse_md, + offsetof(struct iphdr, saddr), + data_check.skb_addrs, 8, + BPF_HDR_START_NET)) + GOTO_DONE(DROP_MISC); + } else { + if (bpf_skb_load_bytes_relative(reuse_md, + offsetof(struct ipv6hdr, saddr), + data_check.skb_addrs, 32, + BPF_HDR_START_NET)) + GOTO_DONE(DROP_MISC); + } + + /* + * The ip_protocol could be a compile time decision + * if the bpf_prog.o is dedicated to either TCP or + * UDP. + * + * Otherwise, reuse_md->ip_protocol or + * the protocol field in the iphdr can be used. + */ + if (data_check.ip_protocol == IPPROTO_TCP) { + struct tcphdr *th = data; + + if (th + 1 > data_end) + GOTO_DONE(DROP_MISC); + + data_check.skb_ports[0] = th->source; + data_check.skb_ports[1] = th->dest; + + if ((th->doff << 2) + sizeof(*cmd) > data_check.len) + GOTO_DONE(DROP_ERR_SKB_DATA); + if (bpf_skb_load_bytes(reuse_md, th->doff << 2, &cmd_copy, + sizeof(cmd_copy))) + GOTO_DONE(DROP_MISC); + cmd = &cmd_copy; + } else if (data_check.ip_protocol == IPPROTO_UDP) { + struct udphdr *uh = data; + + if (uh + 1 > data_end) + GOTO_DONE(DROP_MISC); + + data_check.skb_ports[0] = uh->source; + data_check.skb_ports[1] = uh->dest; + + if (sizeof(struct udphdr) + sizeof(*cmd) > data_check.len) + GOTO_DONE(DROP_ERR_SKB_DATA); + if (data + sizeof(struct udphdr) + sizeof(*cmd) > data_end) { + if (bpf_skb_load_bytes(reuse_md, sizeof(struct udphdr), + &cmd_copy, sizeof(cmd_copy))) + GOTO_DONE(DROP_MISC); + cmd = &cmd_copy; + } else { + cmd = data + sizeof(struct udphdr); + } + } else { + GOTO_DONE(DROP_MISC); + } + + reuseport_array = bpf_map_lookup_elem(&outer_map, &index_zero); + if (!reuseport_array) + GOTO_DONE(DROP_ERR_INNER_MAP); + + index = cmd->reuseport_index; + index_ovr = bpf_map_lookup_elem(&tmp_index_ovr_map, &index_zero); + if (!index_ovr) + GOTO_DONE(DROP_MISC); + + if (*index_ovr != -1) { + index = *index_ovr; + *index_ovr = -1; + } + err = bpf_sk_select_reuseport(reuse_md, reuseport_array, &index, + flags); + if (!err) + GOTO_DONE(PASS); + + if (cmd->pass_on_failure) + GOTO_DONE(PASS_ERR_SK_SELECT_REUSEPORT); + else + GOTO_DONE(DROP_ERR_SK_SELECT_REUSEPORT); + +done: + result_cnt = bpf_map_lookup_elem(&result_map, &result); + if (!result_cnt) + return SK_DROP; + + bpf_map_update_elem(&linum_map, &index_zero, &linum, BPF_ANY); + bpf_map_update_elem(&data_check_map, &index_zero, &data_check, BPF_ANY); + + (*result_cnt)++; + return result < PASS ? SK_DROP : SK_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c new file mode 100644 index 000000000000..e21cd736c196 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io + +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/pkt_cls.h> +#include <linux/tcp.h> +#include <sys/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; + +/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */ +static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off, + void *data_end, __u16 eth_proto, + bool *ipv4) +{ + struct bpf_sock_tuple *result; + __u8 proto = 0; + __u64 ihl_len; + + if (eth_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)(data + nh_off); + + if (iph + 1 > data_end) + return NULL; + ihl_len = iph->ihl * 4; + proto = iph->protocol; + *ipv4 = true; + result = (struct bpf_sock_tuple *)&iph->saddr; + } else if (eth_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + nh_off); + + if (ip6h + 1 > data_end) + return NULL; + ihl_len = sizeof(*ip6h); + proto = ip6h->nexthdr; + *ipv4 = true; + result = (struct bpf_sock_tuple *)&ip6h->saddr; + } + + if (data + nh_off + ihl_len > data_end || proto != IPPROTO_TCP) + return NULL; + + return result; +} + +SEC("sk_lookup_success") +int bpf_sk_lookup_test0(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct ethhdr *eth = (struct ethhdr *)(data); + struct bpf_sock_tuple *tuple; + struct bpf_sock *sk; + size_t tuple_len; + bool ipv4; + + if (eth + 1 > data_end) + return TC_ACT_SHOT; + + tuple = get_tuple(data, sizeof(*eth), data_end, eth->h_proto, &ipv4); + if (!tuple || tuple + sizeof *tuple > data_end) + return TC_ACT_SHOT; + + tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); + sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); + if (sk) + bpf_sk_release(sk); + return sk ? TC_ACT_OK : TC_ACT_UNSPEC; +} + +SEC("sk_lookup_success_simple") +int bpf_sk_lookup_test1(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); + if (sk) + bpf_sk_release(sk); + return 0; +} + +SEC("fail_use_after_free") +int bpf_sk_lookup_uaf(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + __u32 family = 0; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); + if (sk) { + bpf_sk_release(sk); + family = sk->family; + } + return family; +} + +SEC("fail_modify_sk_pointer") +int bpf_sk_lookup_modptr(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + __u32 family; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); + if (sk) { + sk += 1; + bpf_sk_release(sk); + } + return 0; +} + +SEC("fail_modify_sk_or_null_pointer") +int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + __u32 family; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); + sk += 1; + if (sk) + bpf_sk_release(sk); + return 0; +} + +SEC("fail_no_release") +int bpf_sk_lookup_test2(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); + return 0; +} + +SEC("fail_release_twice") +int bpf_sk_lookup_test3(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); + bpf_sk_release(sk); + bpf_sk_release(sk); + return 0; +} + +SEC("fail_release_unchecked") +int bpf_sk_lookup_test4(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); + bpf_sk_release(sk); + return 0; +} + +void lookup_no_release(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); +} + +SEC("fail_no_release_subcall") +int bpf_sk_lookup_test5(struct __sk_buff *skb) +{ + lookup_no_release(skb); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c new file mode 100644 index 000000000000..68cf9829f5a7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <linux/bpf.h> +#include <linux/pkt_cls.h> + +#include <string.h> + +#include "bpf_helpers.h" + +#define NUM_CGROUP_LEVELS 4 + +struct bpf_map_def SEC("maps") cgroup_ids = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64), + .max_entries = NUM_CGROUP_LEVELS, +}; + +static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level) +{ + __u64 id; + + /* [1] &level passed to external function that may change it, it's + * incompatible with loop unroll. + */ + id = bpf_skb_ancestor_cgroup_id(skb, level); + bpf_map_update_elem(&cgroup_ids, &level, &id, 0); +} + +SEC("cgroup_id_logger") +int log_cgroup_id(struct __sk_buff *skb) +{ + /* Loop unroll can't be used here due to [1]. Unrolling manually. + * Number of calls should be in sync with NUM_CGROUP_LEVELS. + */ + log_nth_level(skb, 0); + log_nth_level(skb, 1); + log_nth_level(skb, 2); + log_nth_level(skb, 3); + + return TC_ACT_OK; +} + +int _version SEC("version") = 1; + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c new file mode 100644 index 000000000000..de1a43e8f610 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <linux/bpf.h> +#include <netinet/in.h> +#include <stdbool.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +enum bpf_array_idx { + SRV_IDX, + CLI_IDX, + __NR_BPF_ARRAY_IDX, +}; + +struct bpf_map_def SEC("maps") addr_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct sockaddr_in6), + .max_entries = __NR_BPF_ARRAY_IDX, +}; + +struct bpf_map_def SEC("maps") sock_result_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct bpf_sock), + .max_entries = __NR_BPF_ARRAY_IDX, +}; + +struct bpf_map_def SEC("maps") tcp_sock_result_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct bpf_tcp_sock), + .max_entries = __NR_BPF_ARRAY_IDX, +}; + +struct bpf_map_def SEC("maps") linum_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 1, +}; + +static bool is_loopback6(__u32 *a6) +{ + return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1); +} + +static void skcpy(struct bpf_sock *dst, + const struct bpf_sock *src) +{ + dst->bound_dev_if = src->bound_dev_if; + dst->family = src->family; + dst->type = src->type; + dst->protocol = src->protocol; + dst->mark = src->mark; + dst->priority = src->priority; + dst->src_ip4 = src->src_ip4; + dst->src_ip6[0] = src->src_ip6[0]; + dst->src_ip6[1] = src->src_ip6[1]; + dst->src_ip6[2] = src->src_ip6[2]; + dst->src_ip6[3] = src->src_ip6[3]; + dst->src_port = src->src_port; + dst->dst_ip4 = src->dst_ip4; + dst->dst_ip6[0] = src->dst_ip6[0]; + dst->dst_ip6[1] = src->dst_ip6[1]; + dst->dst_ip6[2] = src->dst_ip6[2]; + dst->dst_ip6[3] = src->dst_ip6[3]; + dst->dst_port = src->dst_port; + dst->state = src->state; +} + +static void tpcpy(struct bpf_tcp_sock *dst, + const struct bpf_tcp_sock *src) +{ + dst->snd_cwnd = src->snd_cwnd; + dst->srtt_us = src->srtt_us; + dst->rtt_min = src->rtt_min; + dst->snd_ssthresh = src->snd_ssthresh; + dst->rcv_nxt = src->rcv_nxt; + dst->snd_nxt = src->snd_nxt; + dst->snd_una = src->snd_una; + dst->mss_cache = src->mss_cache; + dst->ecn_flags = src->ecn_flags; + dst->rate_delivered = src->rate_delivered; + dst->rate_interval_us = src->rate_interval_us; + dst->packets_out = src->packets_out; + dst->retrans_out = src->retrans_out; + dst->total_retrans = src->total_retrans; + dst->segs_in = src->segs_in; + dst->data_segs_in = src->data_segs_in; + dst->segs_out = src->segs_out; + dst->data_segs_out = src->data_segs_out; + dst->lost_out = src->lost_out; + dst->sacked_out = src->sacked_out; + dst->bytes_received = src->bytes_received; + dst->bytes_acked = src->bytes_acked; +} + +#define RETURN { \ + linum = __LINE__; \ + bpf_map_update_elem(&linum_map, &idx0, &linum, 0); \ + return 1; \ +} + +SEC("cgroup_skb/egress") +int read_sock_fields(struct __sk_buff *skb) +{ + __u32 srv_idx = SRV_IDX, cli_idx = CLI_IDX, idx; + struct sockaddr_in6 *srv_sa6, *cli_sa6; + struct bpf_tcp_sock *tp, *tp_ret; + struct bpf_sock *sk, *sk_ret; + __u32 linum, idx0 = 0; + + sk = skb->sk; + if (!sk || sk->state == 10) + RETURN; + + sk = bpf_sk_fullsock(sk); + if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP || + !is_loopback6(sk->src_ip6)) + RETURN; + + tp = bpf_tcp_sock(sk); + if (!tp) + RETURN; + + srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx); + cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx); + if (!srv_sa6 || !cli_sa6) + RETURN; + + if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port)) + idx = srv_idx; + else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port)) + idx = cli_idx; + else + RETURN; + + sk_ret = bpf_map_lookup_elem(&sock_result_map, &idx); + tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &idx); + if (!sk_ret || !tp_ret) + RETURN; + + skcpy(sk_ret, sk); + tpcpy(tp_ret, tp); + + RETURN; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sockhash_kern.c b/tools/testing/selftests/bpf/progs/test_sockhash_kern.c new file mode 100644 index 000000000000..e6755916442a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockhash_kern.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io +#undef SOCKMAP +#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKHASH +#include "./test_sockmap_kern.h" diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.c b/tools/testing/selftests/bpf/progs/test_sockmap_kern.c new file mode 100644 index 000000000000..677b2ed1cc1e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io +#define SOCKMAP +#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKMAP +#include "./test_sockmap_kern.h" diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c new file mode 100644 index 000000000000..40f904312090 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <linux/bpf.h> +#include <linux/version.h> +#include "bpf_helpers.h" + +struct hmap_elem { + volatile int cnt; + struct bpf_spin_lock lock; + int test_padding; +}; + +struct bpf_map_def SEC("maps") hmap = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(struct hmap_elem), + .max_entries = 1, +}; + +BPF_ANNOTATE_KV_PAIR(hmap, int, struct hmap_elem); + + +struct cls_elem { + struct bpf_spin_lock lock; + volatile int cnt; +}; + +struct bpf_map_def SEC("maps") cls_map = { + .type = BPF_MAP_TYPE_CGROUP_STORAGE, + .key_size = sizeof(struct bpf_cgroup_storage_key), + .value_size = sizeof(struct cls_elem), +}; + +BPF_ANNOTATE_KV_PAIR(cls_map, struct bpf_cgroup_storage_key, + struct cls_elem); + +struct bpf_vqueue { + struct bpf_spin_lock lock; + /* 4 byte hole */ + unsigned long long lasttime; + int credit; + unsigned int rate; +}; + +struct bpf_map_def SEC("maps") vqueue = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(struct bpf_vqueue), + .max_entries = 1, +}; + +BPF_ANNOTATE_KV_PAIR(vqueue, int, struct bpf_vqueue); +#define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20) + +SEC("spin_lock_demo") +int bpf_sping_lock_test(struct __sk_buff *skb) +{ + volatile int credit = 0, max_credit = 100, pkt_len = 64; + struct hmap_elem zero = {}, *val; + unsigned long long curtime; + struct bpf_vqueue *q; + struct cls_elem *cls; + int key = 0; + int err = 0; + + val = bpf_map_lookup_elem(&hmap, &key); + if (!val) { + bpf_map_update_elem(&hmap, &key, &zero, 0); + val = bpf_map_lookup_elem(&hmap, &key); + if (!val) { + err = 1; + goto err; + } + } + /* spin_lock in hash map run time test */ + bpf_spin_lock(&val->lock); + if (val->cnt) + val->cnt--; + else + val->cnt++; + if (val->cnt != 0 && val->cnt != 1) + err = 1; + bpf_spin_unlock(&val->lock); + + /* spin_lock in array. virtual queue demo */ + q = bpf_map_lookup_elem(&vqueue, &key); + if (!q) + goto err; + curtime = bpf_ktime_get_ns(); + bpf_spin_lock(&q->lock); + q->credit += CREDIT_PER_NS(curtime - q->lasttime, q->rate); + q->lasttime = curtime; + if (q->credit > max_credit) + q->credit = max_credit; + q->credit -= pkt_len; + credit = q->credit; + bpf_spin_unlock(&q->lock); + + /* spin_lock in cgroup local storage */ + cls = bpf_get_local_storage(&cls_map, 0); + bpf_spin_lock(&cls->lock); + cls->cnt++; + bpf_spin_unlock(&cls->lock); + +err: + return err; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_stack_map.c b/tools/testing/selftests/bpf/progs/test_stack_map.c new file mode 100644 index 000000000000..31c3880e6da0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_stack_map.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Politecnico di Torino +#define MAP_TYPE BPF_MAP_TYPE_STACK +#include "test_queue_stack_map.h" diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c new file mode 100644 index 000000000000..d86c281e957f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <linux/bpf.h> +#include "bpf_helpers.h" + +#ifndef PERF_MAX_STACK_DEPTH +#define PERF_MAX_STACK_DEPTH 127 +#endif + +struct bpf_map_def SEC("maps") control_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") stackid_hmap = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 16384, +}; + +struct bpf_map_def SEC("maps") stackmap = { + .type = BPF_MAP_TYPE_STACK_TRACE, + .key_size = sizeof(__u32), + .value_size = sizeof(struct bpf_stack_build_id) + * PERF_MAX_STACK_DEPTH, + .max_entries = 128, + .map_flags = BPF_F_STACK_BUILD_ID, +}; + +struct bpf_map_def SEC("maps") stack_amap = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct bpf_stack_build_id) + * PERF_MAX_STACK_DEPTH, + .max_entries = 128, +}; + +/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */ +struct random_urandom_args { + unsigned long long pad; + int got_bits; + int pool_left; + int input_left; +}; + +SEC("tracepoint/random/urandom_read") +int oncpu(struct random_urandom_args *args) +{ + __u32 max_len = sizeof(struct bpf_stack_build_id) + * PERF_MAX_STACK_DEPTH; + __u32 key = 0, val = 0, *value_p; + void *stack_p; + + value_p = bpf_map_lookup_elem(&control_map, &key); + if (value_p && *value_p) + return 0; /* skip if non-zero *value_p */ + + /* The size of stackmap and stackid_hmap should be the same */ + key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK); + if ((int)key >= 0) { + bpf_map_update_elem(&stackid_hmap, &key, &val, 0); + stack_p = bpf_map_lookup_elem(&stack_amap, &key); + if (stack_p) + bpf_get_stack(args, stack_p, max_len, + BPF_F_USER_STACK | BPF_F_USER_BUILD_ID); + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c new file mode 100644 index 000000000000..af111af7ca1a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <linux/bpf.h> +#include "bpf_helpers.h" + +#ifndef PERF_MAX_STACK_DEPTH +#define PERF_MAX_STACK_DEPTH 127 +#endif + +struct bpf_map_def SEC("maps") control_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") stackid_hmap = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 16384, +}; + +struct bpf_map_def SEC("maps") stackmap = { + .type = BPF_MAP_TYPE_STACK_TRACE, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH, + .max_entries = 16384, +}; + +struct bpf_map_def SEC("maps") stack_amap = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH, + .max_entries = 16384, +}; + +/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ +struct sched_switch_args { + unsigned long long pad; + char prev_comm[16]; + int prev_pid; + int prev_prio; + long long prev_state; + char next_comm[16]; + int next_pid; + int next_prio; +}; + +SEC("tracepoint/sched/sched_switch") +int oncpu(struct sched_switch_args *ctx) +{ + __u32 max_len = PERF_MAX_STACK_DEPTH * sizeof(__u64); + __u32 key = 0, val = 0, *value_p; + void *stack_p; + + value_p = bpf_map_lookup_elem(&control_map, &key); + if (value_p && *value_p) + return 0; /* skip if non-zero *value_p */ + + /* The size of stackmap and stackid_hmap should be the same */ + key = bpf_get_stackid(ctx, &stackmap, 0); + if ((int)key >= 0) { + bpf_map_update_elem(&stackid_hmap, &key, &val, 0); + stack_p = bpf_map_lookup_elem(&stack_amap, &key); + if (stack_p) + bpf_get_stack(ctx, stack_p, max_len, 0); + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c new file mode 100644 index 000000000000..bee3bbecc0c4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c @@ -0,0 +1,258 @@ +/* Copyright (c) 2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +/* This program shows clang/llvm is able to generate code pattern + * like: + * _tcp_send_active_reset: + * 0: bf 16 00 00 00 00 00 00 r6 = r1 + * ...... + * 335: b7 01 00 00 0f 00 00 00 r1 = 15 + * 336: 05 00 48 00 00 00 00 00 goto 72 + * + * LBB0_3: + * 337: b7 01 00 00 01 00 00 00 r1 = 1 + * 338: 63 1a d0 ff 00 00 00 00 *(u32 *)(r10 - 48) = r1 + * 408: b7 01 00 00 03 00 00 00 r1 = 3 + * + * LBB0_4: + * 409: 71 a2 fe ff 00 00 00 00 r2 = *(u8 *)(r10 - 2) + * 410: bf a7 00 00 00 00 00 00 r7 = r10 + * 411: 07 07 00 00 b8 ff ff ff r7 += -72 + * 412: bf 73 00 00 00 00 00 00 r3 = r7 + * 413: 0f 13 00 00 00 00 00 00 r3 += r1 + * 414: 73 23 2d 00 00 00 00 00 *(u8 *)(r3 + 45) = r2 + * + * From the above code snippet, the code generated by the compiler + * is reasonable. The "r1" is assigned to different values in basic + * blocks "_tcp_send_active_reset" and "LBB0_3", and used in "LBB0_4". + * The verifier should be able to handle such code patterns. + */ +#include <string.h> +#include <linux/bpf.h> +#include <linux/ipv6.h> +#include <linux/version.h> +#include <sys/socket.h> +#include "bpf_helpers.h" + +#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) +#define TCP_ESTATS_MAGIC 0xBAADBEEF + +/* This test case needs "sock" and "pt_regs" data structure. + * Recursively, "sock" needs "sock_common" and "inet_sock". + * However, this is a unit test case only for + * verifier purpose without bpf program execution. + * We can safely mock much simpler data structures, basically + * only taking the necessary fields from kernel headers. + */ +typedef __u32 __bitwise __portpair; +typedef __u64 __bitwise __addrpair; + +struct sock_common { + unsigned short skc_family; + union { + __addrpair skc_addrpair; + struct { + __be32 skc_daddr; + __be32 skc_rcv_saddr; + }; + }; + union { + __portpair skc_portpair; + struct { + __be16 skc_dport; + __u16 skc_num; + }; + }; + struct in6_addr skc_v6_daddr; + struct in6_addr skc_v6_rcv_saddr; +}; + +struct sock { + struct sock_common __sk_common; +#define sk_family __sk_common.skc_family +#define sk_v6_daddr __sk_common.skc_v6_daddr +#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr +}; + +struct inet_sock { + struct sock sk; +#define inet_daddr sk.__sk_common.skc_daddr +#define inet_dport sk.__sk_common.skc_dport + __be32 inet_saddr; + __be16 inet_sport; +}; + +struct pt_regs { + long di; +}; + +static inline struct inet_sock *inet_sk(const struct sock *sk) +{ + return (struct inet_sock *)sk; +} + +/* Define various data structures for state recording. + * Some fields are not used due to test simplification. + */ +enum tcp_estats_addrtype { + TCP_ESTATS_ADDRTYPE_IPV4 = 1, + TCP_ESTATS_ADDRTYPE_IPV6 = 2 +}; + +enum tcp_estats_event_type { + TCP_ESTATS_ESTABLISH, + TCP_ESTATS_PERIODIC, + TCP_ESTATS_TIMEOUT, + TCP_ESTATS_RETRANSMIT_TIMEOUT, + TCP_ESTATS_RETRANSMIT_OTHER, + TCP_ESTATS_SYN_RETRANSMIT, + TCP_ESTATS_SYNACK_RETRANSMIT, + TCP_ESTATS_TERM, + TCP_ESTATS_TX_RESET, + TCP_ESTATS_RX_RESET, + TCP_ESTATS_WRITE_TIMEOUT, + TCP_ESTATS_CONN_TIMEOUT, + TCP_ESTATS_ACK_LATENCY, + TCP_ESTATS_NEVENTS, +}; + +struct tcp_estats_event { + int pid; + int cpu; + unsigned long ts; + unsigned int magic; + enum tcp_estats_event_type event_type; +}; + +/* The below data structure is packed in order for + * llvm compiler to generate expected code. + */ +struct tcp_estats_conn_id { + unsigned int localaddressType; + struct { + unsigned char data[16]; + } localaddress; + struct { + unsigned char data[16]; + } remaddress; + unsigned short localport; + unsigned short remport; +} __attribute__((__packed__)); + +struct tcp_estats_basic_event { + struct tcp_estats_event event; + struct tcp_estats_conn_id conn_id; +}; + +struct bpf_map_def SEC("maps") ev_record_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(struct tcp_estats_basic_event), + .max_entries = 1024, +}; + +struct dummy_tracepoint_args { + unsigned long long pad; + struct sock *sock; +}; + +static __always_inline void tcp_estats_ev_init(struct tcp_estats_event *event, + enum tcp_estats_event_type type) +{ + event->magic = TCP_ESTATS_MAGIC; + event->ts = bpf_ktime_get_ns(); + event->event_type = type; +} + +static __always_inline void unaligned_u32_set(unsigned char *to, __u8 *from) +{ + to[0] = _(from[0]); + to[1] = _(from[1]); + to[2] = _(from[2]); + to[3] = _(from[3]); +} + +static __always_inline void conn_id_ipv4_init(struct tcp_estats_conn_id *conn_id, + __be32 *saddr, __be32 *daddr) +{ + conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV4; + + unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr); + unaligned_u32_set(conn_id->remaddress.data, (__u8 *)daddr); +} + +static __always_inline void conn_id_ipv6_init(struct tcp_estats_conn_id *conn_id, + __be32 *saddr, __be32 *daddr) +{ + conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV6; + + unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr); + unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32), + (__u8 *)(saddr + 1)); + unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 2, + (__u8 *)(saddr + 2)); + unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 3, + (__u8 *)(saddr + 3)); + + unaligned_u32_set(conn_id->remaddress.data, + (__u8 *)(daddr)); + unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32), + (__u8 *)(daddr + 1)); + unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 2, + (__u8 *)(daddr + 2)); + unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 3, + (__u8 *)(daddr + 3)); +} + +static __always_inline void tcp_estats_conn_id_init(struct tcp_estats_conn_id *conn_id, + struct sock *sk) +{ + conn_id->localport = _(inet_sk(sk)->inet_sport); + conn_id->remport = _(inet_sk(sk)->inet_dport); + + if (_(sk->sk_family) == AF_INET6) + conn_id_ipv6_init(conn_id, + sk->sk_v6_rcv_saddr.s6_addr32, + sk->sk_v6_daddr.s6_addr32); + else + conn_id_ipv4_init(conn_id, + &inet_sk(sk)->inet_saddr, + &inet_sk(sk)->inet_daddr); +} + +static __always_inline void tcp_estats_init(struct sock *sk, + struct tcp_estats_event *event, + struct tcp_estats_conn_id *conn_id, + enum tcp_estats_event_type type) +{ + tcp_estats_ev_init(event, type); + tcp_estats_conn_id_init(conn_id, sk); +} + +static __always_inline void send_basic_event(struct sock *sk, + enum tcp_estats_event_type type) +{ + struct tcp_estats_basic_event ev; + __u32 key = bpf_get_prandom_u32(); + + memset(&ev, 0, sizeof(ev)); + tcp_estats_init(sk, &ev.event, &ev.conn_id, type); + bpf_map_update_elem(&ev_record_map, &key, &ev, BPF_ANY); +} + +SEC("dummy_tracepoint") +int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +{ + if (!arg->sock) + return 0; + + send_basic_event(arg->sock, TCP_ESTATS_TX_RESET); + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c new file mode 100644 index 000000000000..74f73b33a7b0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/tcp.h> +#include <netinet/in.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" +#include "test_tcpbpf.h" + +struct bpf_map_def SEC("maps") global_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct tcpbpf_globals), + .max_entries = 4, +}; + +struct bpf_map_def SEC("maps") sockopt_results = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(int), + .max_entries = 2, +}; + +static inline void update_event_map(int event) +{ + __u32 key = 0; + struct tcpbpf_globals g, *gp; + + gp = bpf_map_lookup_elem(&global_map, &key); + if (gp == NULL) { + struct tcpbpf_globals g = {0}; + + g.event_map |= (1 << event); + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } else { + g = *gp; + g.event_map |= (1 << event); + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } +} + +int _version SEC("version") = 1; + +SEC("sockops") +int bpf_testcb(struct bpf_sock_ops *skops) +{ + char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)]; + struct tcphdr *thdr; + int good_call_rv = 0; + int bad_call_rv = 0; + int save_syn = 1; + int rv = -1; + int v = 0; + int op; + + op = (int) skops->op; + + update_event_map(op); + + switch (op) { + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + /* Test failure to set largest cb flag (assumes not defined) */ + bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80); + /* Set callback */ + good_call_rv = bpf_sock_ops_cb_flags_set(skops, + BPF_SOCK_OPS_STATE_CB_FLAG); + /* Update results */ + { + __u32 key = 0; + struct tcpbpf_globals g, *gp; + + gp = bpf_map_lookup_elem(&global_map, &key); + if (!gp) + break; + g = *gp; + g.bad_cb_test_rv = bad_call_rv; + g.good_cb_test_rv = good_call_rv; + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } + break; + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + skops->sk_txhash = 0x12345f; + v = 0xff; + rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v, + sizeof(v)); + if (skops->family == AF_INET6) { + v = bpf_getsockopt(skops, IPPROTO_TCP, TCP_SAVED_SYN, + header, (sizeof(struct ipv6hdr) + + sizeof(struct tcphdr))); + if (!v) { + int offset = sizeof(struct ipv6hdr); + + thdr = (struct tcphdr *)(header + offset); + v = thdr->syn; + __u32 key = 1; + + bpf_map_update_elem(&sockopt_results, &key, &v, + BPF_ANY); + } + } + break; + case BPF_SOCK_OPS_RTO_CB: + break; + case BPF_SOCK_OPS_RETRANS_CB: + break; + case BPF_SOCK_OPS_STATE_CB: + if (skops->args[1] == BPF_TCP_CLOSE) { + __u32 key = 0; + struct tcpbpf_globals g, *gp; + + gp = bpf_map_lookup_elem(&global_map, &key); + if (!gp) + break; + g = *gp; + if (skops->args[0] == BPF_TCP_LISTEN) { + g.num_listen++; + } else { + g.total_retrans = skops->total_retrans; + g.data_segs_in = skops->data_segs_in; + g.data_segs_out = skops->data_segs_out; + g.bytes_received = skops->bytes_received; + g.bytes_acked = skops->bytes_acked; + } + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + } + break; + case BPF_SOCK_OPS_TCP_LISTEN_CB: + bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); + v = bpf_setsockopt(skops, IPPROTO_TCP, TCP_SAVE_SYN, + &save_syn, sizeof(save_syn)); + /* Update global map w/ result of setsock opt */ + __u32 key = 0; + + bpf_map_update_elem(&sockopt_results, &key, &v, BPF_ANY); + break; + default: + rv = -1; + } + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c new file mode 100644 index 000000000000..edbca203ce2d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/tcp.h> +#include <netinet/in.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" +#include "test_tcpnotify.h" + +struct bpf_map_def SEC("maps") global_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct tcpnotify_globals), + .max_entries = 4, +}; + +struct bpf_map_def SEC("maps") perf_event_map = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(__u32), + .max_entries = 2, +}; + +int _version SEC("version") = 1; + +SEC("sockops") +int bpf_testcb(struct bpf_sock_ops *skops) +{ + int rv = -1; + int op; + + op = (int) skops->op; + + if (bpf_ntohl(skops->remote_port) != TESTPORT) { + skops->reply = -1; + return 0; + } + + switch (op) { + case BPF_SOCK_OPS_TIMEOUT_INIT: + case BPF_SOCK_OPS_RWND_INIT: + case BPF_SOCK_OPS_NEEDS_ECN: + case BPF_SOCK_OPS_BASE_RTT: + case BPF_SOCK_OPS_RTO_CB: + rv = 1; + break; + + case BPF_SOCK_OPS_TCP_CONNECT_CB: + case BPF_SOCK_OPS_TCP_LISTEN_CB: + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + bpf_sock_ops_cb_flags_set(skops, (BPF_SOCK_OPS_RETRANS_CB_FLAG| + BPF_SOCK_OPS_RTO_CB_FLAG)); + rv = 1; + break; + case BPF_SOCK_OPS_RETRANS_CB: { + __u32 key = 0; + struct tcpnotify_globals g, *gp; + struct tcp_notifier msg = { + .type = 0xde, + .subtype = 0xad, + .source = 0xbe, + .hash = 0xef, + }; + + rv = 1; + + /* Update results */ + gp = bpf_map_lookup_elem(&global_map, &key); + if (!gp) + break; + g = *gp; + g.total_retrans = skops->total_retrans; + g.ncalls++; + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + bpf_perf_event_output(skops, &perf_event_map, + BPF_F_CURRENT_CPU, + &msg, sizeof(msg)); + } + break; + default: + rv = -1; + } + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c new file mode 100644 index 000000000000..04bf084517e0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook + +#include <linux/bpf.h> +#include "bpf_helpers.h" + +/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ +struct sched_switch_args { + unsigned long long pad; + char prev_comm[16]; + int prev_pid; + int prev_prio; + long long prev_state; + char next_comm[16]; + int next_pid; + int next_prio; +}; + +SEC("tracepoint/sched/sched_switch") +int oncpu(struct sched_switch_args *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c new file mode 100644 index 000000000000..504df69c83df --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -0,0 +1,713 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2016 VMware + * Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stddef.h> +#include <string.h> +#include <arpa/inet.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/types.h> +#include <linux/tcp.h> +#include <linux/socket.h> +#include <linux/pkt_cls.h> +#include <linux/erspan.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define ERROR(ret) do {\ + char fmt[] = "ERROR line:%d ret:%d\n";\ + bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \ + } while (0) + +int _version SEC("version") = 1; + +struct geneve_opt { + __be16 opt_class; + __u8 type; + __u8 length:5; + __u8 r3:1; + __u8 r2:1; + __u8 r1:1; + __u8 opt_data[8]; /* hard-coded to 8 byte */ +}; + +struct vxlan_metadata { + __u32 gbp; +}; + +SEC("gre_set_tunnel") +int _gre_set_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("gre_get_tunnel") +int _gre_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "key %d remote ip 0x%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4); + return TC_ACT_OK; +} + +SEC("ip6gretap_set_tunnel") +int _ip6gretap_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key; + int ret; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + key.tunnel_label = 0xabcde; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX | + BPF_F_SEQ_NUMBER); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ip6gretap_get_tunnel") +int _ip6gretap_get_tunnel(struct __sk_buff *skb) +{ + char fmt[] = "key %d remote ip6 ::%x label %x\n"; + struct bpf_tunnel_key key; + int ret; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv6[3], key.tunnel_label); + + return TC_ACT_OK; +} + +SEC("erspan_set_tunnel") +int _erspan_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key; + struct erspan_metadata md; + int ret; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_ZERO_CSUM_TX); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + __builtin_memset(&md, 0, sizeof(md)); +#ifdef ERSPAN_V1 + md.version = 1; + md.u.index = bpf_htonl(123); +#else + __u8 direction = 1; + __u8 hwid = 7; + + md.version = 2; + md.u.md2.dir = direction; + md.u.md2.hwid = hwid & 0xf; + md.u.md2.hwid_upper = (hwid >> 4) & 0x3; +#endif + + ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("erspan_get_tunnel") +int _erspan_get_tunnel(struct __sk_buff *skb) +{ + char fmt[] = "key %d remote ip 0x%x erspan version %d\n"; + struct bpf_tunnel_key key; + struct erspan_metadata md; + __u32 index; + int ret; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv4, md.version); + +#ifdef ERSPAN_V1 + char fmt2[] = "\tindex %x\n"; + + index = bpf_ntohl(md.u.index); + bpf_trace_printk(fmt2, sizeof(fmt2), index); +#else + char fmt2[] = "\tdirection %d hwid %x timestamp %u\n"; + + bpf_trace_printk(fmt2, sizeof(fmt2), + md.u.md2.dir, + (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, + bpf_ntohl(md.u.md2.timestamp)); +#endif + + return TC_ACT_OK; +} + +SEC("ip4ip6erspan_set_tunnel") +int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key; + struct erspan_metadata md; + int ret; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv6[3] = bpf_htonl(0x11); + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + __builtin_memset(&md, 0, sizeof(md)); + +#ifdef ERSPAN_V1 + md.u.index = bpf_htonl(123); + md.version = 1; +#else + __u8 direction = 0; + __u8 hwid = 17; + + md.version = 2; + md.u.md2.dir = direction; + md.u.md2.hwid = hwid & 0xf; + md.u.md2.hwid_upper = (hwid >> 4) & 0x3; +#endif + + ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ip4ip6erspan_get_tunnel") +int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb) +{ + char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n"; + struct bpf_tunnel_key key; + struct erspan_metadata md; + __u32 index; + int ret; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv4, md.version); + +#ifdef ERSPAN_V1 + char fmt2[] = "\tindex %x\n"; + + index = bpf_ntohl(md.u.index); + bpf_trace_printk(fmt2, sizeof(fmt2), index); +#else + char fmt2[] = "\tdirection %d hwid %x timestamp %u\n"; + + bpf_trace_printk(fmt2, sizeof(fmt2), + md.u.md2.dir, + (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, + bpf_ntohl(md.u.md2.timestamp)); +#endif + + return TC_ACT_OK; +} + +SEC("vxlan_set_tunnel") +int _vxlan_set_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + struct vxlan_metadata md; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_ZERO_CSUM_TX); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */ + ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("vxlan_get_tunnel") +int _vxlan_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + struct vxlan_metadata md; + char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv4, md.gbp); + + return TC_ACT_OK; +} + +SEC("ip6vxlan_set_tunnel") +int _ip6vxlan_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key; + int ret; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ + key.tunnel_id = 22; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ip6vxlan_get_tunnel") +int _ip6vxlan_get_tunnel(struct __sk_buff *skb) +{ + char fmt[] = "key %d remote ip6 ::%x label %x\n"; + struct bpf_tunnel_key key; + int ret; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv6[3], key.tunnel_label); + + return TC_ACT_OK; +} + +SEC("geneve_set_tunnel") +int _geneve_set_tunnel(struct __sk_buff *skb) +{ + int ret, ret2; + struct bpf_tunnel_key key; + struct geneve_opt gopt; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + __builtin_memset(&gopt, 0x0, sizeof(gopt)); + gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */ + gopt.type = 0x08; + gopt.r1 = 0; + gopt.r2 = 0; + gopt.r3 = 0; + gopt.length = 2; /* 4-byte multiple */ + *(int *) &gopt.opt_data = bpf_htonl(0xdeadbeef); + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_ZERO_CSUM_TX); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("geneve_get_tunnel") +int _geneve_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + struct geneve_opt gopt; + char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv4, gopt.opt_class); + return TC_ACT_OK; +} + +SEC("ip6geneve_set_tunnel") +int _ip6geneve_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key; + struct geneve_opt gopt; + int ret; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ + key.tunnel_id = 22; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + __builtin_memset(&gopt, 0x0, sizeof(gopt)); + gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */ + gopt.type = 0x08; + gopt.r1 = 0; + gopt.r2 = 0; + gopt.r3 = 0; + gopt.length = 2; /* 4-byte multiple */ + *(int *) &gopt.opt_data = bpf_htonl(0xfeedbeef); + + ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ip6geneve_get_tunnel") +int _ip6geneve_get_tunnel(struct __sk_buff *skb) +{ + char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n"; + struct bpf_tunnel_key key; + struct geneve_opt gopt; + int ret; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv4, gopt.opt_class); + + return TC_ACT_OK; +} + +SEC("ipip_set_tunnel") +int _ipip_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + void *data = (void *)(long)skb->data; + struct iphdr *iph = data; + struct tcphdr *tcp = data + sizeof(*iph); + void *data_end = (void *)(long)skb->data_end; + int ret; + + /* single length check */ + if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + ERROR(1); + return TC_ACT_SHOT; + } + + key.tunnel_ttl = 64; + if (iph->protocol == IPPROTO_ICMP) { + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + } else { + if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) + return TC_ACT_SHOT; + + if (tcp->dest == bpf_htons(5200)) + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + else if (tcp->dest == bpf_htons(5201)) + key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */ + else + return TC_ACT_SHOT; + } + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ipip_get_tunnel") +int _ipip_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "remote ip 0x%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4); + return TC_ACT_OK; +} + +SEC("ipip6_set_tunnel") +int _ipip6_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + void *data = (void *)(long)skb->data; + struct iphdr *iph = data; + struct tcphdr *tcp = data + sizeof(*iph); + void *data_end = (void *)(long)skb->data_end; + int ret; + + /* single length check */ + if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + ERROR(1); + return TC_ACT_SHOT; + } + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */ + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ipip6_get_tunnel") +int _ipip6_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "remote ip6 %x::%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]), + bpf_htonl(key.remote_ipv6[3])); + return TC_ACT_OK; +} + +SEC("ip6ip6_set_tunnel") +int _ip6ip6_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + void *data = (void *)(long)skb->data; + struct ipv6hdr *iph = data; + struct tcphdr *tcp = data + sizeof(*iph); + void *data_end = (void *)(long)skb->data_end; + int ret; + + /* single length check */ + if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + ERROR(1); + return TC_ACT_SHOT; + } + + key.remote_ipv6[0] = bpf_htonl(0x2401db00); + key.tunnel_ttl = 64; + + if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) { + key.remote_ipv6[3] = bpf_htonl(1); + } else { + if (iph->nexthdr != 6 /* NEXTHDR_TCP */) { + ERROR(iph->nexthdr); + return TC_ACT_SHOT; + } + + if (tcp->dest == bpf_htons(5200)) { + key.remote_ipv6[3] = bpf_htonl(1); + } else if (tcp->dest == bpf_htons(5201)) { + key.remote_ipv6[3] = bpf_htonl(2); + } else { + ERROR(tcp->dest); + return TC_ACT_SHOT; + } + } + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ip6ip6_get_tunnel") +int _ip6ip6_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "remote ip6 %x::%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), + BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]), + bpf_htonl(key.remote_ipv6[3])); + return TC_ACT_OK; +} + +SEC("xfrm_get_state") +int _xfrm_get_state(struct __sk_buff *skb) +{ + struct bpf_xfrm_state x; + char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n"; + int ret; + + ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0); + if (ret < 0) + return TC_ACT_OK; + + bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi), + bpf_ntohl(x.remote_ipv4)); + return TC_ACT_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c new file mode 100644 index 000000000000..5e7df8bb5b5d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp.c @@ -0,0 +1,235 @@ +/* Copyright (c) 2016,2017 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/in.h> +#include <linux/udp.h> +#include <linux/tcp.h> +#include <linux/pkt_cls.h> +#include <sys/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" +#include "test_iptunnel_common.h" + +int _version SEC("version") = 1; + +struct bpf_map_def SEC("maps") rxcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64), + .max_entries = 256, +}; + +struct bpf_map_def SEC("maps") vip2tnl = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip), + .value_size = sizeof(struct iptnl_info), + .max_entries = MAX_IPTNL_ENTRIES, +}; + +static __always_inline void count_tx(__u32 protocol) +{ + __u64 *rxcnt_count; + + rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol); + if (rxcnt_count) + *rxcnt_count += 1; +} + +static __always_inline int get_dport(void *trans_data, void *data_end, + __u8 protocol) +{ + struct tcphdr *th; + struct udphdr *uh; + + switch (protocol) { + case IPPROTO_TCP: + th = (struct tcphdr *)trans_data; + if (th + 1 > data_end) + return -1; + return th->dest; + case IPPROTO_UDP: + uh = (struct udphdr *)trans_data; + if (uh + 1 > data_end) + return -1; + return uh->dest; + default: + return 0; + } +} + +static __always_inline void set_ethhdr(struct ethhdr *new_eth, + const struct ethhdr *old_eth, + const struct iptnl_info *tnl, + __be16 h_proto) +{ + memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source)); + memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest)); + new_eth->h_proto = h_proto; +} + +static __always_inline int handle_ipv4(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct iptnl_info *tnl; + struct ethhdr *new_eth; + struct ethhdr *old_eth; + struct iphdr *iph = data + sizeof(struct ethhdr); + __u16 *next_iph; + __u16 payload_len; + struct vip vip = {}; + int dport; + __u32 csum = 0; + int i; + + if (iph + 1 > data_end) + return XDP_DROP; + + dport = get_dport(iph + 1, data_end, iph->protocol); + if (dport == -1) + return XDP_DROP; + + vip.protocol = iph->protocol; + vip.family = AF_INET; + vip.daddr.v4 = iph->daddr; + vip.dport = dport; + payload_len = bpf_ntohs(iph->tot_len); + + tnl = bpf_map_lookup_elem(&vip2tnl, &vip); + /* It only does v4-in-v4 */ + if (!tnl || tnl->family != AF_INET) + return XDP_PASS; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) + return XDP_DROP; + + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + + new_eth = data; + iph = data + sizeof(*new_eth); + old_eth = data + sizeof(*iph); + + if (new_eth + 1 > data_end || + old_eth + 1 > data_end || + iph + 1 > data_end) + return XDP_DROP; + + set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP)); + + iph->version = 4; + iph->ihl = sizeof(*iph) >> 2; + iph->frag_off = 0; + iph->protocol = IPPROTO_IPIP; + iph->check = 0; + iph->tos = 0; + iph->tot_len = bpf_htons(payload_len + sizeof(*iph)); + iph->daddr = tnl->daddr.v4; + iph->saddr = tnl->saddr.v4; + iph->ttl = 8; + + next_iph = (__u16 *)iph; +#pragma clang loop unroll(full) + for (i = 0; i < sizeof(*iph) >> 1; i++) + csum += *next_iph++; + + iph->check = ~((csum & 0xffff) + (csum >> 16)); + + count_tx(vip.protocol); + + return XDP_TX; +} + +static __always_inline int handle_ipv6(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct iptnl_info *tnl; + struct ethhdr *new_eth; + struct ethhdr *old_eth; + struct ipv6hdr *ip6h = data + sizeof(struct ethhdr); + __u16 payload_len; + struct vip vip = {}; + int dport; + + if (ip6h + 1 > data_end) + return XDP_DROP; + + dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr); + if (dport == -1) + return XDP_DROP; + + vip.protocol = ip6h->nexthdr; + vip.family = AF_INET6; + memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr)); + vip.dport = dport; + payload_len = ip6h->payload_len; + + tnl = bpf_map_lookup_elem(&vip2tnl, &vip); + /* It only does v6-in-v6 */ + if (!tnl || tnl->family != AF_INET6) + return XDP_PASS; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) + return XDP_DROP; + + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + + new_eth = data; + ip6h = data + sizeof(*new_eth); + old_eth = data + sizeof(*ip6h); + + if (new_eth + 1 > data_end || old_eth + 1 > data_end || + ip6h + 1 > data_end) + return XDP_DROP; + + set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6)); + + ip6h->version = 6; + ip6h->priority = 0; + memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); + ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + sizeof(*ip6h)); + ip6h->nexthdr = IPPROTO_IPV6; + ip6h->hop_limit = 8; + memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6)); + memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6)); + + count_tx(vip.protocol); + + return XDP_TX; +} + +SEC("xdp_tx_iptunnel") +int _xdp_tx_iptunnel(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + struct ethhdr *eth = data; + __u16 h_proto; + + if (eth + 1 > data_end) + return XDP_DROP; + + h_proto = eth->h_proto; + + if (h_proto == bpf_htons(ETH_P_IP)) + return handle_ipv4(xdp); + else if (h_proto == bpf_htons(ETH_P_IPV6)) + + return handle_ipv6(xdp); + else + return XDP_DROP; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c new file mode 100644 index 000000000000..8d0182650653 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -0,0 +1,53 @@ +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/pkt_cls.h> + +#include "bpf_helpers.h" + +#define __round_mask(x, y) ((__typeof__(x))((y) - 1)) +#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1) +#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem + +SEC("t") +int ing_cls(struct __sk_buff *ctx) +{ + __u8 *data, *data_meta, *data_end; + __u32 diff = 0; + + data_meta = ctx_ptr(ctx, data_meta); + data_end = ctx_ptr(ctx, data_end); + data = ctx_ptr(ctx, data); + + if (data + ETH_ALEN > data_end || + data_meta + round_up(ETH_ALEN, 4) > data) + return TC_ACT_SHOT; + + diff |= ((__u32 *)data_meta)[0] ^ ((__u32 *)data)[0]; + diff |= ((__u16 *)data_meta)[2] ^ ((__u16 *)data)[2]; + + return diff ? TC_ACT_SHOT : TC_ACT_OK; +} + +SEC("x") +int ing_xdp(struct xdp_md *ctx) +{ + __u8 *data, *data_meta, *data_end; + int ret; + + ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, 4)); + if (ret < 0) + return XDP_DROP; + + data_meta = ctx_ptr(ctx, data_meta); + data_end = ctx_ptr(ctx, data_end); + data = ctx_ptr(ctx, data); + + if (data + ETH_ALEN > data_end || + data_meta + round_up(ETH_ALEN, 4) > data) + return XDP_DROP; + + __builtin_memcpy(data_meta, data, ETH_ALEN); + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c new file mode 100644 index 000000000000..5e4aac74f9d0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -0,0 +1,833 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/pkt_cls.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include "bpf_helpers.h" + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +static __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/* copy paste of jhash from kernel sources to make sure llvm + * can compile it into valid sequence of bpf instructions + */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +#define JHASH_INITVAL 0xdeadbeef + +typedef unsigned int u32; + +static __attribute__ ((noinline)) +u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const unsigned char *k = key; + + a = b = c = JHASH_INITVAL + length + initval; + + while (length > 12) { + a += *(u32 *)(k); + b += *(u32 *)(k + 4); + c += *(u32 *)(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +static __attribute__ ((noinline)) +u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + __jhash_final(a, b, c); + return c; +} + +static __attribute__ ((noinline)) +u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +struct flow_key { + union { + __be32 src; + __be32 srcv6[4]; + }; + union { + __be32 dst; + __be32 dstv6[4]; + }; + union { + __u32 ports; + __u16 port16[2]; + }; + __u8 proto; +}; + +struct packet_description { + struct flow_key flow; + __u8 flags; +}; + +struct ctl_value { + union { + __u64 value; + __u32 ifindex; + __u8 mac[6]; + }; +}; + +struct vip_definition { + union { + __be32 vip; + __be32 vipv6[4]; + }; + __u16 port; + __u16 family; + __u8 proto; +}; + +struct vip_meta { + __u32 flags; + __u32 vip_num; +}; + +struct real_pos_lru { + __u32 pos; + __u64 atime; +}; + +struct real_definition { + union { + __be32 dst; + __be32 dstv6[4]; + }; + __u8 flags; +}; + +struct lb_stats { + __u64 v2; + __u64 v1; +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct vip_definition), + .value_size = sizeof(struct vip_meta), + .max_entries = 512, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = { + .type = BPF_MAP_TYPE_LRU_HASH, + .key_size = sizeof(struct flow_key), + .value_size = sizeof(struct real_pos_lru), + .max_entries = 300, + .map_flags = 1U << 1, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 12 * 655, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) reals = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct real_definition), + .max_entries = 40, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) stats = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct lb_stats), + .max_entries = 515, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct ctl_value), + .max_entries = 16, + .map_flags = 0, +}; + +struct eth_hdr { + unsigned char eth_dest[6]; + unsigned char eth_source[6]; + unsigned short eth_proto; +}; + +static inline __u64 calc_offset(bool is_ipv6, bool is_icmp) +{ + __u64 off = sizeof(struct eth_hdr); + if (is_ipv6) { + off += sizeof(struct ipv6hdr); + if (is_icmp) + off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr); + } else { + off += sizeof(struct iphdr); + if (is_icmp) + off += sizeof(struct icmphdr) + sizeof(struct iphdr); + } + return off; +} + +static __attribute__ ((noinline)) +bool parse_udp(void *data, void *data_end, + bool is_ipv6, struct packet_description *pckt) +{ + + bool is_icmp = !((pckt->flags & (1 << 0)) == 0); + __u64 off = calc_offset(is_ipv6, is_icmp); + struct udphdr *udp; + udp = data + off; + + if (udp + 1 > data_end) + return 0; + if (!is_icmp) { + pckt->flow.port16[0] = udp->source; + pckt->flow.port16[1] = udp->dest; + } else { + pckt->flow.port16[0] = udp->dest; + pckt->flow.port16[1] = udp->source; + } + return 1; +} + +static __attribute__ ((noinline)) +bool parse_tcp(void *data, void *data_end, + bool is_ipv6, struct packet_description *pckt) +{ + + bool is_icmp = !((pckt->flags & (1 << 0)) == 0); + __u64 off = calc_offset(is_ipv6, is_icmp); + struct tcphdr *tcp; + + tcp = data + off; + if (tcp + 1 > data_end) + return 0; + if (tcp->syn) + pckt->flags |= (1 << 1); + if (!is_icmp) { + pckt->flow.port16[0] = tcp->source; + pckt->flow.port16[1] = tcp->dest; + } else { + pckt->flow.port16[0] = tcp->dest; + pckt->flow.port16[1] = tcp->source; + } + return 1; +} + +static __attribute__ ((noinline)) +bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, + struct packet_description *pckt, + struct real_definition *dst, __u32 pkt_bytes) +{ + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + struct ipv6hdr *ip6h; + __u32 ip_suffix; + void *data_end; + void *data; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) + return 0; + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + new_eth = data; + ip6h = data + sizeof(struct eth_hdr); + old_eth = data + sizeof(struct ipv6hdr); + if (new_eth + 1 > data_end || + old_eth + 1 > data_end || ip6h + 1 > data_end) + return 0; + memcpy(new_eth->eth_dest, cval->mac, 6); + memcpy(new_eth->eth_source, old_eth->eth_dest, 6); + new_eth->eth_proto = 56710; + ip6h->version = 6; + ip6h->priority = 0; + memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); + + ip6h->nexthdr = IPPROTO_IPV6; + ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0]; + ip6h->payload_len = + __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr)); + ip6h->hop_limit = 4; + + ip6h->saddr.in6_u.u6_addr32[0] = 1; + ip6h->saddr.in6_u.u6_addr32[1] = 2; + ip6h->saddr.in6_u.u6_addr32[2] = 3; + ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix; + memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16); + return 1; +} + +static __attribute__ ((noinline)) +bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, + struct packet_description *pckt, + struct real_definition *dst, __u32 pkt_bytes) +{ + + __u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]); + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + __u16 *next_iph_u16; + struct iphdr *iph; + __u32 csum = 0; + void *data_end; + void *data; + + ip_suffix <<= 15; + ip_suffix ^= pckt->flow.src; + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) + return 0; + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + new_eth = data; + iph = data + sizeof(struct eth_hdr); + old_eth = data + sizeof(struct iphdr); + if (new_eth + 1 > data_end || + old_eth + 1 > data_end || iph + 1 > data_end) + return 0; + memcpy(new_eth->eth_dest, cval->mac, 6); + memcpy(new_eth->eth_source, old_eth->eth_dest, 6); + new_eth->eth_proto = 8; + iph->version = 4; + iph->ihl = 5; + iph->frag_off = 0; + iph->protocol = IPPROTO_IPIP; + iph->check = 0; + iph->tos = 1; + iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr)); + /* don't update iph->daddr, since it will overwrite old eth_proto + * and multiple iterations of bpf_prog_run() will fail + */ + + iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst; + iph->ttl = 4; + + next_iph_u16 = (__u16 *) iph; +#pragma clang loop unroll(full) + for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) + csum += *next_iph_u16++; + iph->check = ~((csum & 0xffff) + (csum >> 16)); + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) + return 0; + return 1; +} + +static __attribute__ ((noinline)) +bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4) +{ + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + + old_eth = *data; + new_eth = *data + sizeof(struct ipv6hdr); + memcpy(new_eth->eth_source, old_eth->eth_source, 6); + memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); + if (inner_v4) + new_eth->eth_proto = 8; + else + new_eth->eth_proto = 56710; + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr))) + return 0; + *data = (void *)(long)xdp->data; + *data_end = (void *)(long)xdp->data_end; + return 1; +} + +static __attribute__ ((noinline)) +bool decap_v4(struct xdp_md *xdp, void **data, void **data_end) +{ + struct eth_hdr *new_eth; + struct eth_hdr *old_eth; + + old_eth = *data; + new_eth = *data + sizeof(struct iphdr); + memcpy(new_eth->eth_source, old_eth->eth_source, 6); + memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); + new_eth->eth_proto = 8; + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) + return 0; + *data = (void *)(long)xdp->data; + *data_end = (void *)(long)xdp->data_end; + return 1; +} + +static __attribute__ ((noinline)) +int swap_mac_and_send(void *data, void *data_end) +{ + unsigned char tmp_mac[6]; + struct eth_hdr *eth; + + eth = data; + memcpy(tmp_mac, eth->eth_source, 6); + memcpy(eth->eth_source, eth->eth_dest, 6); + memcpy(eth->eth_dest, tmp_mac, 6); + return XDP_TX; +} + +static __attribute__ ((noinline)) +int send_icmp_reply(void *data, void *data_end) +{ + struct icmphdr *icmp_hdr; + __u16 *next_iph_u16; + __u32 tmp_addr = 0; + struct iphdr *iph; + __u32 csum1 = 0; + __u32 csum = 0; + __u64 off = 0; + + if (data + sizeof(struct eth_hdr) + + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end) + return XDP_DROP; + off += sizeof(struct eth_hdr); + iph = data + off; + off += sizeof(struct iphdr); + icmp_hdr = data + off; + icmp_hdr->type = 0; + icmp_hdr->checksum += 0x0007; + iph->ttl = 4; + tmp_addr = iph->daddr; + iph->daddr = iph->saddr; + iph->saddr = tmp_addr; + iph->check = 0; + next_iph_u16 = (__u16 *) iph; +#pragma clang loop unroll(full) + for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) + csum += *next_iph_u16++; + iph->check = ~((csum & 0xffff) + (csum >> 16)); + return swap_mac_and_send(data, data_end); +} + +static __attribute__ ((noinline)) +int send_icmp6_reply(void *data, void *data_end) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + __be32 tmp_addr[4]; + __u64 off = 0; + + if (data + sizeof(struct eth_hdr) + + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end) + return XDP_DROP; + off += sizeof(struct eth_hdr); + ip6h = data + off; + off += sizeof(struct ipv6hdr); + icmp_hdr = data + off; + icmp_hdr->icmp6_type = 129; + icmp_hdr->icmp6_cksum -= 0x0001; + ip6h->hop_limit = 4; + memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16); + memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16); + memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16); + return swap_mac_and_send(data, data_end); +} + +static __attribute__ ((noinline)) +int parse_icmpv6(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmp6hdr *icmp_hdr; + struct ipv6hdr *ip6h; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return XDP_DROP; + if (icmp_hdr->icmp6_type == 128) + return send_icmp6_reply(data, data_end); + if (icmp_hdr->icmp6_type != 3) + return XDP_PASS; + off += sizeof(struct icmp6hdr); + ip6h = data + off; + if (ip6h + 1 > data_end) + return XDP_DROP; + pckt->flow.proto = ip6h->nexthdr; + pckt->flags |= (1 << 0); + memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16); + memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16); + return -1; +} + +static __attribute__ ((noinline)) +int parse_icmp(void *data, void *data_end, __u64 off, + struct packet_description *pckt) +{ + struct icmphdr *icmp_hdr; + struct iphdr *iph; + + icmp_hdr = data + off; + if (icmp_hdr + 1 > data_end) + return XDP_DROP; + if (icmp_hdr->type == 8) + return send_icmp_reply(data, data_end); + if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4)) + return XDP_PASS; + off += sizeof(struct icmphdr); + iph = data + off; + if (iph + 1 > data_end) + return XDP_DROP; + if (iph->ihl != 5) + return XDP_DROP; + pckt->flow.proto = iph->protocol; + pckt->flags |= (1 << 0); + pckt->flow.src = iph->daddr; + pckt->flow.dst = iph->saddr; + return -1; +} + +static __attribute__ ((noinline)) +__u32 get_packet_hash(struct packet_description *pckt, + bool hash_16bytes) +{ + if (hash_16bytes) + return jhash_2words(jhash(pckt->flow.srcv6, 16, 12), + pckt->flow.ports, 24); + else + return jhash_2words(pckt->flow.src, pckt->flow.ports, + 24); +} + +__attribute__ ((noinline)) +static bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6, void *lru_map) +{ + struct real_pos_lru new_dst_lru = { }; + bool hash_16bytes = is_ipv6; + __u32 *real_pos, hash, key; + __u64 cur_time; + + if (vip_info->flags & (1 << 2)) + hash_16bytes = 1; + if (vip_info->flags & (1 << 3)) { + pckt->flow.port16[0] = pckt->flow.port16[1]; + memset(pckt->flow.srcv6, 0, 16); + } + hash = get_packet_hash(pckt, hash_16bytes); + if (hash != 0x358459b7 /* jhash of ipv4 packet */ && + hash != 0x2f4bc6bb /* jhash of ipv6 packet */) + return 0; + key = 2 * vip_info->vip_num + hash % 2; + real_pos = bpf_map_lookup_elem(&ch_rings, &key); + if (!real_pos) + return 0; + key = *real_pos; + *real = bpf_map_lookup_elem(&reals, &key); + if (!(*real)) + return 0; + if (!(vip_info->flags & (1 << 1))) { + __u32 conn_rate_key = 512 + 2; + struct lb_stats *conn_rate_stats = + bpf_map_lookup_elem(&stats, &conn_rate_key); + + if (!conn_rate_stats) + return 1; + cur_time = bpf_ktime_get_ns(); + if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) { + conn_rate_stats->v1 = 1; + conn_rate_stats->v2 = cur_time; + } else { + conn_rate_stats->v1 += 1; + if (conn_rate_stats->v1 >= 1) + return 1; + } + if (pckt->flow.proto == IPPROTO_UDP) + new_dst_lru.atime = cur_time; + new_dst_lru.pos = key; + bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0); + } + return 1; +} + +__attribute__ ((noinline)) +static void connection_table_lookup(struct real_definition **real, + struct packet_description *pckt, + void *lru_map) +{ + + struct real_pos_lru *dst_lru; + __u64 cur_time; + __u32 key; + + dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow); + if (!dst_lru) + return; + if (pckt->flow.proto == IPPROTO_UDP) { + cur_time = bpf_ktime_get_ns(); + if (cur_time - dst_lru->atime > 300000) + return; + dst_lru->atime = cur_time; + } + key = dst_lru->pos; + *real = bpf_map_lookup_elem(&reals, &key); +} + +/* don't believe your eyes! + * below function has 6 arguments whereas bpf and llvm allow maximum of 5 + * but since it's _static_ llvm can optimize one argument away + */ +__attribute__ ((noinline)) +static int process_l3_headers_v6(struct packet_description *pckt, + __u8 *protocol, __u64 off, + __u16 *pkt_bytes, void *data, + void *data_end) +{ + struct ipv6hdr *ip6h; + __u64 iph_len; + int action; + + ip6h = data + off; + if (ip6h + 1 > data_end) + return XDP_DROP; + iph_len = sizeof(struct ipv6hdr); + *protocol = ip6h->nexthdr; + pckt->flow.proto = *protocol; + *pkt_bytes = __builtin_bswap16(ip6h->payload_len); + off += iph_len; + if (*protocol == 45) { + return XDP_DROP; + } else if (*protocol == 59) { + action = parse_icmpv6(data, data_end, off, pckt); + if (action >= 0) + return action; + } else { + memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16); + memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16); + } + return -1; +} + +__attribute__ ((noinline)) +static int process_l3_headers_v4(struct packet_description *pckt, + __u8 *protocol, __u64 off, + __u16 *pkt_bytes, void *data, + void *data_end) +{ + struct iphdr *iph; + __u64 iph_len; + int action; + + iph = data + off; + if (iph + 1 > data_end) + return XDP_DROP; + if (iph->ihl != 5) + return XDP_DROP; + *protocol = iph->protocol; + pckt->flow.proto = *protocol; + *pkt_bytes = __builtin_bswap16(iph->tot_len); + off += 20; + if (iph->frag_off & 65343) + return XDP_DROP; + if (*protocol == IPPROTO_ICMP) { + action = parse_icmp(data, data_end, off, pckt); + if (action >= 0) + return action; + } else { + pckt->flow.src = iph->saddr; + pckt->flow.dst = iph->daddr; + } + return -1; +} + +__attribute__ ((noinline)) +static int process_packet(void *data, __u64 off, void *data_end, + bool is_ipv6, struct xdp_md *xdp) +{ + + struct real_definition *dst = NULL; + struct packet_description pckt = { }; + struct vip_definition vip = { }; + struct lb_stats *data_stats; + struct eth_hdr *eth = data; + void *lru_map = &lru_cache; + struct vip_meta *vip_info; + __u32 lru_stats_key = 513; + __u32 mac_addr_pos = 0; + __u32 stats_key = 512; + struct ctl_value *cval; + __u16 pkt_bytes; + __u64 iph_len; + __u8 protocol; + __u32 vip_num; + int action; + + if (is_ipv6) + action = process_l3_headers_v6(&pckt, &protocol, off, + &pkt_bytes, data, data_end); + else + action = process_l3_headers_v4(&pckt, &protocol, off, + &pkt_bytes, data, data_end); + if (action >= 0) + return action; + protocol = pckt.flow.proto; + if (protocol == IPPROTO_TCP) { + if (!parse_tcp(data, data_end, is_ipv6, &pckt)) + return XDP_DROP; + } else if (protocol == IPPROTO_UDP) { + if (!parse_udp(data, data_end, is_ipv6, &pckt)) + return XDP_DROP; + } else { + return XDP_TX; + } + + if (is_ipv6) + memcpy(vip.vipv6, pckt.flow.dstv6, 16); + else + vip.vip = pckt.flow.dst; + vip.port = pckt.flow.port16[1]; + vip.proto = pckt.flow.proto; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) { + vip.port = 0; + vip_info = bpf_map_lookup_elem(&vip_map, &vip); + if (!vip_info) + return XDP_PASS; + if (!(vip_info->flags & (1 << 4))) + pckt.flow.port16[1] = 0; + } + if (data_end - data > 1400) + return XDP_DROP; + data_stats = bpf_map_lookup_elem(&stats, &stats_key); + if (!data_stats) + return XDP_DROP; + data_stats->v1 += 1; + if (!dst) { + if (vip_info->flags & (1 << 0)) + pckt.flow.port16[0] = 0; + if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1))) + connection_table_lookup(&dst, &pckt, lru_map); + if (dst) + goto out; + if (pckt.flow.proto == IPPROTO_TCP) { + struct lb_stats *lru_stats = + bpf_map_lookup_elem(&stats, &lru_stats_key); + + if (!lru_stats) + return XDP_DROP; + if (pckt.flags & (1 << 1)) + lru_stats->v1 += 1; + else + lru_stats->v2 += 1; + } + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map)) + return XDP_DROP; + data_stats->v2 += 1; + } +out: + cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos); + if (!cval) + return XDP_DROP; + if (dst->flags & (1 << 0)) { + if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes)) + return XDP_DROP; + } else { + if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes)) + return XDP_DROP; + } + vip_num = vip_info->vip_num; + data_stats = bpf_map_lookup_elem(&stats, &vip_num); + if (!data_stats) + return XDP_DROP; + data_stats->v1 += 1; + data_stats->v2 += pkt_bytes; + + data = (void *)(long)xdp->data; + data_end = (void *)(long)xdp->data_end; + if (data + 4 > data_end) + return XDP_DROP; + *(u32 *)data = dst->dst; + return XDP_DROP; +} + +__attribute__ ((section("xdp-test"), used)) +int balancer_ingress(struct xdp_md *ctx) +{ + void *data = (void *)(long)ctx->data; + void *data_end = (void *)(long)ctx->data_end; + struct eth_hdr *eth = data; + __u32 eth_proto; + __u32 nh_off; + + nh_off = sizeof(struct eth_hdr); + if (data + nh_off > data_end) + return XDP_DROP; + eth_proto = eth->eth_proto; + if (eth_proto == 8) + return process_packet(data, nh_off, data_end, 0, ctx); + else if (eth_proto == 56710) + return process_packet(data, nh_off, data_end, 1, ctx); + else + return XDP_DROP; +} + +char _license[] __attribute__ ((section("license"), used)) = "GPL"; +int _version __attribute__ ((section("version"), used)) = 1; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c new file mode 100644 index 000000000000..ef9e704be140 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c @@ -0,0 +1,28 @@ +/* Copyright (c) 2017 VMware + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/bpf.h> +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +SEC("redirect_to_111") +int xdp_redirect_to_111(struct xdp_md *xdp) +{ + return bpf_redirect(111, 0); +} +SEC("redirect_to_222") +int xdp_redirect_to_222(struct xdp_md *xdp) +{ + return bpf_redirect(222, 0); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c new file mode 100644 index 000000000000..365a7d2d9f5c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c @@ -0,0 +1,292 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright(c) 2018 Jesper Dangaard Brouer. + * + * XDP/TC VLAN manipulation example + * + * GOTCHA: Remember to disable NIC hardware offloading of VLANs, + * else the VLAN tags are NOT inlined in the packet payload: + * + * # ethtool -K ixgbe2 rxvlan off + * + * Verify setting: + * # ethtool -k ixgbe2 | grep rx-vlan-offload + * rx-vlan-offload: off + * + */ +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/in.h> +#include <linux/pkt_cls.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +/* linux/if_vlan.h have not exposed this as UAPI, thus mirror some here + * + * struct vlan_hdr - vlan header + * @h_vlan_TCI: priority and VLAN ID + * @h_vlan_encapsulated_proto: packet type ID or len + */ +struct _vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; +#define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */ +#define VLAN_PRIO_SHIFT 13 +#define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */ +#define VLAN_TAG_PRESENT VLAN_CFI_MASK +#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ +#define VLAN_N_VID 4096 + +struct parse_pkt { + __u16 l3_proto; + __u16 l3_offset; + __u16 vlan_outer; + __u16 vlan_inner; + __u8 vlan_outer_offset; + __u8 vlan_inner_offset; +}; + +char _license[] SEC("license") = "GPL"; + +static __always_inline +bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt) +{ + __u16 eth_type; + __u8 offset; + + offset = sizeof(*eth); + /* Make sure packet is large enough for parsing eth + 2 VLAN headers */ + if ((void *)eth + offset + (2*sizeof(struct _vlan_hdr)) > data_end) + return false; + + eth_type = eth->h_proto; + + /* Handle outer VLAN tag */ + if (eth_type == bpf_htons(ETH_P_8021Q) + || eth_type == bpf_htons(ETH_P_8021AD)) { + struct _vlan_hdr *vlan_hdr; + + vlan_hdr = (void *)eth + offset; + pkt->vlan_outer_offset = offset; + pkt->vlan_outer = bpf_ntohs(vlan_hdr->h_vlan_TCI) + & VLAN_VID_MASK; + eth_type = vlan_hdr->h_vlan_encapsulated_proto; + offset += sizeof(*vlan_hdr); + } + + /* Handle inner (double) VLAN tag */ + if (eth_type == bpf_htons(ETH_P_8021Q) + || eth_type == bpf_htons(ETH_P_8021AD)) { + struct _vlan_hdr *vlan_hdr; + + vlan_hdr = (void *)eth + offset; + pkt->vlan_inner_offset = offset; + pkt->vlan_inner = bpf_ntohs(vlan_hdr->h_vlan_TCI) + & VLAN_VID_MASK; + eth_type = vlan_hdr->h_vlan_encapsulated_proto; + offset += sizeof(*vlan_hdr); + } + + pkt->l3_proto = bpf_ntohs(eth_type); /* Convert to host-byte-order */ + pkt->l3_offset = offset; + + return true; +} + +/* Hint, VLANs are choosen to hit network-byte-order issues */ +#define TESTVLAN 4011 /* 0xFAB */ +// #define TO_VLAN 4000 /* 0xFA0 (hint 0xOA0 = 160) */ + +SEC("xdp_drop_vlan_4011") +int xdp_prognum0(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct parse_pkt pkt = { 0 }; + + if (!parse_eth_frame(data, data_end, &pkt)) + return XDP_ABORTED; + + /* Drop specific VLAN ID example */ + if (pkt.vlan_outer == TESTVLAN) + return XDP_ABORTED; + /* + * Using XDP_ABORTED makes it possible to record this event, + * via tracepoint xdp:xdp_exception like: + * # perf record -a -e xdp:xdp_exception + * # perf script + */ + return XDP_PASS; +} +/* +Commands to setup VLAN on Linux to test packets gets dropped: + + export ROOTDEV=ixgbe2 + export VLANID=4011 + ip link add link $ROOTDEV name $ROOTDEV.$VLANID type vlan id $VLANID + ip link set dev $ROOTDEV.$VLANID up + + ip link set dev $ROOTDEV mtu 1508 + ip addr add 100.64.40.11/24 dev $ROOTDEV.$VLANID + +Load prog with ip tool: + + ip link set $ROOTDEV xdp off + ip link set $ROOTDEV xdp object xdp_vlan01_kern.o section xdp_drop_vlan_4011 + +*/ + +/* Changing VLAN to zero, have same practical effect as removing the VLAN. */ +#define TO_VLAN 0 + +SEC("xdp_vlan_change") +int xdp_prognum1(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct parse_pkt pkt = { 0 }; + + if (!parse_eth_frame(data, data_end, &pkt)) + return XDP_ABORTED; + + /* Change specific VLAN ID */ + if (pkt.vlan_outer == TESTVLAN) { + struct _vlan_hdr *vlan_hdr = data + pkt.vlan_outer_offset; + + /* Modifying VLAN, preserve top 4 bits */ + vlan_hdr->h_vlan_TCI = + bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000) + | TO_VLAN); + } + + return XDP_PASS; +} + +/* + * Show XDP+TC can cooperate, on creating a VLAN rewriter. + * 1. Create a XDP prog that can "pop"/remove a VLAN header. + * 2. Create a TC-bpf prog that egress can add a VLAN header. + */ + +#ifndef ETH_ALEN /* Ethernet MAC address length */ +#define ETH_ALEN 6 /* bytes */ +#endif +#define VLAN_HDR_SZ 4 /* bytes */ + +SEC("xdp_vlan_remove_outer") +int xdp_prognum2(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct parse_pkt pkt = { 0 }; + char *dest; + + if (!parse_eth_frame(data, data_end, &pkt)) + return XDP_ABORTED; + + /* Skip packet if no outer VLAN was detected */ + if (pkt.vlan_outer_offset == 0) + return XDP_PASS; + + /* Moving Ethernet header, dest overlap with src, memmove handle this */ + dest = data; + dest+= VLAN_HDR_SZ; + /* + * Notice: Taking over vlan_hdr->h_vlan_encapsulated_proto, by + * only moving two MAC addrs (12 bytes), not overwriting last 2 bytes + */ + __builtin_memmove(dest, data, ETH_ALEN * 2); + /* Note: LLVM built-in memmove inlining require size to be constant */ + + /* Move start of packet header seen by Linux kernel stack */ + bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ); + + return XDP_PASS; +} + +static __always_inline +void shift_mac_4bytes_16bit(void *data) +{ + __u16 *p = data; + + p[7] = p[5]; /* delete p[7] was vlan_hdr->h_vlan_TCI */ + p[6] = p[4]; /* delete p[6] was ethhdr->h_proto */ + p[5] = p[3]; + p[4] = p[2]; + p[3] = p[1]; + p[2] = p[0]; +} + +static __always_inline +void shift_mac_4bytes_32bit(void *data) +{ + __u32 *p = data; + + /* Assuming VLAN hdr present. The 4 bytes in p[3] that gets + * overwritten, is ethhdr->h_proto and vlan_hdr->h_vlan_TCI. + * The vlan_hdr->h_vlan_encapsulated_proto take over role as + * ethhdr->h_proto. + */ + p[3] = p[2]; + p[2] = p[1]; + p[1] = p[0]; +} + +SEC("xdp_vlan_remove_outer2") +int xdp_prognum3(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *orig_eth = data; + struct parse_pkt pkt = { 0 }; + + if (!parse_eth_frame(orig_eth, data_end, &pkt)) + return XDP_ABORTED; + + /* Skip packet if no outer VLAN was detected */ + if (pkt.vlan_outer_offset == 0) + return XDP_PASS; + + /* Simply shift down MAC addrs 4 bytes, overwrite h_proto + TCI */ + shift_mac_4bytes_32bit(data); + + /* Move start of packet header seen by Linux kernel stack */ + bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ); + + return XDP_PASS; +} + +/*===================================== + * BELOW: TC-hook based ebpf programs + * ==================================== + * The TC-clsact eBPF programs (currently) need to be attach via TC commands + */ + +SEC("tc_vlan_push") +int _tc_progA(struct __sk_buff *ctx) +{ + bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN); + + return TC_ACT_OK; +} +/* +Commands to setup TC to use above bpf prog: + +export ROOTDEV=ixgbe2 +export FILE=xdp_vlan01_kern.o + +# Re-attach clsact to clear/flush existing role +tc qdisc del dev $ROOTDEV clsact 2> /dev/null ;\ +tc qdisc add dev $ROOTDEV clsact + +# Attach BPF prog EGRESS +tc filter add dev $ROOTDEV egress \ + prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push + +tc filter show dev $ROOTDEV egress +*/ diff --git a/tools/testing/selftests/bpf/progs/xdp_dummy.c b/tools/testing/selftests/bpf/progs/xdp_dummy.c new file mode 100644 index 000000000000..43b0ef1001ed --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xdp_dummy.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define KBUILD_MODNAME "xdp_dummy" +#include <linux/bpf.h> +#include "bpf_helpers.h" + +SEC("xdp_dummy") +int xdp_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; |