From e710bcc6d92c47bb7d8e803b41ef529c09ad6a9e Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 19 Oct 2020 12:42:19 -0700
Subject: bpf: selftest: Ensure the return value of bpf_skc_to helpers must be
 checked

This patch tests:

int bpf_cls(struct __sk_buff *skb)
{
	/* REG_6: sk
	 * REG_7: tp
	 * REG_8: req_sk
	 */

	sk = skb->sk;
	if (!sk)
		return 0;

	tp = bpf_skc_to_tcp_sock(sk);
	req_sk = bpf_skc_to_tcp_request_sock(sk);
	if (!req_sk)
		return 0;

	/* !tp has not been tested, so verifier should reject. */
	return *(__u8 *)tp;
}

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201019194219.1051314-1-kafai@fb.com
---
 tools/testing/selftests/bpf/verifier/sock.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index b1aac2641498..ce13ece08d51 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -631,3 +631,28 @@
 	.prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
 	.result = ACCEPT,
 },
+{
+	"mark null check on return value of bpf_skc_to helpers",
+	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_request_sock),
+	BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_8, 0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = REJECT,
+	.errstr = "invalid mem access",
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "unknown func",
+},
-- 
cgit v1.2.3


From 8568c3cefd5143fa0dc09f90e1bc9dc8905292f4 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 19 Oct 2020 12:42:25 -0700
Subject: bpf: selftest: Ensure the return value of the bpf_per_cpu_ptr() must
 be checked

This patch tests all pointers returned by bpf_per_cpu_ptr() must be
tested for NULL first before it can be accessed.

This patch adds a subtest "null_check", so it moves the ".data..percpu"
existence check to the very beginning and before doing any subtest.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201019194225.1051596-1-kafai@fb.com
---
 tools/testing/selftests/bpf/prog_tests/ksyms_btf.c | 57 +++++++++++++++-------
 .../bpf/progs/test_ksyms_btf_null_check.c          | 31 ++++++++++++
 2 files changed, 70 insertions(+), 18 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
index 28e26bd3e0ca..b58b775d19f3 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -5,18 +5,17 @@
 #include <bpf/libbpf.h>
 #include <bpf/btf.h>
 #include "test_ksyms_btf.skel.h"
+#include "test_ksyms_btf_null_check.skel.h"
 
 static int duration;
 
-void test_ksyms_btf(void)
+static void test_basic(void)
 {
 	__u64 runqueues_addr, bpf_prog_active_addr;
 	__u32 this_rq_cpu;
 	int this_bpf_prog_active;
 	struct test_ksyms_btf *skel = NULL;
 	struct test_ksyms_btf__data *data;
-	struct btf *btf;
-	int percpu_datasec;
 	int err;
 
 	err = kallsyms_find("runqueues", &runqueues_addr);
@@ -31,20 +30,6 @@ void test_ksyms_btf(void)
 	if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_prog_active' not found\n"))
 		return;
 
-	btf = libbpf_find_kernel_btf();
-	if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
-		  PTR_ERR(btf)))
-		return;
-
-	percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
-						BTF_KIND_DATASEC);
-	if (percpu_datasec < 0) {
-		printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n",
-		       __func__);
-		test__skip();
-		goto cleanup;
-	}
-
 	skel = test_ksyms_btf__open_and_load();
 	if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
 		goto cleanup;
@@ -83,6 +68,42 @@ void test_ksyms_btf(void)
 	      data->out__bpf_prog_active);
 
 cleanup:
-	btf__free(btf);
 	test_ksyms_btf__destroy(skel);
 }
+
+static void test_null_check(void)
+{
+	struct test_ksyms_btf_null_check *skel;
+
+	skel = test_ksyms_btf_null_check__open_and_load();
+	CHECK(skel, "skel_open", "unexpected load of a prog missing null check\n");
+
+	test_ksyms_btf_null_check__destroy(skel);
+}
+
+void test_ksyms_btf(void)
+{
+	int percpu_datasec;
+	struct btf *btf;
+
+	btf = libbpf_find_kernel_btf();
+	if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
+		  PTR_ERR(btf)))
+		return;
+
+	percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
+						BTF_KIND_DATASEC);
+	btf__free(btf);
+	if (percpu_datasec < 0) {
+		printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n",
+		       __func__);
+		test__skip();
+		return;
+	}
+
+	if (test__start_subtest("basic"))
+		test_basic();
+
+	if (test__start_subtest("null_check"))
+		test_null_check();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c
new file mode 100644
index 000000000000..8bc8f7c637bc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+extern const struct rq runqueues __ksym; /* struct type global var. */
+extern const int bpf_prog_active __ksym; /* int type global var. */
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+	struct rq *rq;
+	int *active;
+	__u32 cpu;
+
+	cpu = bpf_get_smp_processor_id();
+	rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu);
+	active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+	if (active) {
+		/* READ_ONCE */
+		*(volatile int *)active;
+		/* !rq has not been tested, so verifier should reject. */
+		*(volatile int *)(&rq->cpu);
+	}
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From ba452c9e996d8a4c347b32805f91abb70de5de7e Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Tue, 20 Oct 2020 23:25:56 +0200
Subject: bpf: Fix bpf_redirect_neigh helper api to support supplying nexthop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Based on the discussion in [0], update the bpf_redirect_neigh() helper to
accept an optional parameter specifying the nexthop information. This makes
it possible to combine bpf_fib_lookup() and bpf_redirect_neigh() without
incurring a duplicate FIB lookup - since the FIB lookup helper will return
the nexthop information even if no neighbour is present, this can simply
be passed on to bpf_redirect_neigh() if bpf_fib_lookup() returns
BPF_FIB_LKUP_RET_NO_NEIGH. Thus fix & extend it before helper API is frozen.

  [0] https://lore.kernel.org/bpf/393e17fc-d187-3a8d-2f0d-a627c7c63fca@iogearbox.net/

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/bpf/160322915615.32199.1187570224032024535.stgit@toke.dk
---
 include/linux/filter.h         |   9 +++
 include/uapi/linux/bpf.h       |  22 ++++--
 net/core/filter.c              | 158 ++++++++++++++++++++++++++---------------
 scripts/bpf_helpers_doc.py     |   1 +
 tools/include/uapi/linux/bpf.h |  22 ++++--
 5 files changed, 145 insertions(+), 67 deletions(-)

(limited to 'tools')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 20fc24c9779a..72d62cbc1578 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -607,12 +607,21 @@ struct bpf_skb_data_end {
 	void *data_end;
 };
 
+struct bpf_nh_params {
+	u32 nh_family;
+	union {
+		u32 ipv4_nh;
+		struct in6_addr ipv6_nh;
+	};
+};
+
 struct bpf_redirect_info {
 	u32 flags;
 	u32 tgt_index;
 	void *tgt_value;
 	struct bpf_map *map;
 	u32 kern_flags;
+	struct bpf_nh_params nh;
 };
 
 DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index bf5a99d803e4..e6ceac3f7d62 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3677,15 +3677,19 @@ union bpf_attr {
  * 	Return
  * 		The id is returned or 0 in case the id could not be retrieved.
  *
- * long bpf_redirect_neigh(u32 ifindex, u64 flags)
+ * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags)
  * 	Description
  * 		Redirect the packet to another net device of index *ifindex*
  * 		and fill in L2 addresses from neighboring subsystem. This helper
  * 		is somewhat similar to **bpf_redirect**\ (), except that it
  * 		populates L2 addresses as well, meaning, internally, the helper
- * 		performs a FIB lookup based on the skb's networking header to
- * 		get the address of the next hop and then relies on the neighbor
- * 		lookup for the L2 address of the nexthop.
+ * 		relies on the neighbor lookup for the L2 address of the nexthop.
+ *
+ * 		The helper will perform a FIB lookup based on the skb's
+ * 		networking header to get the address of the next hop, unless
+ * 		this is supplied by the caller in the *params* argument. The
+ * 		*plen* argument indicates the len of *params* and should be set
+ * 		to 0 if *params* is NULL.
  *
  * 		The *flags* argument is reserved and must be 0. The helper is
  * 		currently only supported for tc BPF program types, and enabled
@@ -4906,6 +4910,16 @@ struct bpf_fib_lookup {
 	__u8	dmac[6];     /* ETH_ALEN */
 };
 
+struct bpf_redir_neigh {
+	/* network family for lookup (AF_INET, AF_INET6) */
+	__u32 nh_family;
+	/* network address of nexthop; skips fib lookup to find gateway */
+	union {
+		__be32		ipv4_nh;
+		__u32		ipv6_nh[4];  /* in6_addr; network order */
+	};
+};
+
 enum bpf_task_fd_type {
 	BPF_FD_TYPE_RAW_TRACEPOINT,	/* tp name */
 	BPF_FD_TYPE_TRACEPOINT,		/* tp name */
diff --git a/net/core/filter.c b/net/core/filter.c
index c5e2a1c5fd8d..6d0fa65a4a46 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2165,12 +2165,12 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
+static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
+			    struct net_device *dev, struct bpf_nh_params *nh)
 {
-	struct dst_entry *dst = skb_dst(skb);
-	struct net_device *dev = dst->dev;
 	u32 hh_len = LL_RESERVED_SPACE(dev);
 	const struct in6_addr *nexthop;
+	struct dst_entry *dst = NULL;
 	struct neighbour *neigh;
 
 	if (dev_xmit_recursion()) {
@@ -2196,8 +2196,13 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
 	}
 
 	rcu_read_lock_bh();
-	nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
-			      &ipv6_hdr(skb)->daddr);
+	if (!nh) {
+		dst = skb_dst(skb);
+		nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
+				      &ipv6_hdr(skb)->daddr);
+	} else {
+		nexthop = &nh->ipv6_nh;
+	}
 	neigh = ip_neigh_gw6(dev, nexthop);
 	if (likely(!IS_ERR(neigh))) {
 		int ret;
@@ -2210,36 +2215,43 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
 		return ret;
 	}
 	rcu_read_unlock_bh();
-	IP6_INC_STATS(dev_net(dst->dev),
-		      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+	if (dst)
+		IP6_INC_STATS(dev_net(dst->dev),
+			      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 out_drop:
 	kfree_skb(skb);
 	return -ENETDOWN;
 }
 
-static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
+				   struct bpf_nh_params *nh)
 {
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	struct net *net = dev_net(dev);
 	int err, ret = NET_XMIT_DROP;
-	struct dst_entry *dst;
-	struct flowi6 fl6 = {
-		.flowi6_flags	= FLOWI_FLAG_ANYSRC,
-		.flowi6_mark	= skb->mark,
-		.flowlabel	= ip6_flowinfo(ip6h),
-		.flowi6_oif	= dev->ifindex,
-		.flowi6_proto	= ip6h->nexthdr,
-		.daddr		= ip6h->daddr,
-		.saddr		= ip6h->saddr,
-	};
 
-	dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
-	if (IS_ERR(dst))
-		goto out_drop;
+	if (!nh) {
+		struct dst_entry *dst;
+		struct flowi6 fl6 = {
+			.flowi6_flags = FLOWI_FLAG_ANYSRC,
+			.flowi6_mark  = skb->mark,
+			.flowlabel    = ip6_flowinfo(ip6h),
+			.flowi6_oif   = dev->ifindex,
+			.flowi6_proto = ip6h->nexthdr,
+			.daddr	      = ip6h->daddr,
+			.saddr	      = ip6h->saddr,
+		};
+
+		dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
+		if (IS_ERR(dst))
+			goto out_drop;
 
-	skb_dst_set(skb, dst);
+		skb_dst_set(skb, dst);
+	} else if (nh->nh_family != AF_INET6) {
+		goto out_drop;
+	}
 
-	err = bpf_out_neigh_v6(net, skb);
+	err = bpf_out_neigh_v6(net, skb, dev, nh);
 	if (unlikely(net_xmit_eval(err)))
 		dev->stats.tx_errors++;
 	else
@@ -2252,7 +2264,8 @@ out_xmit:
 	return ret;
 }
 #else
-static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
+				   struct bpf_nh_params *nh)
 {
 	kfree_skb(skb);
 	return NET_XMIT_DROP;
@@ -2260,11 +2273,9 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
 #endif /* CONFIG_IPV6 */
 
 #if IS_ENABLED(CONFIG_INET)
-static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
+static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
+			    struct net_device *dev, struct bpf_nh_params *nh)
 {
-	struct dst_entry *dst = skb_dst(skb);
-	struct rtable *rt = container_of(dst, struct rtable, dst);
-	struct net_device *dev = dst->dev;
 	u32 hh_len = LL_RESERVED_SPACE(dev);
 	struct neighbour *neigh;
 	bool is_v6gw = false;
@@ -2292,7 +2303,21 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
 	}
 
 	rcu_read_lock_bh();
-	neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
+	if (!nh) {
+		struct dst_entry *dst = skb_dst(skb);
+		struct rtable *rt = container_of(dst, struct rtable, dst);
+
+		neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
+	} else if (nh->nh_family == AF_INET6) {
+		neigh = ip_neigh_gw6(dev, &nh->ipv6_nh);
+		is_v6gw = true;
+	} else if (nh->nh_family == AF_INET) {
+		neigh = ip_neigh_gw4(dev, nh->ipv4_nh);
+	} else {
+		rcu_read_unlock_bh();
+		goto out_drop;
+	}
+
 	if (likely(!IS_ERR(neigh))) {
 		int ret;
 
@@ -2309,33 +2334,37 @@ out_drop:
 	return -ENETDOWN;
 }
 
-static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
+				   struct bpf_nh_params *nh)
 {
 	const struct iphdr *ip4h = ip_hdr(skb);
 	struct net *net = dev_net(dev);
 	int err, ret = NET_XMIT_DROP;
-	struct rtable *rt;
-	struct flowi4 fl4 = {
-		.flowi4_flags	= FLOWI_FLAG_ANYSRC,
-		.flowi4_mark	= skb->mark,
-		.flowi4_tos	= RT_TOS(ip4h->tos),
-		.flowi4_oif	= dev->ifindex,
-		.flowi4_proto	= ip4h->protocol,
-		.daddr		= ip4h->daddr,
-		.saddr		= ip4h->saddr,
-	};
 
-	rt = ip_route_output_flow(net, &fl4, NULL);
-	if (IS_ERR(rt))
-		goto out_drop;
-	if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
-		ip_rt_put(rt);
-		goto out_drop;
-	}
+	if (!nh) {
+		struct flowi4 fl4 = {
+			.flowi4_flags = FLOWI_FLAG_ANYSRC,
+			.flowi4_mark  = skb->mark,
+			.flowi4_tos   = RT_TOS(ip4h->tos),
+			.flowi4_oif   = dev->ifindex,
+			.flowi4_proto = ip4h->protocol,
+			.daddr	      = ip4h->daddr,
+			.saddr	      = ip4h->saddr,
+		};
+		struct rtable *rt;
+
+		rt = ip_route_output_flow(net, &fl4, NULL);
+		if (IS_ERR(rt))
+			goto out_drop;
+		if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
+			ip_rt_put(rt);
+			goto out_drop;
+		}
 
-	skb_dst_set(skb, &rt->dst);
+		skb_dst_set(skb, &rt->dst);
+	}
 
-	err = bpf_out_neigh_v4(net, skb);
+	err = bpf_out_neigh_v4(net, skb, dev, nh);
 	if (unlikely(net_xmit_eval(err)))
 		dev->stats.tx_errors++;
 	else
@@ -2348,14 +2377,16 @@ out_xmit:
 	return ret;
 }
 #else
-static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
+				   struct bpf_nh_params *nh)
 {
 	kfree_skb(skb);
 	return NET_XMIT_DROP;
 }
 #endif /* CONFIG_INET */
 
-static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev,
+				struct bpf_nh_params *nh)
 {
 	struct ethhdr *ethh = eth_hdr(skb);
 
@@ -2370,9 +2401,9 @@ static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
 	skb_reset_network_header(skb);
 
 	if (skb->protocol == htons(ETH_P_IP))
-		return __bpf_redirect_neigh_v4(skb, dev);
+		return __bpf_redirect_neigh_v4(skb, dev, nh);
 	else if (skb->protocol == htons(ETH_P_IPV6))
-		return __bpf_redirect_neigh_v6(skb, dev);
+		return __bpf_redirect_neigh_v6(skb, dev, nh);
 out:
 	kfree_skb(skb);
 	return -ENOTSUPP;
@@ -2382,7 +2413,8 @@ out:
 enum {
 	BPF_F_NEIGH	= (1ULL << 1),
 	BPF_F_PEER	= (1ULL << 2),
-#define BPF_F_REDIRECT_INTERNAL	(BPF_F_NEIGH | BPF_F_PEER)
+	BPF_F_NEXTHOP	= (1ULL << 3),
+#define BPF_F_REDIRECT_INTERNAL	(BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP)
 };
 
 BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
@@ -2455,7 +2487,8 @@ int skb_do_redirect(struct sk_buff *skb)
 		return -EAGAIN;
 	}
 	return flags & BPF_F_NEIGH ?
-	       __bpf_redirect_neigh(skb, dev) :
+	       __bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ?
+				    &ri->nh : NULL) :
 	       __bpf_redirect(skb, dev, flags);
 out_drop:
 	kfree_skb(skb);
@@ -2504,16 +2537,21 @@ static const struct bpf_func_proto bpf_redirect_peer_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags)
+BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params,
+	   int, plen, u64, flags)
 {
 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
 
-	if (unlikely(flags))
+	if (unlikely((plen && plen < sizeof(*params)) || flags))
 		return TC_ACT_SHOT;
 
-	ri->flags = BPF_F_NEIGH;
+	ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0);
 	ri->tgt_index = ifindex;
 
+	BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params));
+	if (plen)
+		memcpy(&ri->nh, params, sizeof(ri->nh));
+
 	return TC_ACT_REDIRECT;
 }
 
@@ -2522,7 +2560,9 @@ static const struct bpf_func_proto bpf_redirect_neigh_proto = {
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_ANYTHING,
-	.arg2_type	= ARG_ANYTHING,
+	.arg2_type      = ARG_PTR_TO_MEM_OR_NULL,
+	.arg3_type      = ARG_CONST_SIZE_OR_ZERO,
+	.arg4_type	= ARG_ANYTHING,
 };
 
 BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index 7d86fdd190be..6769caae142f 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -453,6 +453,7 @@ class PrinterHelpers(Printer):
             'struct bpf_perf_event_data',
             'struct bpf_perf_event_value',
             'struct bpf_pidns_info',
+            'struct bpf_redir_neigh',
             'struct bpf_sk_lookup',
             'struct bpf_sock',
             'struct bpf_sock_addr',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index bf5a99d803e4..e6ceac3f7d62 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3677,15 +3677,19 @@ union bpf_attr {
  * 	Return
  * 		The id is returned or 0 in case the id could not be retrieved.
  *
- * long bpf_redirect_neigh(u32 ifindex, u64 flags)
+ * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags)
  * 	Description
  * 		Redirect the packet to another net device of index *ifindex*
  * 		and fill in L2 addresses from neighboring subsystem. This helper
  * 		is somewhat similar to **bpf_redirect**\ (), except that it
  * 		populates L2 addresses as well, meaning, internally, the helper
- * 		performs a FIB lookup based on the skb's networking header to
- * 		get the address of the next hop and then relies on the neighbor
- * 		lookup for the L2 address of the nexthop.
+ * 		relies on the neighbor lookup for the L2 address of the nexthop.
+ *
+ * 		The helper will perform a FIB lookup based on the skb's
+ * 		networking header to get the address of the next hop, unless
+ * 		this is supplied by the caller in the *params* argument. The
+ * 		*plen* argument indicates the len of *params* and should be set
+ * 		to 0 if *params* is NULL.
  *
  * 		The *flags* argument is reserved and must be 0. The helper is
  * 		currently only supported for tc BPF program types, and enabled
@@ -4906,6 +4910,16 @@ struct bpf_fib_lookup {
 	__u8	dmac[6];     /* ETH_ALEN */
 };
 
+struct bpf_redir_neigh {
+	/* network family for lookup (AF_INET, AF_INET6) */
+	__u32 nh_family;
+	/* network address of nexthop; skips fib lookup to find gateway */
+	union {
+		__be32		ipv4_nh;
+		__u32		ipv6_nh[4];  /* in6_addr; network order */
+	};
+};
+
 enum bpf_task_fd_type {
 	BPF_FD_TYPE_RAW_TRACEPOINT,	/* tp name */
 	BPF_FD_TYPE_TRACEPOINT,		/* tp name */
-- 
cgit v1.2.3


From adfd272c4ccbe43d9761bb17dd8a4387d7815382 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Tue, 20 Oct 2020 23:25:57 +0200
Subject: bpf, selftests: Extend test_tc_redirect to use modified
 bpf_redirect_neigh()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This updates the test_tc_neigh prog in selftests to use the new syntax of
bpf_redirect_neigh(). To exercise the helper both with and without the
optional parameter, add an additional test_tc_neigh_fib test program, which
does a bpf_fib_lookup() followed by a call to bpf_redirect_neigh() instead
of looking up the ifindex in a map.

Update the test_tc_redirect.sh script to run both versions of the test,
and while we're add it, fix it to work on systems that have a consolidated
dual-stack 'ping' binary instead of separate ping/ping6 versions.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/160322915724.32199.17530068594636950447.stgit@toke.dk
---
 tools/testing/selftests/bpf/progs/test_tc_neigh.c  |   5 +-
 .../selftests/bpf/progs/test_tc_neigh_fib.c        | 155 +++++++++++++++++++++
 tools/testing/selftests/bpf/test_tc_redirect.sh    |  18 ++-
 3 files changed, 173 insertions(+), 5 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
index fe182616b112..b985ac4e7a81 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
 #include <stdint.h>
 #include <stdbool.h>
 
@@ -118,7 +119,7 @@ SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
 	if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
 		return TC_ACT_SHOT;
 
-	return bpf_redirect_neigh(get_dev_ifindex(dev_src), 0);
+	return bpf_redirect_neigh(get_dev_ifindex(dev_src), NULL, 0, 0);
 }
 
 SEC("src_ingress") int tc_src(struct __sk_buff *skb)
@@ -142,7 +143,7 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb)
 	if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
 		return TC_ACT_SHOT;
 
-	return bpf_redirect_neigh(get_dev_ifindex(dev_dst), 0);
+	return bpf_redirect_neigh(get_dev_ifindex(dev_dst), NULL, 0, 0);
 }
 
 char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
new file mode 100644
index 000000000000..d82ed3457030
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#ifndef ctx_ptr
+# define ctx_ptr(field)		(void *)(long)(field)
+#endif
+
+#define AF_INET 2
+#define AF_INET6 10
+
+static __always_inline int fill_fib_params_v4(struct __sk_buff *skb,
+					      struct bpf_fib_lookup *fib_params)
+{
+	void *data_end = ctx_ptr(skb->data_end);
+	void *data = ctx_ptr(skb->data);
+	struct iphdr *ip4h;
+
+	if (data + sizeof(struct ethhdr) > data_end)
+		return -1;
+
+	ip4h = (struct iphdr *)(data + sizeof(struct ethhdr));
+	if ((void *)(ip4h + 1) > data_end)
+		return -1;
+
+	fib_params->family = AF_INET;
+	fib_params->tos = ip4h->tos;
+	fib_params->l4_protocol = ip4h->protocol;
+	fib_params->sport = 0;
+	fib_params->dport = 0;
+	fib_params->tot_len = bpf_ntohs(ip4h->tot_len);
+	fib_params->ipv4_src = ip4h->saddr;
+	fib_params->ipv4_dst = ip4h->daddr;
+
+	return 0;
+}
+
+static __always_inline int fill_fib_params_v6(struct __sk_buff *skb,
+					      struct bpf_fib_lookup *fib_params)
+{
+	struct in6_addr *src = (struct in6_addr *)fib_params->ipv6_src;
+	struct in6_addr *dst = (struct in6_addr *)fib_params->ipv6_dst;
+	void *data_end = ctx_ptr(skb->data_end);
+	void *data = ctx_ptr(skb->data);
+	struct ipv6hdr *ip6h;
+
+	if (data + sizeof(struct ethhdr) > data_end)
+		return -1;
+
+	ip6h = (struct ipv6hdr *)(data + sizeof(struct ethhdr));
+	if ((void *)(ip6h + 1) > data_end)
+		return -1;
+
+	fib_params->family = AF_INET6;
+	fib_params->flowinfo = 0;
+	fib_params->l4_protocol = ip6h->nexthdr;
+	fib_params->sport = 0;
+	fib_params->dport = 0;
+	fib_params->tot_len = bpf_ntohs(ip6h->payload_len);
+	*src = ip6h->saddr;
+	*dst = ip6h->daddr;
+
+	return 0;
+}
+
+SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+{
+	void *data_end = ctx_ptr(skb->data_end);
+	void *data = ctx_ptr(skb->data);
+	__u32 *raw = data;
+
+	if (data + sizeof(struct ethhdr) > data_end)
+		return TC_ACT_SHOT;
+
+	return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK;
+}
+
+static __always_inline int tc_redir(struct __sk_buff *skb)
+{
+	struct bpf_fib_lookup fib_params = { .ifindex = skb->ingress_ifindex };
+	__u8 zero[ETH_ALEN * 2];
+	int ret = -1;
+
+	switch (skb->protocol) {
+	case __bpf_constant_htons(ETH_P_IP):
+		ret = fill_fib_params_v4(skb, &fib_params);
+		break;
+	case __bpf_constant_htons(ETH_P_IPV6):
+		ret = fill_fib_params_v6(skb, &fib_params);
+		break;
+	}
+
+	if (ret)
+		return TC_ACT_OK;
+
+	ret = bpf_fib_lookup(skb, &fib_params, sizeof(fib_params), 0);
+	if (ret == BPF_FIB_LKUP_RET_NOT_FWDED || ret < 0)
+		return TC_ACT_OK;
+
+	__builtin_memset(&zero, 0, sizeof(zero));
+	if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
+		return TC_ACT_SHOT;
+
+	if (ret == BPF_FIB_LKUP_RET_NO_NEIGH) {
+		struct bpf_redir_neigh nh_params = {};
+
+		nh_params.nh_family = fib_params.family;
+		__builtin_memcpy(&nh_params.ipv6_nh, &fib_params.ipv6_dst,
+				 sizeof(nh_params.ipv6_nh));
+
+		return bpf_redirect_neigh(fib_params.ifindex, &nh_params,
+					  sizeof(nh_params), 0);
+
+	} else if (ret == BPF_FIB_LKUP_RET_SUCCESS) {
+		void *data_end = ctx_ptr(skb->data_end);
+		struct ethhdr *eth = ctx_ptr(skb->data);
+
+		if (eth + 1 > data_end)
+			return TC_ACT_SHOT;
+
+		__builtin_memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
+		__builtin_memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
+
+		return bpf_redirect(fib_params.ifindex, 0);
+	}
+
+	return TC_ACT_SHOT;
+}
+
+/* these are identical, but keep them separate for compatibility with the
+ * section names expected by test_tc_redirect.sh
+ */
+SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+{
+	return tc_redir(skb);
+}
+
+SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+{
+	return tc_redir(skb);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh
index 6d7482562140..8868aa1ca902 100755
--- a/tools/testing/selftests/bpf/test_tc_redirect.sh
+++ b/tools/testing/selftests/bpf/test_tc_redirect.sh
@@ -24,8 +24,7 @@ command -v timeout >/dev/null 2>&1 || \
 	{ echo >&2 "timeout is not available"; exit 1; }
 command -v ping >/dev/null 2>&1 || \
 	{ echo >&2 "ping is not available"; exit 1; }
-command -v ping6 >/dev/null 2>&1 || \
-	{ echo >&2 "ping6 is not available"; exit 1; }
+if command -v ping6 >/dev/null 2>&1; then PING6=ping6; else PING6=ping; fi
 command -v perl >/dev/null 2>&1 || \
 	{ echo >&2 "perl is not available"; exit 1; }
 command -v jq >/dev/null 2>&1 || \
@@ -152,7 +151,7 @@ netns_test_connectivity()
 	echo -e "${TEST}: ${GREEN}PASS${NC}"
 
 	TEST="ICMPv6 connectivity test"
-	ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST}
+	ip netns exec ${NS_SRC} $PING6 $PING_ARG ${IP6_DST}
 	if [ $? -ne 0 ]; then
 		echo -e "${TEST}: ${RED}FAIL${NC}"
 		exit 1
@@ -170,6 +169,7 @@ hex_mem_str()
 netns_setup_bpf()
 {
 	local obj=$1
+	local use_forwarding=${2:-0}
 
 	ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
 	ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress
@@ -179,6 +179,14 @@ netns_setup_bpf()
 	ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress
 	ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress  bpf da obj $obj sec chk_egress
 
+	if [ "$use_forwarding" -eq "1" ]; then
+		# bpf_fib_lookup() checks if forwarding is enabled
+		ip netns exec ${NS_FWD} sysctl -w net.ipv4.ip_forward=1
+		ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_dst_fwd.forwarding=1
+		ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_src_fwd.forwarding=1
+		return 0
+	fi
+
 	veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex)
 	veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex)
 
@@ -200,5 +208,9 @@ netns_setup_bpf test_tc_neigh.o
 netns_test_connectivity
 netns_cleanup
 netns_setup
+netns_setup_bpf test_tc_neigh_fib.o 1
+netns_test_connectivity
+netns_cleanup
+netns_setup
 netns_setup_bpf test_tc_peer.o
 netns_test_connectivity
-- 
cgit v1.2.3


From 3652c9a1b1fe6cbdd4510eb220db548bff8704ae Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 21 Oct 2020 22:32:57 +0200
Subject: bpf, libbpf: Guard bpf inline asm from bpf_tail_call_static

Yaniv reported a compilation error after pulling latest libbpf:

  [...]
  ../libbpf/src/root/usr/include/bpf/bpf_helpers.h:99:10: error:
  unknown register name 'r0' in asm
                     : "r0", "r1", "r2", "r3", "r4", "r5");
  [...]

The issue got triggered given Yaniv was compiling tracing programs with native
target (e.g. x86) instead of BPF target, hence no BTF generated vmlinux.h nor
CO-RE used, and later llc with -march=bpf was invoked to compile from LLVM IR
to BPF object file. Given that clang was expecting x86 inline asm and not BPF
one the error complained that these regs don't exist on the former.

Guard bpf_tail_call_static() with defined(__bpf__) where BPF inline asm is valid
to use. BPF tracing programs on more modern kernels use BPF target anyway and
thus the bpf_tail_call_static() function will be available for them. BPF inline
asm is supported since clang 7 (clang <= 6 otherwise throws same above error),
and __bpf_unreachable() since clang 8, therefore include the latter condition
in order to prevent compilation errors for older clang versions. Given even an
old Ubuntu 18.04 LTS has official LLVM packages all the way up to llvm-10, I did
not bother to special case the __bpf_unreachable() inside bpf_tail_call_static()
further.

Also, undo the sockex3_kern's use of bpf_tail_call_static() sample given they
still have the old hacky way to even compile networking progs with native instead
of BPF target so bpf_tail_call_static() won't be defined there anymore.

Fixes: 0e9f6841f664 ("bpf, libbpf: Add bpf_tail_call_static helper for bpf programs")
Reported-by: Yaniv Agman <yanivagman@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Tested-by: Yaniv Agman <yanivagman@gmail.com>
Link: https://lore.kernel.org/bpf/CAMy7=ZUk08w5Gc2Z-EKi4JFtuUCaZYmE4yzhJjrExXpYKR4L8w@mail.gmail.com
Link: https://lore.kernel.org/bpf/20201021203257.26223-1-daniel@iogearbox.net
---
 samples/bpf/sockex3_kern.c  | 8 ++++----
 tools/lib/bpf/bpf_helpers.h | 2 ++
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c
index 8142d02b33e6..b363503357e5 100644
--- a/samples/bpf/sockex3_kern.c
+++ b/samples/bpf/sockex3_kern.c
@@ -44,17 +44,17 @@ static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
 	switch (proto) {
 	case ETH_P_8021Q:
 	case ETH_P_8021AD:
-		bpf_tail_call_static(skb, &jmp_table, PARSE_VLAN);
+		bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
 		break;
 	case ETH_P_MPLS_UC:
 	case ETH_P_MPLS_MC:
-		bpf_tail_call_static(skb, &jmp_table, PARSE_MPLS);
+		bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
 		break;
 	case ETH_P_IP:
-		bpf_tail_call_static(skb, &jmp_table, PARSE_IP);
+		bpf_tail_call(skb, &jmp_table, PARSE_IP);
 		break;
 	case ETH_P_IPV6:
-		bpf_tail_call_static(skb, &jmp_table, PARSE_IPV6);
+		bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
 		break;
 	}
 }
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 2bdb7d6dbad2..72b251110c4d 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -72,6 +72,7 @@
 /*
  * Helper function to perform a tail call with a constant/immediate map slot.
  */
+#if __clang_major__ >= 8 && defined(__bpf__)
 static __always_inline void
 bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
 {
@@ -98,6 +99,7 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
 		     :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot)
 		     : "r0", "r1", "r2", "r3", "r4", "r5");
 }
+#endif
 
 /*
  * Helper structure used by eBPF C program
-- 
cgit v1.2.3