summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2018-05-11 01:10:58 +0300
committerDaniel Borkmann <daniel@iogearbox.net>2018-05-11 01:10:59 +0300
commitff1f56d9877f82d1fdac64e32827a570a54143bd (patch)
tree2bab4dd9bff8b85508549c97886d92d82d7c22e1 /include
parent68625b7631e040707b24197451a475a3e9197e2a (diff)
parentfe616055f78457a0b78e0d3693d1ae26f2d7dab3 (diff)
downloadlinux-ff1f56d9877f82d1fdac64e32827a570a54143bd.tar.xz
Merge branch 'bpf-fib-lookup-helper'
David Ahern says: ==================== Provide a helper for doing a FIB and neighbor lookup in the kernel tables from an XDP program. The helper provides a fastpath for forwarding packets. If the packet is a local delivery or for any reason is not a simple lookup and forward, the packet is expected to continue up the stack for full processing. The response from a FIB and neighbor lookup is either the egress index with the bpf_fib_lookup struct filled in with dmac and gateway or 0 meaning the packet should continue up the stack. In time we can revisit this to return the FIB lookup result errno if it is one of the special RTN_'s such as RTN_BLACKHOLE (-EINVAL) so that the XDP programs can do an early drop if desired. Patches 1-6 do some more refactoring to IPv6 with the end goal of extracting a FIB lookup function that aligns with fib_lookup for IPv4, basically returning a fib6_info without creating a dst based entry. Patch 7 adds lookup functions to the ipv6 stub. These are needed since bpf is built into the kernel and ipv6 may not be built or loaded. Patch 8 adds the bpf helper and 9 adds a sample program. v3 - remove ETH_ALEN and in6_addr from uapi header v2 - removed pkt_access from bpf_func_proto as noticed by Daniel - added check in that IPv6 forwarding is enabled - added DaveM's ack on patches 1-7 and 9 based on v1 response and fact that no changes were made to them in v2 v1 - updated commit messages and cover letter - added comment to sample program noting lack of verification on egress device supporting XDP RFC v2 - fixed use of foward helper from cls_act as noted by Daniel - in patch 1 rename fib6_lookup_1 as well for consistency ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'include')
-rw-r--r--include/net/addrconf.h14
-rw-r--r--include/net/ip6_fib.h21
-rw-r--r--include/trace/events/fib6.h14
-rw-r--r--include/uapi/linux/bpf.h81
4 files changed, 119 insertions, 11 deletions
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 8312cc25a3af..ff766ab207e0 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -223,6 +223,20 @@ struct ipv6_stub {
const struct in6_addr *addr);
int (*ipv6_dst_lookup)(struct net *net, struct sock *sk,
struct dst_entry **dst, struct flowi6 *fl6);
+
+ struct fib6_table *(*fib6_get_table)(struct net *net, u32 id);
+ struct fib6_info *(*fib6_lookup)(struct net *net, int oif,
+ struct flowi6 *fl6, int flags);
+ struct fib6_info *(*fib6_table_lookup)(struct net *net,
+ struct fib6_table *table,
+ int oif, struct flowi6 *fl6,
+ int flags);
+ struct fib6_info *(*fib6_multipath_select)(const struct net *net,
+ struct fib6_info *f6i,
+ struct flowi6 *fl6, int oif,
+ const struct sk_buff *skb,
+ int strict);
+
void (*udpv6_encap_enable)(void);
void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index a3ec08d05756..cc70f6da8462 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -376,9 +376,24 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
const struct sk_buff *skb,
int flags, pol_lookup_t lookup);
-struct fib6_node *fib6_lookup(struct fib6_node *root,
- const struct in6_addr *daddr,
- const struct in6_addr *saddr);
+/* called with rcu lock held; can return error pointer
+ * caller needs to select path
+ */
+struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
+ int flags);
+
+/* called with rcu lock held; caller needs to select path */
+struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
+ int oif, struct flowi6 *fl6, int strict);
+
+struct fib6_info *fib6_multipath_select(const struct net *net,
+ struct fib6_info *match,
+ struct flowi6 *fl6, int oif,
+ const struct sk_buff *skb, int strict);
+
+struct fib6_node *fib6_node_lookup(struct fib6_node *root,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr);
struct fib6_node *fib6_locate(struct fib6_node *root,
const struct in6_addr *daddr, int dst_len,
diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h
index 7e8d48a81b91..1b8d951e3c12 100644
--- a/include/trace/events/fib6.h
+++ b/include/trace/events/fib6.h
@@ -12,10 +12,10 @@
TRACE_EVENT(fib6_table_lookup,
- TP_PROTO(const struct net *net, const struct rt6_info *rt,
+ TP_PROTO(const struct net *net, const struct fib6_info *f6i,
struct fib6_table *table, const struct flowi6 *flp),
- TP_ARGS(net, rt, table, flp),
+ TP_ARGS(net, f6i, table, flp),
TP_STRUCT__entry(
__field( u32, tb_id )
@@ -48,20 +48,20 @@ TRACE_EVENT(fib6_table_lookup,
in6 = (struct in6_addr *)__entry->dst;
*in6 = flp->daddr;
- if (rt->rt6i_idev) {
- __assign_str(name, rt->rt6i_idev->dev->name);
+ if (f6i->fib6_nh.nh_dev) {
+ __assign_str(name, f6i->fib6_nh.nh_dev);
} else {
__assign_str(name, "");
}
- if (rt == net->ipv6.ip6_null_entry) {
+ if (f6i == net->ipv6.fib6_null_entry) {
struct in6_addr in6_zero = {};
in6 = (struct in6_addr *)__entry->gw;
*in6 = in6_zero;
- } else if (rt) {
+ } else if (f6i) {
in6 = (struct in6_addr *)__entry->gw;
- *in6 = rt->rt6i_gateway;
+ *in6 = f6i->fib6_nh.nh_gw;
}
),
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d615c777b573..02e4112510f8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1828,6 +1828,33 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
+ *
+ * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ * Description
+ * Do FIB lookup in kernel tables using parameters in *params*.
+ * If lookup is successful and result shows packet is to be
+ * forwarded, the neighbor tables are searched for the nexthop.
+ * If successful (ie., FIB lookup shows forwarding and nexthop
+ * is resolved), the nexthop address is returned in ipv4_dst,
+ * ipv6_dst or mpls_out based on family, smac is set to mac
+ * address of egress device, dmac is set to nexthop mac address,
+ * rt_metric is set to metric from route.
+ *
+ * *plen* argument is the size of the passed in struct.
+ * *flags* argument can be one or more BPF_FIB_LOOKUP_ flags:
+ *
+ * **BPF_FIB_LOOKUP_DIRECT** means do a direct table lookup vs
+ * full lookup using FIB rules
+ * **BPF_FIB_LOOKUP_OUTPUT** means do lookup from an egress
+ * perspective (default is ingress)
+ *
+ * *ctx* is either **struct xdp_md** for XDP programs or
+ * **struct sk_buff** tc cls_act programs.
+ *
+ * Return
+ * Egress device index on success, 0 if packet needs to continue
+ * up the stack for further processing or a negative error in case
+ * of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -1898,7 +1925,8 @@ union bpf_attr {
FN(xdp_adjust_tail), \
FN(skb_get_xfrm_state), \
FN(get_stack), \
- FN(skb_load_bytes_relative),
+ FN(skb_load_bytes_relative), \
+ FN(fib_lookup),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -2321,4 +2349,55 @@ struct bpf_raw_tracepoint_args {
__u64 args[0];
};
+/* DIRECT: Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT: Do lookup from egress perspective; default is ingress
+ */
+#define BPF_FIB_LOOKUP_DIRECT BIT(0)
+#define BPF_FIB_LOOKUP_OUTPUT BIT(1)
+
+struct bpf_fib_lookup {
+ /* input */
+ __u8 family; /* network family, AF_INET, AF_INET6, AF_MPLS */
+
+ /* set if lookup is to consider L4 data - e.g., FIB rules */
+ __u8 l4_protocol;
+ __be16 sport;
+ __be16 dport;
+
+ /* total length of packet from network header - used for MTU check */
+ __u16 tot_len;
+ __u32 ifindex; /* L3 device index for lookup */
+
+ union {
+ /* inputs to lookup */
+ __u8 tos; /* AF_INET */
+ __be32 flowlabel; /* AF_INET6 */
+
+ /* output: metric of fib result */
+ __u32 rt_metric;
+ };
+
+ union {
+ __be32 mpls_in;
+ __be32 ipv4_src;
+ __u32 ipv6_src[4]; /* in6_addr; network order */
+ };
+
+ /* input to bpf_fib_lookup, *dst is destination address.
+ * output: bpf_fib_lookup sets to gateway address
+ */
+ union {
+ /* return for MPLS lookups */
+ __be32 mpls_out[4]; /* support up to 4 labels */
+ __be32 ipv4_dst;
+ __u32 ipv6_dst[4]; /* in6_addr; network order */
+ };
+
+ /* output */
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ __u8 smac[6]; /* ETH_ALEN */
+ __u8 dmac[6]; /* ETH_ALEN */
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */