diff options
| author | Alexei Starovoitov <ast@kernel.org> | 2019-02-14 05:27:56 +0300 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2019-02-14 05:27:56 +0300 |
| commit | 87486b23f8aa7cf55c2df4a927691c5e8540c813 (patch) | |
| tree | b00d597241213e1ddaab1185700262bb7c6abcef /include | |
| parent | dd27c2e3d0a05c01ff14bb672d1a3f0fdd8f98fc (diff) | |
| parent | 0fde56e4385b09a67dd25321f607d4c942282de2 (diff) | |
| download | linux-87486b23f8aa7cf55c2df4a927691c5e8540c813.tar.xz | |
Merge branch 'lwt_encap_ip'
Peter Oskolkov says:
====================
This patchset implements BPF_LWT_ENCAP_IP mode in bpf_lwt_push_encap
BPF helper. It enables BPF programs (specifically, BPF_PROG_TYPE_LWT_IN
and BPF_PROG_TYPE_LWT_XMIT prog types) to add IP encapsulation headers
to packets (e.g. IP/GRE, GUE, IPIP).
This is useful when thousands of different short-lived flows should be
encapped, each with different and dynamically determined destination.
Although lwtunnels can be used in some of these scenarios, the ability
to dynamically generate encap headers adds more flexibility, e.g.
when routing depends on the state of the host (reflected in global bpf
maps).
V2 changes: added flowi-based route lookup, IPv6 encapping, and
encapping on ingress.
V3 changes: incorporated David Ahern's suggestions:
- added l3mdev check/oif (patch 2)
- sync bpf.h from include/uapi into tools/include/uapi
- selftest tweaks
V4 changes: moved route lookup/dst change from bpf_push_ip_encap
to when BPF_LWT_REROUTE is handled, as suggested by David Ahern.
V5 changes: added a check in lwt_xmit that skb->protocol stays the
same if the skb is to be passed back to the stack (ret == BPF_OK).
Again, suggested by David Ahern.
V6 changes: abandoned.
V7 changes: added handling of GSO packets (patch 3 in the patchset added),
as suggested by BPF maintainers.
V8 changes:
- fixed build errors when LWT or IPV6 are not enabled;
- whitelisted TCP GSO instead of blacklisting SCTP and UDP GSO, as
suggested by Willem de Bruijn;
- added validation that pushed length cover needed headers when GRE/UDP
encap is detected, as suggested by Willem de Bruijn;
- a couple of minor/stylistic tweaks/fixed typos.
V9 changes:
- fixed a kbuild test robot compiler warning;
- added ipv6_route_input to ipv6_stub (patch 4 in the patchset
added), and IPv6 routing functions are now invoked via ipv6_stub,
as suggested by David Ahern.
V10 changes:
- removed unnecessary IS_ENABLED and pr_warn_once from patch 5.
V11 changes: fixed a potential dst leak in patch 5, as suggested by
David Ahern.
====================
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/net/addrconf.h | 1 | ||||
| -rw-r--r-- | include/net/lwtunnel.h | 2 | ||||
| -rw-r--r-- | include/uapi/linux/bpf.h | 26 |
3 files changed, 27 insertions, 2 deletions
diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 20d523ee2fec..269ec27385e9 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -248,6 +248,7 @@ struct ipv6_stub { const struct in6_addr *addr); int (*ipv6_dst_lookup)(struct net *net, struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); + int (*ipv6_route_input)(struct sk_buff *skb); struct fib6_table *(*fib6_get_table)(struct net *net, u32 id); struct fib6_info *(*fib6_lookup)(struct net *net, int oif, diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index 33fd9ba7e0e5..671113bcb2cc 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -126,6 +126,8 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b); int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb); int lwtunnel_input(struct sk_buff *skb); int lwtunnel_xmit(struct sk_buff *skb); +int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, + bool ingress); static inline void lwtunnel_set_redirect(struct dst_entry *dst) { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 25c8c0e62ecf..bcdd2474eee7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2016,6 +2016,19 @@ union bpf_attr { * Only works if *skb* contains an IPv6 packet. Insert a * Segment Routing Header (**struct ipv6_sr_hdr**) inside * the IPv6 header. + * **BPF_LWT_ENCAP_IP** + * IP encapsulation (GRE/GUE/IPIP/etc). The outer header + * must be IPv4 or IPv6, followed by zero or more + * additional headers, up to LWT_BPF_MAX_HEADROOM total + * bytes in all prepended headers. Please note that + * if skb_is_gso(skb) is true, no more than two headers + * can be prepended, and the inner header, if present, + * should be either GRE or UDP/GUE. + * + * BPF_LWT_ENCAP_SEG6*** types can be called by bpf programs of + * type BPF_PROG_TYPE_LWT_IN; BPF_LWT_ENCAP_IP type can be called + * by bpf programs of types BPF_PROG_TYPE_LWT_IN and + * BPF_PROG_TYPE_LWT_XMIT. * * A call to this helper is susceptible to change the underlaying * packet buffer. Therefore, at load time, all checks on pointers @@ -2517,7 +2530,8 @@ enum bpf_hdr_start_off { /* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */ enum bpf_lwt_encap_mode { BPF_LWT_ENCAP_SEG6, - BPF_LWT_ENCAP_SEG6_INLINE + BPF_LWT_ENCAP_SEG6_INLINE, + BPF_LWT_ENCAP_IP, }; #define __bpf_md_ptr(type, name) \ @@ -2606,7 +2620,15 @@ enum bpf_ret_code { BPF_DROP = 2, /* 3-6 reserved */ BPF_REDIRECT = 7, - /* >127 are reserved for prog type specific return codes */ + /* >127 are reserved for prog type specific return codes. + * + * BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and + * BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been + * changed and should be routed based on its new L3 header. + * (This is an L3 redirect, as opposed to L2 redirect + * represented by BPF_REDIRECT above). + */ + BPF_LWT_REROUTE = 128, }; struct bpf_sock { |
