diff options
| author | Daniel Borkmann <daniel@iogearbox.net> | 2021-05-26 10:46:17 +0300 |
|---|---|---|
| committer | Daniel Borkmann <daniel@iogearbox.net> | 2021-05-26 10:47:31 +0300 |
| commit | aa7f1f03bd998c1ebf412805a40c918537af3d90 (patch) | |
| tree | bcc1e898d165b4f168fd9c402805258234746a38 /include/linux | |
| parent | 21703cf790c7aa0fd09e1d38df9a5d945244abf8 (diff) | |
| parent | d232924762971fe2698011bc244e05949e544541 (diff) | |
| download | linux-aa7f1f03bd998c1ebf412805a40c918537af3d90.tar.xz | |
Merge branch 'bpf-xdp-bcast'
Hangbin Liu says:
====================
This patchset is a new implementation for XDP multicast support based
on my previous 2 maps implementation[1]. The reason is that Daniel thinks
the exclude map implementation is missing proper bond support in XDP
context. And there is a plan to add native XDP bonding support. Adding
a exclude map in the helper also increases the complexity of verifier and
has drawbacks on performance.
The new implementation just add two new flags BPF_F_BROADCAST and
BPF_F_EXCLUDE_INGRESS to extend xdp_redirect_map for broadcast support.
With BPF_F_BROADCAST the packet will be broadcasted to all the interfaces
in the map. with BPF_F_EXCLUDE_INGRESS the ingress interface will be
excluded when do broadcasting.
The patchv11 link is here [2].
[1] https://lore.kernel.org/bpf/20210223125809.1376577-1-liuhangbin@gmail.com
[2] https://lore.kernel.org/bpf/20210513070447.1878448-1-liuhangbin@gmail.com
v12: As Daniel pointed out:
a) defined as const u64 for flag_mask and action_mask in
__bpf_xdp_redirect_map()
b) remove BPF_F_ACTION_MASK in uapi header
c) remove EXPORT_SYMBOL_GPL for xdpf_clone()
v11:
a) Use unlikely() when checking if this is for broadcast redirecting.
b) Fix a tracepoint NULL pointer issue Jesper found
c) Remove BPF_F_REDIR_MASK and just use OR flags to make the reader more
clear about what's flags we are using
d) Add the performace number with multi veth interfaces in patch 01
description.
e) remove some sleeps to reduce the testing time in patch04. Re-struct the
test and make clear what flags we are testing.
v10: use READ/WRITE_ONCE when read/write map instead of xchg()
v9: Update patch 01 commit description
v8: use hlist_for_each_entry_rcu() when looping the devmap hash ojbs
v7: No need to free xdpf in dev_map_enqueue_clone() if xdpf_clone failed.
v6: Fix a skb leak in the error path for generic XDP
v5: Just walk the map directly to get interfaces as get_next_key() of devmap
hash may restart looping from the first key if the device get removed.
After update the performace has improved 10% compired with v4.
v4: Fix flags never cleared issue in patch 02. Update selftest to cover this.
v3: Rebase the code based on latest bpf-next
v2: fix flag renaming issue in patch 02
====================
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/bpf.h | 20 | ||||
| -rw-r--r-- | include/linux/filter.h | 19 |
2 files changed, 35 insertions, 4 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1e9a0ff3217b..86dec5001ae2 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1501,8 +1501,13 @@ int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, struct net_device *dev_rx); int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, struct net_device *dev_rx); +int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, + struct bpf_map *map, bool exclude_ingress); int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, struct bpf_prog *xdp_prog); +int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, + struct bpf_prog *xdp_prog, struct bpf_map *map, + bool exclude_ingress); bool dev_map_can_have_prog(struct bpf_map *map); void __cpu_map_flush(void); @@ -1670,6 +1675,13 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, return 0; } +static inline +int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, + struct bpf_map *map, bool exclude_ingress) +{ + return 0; +} + struct sk_buff; static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, @@ -1679,6 +1691,14 @@ static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, return 0; } +static inline +int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, + struct bpf_prog *xdp_prog, struct bpf_map *map, + bool exclude_ingress) +{ + return 0; +} + static inline void __cpu_map_flush(void) { } diff --git a/include/linux/filter.h b/include/linux/filter.h index 9a09547bc7ba..c5ad7df029ed 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -646,6 +646,7 @@ struct bpf_redirect_info { u32 flags; u32 tgt_index; void *tgt_value; + struct bpf_map *map; u32 map_id; enum bpf_map_type map_type; u32 kern_flags; @@ -1464,17 +1465,19 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol, } #endif /* IS_ENABLED(CONFIG_IPV6) */ -static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, u64 flags, +static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, + u64 flags, const u64 flag_mask, void *lookup_elem(struct bpf_map *map, u32 key)) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + const u64 action_mask = XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX; /* Lower bits of the flags are used as return code on lookup failure */ - if (unlikely(flags > XDP_TX)) + if (unlikely(flags & ~(action_mask | flag_mask))) return XDP_ABORTED; ri->tgt_value = lookup_elem(map, ifindex); - if (unlikely(!ri->tgt_value)) { + if (unlikely(!ri->tgt_value) && !(flags & BPF_F_BROADCAST)) { /* If the lookup fails we want to clear out the state in the * redirect_info struct completely, so that if an eBPF program * performs multiple lookups, the last one always takes @@ -1482,13 +1485,21 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind */ ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */ ri->map_type = BPF_MAP_TYPE_UNSPEC; - return flags; + return flags & action_mask; } ri->tgt_index = ifindex; ri->map_id = map->id; ri->map_type = map->map_type; + if (flags & BPF_F_BROADCAST) { + WRITE_ONCE(ri->map, map); + ri->flags = flags; + } else { + WRITE_ONCE(ri->map, NULL); + ri->flags = 0; + } + return XDP_REDIRECT; } |
