diff options
author | Jakub Kicinski <kuba@kernel.org> | 2023-08-04 01:34:36 +0300 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2023-08-04 01:34:36 +0300 |
commit | d07b7b32da6f678d42d96a8b9824cf0a181ce140 (patch) | |
tree | 606829d4b33a57dbe0f0e825ca8505e0b5fcb759 /net/ipv6 | |
parent | 35b1b1fd96388d5e3cf179bf36bd8a4153baf4a3 (diff) | |
parent | 648880e9331c68b2008430fd90f3648d1795399d (diff) | |
download | linux-d07b7b32da6f678d42d96a8b9824cf0a181ce140.tar.xz |
Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Martin KaFai Lau says:
====================
pull-request: bpf-next 2023-08-03
We've added 54 non-merge commits during the last 10 day(s) which contain
a total of 84 files changed, 4026 insertions(+), 562 deletions(-).
The main changes are:
1) Add SO_REUSEPORT support for TC bpf_sk_assign from Lorenz Bauer,
Daniel Borkmann
2) Support new insns from cpu v4 from Yonghong Song
3) Non-atomically allocate freelist during prefill from YiFei Zhu
4) Support defragmenting IPv(4|6) packets in BPF from Daniel Xu
5) Add tracepoint to xdp attaching failure from Leon Hwang
6) struct netdev_rx_queue and xdp.h reshuffling to reduce
rebuild time from Jakub Kicinski
* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (54 commits)
net: invert the netdevice.h vs xdp.h dependency
net: move struct netdev_rx_queue out of netdevice.h
eth: add missing xdp.h includes in drivers
selftests/bpf: Add testcase for xdp attaching failure tracepoint
bpf, xdp: Add tracepoint to xdp attaching failure
selftests/bpf: fix static assert compilation issue for test_cls_*.c
bpf: fix bpf_probe_read_kernel prototype mismatch
riscv, bpf: Adapt bpf trampoline to optimized riscv ftrace framework
libbpf: fix typos in Makefile
tracing: bpf: use struct trace_entry in struct syscall_tp_t
bpf, devmap: Remove unused dtab field from bpf_dtab_netdev
bpf, cpumap: Remove unused cmap field from bpf_cpu_map_entry
netfilter: bpf: Only define get_proto_defrag_hook() if necessary
bpf: Fix an array-index-out-of-bounds issue in disasm.c
net: remove duplicate INDIRECT_CALLABLE_DECLARE of udp[6]_ehashfn
docs/bpf: Fix malformed documentation
bpf: selftests: Add defrag selftests
bpf: selftests: Support custom type and proto for client sockets
bpf: selftests: Support not connecting client socket
netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link
...
====================
Link: https://lore.kernel.org/r/20230803174845.825419-1-martin.lau@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/inet6_hashtables.c | 69 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 11 | ||||
-rw-r--r-- | net/ipv6/udp.c | 96 |
3 files changed, 94 insertions, 82 deletions
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index b64b49012655..b0e8d278e8a9 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -39,6 +39,7 @@ u32 inet6_ehashfn(const struct net *net, return __inet6_ehashfn(lhash, lport, fhash, fport, inet6_ehash_secret + net_hash_mix(net)); } +EXPORT_SYMBOL_GPL(inet6_ehashfn); /* * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so @@ -111,22 +112,40 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } -static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk, - struct sk_buff *skb, int doff, - const struct in6_addr *saddr, - __be16 sport, - const struct in6_addr *daddr, - unsigned short hnum) +/** + * inet6_lookup_reuseport() - execute reuseport logic on AF_INET6 socket if necessary. + * @net: network namespace. + * @sk: AF_INET6 socket, must be in TCP_LISTEN state for TCP or TCP_CLOSE for UDP. + * @skb: context for a potential SK_REUSEPORT program. + * @doff: header offset. + * @saddr: source address. + * @sport: source port. + * @daddr: destination address. + * @hnum: destination port in host byte order. + * @ehashfn: hash function used to generate the fallback hash. + * + * Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to + * the selected sock or an error. + */ +struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk, + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, + __be16 sport, + const struct in6_addr *daddr, + unsigned short hnum, + inet6_ehashfn_t *ehashfn) { struct sock *reuse_sk = NULL; u32 phash; if (sk->sk_reuseport) { - phash = inet6_ehashfn(net, daddr, hnum, saddr, sport); + phash = INDIRECT_CALL_INET(ehashfn, udp6_ehashfn, inet6_ehashfn, + net, daddr, hnum, saddr, sport); reuse_sk = reuseport_select_sock(sk, phash, skb, doff); } return reuse_sk; } +EXPORT_SYMBOL_GPL(inet6_lookup_reuseport); /* called with rcu_read_lock() */ static struct sock *inet6_lhash2_lookup(struct net *net, @@ -143,8 +162,8 @@ static struct sock *inet6_lhash2_lookup(struct net *net, sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, sdif); if (score > hiscore) { - result = lookup_reuseport(net, sk, skb, doff, - saddr, sport, daddr, hnum); + result = inet6_lookup_reuseport(net, sk, skb, doff, + saddr, sport, daddr, hnum, inet6_ehashfn); if (result) return result; @@ -156,30 +175,30 @@ static struct sock *inet6_lhash2_lookup(struct net *net, return result; } -static inline struct sock *inet6_lookup_run_bpf(struct net *net, - struct inet_hashinfo *hashinfo, - struct sk_buff *skb, int doff, - const struct in6_addr *saddr, - const __be16 sport, - const struct in6_addr *daddr, - const u16 hnum, const int dif) +struct sock *inet6_lookup_run_sk_lookup(struct net *net, + int protocol, + struct sk_buff *skb, int doff, + const struct in6_addr *saddr, + const __be16 sport, + const struct in6_addr *daddr, + const u16 hnum, const int dif, + inet6_ehashfn_t *ehashfn) { struct sock *sk, *reuse_sk; bool no_reuseport; - if (hashinfo != net->ipv4.tcp_death_row.hashinfo) - return NULL; /* only TCP is supported */ - - no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, saddr, sport, + no_reuseport = bpf_sk_lookup_run_v6(net, protocol, saddr, sport, daddr, hnum, dif, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; - reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum); + reuse_sk = inet6_lookup_reuseport(net, sk, skb, doff, + saddr, sport, daddr, hnum, ehashfn); if (reuse_sk) sk = reuse_sk; return sk; } +EXPORT_SYMBOL_GPL(inet6_lookup_run_sk_lookup); struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, @@ -193,9 +212,11 @@ struct sock *inet6_lookup_listener(struct net *net, unsigned int hash2; /* Lookup redirect from BPF */ - if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { - result = inet6_lookup_run_bpf(net, hashinfo, skb, doff, - saddr, sport, daddr, hnum, dif); + if (static_branch_unlikely(&bpf_sk_lookup_enabled) && + hashinfo == net->ipv4.tcp_death_row.hashinfo) { + result = inet6_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff, + saddr, sport, daddr, hnum, dif, + inet6_ehashfn); if (result) goto done; } diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index cb4eb1d2c620..d59b296b4f51 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/icmp.h> +#include <linux/rcupdate.h> #include <linux/sysctl.h> #include <net/ipv6_frag.h> @@ -96,6 +97,12 @@ static void __net_exit defrag6_net_exit(struct net *net) } } +static const struct nf_defrag_hook defrag_hook = { + .owner = THIS_MODULE, + .enable = nf_defrag_ipv6_enable, + .disable = nf_defrag_ipv6_disable, +}; + static struct pernet_operations defrag6_net_ops = { .exit = defrag6_net_exit, }; @@ -114,6 +121,9 @@ static int __init nf_defrag_init(void) pr_err("nf_defrag_ipv6: can't register pernet ops\n"); goto cleanup_frag6; } + + rcu_assign_pointer(nf_defrag_v6_hook, &defrag_hook); + return ret; cleanup_frag6: @@ -124,6 +134,7 @@ cleanup_frag6: static void __exit nf_defrag_fini(void) { + rcu_assign_pointer(nf_defrag_v6_hook, NULL); unregister_pernet_subsys(&defrag6_net_ops); nf_ct_frag6_cleanup(); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 486d893b8e3c..1ea01b0d9be3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -72,11 +72,12 @@ int udpv6_init_sock(struct sock *sk) return 0; } -static u32 udp6_ehashfn(const struct net *net, - const struct in6_addr *laddr, - const u16 lport, - const struct in6_addr *faddr, - const __be16 fport) +INDIRECT_CALLABLE_SCOPE +u32 udp6_ehashfn(const struct net *net, + const struct in6_addr *laddr, + const u16 lport, + const struct in6_addr *faddr, + const __be16 fport) { static u32 udp6_ehash_secret __read_mostly; static u32 udp_ipv6_hash_secret __read_mostly; @@ -161,24 +162,6 @@ static int compute_score(struct sock *sk, struct net *net, return score; } -static struct sock *lookup_reuseport(struct net *net, struct sock *sk, - struct sk_buff *skb, - const struct in6_addr *saddr, - __be16 sport, - const struct in6_addr *daddr, - unsigned int hnum) -{ - struct sock *reuse_sk = NULL; - u32 hash; - - if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { - hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - reuse_sk = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - } - return reuse_sk; -} - /* called with rcu_read_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, @@ -195,44 +178,35 @@ static struct sock *udp6_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { - result = lookup_reuseport(net, sk, skb, - saddr, sport, daddr, hnum); + badness = score; + + if (sk->sk_state == TCP_ESTABLISHED) { + result = sk; + continue; + } + + result = inet6_lookup_reuseport(net, sk, skb, sizeof(struct udphdr), + saddr, sport, daddr, hnum, udp6_ehashfn); + if (!result) { + result = sk; + continue; + } + /* Fall back to scoring if group has connections */ - if (result && !reuseport_has_conns(sk)) + if (!reuseport_has_conns(sk)) return result; - result = result ? : sk; - badness = score; + /* Reuseport logic returned an error, keep original score. */ + if (IS_ERR(result)) + continue; + + badness = compute_score(sk, net, saddr, sport, + daddr, hnum, dif, sdif); } } return result; } -static inline struct sock *udp6_lookup_run_bpf(struct net *net, - struct udp_table *udptable, - struct sk_buff *skb, - const struct in6_addr *saddr, - __be16 sport, - const struct in6_addr *daddr, - u16 hnum, const int dif) -{ - struct sock *sk, *reuse_sk; - bool no_reuseport; - - if (udptable != net->ipv4.udp_table) - return NULL; /* only UDP is supported */ - - no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, saddr, sport, - daddr, hnum, dif, &sk); - if (no_reuseport || IS_ERR_OR_NULL(sk)) - return sk; - - reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum); - if (reuse_sk) - sk = reuse_sk; - return sk; -} - /* rcu_read_lock() must be held */ struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, @@ -257,9 +231,11 @@ struct sock *__udp6_lib_lookup(struct net *net, goto done; /* Lookup redirect from BPF */ - if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { - sk = udp6_lookup_run_bpf(net, udptable, skb, - saddr, sport, daddr, hnum, dif); + if (static_branch_unlikely(&bpf_sk_lookup_enabled) && + udptable == net->ipv4.udp_table) { + sk = inet6_lookup_run_sk_lookup(net, IPPROTO_UDP, skb, sizeof(struct udphdr), + saddr, sport, daddr, hnum, dif, + udp6_ehashfn); if (sk) { result = sk; goto done; @@ -992,7 +968,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, goto csum_error; /* Check if the socket is already available, e.g. due to early demux */ - sk = skb_steal_sock(skb, &refcounted); + sk = inet6_steal_sock(net, skb, sizeof(struct udphdr), saddr, uh->source, daddr, uh->dest, + &refcounted, udp6_ehashfn); + if (IS_ERR(sk)) + goto no_sk; + if (sk) { struct dst_entry *dst = skb_dst(skb); int ret; @@ -1026,7 +1006,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, goto report_csum_error; return udp6_unicast_rcv_skb(sk, skb, uh); } - +no_sk: reason = SKB_DROP_REASON_NO_SOCKET; if (!uh->check) |