diff options
author | Jakub Sitnicki <jakub@cloudflare.com> | 2020-04-29 21:11:52 +0300 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2020-04-30 00:30:59 +0300 |
commit | 64d85290d79c0677edb5a8ee2295b36c022fa5df (patch) | |
tree | b8bc403085811e0d4358be3d4d57ae24a6ff1e2f /net | |
parent | 0b3b9ca3d154486baa08a41cbc62fde67ba8c6c3 (diff) | |
download | linux-64d85290d79c0677edb5a8ee2295b36c022fa5df.tar.xz |
bpf: Allow bpf_map_lookup_elem for SOCKMAP and SOCKHASH
White-list map lookup for SOCKMAP/SOCKHASH from BPF. Lookup returns a
pointer to a full socket and acquires a reference if necessary.
To support it we need to extend the verifier to know that:
(1) register storing the lookup result holds a pointer to socket, if
lookup was done on SOCKMAP/SOCKHASH, and that
(2) map lookup on SOCKMAP/SOCKHASH is a reference acquiring operation,
which needs a corresponding reference release with bpf_sk_release.
On sock_map side, lookup handlers exposed via bpf_map_ops now bump
sk_refcnt if socket is reference counted. In turn, bpf_sk_select_reuseport,
the only in-kernel user of SOCKMAP/SOCKHASH ops->map_lookup_elem, was
updated to release the reference.
Sockets fetched from a map can be used in the same way as ones returned by
BPF socket lookup helpers, such as bpf_sk_lookup_tcp. In particular, they
can be used with bpf_sk_assign to direct packets toward a socket on TC
ingress path.
Suggested-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200429181154.479310-2-jakub@cloudflare.com
Diffstat (limited to 'net')
-rw-r--r-- | net/core/filter.c | 4 | ||||
-rw-r--r-- | net/core/sock_map.c | 18 |
2 files changed, 20 insertions, 2 deletions
diff --git a/net/core/filter.c b/net/core/filter.c index da3b7a72c37c..70b32723e6be 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8712,6 +8712,10 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern, reuse = rcu_dereference(selected_sk->sk_reuseport_cb); if (!reuse) { + /* Lookup in sock_map can return TCP ESTABLISHED sockets. */ + if (sk_is_refcounted(selected_sk)) + sock_put(selected_sk); + /* reuseport_array has only sk with non NULL sk_reuseport_cb. * The only (!reuse) case here is - the sk has already been * unhashed (e.g. by close()), so treat it as -ENOENT. diff --git a/net/core/sock_map.c b/net/core/sock_map.c index b08dfae10f88..00a26cf2cfe9 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -343,7 +343,14 @@ static struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) static void *sock_map_lookup(struct bpf_map *map, void *key) { - return __sock_map_lookup_elem(map, *(u32 *)key); + struct sock *sk; + + sk = __sock_map_lookup_elem(map, *(u32 *)key); + if (!sk || !sk_fullsock(sk)) + return NULL; + if (sk_is_refcounted(sk) && !refcount_inc_not_zero(&sk->sk_refcnt)) + return NULL; + return sk; } static void *sock_map_lookup_sys(struct bpf_map *map, void *key) @@ -1051,7 +1058,14 @@ static void *sock_hash_lookup_sys(struct bpf_map *map, void *key) static void *sock_hash_lookup(struct bpf_map *map, void *key) { - return __sock_hash_lookup_elem(map, key); + struct sock *sk; + + sk = __sock_hash_lookup_elem(map, key); + if (!sk || !sk_fullsock(sk)) + return NULL; + if (sk_is_refcounted(sk) && !refcount_inc_not_zero(&sk->sk_refcnt)) + return NULL; + return sk; } static void sock_hash_release_progs(struct bpf_map *map) |