diff options
author | Jakub Kicinski <kuba@kernel.org> | 2022-02-10 05:17:54 +0300 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2022-02-10 05:40:56 +0300 |
commit | 1127170d457eb9bcc839ef7f2064634f92fe83e2 (patch) | |
tree | 228996f3ae0b734cadc7118a4d10efc1635acf23 /net | |
parent | 5cad527d5ffa9a1c4731bb9c97d2ee93f8960d50 (diff) | |
parent | e5313968c41ba890a91344773a0474d0246d20a3 (diff) | |
download | linux-1127170d457eb9bcc839ef7f2064634f92fe83e2.tar.xz |
Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says:
====================
pull-request: bpf-next 2022-02-09
We've added 126 non-merge commits during the last 16 day(s) which contain
a total of 201 files changed, 4049 insertions(+), 2215 deletions(-).
The main changes are:
1) Add custom BPF allocator for JITs that pack multiple programs into a huge
page to reduce iTLB pressure, from Song Liu.
2) Add __user tagging support in vmlinux BTF and utilize it from BPF
verifier when generating loads, from Yonghong Song.
3) Add per-socket fast path check guarding from cgroup/BPF overhead when
used by only some sockets, from Pavel Begunkov.
4) Continued libbpf deprecation work of APIs/features and removal of their
usage from samples, selftests, libbpf & bpftool, from Andrii Nakryiko
and various others.
5) Improve BPF instruction set documentation by adding byte swap
instructions and cleaning up load/store section, from Christoph Hellwig.
6) Switch BPF preload infra to light skeleton and remove libbpf dependency
from it, from Alexei Starovoitov.
7) Fix architecture-agnostic macros in libbpf for accessing syscall
arguments from BPF progs for non-x86 architectures,
from Ilya Leoshkevich.
8) Rework port members in struct bpf_sk_lookup and struct bpf_sock to be
of 16-bit field with anonymous zero padding, from Jakub Sitnicki.
9) Add new bpf_copy_from_user_task() helper to read memory from a different
task than current. Add ability to create sleepable BPF iterator progs,
from Kenny Yu.
10) Implement XSK batching for ice's zero-copy driver used by AF_XDP and
utilize TX batching API from XSK buffer pool, from Maciej Fijalkowski.
11) Generate temporary netns names for BPF selftests to avoid naming
collisions, from Hangbin Liu.
12) Implement bpf_core_types_are_compat() with limited recursion for
in-kernel usage, from Matteo Croce.
13) Simplify pahole version detection and finally enable CONFIG_DEBUG_INFO_DWARF5
to be selected with CONFIG_DEBUG_INFO_BTF, from Nathan Chancellor.
14) Misc minor fixes to libbpf and selftests from various folks.
* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (126 commits)
selftests/bpf: Cover 4-byte load from remote_port in bpf_sk_lookup
bpf: Make remote_port field in struct bpf_sk_lookup 16-bit wide
libbpf: Fix compilation warning due to mismatched printf format
selftests/bpf: Test BPF_KPROBE_SYSCALL macro
libbpf: Add BPF_KPROBE_SYSCALL macro
libbpf: Fix accessing the first syscall argument on s390
libbpf: Fix accessing the first syscall argument on arm64
libbpf: Allow overriding PT_REGS_PARM1{_CORE}_SYSCALL
selftests/bpf: Skip test_bpf_syscall_macro's syscall_arg1 on arm64 and s390
libbpf: Fix accessing syscall arguments on riscv
libbpf: Fix riscv register names
libbpf: Fix accessing syscall arguments on powerpc
selftests/bpf: Use PT_REGS_SYSCALL_REGS in bpf_syscall_macro
libbpf: Add PT_REGS_SYSCALL_REGS macro
selftests/bpf: Fix an endianness issue in bpf_syscall_macro test
bpf: Fix bpf_prog_pack build HPAGE_PMD_SIZE
bpf: Fix leftover header->pages in sparc and powerpc code.
libbpf: Fix signedness bug in btf_dump_array_data()
selftests/bpf: Do not export subtest as standalone test
bpf, x86_64: Fail gracefully on bpf_jit_binary_pack_finalize failures
...
====================
Link: https://lore.kernel.org/r/20220209210050.8425-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r-- | net/bpf/bpf_dummy_struct_ops.c | 6 | ||||
-rw-r--r-- | net/bpf/test_run.c | 18 | ||||
-rw-r--r-- | net/core/filter.c | 13 | ||||
-rw-r--r-- | net/ipv4/bpf_tcp_ca.c | 6 | ||||
-rw-r--r-- | net/xdp/xsk.c | 13 | ||||
-rw-r--r-- | net/xdp/xsk_buff_pool.c | 7 | ||||
-rw-r--r-- | net/xdp/xsk_queue.h | 19 |
7 files changed, 50 insertions, 32 deletions
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index fbc896323bec..d0e54e30658a 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -145,7 +145,8 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id) + u32 *next_btf_id, + enum bpf_type_flag *flag) { const struct btf_type *state; s32 type_id; @@ -162,7 +163,8 @@ static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log, return -EACCES; } - err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id); + err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id, + flag); if (err < 0) return err; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 65b52b4bd6e1..f08034500813 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -154,7 +154,8 @@ static int bpf_test_finish(const union bpf_attr *kattr, goto out; if (sinfo) { - int i, offset = len, data_len; + int i, offset = len; + u32 data_len; for (i = 0; i < sinfo->nr_frags; i++) { skb_frag_t *frag = &sinfo->frags[i]; @@ -164,7 +165,7 @@ static int bpf_test_finish(const union bpf_attr *kattr, break; } - data_len = min_t(int, copy_size - offset, + data_len = min_t(u32, copy_size - offset, skb_frag_size(frag)); if (copy_to_user(data_out + offset, @@ -960,7 +961,12 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, while (size < kattr->test.data_size_in) { struct page *page; skb_frag_t *frag; - int data_len; + u32 data_len; + + if (sinfo->nr_frags == MAX_SKB_FRAGS) { + ret = -ENOMEM; + goto out; + } page = alloc_page(GFP_KERNEL); if (!page) { @@ -971,7 +977,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, frag = &sinfo->frags[sinfo->nr_frags++]; __skb_frag_set_page(frag, page); - data_len = min_t(int, kattr->test.data_size_in - size, + data_len = min_t(u32, kattr->test.data_size_in - size, PAGE_SIZE); skb_frag_size_set(frag, data_len); @@ -1141,7 +1147,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx))) goto out; - if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) { + if (user_ctx->local_port > U16_MAX) { ret = -ERANGE; goto out; } @@ -1149,7 +1155,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat ctx.family = (u16)user_ctx->family; ctx.protocol = (u16)user_ctx->protocol; ctx.dport = (u16)user_ctx->local_port; - ctx.sport = (__force __be16)user_ctx->remote_port; + ctx.sport = user_ctx->remote_port; switch (ctx.family) { case AF_INET: diff --git a/net/core/filter.c b/net/core/filter.c index f497ca7a16d2..818244068c2d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8275,6 +8275,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) { const int size_default = sizeof(__u32); + int field_size; if (off < 0 || off >= sizeof(struct bpf_sock)) return false; @@ -8286,7 +8287,6 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, case offsetof(struct bpf_sock, family): case offsetof(struct bpf_sock, type): case offsetof(struct bpf_sock, protocol): - case offsetof(struct bpf_sock, dst_port): case offsetof(struct bpf_sock, src_port): case offsetof(struct bpf_sock, rx_queue_mapping): case bpf_ctx_range(struct bpf_sock, src_ip4): @@ -8295,6 +8295,14 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type, case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]): bpf_ctx_record_field_size(info, size_default); return bpf_ctx_narrow_access_ok(off, size, size_default); + case bpf_ctx_range(struct bpf_sock, dst_port): + field_size = size == size_default ? + size_default : sizeof_field(struct bpf_sock, dst_port); + bpf_ctx_record_field_size(info, field_size); + return bpf_ctx_narrow_access_ok(off, size, field_size); + case offsetofend(struct bpf_sock, dst_port) ... + offsetof(struct bpf_sock, dst_ip4) - 1: + return false; } return size == size_default; @@ -10845,7 +10853,8 @@ static bool sk_lookup_is_valid_access(int off, int size, case bpf_ctx_range(struct bpf_sk_lookup, local_ip4): case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]): case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]): - case bpf_ctx_range(struct bpf_sk_lookup, remote_port): + case offsetof(struct bpf_sk_lookup, remote_port) ... + offsetof(struct bpf_sk_lookup, local_ip4) - 1: case bpf_ctx_range(struct bpf_sk_lookup, local_port): case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex): bpf_ctx_record_field_size(info, sizeof(__u32)); diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index b60c9fd7147e..f79ab942f03b 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -96,12 +96,14 @@ static int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, int off, int size, enum bpf_access_type atype, - u32 *next_btf_id) + u32 *next_btf_id, + enum bpf_type_flag *flag) { size_t end; if (atype == BPF_READ) - return btf_struct_access(log, btf, t, off, size, atype, next_btf_id); + return btf_struct_access(log, btf, t, off, size, atype, next_btf_id, + flag); if (t != tcp_sock_type) { bpf_log(log, "only read is supported\n"); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 28ef3f4465ae..2abd64e4d589 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -343,9 +343,9 @@ out: } EXPORT_SYMBOL(xsk_tx_peek_desc); -static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, struct xdp_desc *descs, - u32 max_entries) +static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, u32 max_entries) { + struct xdp_desc *descs = pool->tx_descs; u32 nb_pkts = 0; while (nb_pkts < max_entries && xsk_tx_peek_desc(pool, &descs[nb_pkts])) @@ -355,8 +355,7 @@ static u32 xsk_tx_peek_release_fallback(struct xsk_buff_pool *pool, struct xdp_d return nb_pkts; } -u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc *descs, - u32 max_entries) +u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max_entries) { struct xdp_sock *xs; u32 nb_pkts; @@ -365,7 +364,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc * if (!list_is_singular(&pool->xsk_tx_list)) { /* Fallback to the non-batched version */ rcu_read_unlock(); - return xsk_tx_peek_release_fallback(pool, descs, max_entries); + return xsk_tx_peek_release_fallback(pool, max_entries); } xs = list_first_or_null_rcu(&pool->xsk_tx_list, struct xdp_sock, tx_list); @@ -374,7 +373,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc * goto out; } - nb_pkts = xskq_cons_peek_desc_batch(xs->tx, descs, pool, max_entries); + nb_pkts = xskq_cons_peek_desc_batch(xs->tx, pool, max_entries); if (!nb_pkts) { xs->tx->queue_empty_descs++; goto out; @@ -386,7 +385,7 @@ u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, struct xdp_desc * * packets. This avoids having to implement any buffering in * the Tx path. */ - nb_pkts = xskq_prod_reserve_addr_batch(pool->cq, descs, nb_pkts); + nb_pkts = xskq_prod_reserve_addr_batch(pool->cq, pool->tx_descs, nb_pkts); if (!nb_pkts) goto out; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index fd39bb660ebc..b34fca6ada86 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -37,6 +37,7 @@ void xp_destroy(struct xsk_buff_pool *pool) if (!pool) return; + kvfree(pool->tx_descs); kvfree(pool->heads); kvfree(pool); } @@ -58,6 +59,12 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, if (!pool->heads) goto out; + if (xs->tx) { + pool->tx_descs = kcalloc(xs->tx->nentries, sizeof(*pool->tx_descs), GFP_KERNEL); + if (!pool->tx_descs) + goto out; + } + pool->chunk_mask = ~((u64)umem->chunk_size - 1); pool->addrs_cnt = umem->size; pool->heads_cnt = umem->chunks; diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index e9aa2c236356..801cda5d1938 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -205,11 +205,11 @@ static inline bool xskq_cons_read_desc(struct xsk_queue *q, return false; } -static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, - struct xdp_desc *descs, - struct xsk_buff_pool *pool, u32 max) +static inline u32 xskq_cons_read_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool, + u32 max) { u32 cached_cons = q->cached_cons, nb_entries = 0; + struct xdp_desc *descs = pool->tx_descs; while (cached_cons != q->cached_prod && nb_entries < max) { struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; @@ -282,12 +282,12 @@ static inline bool xskq_cons_peek_desc(struct xsk_queue *q, return xskq_cons_read_desc(q, desc, pool); } -static inline u32 xskq_cons_peek_desc_batch(struct xsk_queue *q, struct xdp_desc *descs, - struct xsk_buff_pool *pool, u32 max) +static inline u32 xskq_cons_peek_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool, + u32 max) { u32 entries = xskq_cons_nb_entries(q, max); - return xskq_cons_read_desc_batch(q, descs, pool, entries); + return xskq_cons_read_desc_batch(q, pool, entries); } /* To improve performance in the xskq_cons_release functions, only update local state here. @@ -304,13 +304,6 @@ static inline void xskq_cons_release_n(struct xsk_queue *q, u32 cnt) q->cached_cons += cnt; } -static inline bool xskq_cons_is_full(struct xsk_queue *q) -{ - /* No barriers needed since data is not accessed */ - return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer) == - q->nentries; -} - static inline u32 xskq_cons_present_entries(struct xsk_queue *q) { /* No barriers needed since data is not accessed */ |