diff options
author | Alexei Starovoitov <ast@kernel.org> | 2022-05-12 04:16:55 +0300 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2022-05-12 04:16:55 +0300 |
commit | 0bed8f374a9981d94c6b03376925419ef588f44a (patch) | |
tree | 838c1e08c3b57feea8ff26e4c648e36bce8d9cc2 | |
parent | 571b8739dd6df6d41bdcc83ed50b481a27af912c (diff) | |
parent | ed7c13776e20c74486b0939a3c1de984c5efb6aa (diff) | |
download | linux-0bed8f374a9981d94c6b03376925419ef588f44a.tar.xz |
Merge branch 'Introduce access remote cpu elem support in BPF percpu map'
Feng zhou says:
====================
From: Feng Zhou <zhoufeng.zf@bytedance.com>
Trace some functions, such as enqueue_task_fair, need to access the
corresponding cpu, not the current cpu, and bpf_map_lookup_elem percpu map
cannot do it. So add bpf_map_lookup_percpu_elem to accomplish it for
percpu_array_map, percpu_hash_map, lru_percpu_hash_map.
v1->v2: Addressed comments from Alexei Starovoitov.
- add a selftest for bpf_map_lookup_percpu_elem.
====================
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r-- | include/linux/bpf.h | 2 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 9 | ||||
-rw-r--r-- | kernel/bpf/arraymap.c | 15 | ||||
-rw-r--r-- | kernel/bpf/core.c | 1 | ||||
-rw-r--r-- | kernel/bpf/hashtab.c | 32 | ||||
-rw-r--r-- | kernel/bpf/helpers.c | 18 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 17 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 2 | ||||
-rw-r--r-- | tools/include/uapi/linux/bpf.h | 9 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c | 46 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c | 54 |
11 files changed, 203 insertions, 2 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3ded8711457f..5061ccd8b2dc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -89,6 +89,7 @@ struct bpf_map_ops { int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); int (*map_pop_elem)(struct bpf_map *map, void *value); int (*map_peek_elem)(struct bpf_map *map, void *value); + void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu); /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, @@ -2184,6 +2185,7 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto; extern const struct bpf_func_proto bpf_map_push_elem_proto; extern const struct bpf_func_proto bpf_map_pop_elem_proto; extern const struct bpf_func_proto bpf_map_peek_elem_proto; +extern const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bc7f89948f54..0210f85131b3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5164,6 +5164,14 @@ union bpf_attr { * if not NULL, is a reference which must be released using its * corresponding release function, or moved into a BPF map before * program exit. + * + * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu) + * Description + * Perform a lookup in *percpu map* for an entry associated to + * *key* on *cpu*. + * Return + * Map value associated to *key* on *cpu*, or **NULL** if no entry + * was found or *cpu* is invalid. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5361,6 +5369,7 @@ union bpf_attr { FN(skb_set_tstamp), \ FN(ima_file_hash), \ FN(kptr_xchg), \ + FN(map_lookup_percpu_elem), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 724613da6576..fe40d3b9458f 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -243,6 +243,20 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) return this_cpu_ptr(array->pptrs[index & array->index_mask]); } +static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + u32 index = *(u32 *)key; + + if (cpu >= nr_cpu_ids) + return NULL; + + if (unlikely(index >= array->map.max_entries)) + return NULL; + + return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu); +} + int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) { struct bpf_array *array = container_of(map, struct bpf_array, map); @@ -725,6 +739,7 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_lookup_elem = percpu_array_map_lookup_elem, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, + .map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem, .map_seq_show_elem = percpu_array_map_seq_show_elem, .map_check_btf = array_map_check_btf, .map_lookup_batch = generic_map_lookup_batch, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 13e9dbeeedf3..76f68d0a7ae8 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2619,6 +2619,7 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_map_push_elem_proto __weak; const struct bpf_func_proto bpf_map_pop_elem_proto __weak; const struct bpf_func_proto bpf_map_peek_elem_proto __weak; +const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto __weak; const struct bpf_func_proto bpf_spin_lock_proto __weak; const struct bpf_func_proto bpf_spin_unlock_proto __weak; const struct bpf_func_proto bpf_jiffies64_proto __weak; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 705841279d16..17fb69c0e0dc 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -2199,6 +2199,20 @@ static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key) return NULL; } +static void *htab_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) +{ + struct htab_elem *l; + + if (cpu >= nr_cpu_ids) + return NULL; + + l = __htab_map_lookup_elem(map, key); + if (l) + return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); + else + return NULL; +} + static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) { struct htab_elem *l = __htab_map_lookup_elem(map, key); @@ -2211,6 +2225,22 @@ static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) return NULL; } +static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) +{ + struct htab_elem *l; + + if (cpu >= nr_cpu_ids) + return NULL; + + l = __htab_map_lookup_elem(map, key); + if (l) { + bpf_lru_node_set_ref(&l->lru_node); + return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); + } + + return NULL; +} + int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) { struct htab_elem *l; @@ -2300,6 +2330,7 @@ const struct bpf_map_ops htab_percpu_map_ops = { .map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem, .map_update_elem = htab_percpu_map_update_elem, .map_delete_elem = htab_map_delete_elem, + .map_lookup_percpu_elem = htab_percpu_map_lookup_percpu_elem, .map_seq_show_elem = htab_percpu_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, @@ -2318,6 +2349,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = { .map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem, .map_update_elem = htab_lru_percpu_map_update_elem, .map_delete_elem = htab_lru_map_delete_elem, + .map_lookup_percpu_elem = htab_lru_percpu_map_lookup_percpu_elem, .map_seq_show_elem = htab_percpu_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 3e709fed5306..d5f104a39092 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -119,6 +119,22 @@ const struct bpf_func_proto bpf_map_peek_elem_proto = { .arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE, }; +BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu) +{ + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + return (unsigned long) map->ops->map_lookup_percpu_elem(map, key, cpu); +} + +const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto = { + .func = bpf_map_lookup_percpu_elem, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_MAP_KEY, + .arg3_type = ARG_ANYTHING, +}; + const struct bpf_func_proto bpf_get_prandom_u32_proto = { .func = bpf_user_rnd_u32, .gpl_only = false, @@ -1420,6 +1436,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_map_pop_elem_proto; case BPF_FUNC_map_peek_elem: return &bpf_map_peek_elem_proto; + case BPF_FUNC_map_lookup_percpu_elem: + return &bpf_map_lookup_percpu_elem_proto; case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; case BPF_FUNC_get_smp_processor_id: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c27fee73a2cb..05c1b6656824 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6137,6 +6137,12 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, map->map_type != BPF_MAP_TYPE_BLOOM_FILTER) goto error; break; + case BPF_FUNC_map_lookup_percpu_elem: + if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY && + map->map_type != BPF_MAP_TYPE_PERCPU_HASH && + map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH) + goto error; + break; case BPF_FUNC_sk_storage_get: case BPF_FUNC_sk_storage_delete: if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) @@ -6750,7 +6756,8 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, func_id != BPF_FUNC_map_pop_elem && func_id != BPF_FUNC_map_peek_elem && func_id != BPF_FUNC_for_each_map_elem && - func_id != BPF_FUNC_redirect_map) + func_id != BPF_FUNC_redirect_map && + func_id != BPF_FUNC_map_lookup_percpu_elem) return 0; if (map == NULL) { @@ -13810,7 +13817,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env) insn->imm == BPF_FUNC_map_pop_elem || insn->imm == BPF_FUNC_map_peek_elem || insn->imm == BPF_FUNC_redirect_map || - insn->imm == BPF_FUNC_for_each_map_elem)) { + insn->imm == BPF_FUNC_for_each_map_elem || + insn->imm == BPF_FUNC_map_lookup_percpu_elem)) { aux = &env->insn_aux_data[i + delta]; if (bpf_map_ptr_poisoned(aux)) goto patch_call_imm; @@ -13859,6 +13867,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env) bpf_callback_t callback_fn, void *callback_ctx, u64 flags))NULL)); + BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem, + (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL)); patch_map_ops_generic: switch (insn->imm) { @@ -13886,6 +13896,9 @@ patch_map_ops_generic: case BPF_FUNC_for_each_map_elem: insn->imm = BPF_CALL_IMM(ops->map_for_each_callback); continue; + case BPF_FUNC_map_lookup_percpu_elem: + insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem); + continue; } goto patch_call_imm; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 2eaac094caf8..7141ca8a1c2d 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1197,6 +1197,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_map_pop_elem_proto; case BPF_FUNC_map_peek_elem: return &bpf_map_peek_elem_proto; + case BPF_FUNC_map_lookup_percpu_elem: + return &bpf_map_lookup_percpu_elem_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; case BPF_FUNC_ktime_get_boot_ns: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index bc7f89948f54..0210f85131b3 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5164,6 +5164,14 @@ union bpf_attr { * if not NULL, is a reference which must be released using its * corresponding release function, or moved into a BPF map before * program exit. + * + * void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu) + * Description + * Perform a lookup in *percpu map* for an entry associated to + * *key* on *cpu*. + * Return + * Map value associated to *key* on *cpu*, or **NULL** if no entry + * was found or *cpu* is invalid. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5361,6 +5369,7 @@ union bpf_attr { FN(skb_set_tstamp), \ FN(ima_file_hash), \ FN(kptr_xchg), \ + FN(map_lookup_percpu_elem), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c b/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c new file mode 100644 index 000000000000..58b24c2112b0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2022 Bytedance + +#include <test_progs.h> + +#include "test_map_lookup_percpu_elem.skel.h" + +#define TEST_VALUE 1 + +void test_map_lookup_percpu_elem(void) +{ + struct test_map_lookup_percpu_elem *skel; + int key = 0, ret; + int nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + int *buf; + + buf = (int *)malloc(nr_cpus*sizeof(int)); + if (!ASSERT_OK_PTR(buf, "malloc")) + return; + memset(buf, 0, nr_cpus*sizeof(int)); + buf[0] = TEST_VALUE; + + skel = test_map_lookup_percpu_elem__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_map_lookup_percpu_elem__open_and_load")) + return; + ret = test_map_lookup_percpu_elem__attach(skel); + ASSERT_OK(ret, "test_map_lookup_percpu_elem__attach"); + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_array_map), &key, buf, 0); + ASSERT_OK(ret, "percpu_array_map update"); + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_hash_map), &key, buf, 0); + ASSERT_OK(ret, "percpu_hash_map update"); + + ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_lru_hash_map), &key, buf, 0); + ASSERT_OK(ret, "percpu_lru_hash_map update"); + + syscall(__NR_getuid); + + ret = skel->bss->percpu_array_elem_val == TEST_VALUE && + skel->bss->percpu_hash_elem_val == TEST_VALUE && + skel->bss->percpu_lru_hash_elem_val == TEST_VALUE; + ASSERT_OK(!ret, "bpf_map_lookup_percpu_elem success"); + + test_map_lookup_percpu_elem__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c b/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c new file mode 100644 index 000000000000..5d4ef86cbf48 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2022 Bytedance + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +int percpu_array_elem_val = 0; +int percpu_hash_elem_val = 0; +int percpu_lru_hash_elem_val = 0; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} percpu_array_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} percpu_hash_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} percpu_lru_hash_map SEC(".maps"); + +SEC("tp/syscalls/sys_enter_getuid") +int sysenter_getuid(const void *ctx) +{ + __u32 key = 0; + __u32 cpu = 0; + __u32 *value; + + value = bpf_map_lookup_percpu_elem(&percpu_array_map, &key, cpu); + if (value) + percpu_array_elem_val = *value; + + value = bpf_map_lookup_percpu_elem(&percpu_hash_map, &key, cpu); + if (value) + percpu_hash_elem_val = *value; + + value = bpf_map_lookup_percpu_elem(&percpu_lru_hash_map, &key, cpu); + if (value) + percpu_lru_hash_elem_val = *value; + + return 0; +} + +char _license[] SEC("license") = "GPL"; |