diff options
author | Andrii Nakryiko <andrii@kernel.org> | 2023-11-02 19:47:41 +0300 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2023-11-10 06:06:23 +0300 |
commit | e3499962d836af085a621f005978fee20fc87276 (patch) | |
tree | 989e0be5c06e56cf68317fbd3614ed9e2423d5b2 /tools/testing | |
parent | cd9c127069c040d6b022f1ff32fed4b52b9a4017 (diff) | |
parent | 2f553b032cad4993969cab356b3b0e306fcd1cd1 (diff) | |
download | linux-e3499962d836af085a621f005978fee20fc87276.tar.xz |
Merge branch 'selftests/bpf: Fixes for map_percpu_stats test'
Hou Tao says:
====================
From: Hou Tao <houtao1@huawei.com>
Hi,
BPF CI failed due to map_percpu_stats_percpu_hash from time to time [1].
It seems that the failure reason is per-cpu bpf memory allocator may not
be able to allocate per-cpu pointer successfully and it can not refill
free llist timely, and bpf_map_update_elem() will return -ENOMEM.
Patch #1 fixes the size of value passed to per-cpu map update API. The
problem was found when fixing the ENOMEM problem, so also post it in
this patchset. Patch #2 & #3 mitigates the ENOMEM problem by retrying
the update operation for non-preallocated per-cpu map.
Please see individual patches for more details. And comments are always
welcome.
Regards,
Tao
[1]: https://github.com/kernel-patches/bpf/actions/runs/6713177520/job/18244865326?pr=5909
====================
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'tools/testing')
-rw-r--r-- | tools/testing/selftests/bpf/map_tests/map_percpu_stats.c | 39 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/test_maps.c | 17 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/test_maps.h | 5 |
3 files changed, 53 insertions, 8 deletions
diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c index 8bf497a9843e..2ea36408816b 100644 --- a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c +++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c @@ -131,10 +131,17 @@ static bool is_lru(__u32 map_type) map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; } +static bool is_percpu(__u32 map_type) +{ + return map_type == BPF_MAP_TYPE_PERCPU_HASH || + map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; +} + struct upsert_opts { __u32 map_type; int map_fd; __u32 n; + bool retry_for_nomem; }; static int create_small_hash(void) @@ -148,19 +155,38 @@ static int create_small_hash(void) return map_fd; } +static bool retry_for_nomem_fn(int err) +{ + return err == ENOMEM; +} + static void *patch_map_thread(void *arg) { + /* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */ + static __u8 blob[8 << 10]; struct upsert_opts *opts = arg; + void *val_ptr; int val; int ret; int i; for (i = 0; i < opts->n; i++) { - if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) + if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { val = create_small_hash(); - else + val_ptr = &val; + } else if (is_percpu(opts->map_type)) { + val_ptr = blob; + } else { val = rand(); - ret = bpf_map_update_elem(opts->map_fd, &i, &val, 0); + val_ptr = &val; + } + + /* 2 seconds may be enough ? */ + if (opts->retry_for_nomem) + ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0, + 40, retry_for_nomem_fn); + else + ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0); CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno)); if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) @@ -281,6 +307,13 @@ static void __test(int map_fd) else opts.n /= 2; + /* per-cpu bpf memory allocator may not be able to allocate per-cpu + * pointer successfully and it can not refill free llist timely, and + * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate + * the problem temporarily. + */ + opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC); + /* * Upsert keys [0, n) under some competition: with random values from * N_THREADS threads. Check values, then delete all elements and check diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 7fc00e423e4d..767e0693df10 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1396,13 +1396,18 @@ static void test_map_stress(void) #define MAX_DELAY_US 50000 #define MIN_DELAY_RANGE_US 5000 -static int map_update_retriable(int map_fd, const void *key, const void *value, - int flags, int attempts) +static bool retry_for_again_or_busy(int err) +{ + return (err == EAGAIN || err == EBUSY); +} + +int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, + retry_for_error_fn need_retry) { int delay = rand() % MIN_DELAY_RANGE_US; while (bpf_map_update_elem(map_fd, key, value, flags)) { - if (!attempts || (errno != EAGAIN && errno != EBUSY)) + if (!attempts || !need_retry(errno)) return -errno; if (delay <= MAX_DELAY_US / 2) @@ -1445,11 +1450,13 @@ static void test_update_delete(unsigned int fn, void *data) key = value = i; if (do_update) { - err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES); + err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES, + retry_for_again_or_busy); if (err) printf("error %d %d\n", err, errno); assert(err == 0); - err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES); + err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES, + retry_for_again_or_busy); if (err) printf("error %d %d\n", err, errno); assert(err == 0); diff --git a/tools/testing/selftests/bpf/test_maps.h b/tools/testing/selftests/bpf/test_maps.h index f6fbca761732..e4ac704a536c 100644 --- a/tools/testing/selftests/bpf/test_maps.h +++ b/tools/testing/selftests/bpf/test_maps.h @@ -4,6 +4,7 @@ #include <stdio.h> #include <stdlib.h> +#include <stdbool.h> #define CHECK(condition, tag, format...) ({ \ int __ret = !!(condition); \ @@ -16,4 +17,8 @@ extern int skips; +typedef bool (*retry_for_error_fn)(int err); +int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, + retry_for_error_fn need_retry); + #endif |