summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/bpf/map_hash.rst8
-rw-r--r--Documentation/bpf/map_lru_hash_update.dot6
-rw-r--r--kernel/bpf/bpf_lru_list.c9
-rw-r--r--kernel/bpf/bpf_lru_list.h1
-rw-r--r--tools/testing/selftests/bpf/test_lru_map.c72
5 files changed, 52 insertions, 44 deletions
diff --git a/Documentation/bpf/map_hash.rst b/Documentation/bpf/map_hash.rst
index d2343952f2cb..8606bf958a8c 100644
--- a/Documentation/bpf/map_hash.rst
+++ b/Documentation/bpf/map_hash.rst
@@ -233,10 +233,16 @@ attempts in order to enforce the LRU property which have increasing impacts on
other CPUs involved in the following operation attempts:
- Attempt to use CPU-local state to batch operations
-- Attempt to fetch free nodes from global lists
+- Attempt to fetch ``target_free`` free nodes from global lists
- Attempt to pull any node from a global list and remove it from the hashmap
- Attempt to pull any node from any CPU's list and remove it from the hashmap
+The number of nodes to borrow from the global list in a batch, ``target_free``,
+depends on the size of the map. Larger batch size reduces lock contention, but
+may also exhaust the global structure. The value is computed at map init to
+avoid exhaustion, by limiting aggregate reservation by all CPUs to half the map
+size. With a minimum of a single element and maximum budget of 128 at a time.
+
This algorithm is described visually in the following diagram. See the
description in commit 3a08c2fd7634 ("bpf: LRU List") for a full explanation of
the corresponding operations:
diff --git a/Documentation/bpf/map_lru_hash_update.dot b/Documentation/bpf/map_lru_hash_update.dot
index a0fee349d29c..ab10058f5b79 100644
--- a/Documentation/bpf/map_lru_hash_update.dot
+++ b/Documentation/bpf/map_lru_hash_update.dot
@@ -35,18 +35,18 @@ digraph {
fn_bpf_lru_list_pop_free_to_local [shape=rectangle,fillcolor=2,
label="Flush local pending,
Rotate Global list, move
- LOCAL_FREE_TARGET
+ target_free
from global -> local"]
// Also corresponds to:
// fn__local_list_flush()
// fn_bpf_lru_list_rotate()
fn___bpf_lru_node_move_to_free[shape=diamond,fillcolor=2,
- label="Able to free\nLOCAL_FREE_TARGET\nnodes?"]
+ label="Able to free\ntarget_free\nnodes?"]
fn___bpf_lru_list_shrink_inactive [shape=rectangle,fillcolor=3,
label="Shrink inactive list
up to remaining
- LOCAL_FREE_TARGET
+ target_free
(global LRU -> local)"]
fn___bpf_lru_list_shrink [shape=diamond,fillcolor=2,
label="> 0 entries in\nlocal free list?"]
diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c
index 3dabdd137d10..2d6e1c98d8ad 100644
--- a/kernel/bpf/bpf_lru_list.c
+++ b/kernel/bpf/bpf_lru_list.c
@@ -337,12 +337,12 @@ static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru,
list) {
__bpf_lru_node_move_to_free(l, node, local_free_list(loc_l),
BPF_LRU_LOCAL_LIST_T_FREE);
- if (++nfree == LOCAL_FREE_TARGET)
+ if (++nfree == lru->target_free)
break;
}
- if (nfree < LOCAL_FREE_TARGET)
- __bpf_lru_list_shrink(lru, l, LOCAL_FREE_TARGET - nfree,
+ if (nfree < lru->target_free)
+ __bpf_lru_list_shrink(lru, l, lru->target_free - nfree,
local_free_list(loc_l),
BPF_LRU_LOCAL_LIST_T_FREE);
@@ -577,6 +577,9 @@ static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf,
list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
buf += elem_size;
}
+
+ lru->target_free = clamp((nr_elems / num_possible_cpus()) / 2,
+ 1, LOCAL_FREE_TARGET);
}
static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf,
diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h
index cbd8d3720c2b..fe2661a58ea9 100644
--- a/kernel/bpf/bpf_lru_list.h
+++ b/kernel/bpf/bpf_lru_list.h
@@ -58,6 +58,7 @@ struct bpf_lru {
del_from_htab_func del_from_htab;
void *del_arg;
unsigned int hash_offset;
+ unsigned int target_free;
unsigned int nr_scans;
bool percpu;
};
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index fda7589c5023..4ae83f4b7fc7 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -138,6 +138,12 @@ static int sched_next_online(int pid, int *next_to_try)
return ret;
}
+/* Inverse of how bpf_common_lru_populate derives target_free from map_size. */
+static unsigned int __map_size(unsigned int tgt_free)
+{
+ return tgt_free * nr_cpus * 2;
+}
+
/* Size of the LRU map is 2
* Add key=1 (+1 key)
* Add key=2 (+1 key)
@@ -231,11 +237,11 @@ static void test_lru_sanity0(int map_type, int map_flags)
printf("Pass\n");
}
-/* Size of the LRU map is 1.5*tgt_free
- * Insert 1 to tgt_free (+tgt_free keys)
- * Lookup 1 to tgt_free/2
- * Insert 1+tgt_free to 2*tgt_free (+tgt_free keys)
- * => 1+tgt_free/2 to LOCALFREE_TARGET will be removed by LRU
+/* Verify that unreferenced elements are recycled before referenced ones.
+ * Insert elements.
+ * Reference a subset of these.
+ * Insert more, enough to trigger recycling.
+ * Verify that unreferenced are recycled.
*/
static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
{
@@ -257,7 +263,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
batch_size = tgt_free / 2;
assert(batch_size * 2 == tgt_free);
- map_size = tgt_free + batch_size;
+ map_size = __map_size(tgt_free) + batch_size;
lru_map_fd = create_map(map_type, map_flags, map_size);
assert(lru_map_fd != -1);
@@ -266,13 +272,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1 to tgt_free (+tgt_free keys) */
- end_key = 1 + tgt_free;
+ /* Insert map_size - batch_size keys */
+ end_key = 1 + __map_size(tgt_free);
for (key = 1; key < end_key; key++)
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
- /* Lookup 1 to tgt_free/2 */
+ /* Lookup 1 to batch_size */
end_key = 1 + batch_size;
for (key = 1; key < end_key; key++) {
assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value));
@@ -280,12 +286,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
BPF_NOEXIST));
}
- /* Insert 1+tgt_free to 2*tgt_free
- * => 1+tgt_free/2 to LOCALFREE_TARGET will be
+ /* Insert another map_size - batch_size keys
+ * Map will contain 1 to batch_size plus these latest, i.e.,
+ * => previous 1+batch_size to map_size - batch_size will have been
* removed by LRU
*/
- key = 1 + tgt_free;
- end_key = key + tgt_free;
+ key = 1 + __map_size(tgt_free);
+ end_key = key + __map_size(tgt_free);
for (; key < end_key; key++) {
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
@@ -301,17 +308,8 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
printf("Pass\n");
}
-/* Size of the LRU map 1.5 * tgt_free
- * Insert 1 to tgt_free (+tgt_free keys)
- * Update 1 to tgt_free/2
- * => The original 1 to tgt_free/2 will be removed due to
- * the LRU shrink process
- * Re-insert 1 to tgt_free/2 again and do a lookup immeidately
- * Insert 1+tgt_free to tgt_free*3/2
- * Insert 1+tgt_free*3/2 to tgt_free*5/2
- * => Key 1+tgt_free to tgt_free*3/2
- * will be removed from LRU because it has never
- * been lookup and ref bit is not set
+/* Verify that insertions exceeding map size will recycle the oldest.
+ * Verify that unreferenced elements are recycled before referenced.
*/
static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
{
@@ -334,7 +332,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
batch_size = tgt_free / 2;
assert(batch_size * 2 == tgt_free);
- map_size = tgt_free + batch_size;
+ map_size = __map_size(tgt_free) + batch_size;
lru_map_fd = create_map(map_type, map_flags, map_size);
assert(lru_map_fd != -1);
@@ -343,8 +341,8 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1 to tgt_free (+tgt_free keys) */
- end_key = 1 + tgt_free;
+ /* Insert map_size - batch_size keys */
+ end_key = 1 + __map_size(tgt_free);
for (key = 1; key < end_key; key++)
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
@@ -357,8 +355,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
* shrink the inactive list to get tgt_free
* number of free nodes.
*
- * Hence, the oldest key 1 to tgt_free/2
- * are removed from the LRU list.
+ * Hence, the oldest key is removed from the LRU list.
*/
key = 1;
if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
@@ -370,8 +367,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
BPF_EXIST));
}
- /* Re-insert 1 to tgt_free/2 again and do a lookup
- * immeidately.
+ /* Re-insert 1 to batch_size again and do a lookup immediately.
*/
end_key = 1 + batch_size;
value[0] = 4321;
@@ -387,17 +383,18 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
value[0] = 1234;
- /* Insert 1+tgt_free to tgt_free*3/2 */
- end_key = 1 + tgt_free + batch_size;
- for (key = 1 + tgt_free; key < end_key; key++)
+ /* Insert batch_size new elements */
+ key = 1 + __map_size(tgt_free);
+ end_key = key + batch_size;
+ for (; key < end_key; key++)
/* These newly added but not referenced keys will be
* gone during the next LRU shrink.
*/
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
- /* Insert 1+tgt_free*3/2 to tgt_free*5/2 */
- end_key = key + tgt_free;
+ /* Insert map_size - batch_size elements */
+ end_key += __map_size(tgt_free);
for (; key < end_key; key++) {
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
@@ -500,7 +497,8 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
lru_map_fd = create_map(map_type, map_flags,
3 * tgt_free * nr_cpus);
else
- lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free);
+ lru_map_fd = create_map(map_type, map_flags,
+ 3 * __map_size(tgt_free));
assert(lru_map_fd != -1);
expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0,