From 6fad274f06f038c29660aa53fbad14241c9fd976 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 21 Oct 2024 17:28:05 +0200 Subject: bpf: Add MEM_WRITE attribute Add a MEM_WRITE attribute for BPF helper functions which can be used in bpf_func_proto to annotate an argument type in order to let the verifier know that the helper writes into the memory passed as an argument. In the past MEM_UNINIT has been (ab)used for this function, but the latter merely tells the verifier that the passed memory can be uninitialized. There have been bugs with overloading the latter but aside from that there are also cases where the passed memory is read + written which currently cannot be expressed, see also 4b3786a6c539 ("bpf: Zero former ARG_PTR_TO_{LONG,INT} args in case of error"). Signed-off-by: Daniel Borkmann Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241021152809.33343-1-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 1a43d06eab28..ca3f0a2e5ed5 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -111,7 +111,7 @@ const struct bpf_func_proto bpf_map_pop_elem_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, + .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE, }; BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value) @@ -124,7 +124,7 @@ const struct bpf_func_proto bpf_map_peek_elem_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT, + .arg2_type = ARG_PTR_TO_MAP_VALUE | MEM_UNINIT | MEM_WRITE, }; BPF_CALL_3(bpf_map_lookup_percpu_elem, struct bpf_map *, map, void *, key, u32, cpu) @@ -538,7 +538,7 @@ const struct bpf_func_proto bpf_strtol_proto = { .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, .arg4_size = sizeof(s64), }; @@ -566,7 +566,7 @@ const struct bpf_func_proto bpf_strtoul_proto = { .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, .arg2_type = ARG_CONST_SIZE, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_ALIGNED, + .arg4_type = ARG_PTR_TO_FIXED_SIZE_MEM | MEM_UNINIT | MEM_WRITE | MEM_ALIGNED, .arg4_size = sizeof(u64), }; @@ -1742,7 +1742,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = { .arg1_type = ARG_PTR_TO_UNINIT_MEM, .arg2_type = ARG_CONST_SIZE_OR_ZERO, .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT, + .arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT | MEM_WRITE, }; BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src, -- cgit v1.2.3 From 101ccfbabf4738041273ce64e2b116cf440dea13 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 30 Oct 2024 18:05:12 +0800 Subject: bpf: Free dynamically allocated bits in bpf_iter_bits_destroy() bpf_iter_bits_destroy() uses "kit->nr_bits <= 64" to check whether the bits are dynamically allocated. However, the check is incorrect and may cause a kmemleak as shown below: unreferenced object 0xffff88812628c8c0 (size 32): comm "swapper/0", pid 1, jiffies 4294727320 hex dump (first 32 bytes): b0 c1 55 f5 81 88 ff ff f0 f0 f0 f0 f0 f0 f0 f0 ..U........... f0 f0 f0 f0 f0 f0 f0 f0 00 00 00 00 00 00 00 00 .............. backtrace (crc 781e32cc): [<00000000c452b4ab>] kmemleak_alloc+0x4b/0x80 [<0000000004e09f80>] __kmalloc_node_noprof+0x480/0x5c0 [<00000000597124d6>] __alloc.isra.0+0x89/0xb0 [<000000004ebfffcd>] alloc_bulk+0x2af/0x720 [<00000000d9c10145>] prefill_mem_cache+0x7f/0xb0 [<00000000ff9738ff>] bpf_mem_alloc_init+0x3e2/0x610 [<000000008b616eac>] bpf_global_ma_init+0x19/0x30 [<00000000fc473efc>] do_one_initcall+0xd3/0x3c0 [<00000000ec81498c>] kernel_init_freeable+0x66a/0x940 [<00000000b119f72f>] kernel_init+0x20/0x160 [<00000000f11ac9a7>] ret_from_fork+0x3c/0x70 [<0000000004671da4>] ret_from_fork_asm+0x1a/0x30 That is because nr_bits will be set as zero in bpf_iter_bits_next() after all bits have been iterated. Fix the issue by setting kit->bit to kit->nr_bits instead of setting kit->nr_bits to zero when the iteration completes in bpf_iter_bits_next(). In addition, use "!nr_bits || bits >= nr_bits" to check whether the iteration is complete and still use "nr_bits > 64" to indicate whether bits are dynamically allocated. The "!nr_bits" check is necessary because bpf_iter_bits_new() may fail before setting kit->nr_bits, and this condition will stop the iteration early instead of accessing the zeroed or freed kit->bits. Considering the initial value of kit->bits is -1 and the type of kit->nr_bits is unsigned int, change the type of kit->nr_bits to int. The potential overflow problem will be handled in the following patch. Fixes: 4665415975b0 ("bpf: Add bits iterator") Acked-by: Yafang Shao Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20241030100516.3633640-2-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index ca3f0a2e5ed5..d913a8f1fbd9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2856,7 +2856,7 @@ struct bpf_iter_bits_kern { unsigned long *bits; unsigned long bits_copy; }; - u32 nr_bits; + int nr_bits; int bit; } __aligned(8); @@ -2930,17 +2930,16 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w __bpf_kfunc int *bpf_iter_bits_next(struct bpf_iter_bits *it) { struct bpf_iter_bits_kern *kit = (void *)it; - u32 nr_bits = kit->nr_bits; + int bit = kit->bit, nr_bits = kit->nr_bits; const unsigned long *bits; - int bit; - if (nr_bits == 0) + if (!nr_bits || bit >= nr_bits) return NULL; bits = nr_bits == 64 ? &kit->bits_copy : kit->bits; - bit = find_next_bit(bits, nr_bits, kit->bit + 1); + bit = find_next_bit(bits, nr_bits, bit + 1); if (bit >= nr_bits) { - kit->nr_bits = 0; + kit->bit = bit; return NULL; } -- cgit v1.2.3 From 393397fbdcad7396639d7077c33f86169184ba99 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 30 Oct 2024 18:05:14 +0800 Subject: bpf: Check the validity of nr_words in bpf_iter_bits_new() Check the validity of nr_words in bpf_iter_bits_new(). Without this check, when multiplication overflow occurs for nr_bits (e.g., when nr_words = 0x0400-0001, nr_bits becomes 64), stack corruption may occur due to bpf_probe_read_kernel_common(..., nr_bytes = 0x2000-0008). Fix it by limiting the maximum value of nr_words to 511. The value is derived from the current implementation of BPF memory allocator. To ensure compatibility if the BPF memory allocator's size limitation changes in the future, use the helper bpf_mem_alloc_check_size() to check whether nr_bytes is too larger. And return -E2BIG instead of -ENOMEM for oversized nr_bytes. Fixes: 4665415975b0 ("bpf: Add bits iterator") Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20241030100516.3633640-4-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index d913a8f1fbd9..018985ebc5ce 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2851,6 +2851,8 @@ struct bpf_iter_bits { __u64 __opaque[2]; } __aligned(8); +#define BITS_ITER_NR_WORDS_MAX 511 + struct bpf_iter_bits_kern { union { unsigned long *bits; @@ -2865,7 +2867,8 @@ struct bpf_iter_bits_kern { * @it: The new bpf_iter_bits to be created * @unsafe_ptr__ign: A pointer pointing to a memory area to be iterated over * @nr_words: The size of the specified memory area, measured in 8-byte units. - * Due to the limitation of memalloc, it can't be greater than 512. + * The maximum value of @nr_words is @BITS_ITER_NR_WORDS_MAX. This limit may be + * further reduced by the BPF memory allocator implementation. * * This function initializes a new bpf_iter_bits structure for iterating over * a memory area which is specified by the @unsafe_ptr__ign and @nr_words. It @@ -2892,6 +2895,8 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w if (!unsafe_ptr__ign || !nr_words) return -EINVAL; + if (nr_words > BITS_ITER_NR_WORDS_MAX) + return -E2BIG; /* Optimization for u64 mask */ if (nr_bits == 64) { @@ -2903,6 +2908,9 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w return 0; } + if (bpf_mem_alloc_check_size(false, nr_bytes)) + return -E2BIG; + /* Fallback to memalloc */ kit->bits = bpf_mem_alloc(&bpf_global_ma, nr_bytes); if (!kit->bits) -- cgit v1.2.3 From e1339383675063ae4760d81ffe13a79981841b8d Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Wed, 30 Oct 2024 18:05:15 +0800 Subject: bpf: Use __u64 to save the bits in bits iterator On 32-bit hosts (e.g., arm32), when a bpf program passes a u64 to bpf_iter_bits_new(), bpf_iter_bits_new() will use bits_copy to store the content of the u64. However, bits_copy is only 4 bytes, leading to stack corruption. The straightforward solution would be to replace u64 with unsigned long in bpf_iter_bits_new(). However, this introduces confusion and problems for 32-bit hosts because the size of ulong in bpf program is 8 bytes, but it is treated as 4-bytes after passed to bpf_iter_bits_new(). Fix it by changing the type of both bits and bit_count from unsigned long to u64. However, the change is not enough. The main reason is that bpf_iter_bits_next() uses find_next_bit() to find the next bit and the pointer passed to find_next_bit() is an unsigned long pointer instead of a u64 pointer. For 32-bit little-endian host, it is fine but it is not the case for 32-bit big-endian host. Because under 32-bit big-endian host, the first iterated unsigned long will be the bits 32-63 of the u64 instead of the expected bits 0-31. Therefore, in addition to changing the type, swap the two unsigned longs within the u64 for 32-bit big-endian host. Signed-off-by: Hou Tao Link: https://lore.kernel.org/r/20241030100516.3633640-5-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) (limited to 'kernel/bpf/helpers.c') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 018985ebc5ce..3d45ebe8afb4 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2855,13 +2855,36 @@ struct bpf_iter_bits { struct bpf_iter_bits_kern { union { - unsigned long *bits; - unsigned long bits_copy; + __u64 *bits; + __u64 bits_copy; }; int nr_bits; int bit; } __aligned(8); +/* On 64-bit hosts, unsigned long and u64 have the same size, so passing + * a u64 pointer and an unsigned long pointer to find_next_bit() will + * return the same result, as both point to the same 8-byte area. + * + * For 32-bit little-endian hosts, using a u64 pointer or unsigned long + * pointer also makes no difference. This is because the first iterated + * unsigned long is composed of bits 0-31 of the u64 and the second unsigned + * long is composed of bits 32-63 of the u64. + * + * However, for 32-bit big-endian hosts, this is not the case. The first + * iterated unsigned long will be bits 32-63 of the u64, so swap these two + * ulong values within the u64. + */ +static void swap_ulong_in_u64(u64 *bits, unsigned int nr) +{ +#if (BITS_PER_LONG == 32) && defined(__BIG_ENDIAN) + unsigned int i; + + for (i = 0; i < nr; i++) + bits[i] = (bits[i] >> 32) | ((u64)(u32)bits[i] << 32); +#endif +} + /** * bpf_iter_bits_new() - Initialize a new bits iterator for a given memory area * @it: The new bpf_iter_bits to be created @@ -2904,6 +2927,8 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w if (err) return -EFAULT; + swap_ulong_in_u64(&kit->bits_copy, nr_words); + kit->nr_bits = nr_bits; return 0; } @@ -2922,6 +2947,8 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w return err; } + swap_ulong_in_u64(kit->bits, nr_words); + kit->nr_bits = nr_bits; return 0; } @@ -2939,7 +2966,7 @@ __bpf_kfunc int *bpf_iter_bits_next(struct bpf_iter_bits *it) { struct bpf_iter_bits_kern *kit = (void *)it; int bit = kit->bit, nr_bits = kit->nr_bits; - const unsigned long *bits; + const void *bits; if (!nr_bits || bit >= nr_bits) return NULL; -- cgit v1.2.3