From 5e7b30205cef80f6bb922e61834437ca7bff5837 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 4 Aug 2020 22:50:56 -0700 Subject: bpf: Change uapi for bpf iterator map elements Commit a5cbe05a6673 ("bpf: Implement bpf iterator for map elements") added bpf iterator support for map elements. The map element bpf iterator requires info to identify a particular map. In the above commit, the attr->link_create.target_fd is used to carry map_fd and an enum bpf_iter_link_info is added to uapi to specify the target_fd actually representing a map_fd: enum bpf_iter_link_info { BPF_ITER_LINK_UNSPEC = 0, BPF_ITER_LINK_MAP_FD = 1, MAX_BPF_ITER_LINK_INFO, }; This is an extensible approach as we can grow enumerator for pid, cgroup_id, etc. and we can unionize target_fd for pid, cgroup_id, etc. But in the future, there are chances that more complex customization may happen, e.g., for tasks, it could be filtered based on both cgroup_id and user_id. This patch changed the uapi to have fields __aligned_u64 iter_info; __u32 iter_info_len; for additional iter_info for link_create. The iter_info is defined as union bpf_iter_link_info { struct { __u32 map_fd; } map; }; So future extension for additional customization will be easier. The bpf_iter_link_info will be passed to target callback to validate and generic bpf_iter framework does not need to deal it any more. Note that map_fd = 0 will be considered invalid and -EBADF will be returned to user space. Fixes: a5cbe05a6673 ("bpf: Implement bpf iterator for map elements") Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200805055056.1457463-1-yhs@fb.com --- include/linux/bpf.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cef4ef0d2b4e..55f694b63164 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1214,15 +1214,17 @@ struct bpf_iter_aux_info { struct bpf_map *map; }; -typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog, - struct bpf_iter_aux_info *aux); +typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog, + union bpf_iter_link_info *linfo, + struct bpf_iter_aux_info *aux); +typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux); #define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; - bpf_iter_check_target_t check_target; + bpf_iter_attach_target_t attach_target; + bpf_iter_detach_target_t detach_target; u32 ctx_arg_info_size; - enum bpf_iter_link_info req_linfo; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; const struct bpf_iter_seq_info *seq_info; }; -- cgit v1.2.3 From 519a8a6cf91dda095be2d36216fc4ebc525270a1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Aug 2020 18:42:14 +0200 Subject: net: Revert "net: optimize the sockptr_t for unified kernel/user address spaces" This reverts commits 6d04fe15f78acdf8e32329e208552e226f7a8ae6 and a31edb2059ed4e498f9aa8230c734b59d0ad797a. It turns out the idea to share a single pointer for both kernel and user space address causes various kinds of problems. So use the slightly less optimal version that uses an extra bit, but which is guaranteed to be safe everywhere. Fixes: 6d04fe15f78a ("net: optimize the sockptr_t for unified kernel/user address spaces") Reported-by: Eric Dumazet Reported-by: John Stultz Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- include/linux/sockptr.h | 26 ++------------------------ net/ipv4/bpfilter/sockopt.c | 14 ++++++-------- net/socket.c | 6 +----- 3 files changed, 9 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index 96840def9d69..ea193414298b 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -8,26 +8,9 @@ #ifndef _LINUX_SOCKPTR_H #define _LINUX_SOCKPTR_H -#include #include #include -#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE -typedef union { - void *kernel; - void __user *user; -} sockptr_t; - -static inline bool sockptr_is_kernel(sockptr_t sockptr) -{ - return (unsigned long)sockptr.kernel >= TASK_SIZE; -} - -static inline sockptr_t KERNEL_SOCKPTR(void *p) -{ - return (sockptr_t) { .kernel = p }; -} -#else /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ typedef struct { union { void *kernel; @@ -45,15 +28,10 @@ static inline sockptr_t KERNEL_SOCKPTR(void *p) { return (sockptr_t) { .kernel = p, .is_kernel = true }; } -#endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */ -static inline int __must_check init_user_sockptr(sockptr_t *sp, void __user *p, - size_t size) +static inline sockptr_t USER_SOCKPTR(void __user *p) { - if (!access_ok(p, size)) - return -EFAULT; - *sp = (sockptr_t) { .user = p }; - return 0; + return (sockptr_t) { .user = p }; } static inline bool sockptr_is_null(sockptr_t sockptr) diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c index 545b2640f019..1b34cb9a7708 100644 --- a/net/ipv4/bpfilter/sockopt.c +++ b/net/ipv4/bpfilter/sockopt.c @@ -57,18 +57,16 @@ int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval, return bpfilter_mbox_request(sk, optname, optval, optlen, true); } -int bpfilter_ip_get_sockopt(struct sock *sk, int optname, - char __user *user_optval, int __user *optlen) +int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval, + int __user *optlen) { - sockptr_t optval; - int err, len; + int len; if (get_user(len, optlen)) return -EFAULT; - err = init_user_sockptr(&optval, user_optval, len); - if (err) - return err; - return bpfilter_mbox_request(sk, optname, optval, len, false); + + return bpfilter_mbox_request(sk, optname, USER_SOCKPTR(optval), len, + false); } static int __init bpfilter_sockopt_init(void) diff --git a/net/socket.c b/net/socket.c index f4d5998bdcba..dbbe8ea7d395 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2095,7 +2095,7 @@ static bool sock_use_custom_sol_socket(const struct socket *sock) int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, int optlen) { - sockptr_t optval; + sockptr_t optval = USER_SOCKPTR(user_optval); char *kernel_optval = NULL; int err, fput_needed; struct socket *sock; @@ -2103,10 +2103,6 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, if (optlen < 0) return -EINVAL; - err = init_user_sockptr(&optval, user_optval, optlen); - if (err) - return err; - sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) return err; -- cgit v1.2.3 From 444da3f52407d74c9aa12187ac6b01f76ee47d62 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 10 Aug 2020 11:21:11 -0700 Subject: bitfield.h: don't compile-time validate _val in FIELD_FIT When ur_load_imm_any() is inlined into jeq_imm(), it's possible for the compiler to deduce a case where _val can only have the value of -1 at compile time. Specifically, /* struct bpf_insn: _s32 imm */ u64 imm = insn->imm; /* sign extend */ if (imm >> 32) { /* non-zero only if insn->imm is negative */ /* inlined from ur_load_imm_any */ u32 __imm = imm >> 32; /* therefore, always 0xffffffff */ if (__builtin_constant_p(__imm) && __imm > 255) compiletime_assert_XXX() This can result in tripping a BUILD_BUG_ON() in __BF_FIELD_CHECK() that checks that a given value is representable in one byte (interpreted as unsigned). FIELD_FIT() should return true or false at runtime for whether a value can fit for not. Don't break the build over a value that's too large for the mask. We'd prefer to keep the inlining and compiler optimizations though we know this case will always return false. Cc: stable@vger.kernel.org Fixes: 1697599ee301a ("bitfield.h: add FIELD_FIT() helper") Link: https://lore.kernel.org/kernel-hardening/CAK7LNASvb0UDJ0U5wkYYRzTAdnEs64HjXpEUL7d=V0CXiAXcNw@mail.gmail.com/ Reported-by: Masahiro Yamada Debugged-by: Sami Tolvanen Signed-off-by: Jakub Kicinski Signed-off-by: Nick Desaulniers Signed-off-by: David S. Miller --- include/linux/bitfield.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 48ea093ff04c..4e035aca6f7e 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -77,7 +77,7 @@ */ #define FIELD_FIT(_mask, _val) \ ({ \ - __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_FIT: "); \ + __BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_FIT: "); \ !((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \ }) -- cgit v1.2.3