From 26662d7347a058ca497792c4b22ac91cc415cbf6 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Thu, 20 Apr 2023 00:14:12 -0700 Subject: bpf: Add bpf_dynptr_size bpf_dynptr_size returns the number of usable bytes in a dynptr. Signed-off-by: Joanne Koong Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20230420071414.570108-4-joannelkoong@gmail.com --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/bpf.h') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e53ceee1df37..456f33b9d205 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1197,7 +1197,7 @@ enum bpf_dynptr_type { }; int bpf_dynptr_check_size(u32 size); -u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr); +u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr); #ifdef CONFIG_BPF_JIT int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr); -- cgit v1.2.3 From 47e79cbeea4b3891ad476047f4c68543eb51c8e0 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Mon, 15 May 2023 13:08:48 +0000 Subject: bpf: Remove bpf trampoline selector After commit e21aa341785c ("bpf: Fix fexit trampoline."), the selector is only used to indicate how many times the bpf trampoline image are updated and been displayed in the trampoline ksym name. After the trampoline is freed, the selector will start from 0 again. So the selector is a useless value to the user. We can remove it. If the user want to check whether the bpf trampoline image has been updated or not, the user can compare the address. Each time the trampoline image is updated, the address will change consequently. Jiri also pointed out another issue that perf is still using the old name "bpf_trampoline_%lu", so this change can fix the issue in perf. Fixes: e21aa341785c ("bpf: Fix fexit trampoline.") Signed-off-by: Yafang Shao Signed-off-by: Daniel Borkmann Acked-by: Song Liu Cc: Jiri Olsa Link: https://lore.kernel.org/bpf/ZFvOOlrmHiY9AgXE@krava Link: https://lore.kernel.org/bpf/20230515130849.57502-3-laoar.shao@gmail.com --- include/linux/bpf.h | 1 - kernel/bpf/trampoline.c | 11 ++++------- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux/bpf.h') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 456f33b9d205..36e4b2d8cca2 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1125,7 +1125,6 @@ struct bpf_trampoline { int progs_cnt[BPF_TRAMP_MAX]; /* Executable image of trampoline */ struct bpf_tramp_image *cur_image; - u64 selector; struct module *mod; }; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index ac021bc43a66..84850e66ce3d 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -344,7 +344,7 @@ static void bpf_tramp_image_put(struct bpf_tramp_image *im) call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks); } -static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx) +static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key) { struct bpf_tramp_image *im; struct bpf_ksym *ksym; @@ -371,7 +371,7 @@ static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx) ksym = &im->ksym; INIT_LIST_HEAD_RCU(&ksym->lnode); - snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu_%u", key, idx); + snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", key); bpf_image_ksym_add(image, ksym); return im; @@ -401,11 +401,10 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut err = unregister_fentry(tr, tr->cur_image->image); bpf_tramp_image_put(tr->cur_image); tr->cur_image = NULL; - tr->selector = 0; goto out; } - im = bpf_tramp_image_alloc(tr->key, tr->selector); + im = bpf_tramp_image_alloc(tr->key); if (IS_ERR(im)) { err = PTR_ERR(im); goto out; @@ -442,8 +441,7 @@ again: set_memory_rox((long)im->image, 1); - WARN_ON(tr->cur_image && tr->selector == 0); - WARN_ON(!tr->cur_image && tr->selector); + WARN_ON(tr->cur_image && total == 0); if (tr->cur_image) /* progs already running at this address */ err = modify_fentry(tr, tr->cur_image->image, im->image, lock_direct_mutex); @@ -473,7 +471,6 @@ again: if (tr->cur_image) bpf_tramp_image_put(tr->cur_image); tr->cur_image = im; - tr->selector++; out: /* If any error happens, restore previous flags */ if (err) -- cgit v1.2.3 From cb8edce28073a906401c9e421eca7c99f3396da1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 15 May 2023 16:48:06 -0700 Subject: bpf: Support O_PATH FDs in BPF_OBJ_PIN and BPF_OBJ_GET commands Current UAPI of BPF_OBJ_PIN and BPF_OBJ_GET commands of bpf() syscall forces users to specify pinning location as a string-based absolute or relative (to current working directory) path. This has various implications related to security (e.g., symlink-based attacks), forces BPF FS to be exposed in the file system, which can cause races with other applications. One of the feedbacks we got from folks working with containers heavily was that inability to use purely FD-based location specification was an unfortunate limitation and hindrance for BPF_OBJ_PIN and BPF_OBJ_GET commands. This patch closes this oversight, adding path_fd field to BPF_OBJ_PIN and BPF_OBJ_GET UAPI, following conventions established by *at() syscalls for dirfd + pathname combinations. This now allows interesting possibilities like working with detached BPF FS mount (e.g., to perform multiple pinnings without running a risk of someone interfering with them), and generally making pinning/getting more secure and not prone to any races and/or security attacks. This is demonstrated by a selftest added in subsequent patch that takes advantage of new mount APIs (fsopen, fsconfig, fsmount) to demonstrate creating detached BPF FS mount, pinning, and then getting BPF map out of it, all while never exposing this private instance of BPF FS to outside worlds. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Reviewed-by: Christian Brauner Link: https://lore.kernel.org/bpf/20230523170013.728457-4-andrii@kernel.org --- include/linux/bpf.h | 4 ++-- include/uapi/linux/bpf.h | 10 ++++++++++ kernel/bpf/inode.c | 16 ++++++++-------- kernel/bpf/syscall.c | 25 ++++++++++++++++++++----- tools/include/uapi/linux/bpf.h | 10 ++++++++++ 5 files changed, 50 insertions(+), 15 deletions(-) (limited to 'include/linux/bpf.h') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 36e4b2d8cca2..f58895830ada 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2077,8 +2077,8 @@ struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd); struct bpf_link *bpf_link_get_from_fd(u32 ufd); struct bpf_link *bpf_link_get_curr_or_next(u32 *id); -int bpf_obj_pin_user(u32 ufd, const char __user *pathname); -int bpf_obj_get_user(const char __user *pathname, int flags); +int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); +int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); #define BPF_ITER_FUNC_PREFIX "bpf_iter_" #define DEFINE_BPF_ITER_FUNC(target, args...) \ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 1bb11a6ee667..9273c654743c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1272,6 +1272,9 @@ enum { /* Create a map that will be registered/unregesitered by the backed bpf_link */ BPF_F_LINK = (1U << 13), + +/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */ + BPF_F_PATH_FD = (1U << 14), }; /* Flags for BPF_PROG_QUERY. */ @@ -1420,6 +1423,13 @@ union bpf_attr { __aligned_u64 pathname; __u32 bpf_fd; __u32 file_flags; + /* Same as dirfd in openat() syscall; see openat(2) + * manpage for details of path FD and pathname semantics; + * path_fd should accompanied by BPF_F_PATH_FD flag set in + * file_flags field, otherwise it should be set to zero; + * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed. + */ + __s32 path_fd; }; struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 329f27d5cacf..4174f76133df 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -435,7 +435,7 @@ static int bpf_iter_link_pin_kernel(struct dentry *parent, return ret; } -static int bpf_obj_do_pin(const char __user *pathname, void *raw, +static int bpf_obj_do_pin(int path_fd, const char __user *pathname, void *raw, enum bpf_type type) { struct dentry *dentry; @@ -444,7 +444,7 @@ static int bpf_obj_do_pin(const char __user *pathname, void *raw, umode_t mode; int ret; - dentry = user_path_create(AT_FDCWD, pathname, &path, 0); + dentry = user_path_create(path_fd, pathname, &path, 0); if (IS_ERR(dentry)) return PTR_ERR(dentry); @@ -477,7 +477,7 @@ out: return ret; } -int bpf_obj_pin_user(u32 ufd, const char __user *pathname) +int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname) { enum bpf_type type; void *raw; @@ -487,14 +487,14 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname) if (IS_ERR(raw)) return PTR_ERR(raw); - ret = bpf_obj_do_pin(pathname, raw, type); + ret = bpf_obj_do_pin(path_fd, pathname, raw, type); if (ret != 0) bpf_any_put(raw, type); return ret; } -static void *bpf_obj_do_get(const char __user *pathname, +static void *bpf_obj_do_get(int path_fd, const char __user *pathname, enum bpf_type *type, int flags) { struct inode *inode; @@ -502,7 +502,7 @@ static void *bpf_obj_do_get(const char __user *pathname, void *raw; int ret; - ret = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW, &path); + ret = user_path_at(path_fd, pathname, LOOKUP_FOLLOW, &path); if (ret) return ERR_PTR(ret); @@ -526,7 +526,7 @@ out: return ERR_PTR(ret); } -int bpf_obj_get_user(const char __user *pathname, int flags) +int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags) { enum bpf_type type = BPF_TYPE_UNSPEC; int f_flags; @@ -537,7 +537,7 @@ int bpf_obj_get_user(const char __user *pathname, int flags) if (f_flags < 0) return f_flags; - raw = bpf_obj_do_get(pathname, &type, f_flags); + raw = bpf_obj_do_get(path_fd, pathname, &type, f_flags); if (IS_ERR(raw)) return PTR_ERR(raw); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b2621089904b..c7f6807215e6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2697,23 +2697,38 @@ free_prog: return err; } -#define BPF_OBJ_LAST_FIELD file_flags +#define BPF_OBJ_LAST_FIELD path_fd static int bpf_obj_pin(const union bpf_attr *attr) { - if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) + int path_fd; + + if (CHECK_ATTR(BPF_OBJ) || attr->file_flags & ~BPF_F_PATH_FD) + return -EINVAL; + + /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ + if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) return -EINVAL; - return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); + path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; + return bpf_obj_pin_user(attr->bpf_fd, path_fd, + u64_to_user_ptr(attr->pathname)); } static int bpf_obj_get(const union bpf_attr *attr) { + int path_fd; + if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || - attr->file_flags & ~BPF_OBJ_FLAG_MASK) + attr->file_flags & ~(BPF_OBJ_FLAG_MASK | BPF_F_PATH_FD)) + return -EINVAL; + + /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ + if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) return -EINVAL; - return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), + path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; + return bpf_obj_get_user(path_fd, u64_to_user_ptr(attr->pathname), attr->file_flags); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1bb11a6ee667..9273c654743c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1272,6 +1272,9 @@ enum { /* Create a map that will be registered/unregesitered by the backed bpf_link */ BPF_F_LINK = (1U << 13), + +/* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */ + BPF_F_PATH_FD = (1U << 14), }; /* Flags for BPF_PROG_QUERY. */ @@ -1420,6 +1423,13 @@ union bpf_attr { __aligned_u64 pathname; __u32 bpf_fd; __u32 file_flags; + /* Same as dirfd in openat() syscall; see openat(2) + * manpage for details of path FD and pathname semantics; + * path_fd should accompanied by BPF_F_PATH_FD flag set in + * file_flags field, otherwise it should be set to zero; + * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed. + */ + __s32 path_fd; }; struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */ -- cgit v1.2.3