diff options
Diffstat (limited to 'kernel/bpf')
-rw-r--r-- | kernel/bpf/bpf_lru_list.h | 2 | ||||
-rw-r--r-- | kernel/bpf/cgroup.c | 24 | ||||
-rw-r--r-- | kernel/bpf/core.c | 6 | ||||
-rw-r--r-- | kernel/bpf/cpumap.c | 2 | ||||
-rw-r--r-- | kernel/bpf/devmap.c | 1 | ||||
-rw-r--r-- | kernel/bpf/helpers.c | 89 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 29 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 74 |
8 files changed, 171 insertions, 56 deletions
diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h index f02504640e18..6b12f06ee18c 100644 --- a/kernel/bpf/bpf_lru_list.h +++ b/kernel/bpf/bpf_lru_list.h @@ -30,7 +30,7 @@ struct bpf_lru_node { struct bpf_lru_list { struct list_head lists[NR_BPF_LRU_LIST_T]; unsigned int counts[NR_BPF_LRU_LIST_COUNT]; - /* The next inacitve list rotation starts from here */ + /* The next inactive list rotation starts from here */ struct list_head *next_inactive_rotation; raw_spinlock_t lock ____cacheline_aligned_in_smp; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 977bc69bb1c5..bf634959885c 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1054,36 +1054,21 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, return !allow; } -EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission); static const struct bpf_func_proto * cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { - case BPF_FUNC_map_lookup_elem: - return &bpf_map_lookup_elem_proto; - case BPF_FUNC_map_update_elem: - return &bpf_map_update_elem_proto; - case BPF_FUNC_map_delete_elem: - return &bpf_map_delete_elem_proto; - case BPF_FUNC_map_push_elem: - return &bpf_map_push_elem_proto; - case BPF_FUNC_map_pop_elem: - return &bpf_map_pop_elem_proto; - case BPF_FUNC_map_peek_elem: - return &bpf_map_peek_elem_proto; case BPF_FUNC_get_current_uid_gid: return &bpf_get_current_uid_gid_proto; case BPF_FUNC_get_local_storage: return &bpf_get_local_storage_proto; case BPF_FUNC_get_current_cgroup_id: return &bpf_get_current_cgroup_id_proto; - case BPF_FUNC_trace_printk: - if (capable(CAP_SYS_ADMIN)) - return bpf_get_trace_printk_proto(); - /* fall through */ + case BPF_FUNC_perf_event_output: + return &bpf_event_output_data_proto; default: - return NULL; + return bpf_base_func_proto(func_id); } } @@ -1210,7 +1195,6 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, return ret == 1 ? 0 : -EPERM; } -EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl); #ifdef CONFIG_NET static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp, @@ -1315,7 +1299,6 @@ out: sockopt_free_buf(&ctx); return ret; } -EXPORT_SYMBOL(__cgroup_bpf_run_filter_setsockopt); int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, int optname, char __user *optval, @@ -1402,7 +1385,6 @@ out: sockopt_free_buf(&ctx); return ret; } -EXPORT_SYMBOL(__cgroup_bpf_run_filter_getsockopt); #endif static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 916f5132a984..6aa11de67315 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2136,6 +2136,11 @@ BPF_CALL_0(bpf_user_rnd_u32) return res; } +BPF_CALL_0(bpf_get_raw_cpu_id) +{ + return raw_smp_processor_id(); +} + /* Weak definitions of helper functions in case we don't have bpf syscall. */ const struct bpf_func_proto bpf_map_lookup_elem_proto __weak; const struct bpf_func_proto bpf_map_update_elem_proto __weak; @@ -2151,6 +2156,7 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; const struct bpf_func_proto bpf_get_numa_node_id_proto __weak; const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; +const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak; const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 70f71b154fa5..3fe0b006d2d2 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -469,7 +469,7 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value, return -EOVERFLOW; /* Make sure CPU is a valid possible cpu */ - if (!cpu_possible(key_cpu)) + if (key_cpu >= nr_cpumask_bits || !cpu_possible(key_cpu)) return -ENODEV; if (qsize == 0) { diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 58bdca5d978a..a51d9fb7a359 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -52,7 +52,6 @@ #define DEV_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) -#define DEV_MAP_BULK_SIZE 16 struct xdp_dev_bulk_queue { struct xdp_frame *q[DEV_MAP_BULK_SIZE]; struct list_head flush_node; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index bafc53ddd350..5c0290e0696e 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -151,7 +151,19 @@ BPF_CALL_0(bpf_ktime_get_ns) const struct bpf_func_proto bpf_ktime_get_ns_proto = { .func = bpf_ktime_get_ns, - .gpl_only = true, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +BPF_CALL_0(bpf_ktime_get_boot_ns) +{ + /* NMI safe access to clock boottime */ + return ktime_get_boot_fast_ns(); +} + +const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = { + .func = bpf_ktime_get_boot_ns, + .gpl_only = false, .ret_type = RET_INTEGER, }; @@ -562,3 +574,78 @@ const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = { .arg3_type = ARG_PTR_TO_UNINIT_MEM, .arg4_type = ARG_CONST_SIZE, }; + +static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { + .func = bpf_get_raw_cpu_id, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, + u64, flags, void *, data, u64, size) +{ + if (unlikely(flags & ~(BPF_F_INDEX_MASK))) + return -EINVAL; + + return bpf_event_output(map, flags, data, size, NULL, 0, NULL); +} + +const struct bpf_func_proto bpf_event_output_data_proto = { + .func = bpf_event_output_data, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, +}; + +const struct bpf_func_proto * +bpf_base_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_map_lookup_elem: + return &bpf_map_lookup_elem_proto; + case BPF_FUNC_map_update_elem: + return &bpf_map_update_elem_proto; + case BPF_FUNC_map_delete_elem: + return &bpf_map_delete_elem_proto; + case BPF_FUNC_map_push_elem: + return &bpf_map_push_elem_proto; + case BPF_FUNC_map_pop_elem: + return &bpf_map_pop_elem_proto; + case BPF_FUNC_map_peek_elem: + return &bpf_map_peek_elem_proto; + case BPF_FUNC_get_prandom_u32: + return &bpf_get_prandom_u32_proto; + case BPF_FUNC_get_smp_processor_id: + return &bpf_get_raw_smp_processor_id_proto; + case BPF_FUNC_get_numa_node_id: + return &bpf_get_numa_node_id_proto; + case BPF_FUNC_tail_call: + return &bpf_tail_call_proto; + case BPF_FUNC_ktime_get_ns: + return &bpf_ktime_get_ns_proto; + case BPF_FUNC_ktime_get_boot_ns: + return &bpf_ktime_get_boot_ns_proto; + default: + break; + } + + if (!capable(CAP_SYS_ADMIN)) + return NULL; + + switch (func_id) { + case BPF_FUNC_spin_lock: + return &bpf_spin_lock_proto; + case BPF_FUNC_spin_unlock: + return &bpf_spin_unlock_proto; + case BPF_FUNC_trace_printk: + return bpf_get_trace_printk_proto(); + case BPF_FUNC_jiffies64: + return &bpf_jiffies64_proto; + default: + return NULL; + } +} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 64783da34202..7626b8024471 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -586,9 +586,7 @@ static void bpf_map_mmap_open(struct vm_area_struct *vma) { struct bpf_map *map = vma->vm_file->private_data; - bpf_map_inc_with_uref(map); - - if (vma->vm_flags & VM_WRITE) { + if (vma->vm_flags & VM_MAYWRITE) { mutex_lock(&map->freeze_mutex); map->writecnt++; mutex_unlock(&map->freeze_mutex); @@ -600,13 +598,11 @@ static void bpf_map_mmap_close(struct vm_area_struct *vma) { struct bpf_map *map = vma->vm_file->private_data; - if (vma->vm_flags & VM_WRITE) { + if (vma->vm_flags & VM_MAYWRITE) { mutex_lock(&map->freeze_mutex); map->writecnt--; mutex_unlock(&map->freeze_mutex); } - - bpf_map_put_with_uref(map); } static const struct vm_operations_struct bpf_map_default_vmops = { @@ -635,14 +631,16 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) /* set default open/close callbacks */ vma->vm_ops = &bpf_map_default_vmops; vma->vm_private_data = map; + vma->vm_flags &= ~VM_MAYEXEC; + if (!(vma->vm_flags & VM_WRITE)) + /* disallow re-mapping with PROT_WRITE */ + vma->vm_flags &= ~VM_MAYWRITE; err = map->ops->map_mmap(map, vma); if (err) goto out; - bpf_map_inc_with_uref(map); - - if (vma->vm_flags & VM_WRITE) + if (vma->vm_flags & VM_MAYWRITE) map->writecnt++; out: mutex_unlock(&map->freeze_mutex); @@ -2285,7 +2283,7 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) } #endif -const struct file_operations bpf_link_fops = { +static const struct file_operations bpf_link_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = bpf_link_show_fdinfo, #endif @@ -3630,8 +3628,10 @@ static int link_update(union bpf_attr *attr) return PTR_ERR(link); new_prog = bpf_prog_get(attr->link_update.new_prog_fd); - if (IS_ERR(new_prog)) - return PTR_ERR(new_prog); + if (IS_ERR(new_prog)) { + ret = PTR_ERR(new_prog); + goto out_put_link; + } if (flags & BPF_F_REPLACE) { old_prog = bpf_prog_get(attr->link_update.old_prog_fd); @@ -3640,6 +3640,9 @@ static int link_update(union bpf_attr *attr) old_prog = NULL; goto out_put_progs; } + } else if (attr->link_update.old_prog_fd) { + ret = -EINVAL; + goto out_put_progs; } #ifdef CONFIG_CGROUP_BPF @@ -3655,6 +3658,8 @@ out_put_progs: bpf_prog_put(old_prog); if (ret) bpf_prog_put(new_prog); +out_put_link: + bpf_link_put(link); return ret; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 04c6630cc18f..91728e0f27eb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -168,6 +168,8 @@ struct bpf_verifier_stack_elem { int insn_idx; int prev_insn_idx; struct bpf_verifier_stack_elem *next; + /* length of verifier log at the time this state was pushed on stack */ + u32 log_pos; }; #define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192 @@ -283,6 +285,18 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, log->ubuf = NULL; } +static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos) +{ + char zero = 0; + + if (!bpf_verifier_log_needed(log)) + return; + + log->len_used = new_pos; + if (put_user(zero, log->ubuf + new_pos)) + log->ubuf = NULL; +} + /* log_level controls verbosity level of eBPF verifier. * bpf_verifier_log_write() is used to dump the verification trace to the log, * so the user can figure out what's wrong with the program @@ -846,7 +860,7 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi } static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, - int *insn_idx) + int *insn_idx, bool pop_log) { struct bpf_verifier_state *cur = env->cur_state; struct bpf_verifier_stack_elem *elem, *head = env->head; @@ -860,6 +874,8 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, if (err) return err; } + if (pop_log) + bpf_vlog_reset(&env->log, head->log_pos); if (insn_idx) *insn_idx = head->insn_idx; if (prev_insn_idx) @@ -887,6 +903,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, elem->insn_idx = insn_idx; elem->prev_insn_idx = prev_insn_idx; elem->next = env->head; + elem->log_pos = env->log.len_used; env->head = elem; env->stack_size++; err = copy_verifier_state(&elem->st, cur); @@ -915,7 +932,7 @@ err: free_verifier_state(env->cur_state, true); env->cur_state = NULL; /* pop all elements and return */ - while (!pop_stack(env, NULL, NULL)); + while (!pop_stack(env, NULL, NULL, false)); return NULL; } @@ -1255,8 +1272,7 @@ static void __mark_reg_unknown(const struct bpf_verifier_env *env, reg->type = SCALAR_VALUE; reg->var_off = tnum_unknown; reg->frameno = 0; - reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ? - true : false; + reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks; __mark_reg_unbounded(reg); } @@ -2119,6 +2135,15 @@ static bool register_is_const(struct bpf_reg_state *reg) return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off); } +static bool __is_pointer_value(bool allow_ptr_leaks, + const struct bpf_reg_state *reg) +{ + if (allow_ptr_leaks) + return false; + + return reg->type != SCALAR_VALUE; +} + static void save_register_state(struct bpf_func_state *state, int spi, struct bpf_reg_state *reg) { @@ -2309,6 +2334,16 @@ static int check_stack_read(struct bpf_verifier_env *env, * which resets stack/reg liveness for state transitions */ state->regs[value_regno].live |= REG_LIVE_WRITTEN; + } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) { + /* If value_regno==-1, the caller is asking us whether + * it is acceptable to use this value as a SCALAR_VALUE + * (e.g. for XADD). + * We must not allow unprivileged callers to do that + * with spilled pointers. + */ + verbose(env, "leaking pointer from stack off %d\n", + off); + return -EACCES; } mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64); } else { @@ -2674,15 +2709,6 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, return -EACCES; } -static bool __is_pointer_value(bool allow_ptr_leaks, - const struct bpf_reg_state *reg) -{ - if (allow_ptr_leaks) - return false; - - return reg->type != SCALAR_VALUE; -} - static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno) { return cur_regs(env) + regno; @@ -3090,7 +3116,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, if (ret < 0) return ret; - if (atype == BPF_READ) { + if (atype == BPF_READ && value_regno >= 0) { if (ret == SCALAR_VALUE) { mark_reg_unknown(env, regs, value_regno); return 0; @@ -5600,7 +5626,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, { struct bpf_reg_state *regs = cur_regs(env); u8 opcode = BPF_OP(insn->code); - bool src_known, dst_known; + bool src_known; s64 smin_val, smax_val; u64 umin_val, umax_val; s32 s32_min_val, s32_max_val; @@ -5622,7 +5648,6 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, if (alu32) { src_known = tnum_subreg_is_const(src_reg.var_off); - dst_known = tnum_subreg_is_const(dst_reg->var_off); if ((src_known && (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) || s32_min_val > s32_max_val || u32_min_val > u32_max_val) { @@ -5634,7 +5659,6 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, } } else { src_known = tnum_is_const(src_reg.var_off); - dst_known = tnum_is_const(dst_reg->var_off); if ((src_known && (smin_val != smax_val || umin_val != umax_val)) || smin_val > smax_val || umin_val > umax_val) { @@ -8400,6 +8424,7 @@ static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev) static int do_check(struct bpf_verifier_env *env) { + bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); struct bpf_verifier_state *state = env->cur_state; struct bpf_insn *insns = env->prog->insnsi; struct bpf_reg_state *regs; @@ -8676,7 +8701,7 @@ static int do_check(struct bpf_verifier_env *env) process_bpf_exit: update_branch_counts(env, env->cur_state); err = pop_stack(env, &prev_insn_idx, - &env->insn_idx); + &env->insn_idx, pop_log); if (err < 0) { if (err != -ENOENT) return err; @@ -10199,6 +10224,7 @@ static void sanitize_insn_aux_data(struct bpf_verifier_env *env) static int do_check_common(struct bpf_verifier_env *env, int subprog) { + bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); struct bpf_verifier_state *state; struct bpf_reg_state *regs; int ret, i; @@ -10261,7 +10287,9 @@ out: free_verifier_state(env->cur_state, true); env->cur_state = NULL; } - while (!pop_stack(env, NULL, NULL)); + while (!pop_stack(env, NULL, NULL, false)); + if (!ret && pop_log) + bpf_vlog_reset(&env->log, 0); free_states(env); if (ret) /* clean aux data in case subprog was rejected */ @@ -10488,6 +10516,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) return -EINVAL; } env->ops = bpf_verifier_ops[tgt_prog->type]; + prog->expected_attach_type = tgt_prog->expected_attach_type; } if (!tgt_prog->jited) { verbose(env, "Can attach to only JITed progs\n"); @@ -10832,6 +10861,13 @@ err_release_maps: * them now. Otherwise free_used_maps() will release them. */ release_maps(env); + + /* extension progs temporarily inherit the attach_type of their targets + for verification purposes, so set it back to zero before returning + */ + if (env->prog->type == BPF_PROG_TYPE_EXT) + env->prog->expected_attach_type = 0; + *prog = env->prog; err_unlock: if (!is_priv) |