diff options
Diffstat (limited to 'kernel/bpf')
-rw-r--r-- | kernel/bpf/bpf_lsm.c | 3 | ||||
-rw-r--r-- | kernel/bpf/btf.c | 4 | ||||
-rw-r--r-- | kernel/bpf/hashtab.c | 4 | ||||
-rw-r--r-- | kernel/bpf/memalloc.c | 2 | ||||
-rw-r--r-- | kernel/bpf/offload.c | 3 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 24 | ||||
-rw-r--r-- | kernel/bpf/task_iter.c | 39 | ||||
-rw-r--r-- | kernel/bpf/trampoline.c | 4 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 56 |
9 files changed, 87 insertions, 52 deletions
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 9ea42a45da47..e14c822f8911 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -51,7 +51,6 @@ BTF_SET_END(bpf_lsm_current_hooks) */ BTF_SET_START(bpf_lsm_locked_sockopt_hooks) #ifdef CONFIG_SECURITY_NETWORK -BTF_ID(func, bpf_lsm_socket_sock_rcv_skb) BTF_ID(func, bpf_lsm_sock_graft) BTF_ID(func, bpf_lsm_inet_csk_clone) BTF_ID(func, bpf_lsm_inet_conn_established) @@ -351,8 +350,10 @@ BTF_ID(func, bpf_lsm_bpf_prog_alloc_security) BTF_ID(func, bpf_lsm_bpf_prog_free_security) BTF_ID(func, bpf_lsm_file_alloc_security) BTF_ID(func, bpf_lsm_file_free_security) +#ifdef CONFIG_SECURITY_NETWORK BTF_ID(func, bpf_lsm_sk_alloc_security) BTF_ID(func, bpf_lsm_sk_free_security) +#endif /* CONFIG_SECURITY_NETWORK */ BTF_ID(func, bpf_lsm_task_free) BTF_SET_END(untrusted_lsm_hooks) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index f7dd8af06413..b7017cae6fd1 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -7782,9 +7782,9 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL); - return 0; end: - btf_free_dtor_kfunc_tab(btf); + if (ret) + btf_free_dtor_kfunc_tab(btf); btf_put(btf); return ret; } diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 5aa2b5525f79..66bded144377 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -152,7 +152,7 @@ static inline int htab_lock_bucket(const struct bpf_htab *htab, { unsigned long flags; - hash = hash & HASHTAB_MAP_LOCK_MASK; + hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1); preempt_disable(); if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) { @@ -171,7 +171,7 @@ static inline void htab_unlock_bucket(const struct bpf_htab *htab, struct bucket *b, u32 hash, unsigned long flags) { - hash = hash & HASHTAB_MAP_LOCK_MASK; + hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1); raw_spin_unlock_irqrestore(&b->raw_lock, flags); __this_cpu_dec(*(htab->map_locked[hash])); preempt_enable(); diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index ebcc3dd0fa19..1db156405b68 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -71,7 +71,7 @@ static int bpf_mem_cache_idx(size_t size) if (size <= 192) return size_index[(size - 1) / 8] - 1; - return fls(size - 1) - 1; + return fls(size - 1) - 2; } #define NUM_CACHES 11 diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 13e4efc971e6..190d9f9dc987 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -216,9 +216,6 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog) if (offload->dev_state) offload->offdev->ops->destroy(prog); - /* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */ - bpf_prog_free_id(prog, true); - list_del_init(&offload->offloads); kfree(offload); prog->aux->offload = NULL; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 64131f88c553..ecca9366c7a6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1972,7 +1972,7 @@ static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) return; if (audit_enabled == AUDIT_OFF) return; - if (op == BPF_AUDIT_LOAD) + if (!in_irq() && !irqs_disabled()) ctx = audit_context(); ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); if (unlikely(!ab)) @@ -2001,7 +2001,7 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog) return id > 0 ? 0 : id; } -void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) +void bpf_prog_free_id(struct bpf_prog *prog) { unsigned long flags; @@ -2013,18 +2013,10 @@ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) if (!prog->aux->id) return; - if (do_idr_lock) - spin_lock_irqsave(&prog_idr_lock, flags); - else - __acquire(&prog_idr_lock); - + spin_lock_irqsave(&prog_idr_lock, flags); idr_remove(&prog_idr, prog->aux->id); prog->aux->id = 0; - - if (do_idr_lock) - spin_unlock_irqrestore(&prog_idr_lock, flags); - else - __release(&prog_idr_lock); + spin_unlock_irqrestore(&prog_idr_lock, flags); } static void __bpf_prog_put_rcu(struct rcu_head *rcu) @@ -2067,17 +2059,15 @@ static void bpf_prog_put_deferred(struct work_struct *work) prog = aux->prog; perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); + bpf_prog_free_id(prog); __bpf_prog_put_noref(prog, true); } -static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) +static void __bpf_prog_put(struct bpf_prog *prog) { struct bpf_prog_aux *aux = prog->aux; if (atomic64_dec_and_test(&aux->refcnt)) { - /* bpf_prog_free_id() must be called first */ - bpf_prog_free_id(prog, do_idr_lock); - if (in_irq() || irqs_disabled()) { INIT_WORK(&aux->work, bpf_prog_put_deferred); schedule_work(&aux->work); @@ -2089,7 +2079,7 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) void bpf_prog_put(struct bpf_prog *prog) { - __bpf_prog_put(prog, true); + __bpf_prog_put(prog); } EXPORT_SYMBOL_GPL(bpf_prog_put); diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index c2a2182ce570..c4ab9d6cdbe9 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -438,6 +438,7 @@ struct bpf_iter_seq_task_vma_info { */ struct bpf_iter_seq_task_common common; struct task_struct *task; + struct mm_struct *mm; struct vm_area_struct *vma; u32 tid; unsigned long prev_vm_start; @@ -456,16 +457,19 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) enum bpf_task_vma_iter_find_op op; struct vm_area_struct *curr_vma; struct task_struct *curr_task; + struct mm_struct *curr_mm; u32 saved_tid = info->tid; /* If this function returns a non-NULL vma, it holds a reference to - * the task_struct, and holds read lock on vma->mm->mmap_lock. + * the task_struct, holds a refcount on mm->mm_users, and holds + * read lock on vma->mm->mmap_lock. * If this function returns NULL, it does not hold any reference or * lock. */ if (info->task) { curr_task = info->task; curr_vma = info->vma; + curr_mm = info->mm; /* In case of lock contention, drop mmap_lock to unblock * the writer. * @@ -504,13 +508,15 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) * 4.2) VMA2 and VMA2' covers different ranges, process * VMA2'. */ - if (mmap_lock_is_contended(curr_task->mm)) { + if (mmap_lock_is_contended(curr_mm)) { info->prev_vm_start = curr_vma->vm_start; info->prev_vm_end = curr_vma->vm_end; op = task_vma_iter_find_vma; - mmap_read_unlock(curr_task->mm); - if (mmap_read_lock_killable(curr_task->mm)) + mmap_read_unlock(curr_mm); + if (mmap_read_lock_killable(curr_mm)) { + mmput(curr_mm); goto finish; + } } else { op = task_vma_iter_next_vma; } @@ -535,42 +541,47 @@ again: op = task_vma_iter_find_vma; } - if (!curr_task->mm) + curr_mm = get_task_mm(curr_task); + if (!curr_mm) goto next_task; - if (mmap_read_lock_killable(curr_task->mm)) + if (mmap_read_lock_killable(curr_mm)) { + mmput(curr_mm); goto finish; + } } switch (op) { case task_vma_iter_first_vma: - curr_vma = find_vma(curr_task->mm, 0); + curr_vma = find_vma(curr_mm, 0); break; case task_vma_iter_next_vma: - curr_vma = find_vma(curr_task->mm, curr_vma->vm_end); + curr_vma = find_vma(curr_mm, curr_vma->vm_end); break; case task_vma_iter_find_vma: /* We dropped mmap_lock so it is necessary to use find_vma * to find the next vma. This is similar to the mechanism * in show_smaps_rollup(). */ - curr_vma = find_vma(curr_task->mm, info->prev_vm_end - 1); + curr_vma = find_vma(curr_mm, info->prev_vm_end - 1); /* case 1) and 4.2) above just use curr_vma */ /* check for case 2) or case 4.1) above */ if (curr_vma && curr_vma->vm_start == info->prev_vm_start && curr_vma->vm_end == info->prev_vm_end) - curr_vma = find_vma(curr_task->mm, curr_vma->vm_end); + curr_vma = find_vma(curr_mm, curr_vma->vm_end); break; } if (!curr_vma) { /* case 3) above, or case 2) 4.1) with vma->next == NULL */ - mmap_read_unlock(curr_task->mm); + mmap_read_unlock(curr_mm); + mmput(curr_mm); goto next_task; } info->task = curr_task; info->vma = curr_vma; + info->mm = curr_mm; return curr_vma; next_task: @@ -579,6 +590,7 @@ next_task: put_task_struct(curr_task); info->task = NULL; + info->mm = NULL; info->tid++; goto again; @@ -587,6 +599,7 @@ finish: put_task_struct(curr_task); info->task = NULL; info->vma = NULL; + info->mm = NULL; return NULL; } @@ -658,7 +671,9 @@ static void task_vma_seq_stop(struct seq_file *seq, void *v) */ info->prev_vm_start = ~0UL; info->prev_vm_end = info->vma->vm_end; - mmap_read_unlock(info->task->mm); + mmap_read_unlock(info->mm); + mmput(info->mm); + info->mm = NULL; put_task_struct(info->task); info->task = NULL; } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 11f5ec0b8016..d0ed7d6f5eec 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -488,6 +488,10 @@ again: /* reset fops->func and fops->trampoline for re-register */ tr->fops->func = NULL; tr->fops->trampoline = 0; + + /* reset im->image memory attr for arch_prepare_bpf_trampoline */ + set_memory_nx((long)im->image, 1); + set_memory_rw((long)im->image, 1); goto again; } #endif diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a5255a0dcbb6..7ee218827259 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1054,6 +1054,8 @@ static void print_insn_state(struct bpf_verifier_env *env, */ static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags) { + size_t alloc_bytes; + void *orig = dst; size_t bytes; if (ZERO_OR_NULL_PTR(src)) @@ -1062,11 +1064,11 @@ static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; - if (ksize(dst) < ksize(src)) { - kfree(dst); - dst = kmalloc_track_caller(kmalloc_size_roundup(bytes), flags); - if (!dst) - return NULL; + alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes)); + dst = krealloc(orig, alloc_bytes, flags); + if (!dst) { + kfree(orig); + return NULL; } memcpy(dst, src, bytes); @@ -2746,6 +2748,12 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, */ if (insn->src_reg == 0 && is_callback_calling_function(insn->imm)) return -ENOTSUPP; + /* kfunc with imm==0 is invalid and fixup_kfunc_call will + * catch this error later. Make backtracking conservative + * with ENOTSUPP. + */ + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0) + return -ENOTSUPP; /* regular helper call sets R0 */ *reg_mask &= ~1; if (*reg_mask & 0x3f) { @@ -3235,13 +3243,24 @@ static bool __is_pointer_value(bool allow_ptr_leaks, return reg->type != SCALAR_VALUE; } +/* Copy src state preserving dst->parent and dst->live fields */ +static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src) +{ + struct bpf_reg_state *parent = dst->parent; + enum bpf_reg_liveness live = dst->live; + + *dst = *src; + dst->parent = parent; + dst->live = live; +} + static void save_register_state(struct bpf_func_state *state, int spi, struct bpf_reg_state *reg, int size) { int i; - state->stack[spi].spilled_ptr = *reg; + copy_register_state(&state->stack[spi].spilled_ptr, reg); if (size == BPF_REG_SIZE) state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; @@ -3287,7 +3306,9 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, bool sanitize = reg && is_spillable_regtype(reg->type); for (i = 0; i < size; i++) { - if (state->stack[spi].slot_type[i] == STACK_INVALID) { + u8 type = state->stack[spi].slot_type[i]; + + if (type != STACK_MISC && type != STACK_ZERO) { sanitize = true; break; } @@ -3567,7 +3588,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, */ s32 subreg_def = state->regs[dst_regno].subreg_def; - state->regs[dst_regno] = *reg; + copy_register_state(&state->regs[dst_regno], reg); state->regs[dst_regno].subreg_def = subreg_def; } else { for (i = 0; i < size; i++) { @@ -3588,7 +3609,7 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, if (dst_regno >= 0) { /* restore register state from stack */ - state->regs[dst_regno] = *reg; + copy_register_state(&state->regs[dst_regno], reg); /* mark reg as written since spilled pointer state likely * has its liveness marks cleared by is_state_visited() * which resets stack/reg liveness for state transitions @@ -9582,7 +9603,7 @@ do_sim: */ if (!ptr_is_dst_reg) { tmp = *dst_reg; - *dst_reg = *ptr_reg; + copy_register_state(dst_reg, ptr_reg); } ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1, env->insn_idx); @@ -10835,7 +10856,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) * to propagate min/max range. */ src_reg->id = ++env->id_gen; - *dst_reg = *src_reg; + copy_register_state(dst_reg, src_reg); dst_reg->live |= REG_LIVE_WRITTEN; dst_reg->subreg_def = DEF_NOT_SUBREG; } else { @@ -10846,7 +10867,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) insn->src_reg); return -EACCES; } else if (src_reg->type == SCALAR_VALUE) { - *dst_reg = *src_reg; + copy_register_state(dst_reg, src_reg); /* Make sure ID is cleared otherwise * dst_reg min/max could be incorrectly * propagated into src_reg by find_equal_scalars() @@ -11645,7 +11666,7 @@ static void find_equal_scalars(struct bpf_verifier_state *vstate, bpf_for_each_reg_in_vstate(vstate, state, reg, ({ if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) - *reg = *known_reg; + copy_register_state(reg, known_reg); })); } @@ -11822,10 +11843,17 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, * register B - not null * for JNE A, B, ... - A is not null in the false branch; * for JEQ A, B, ... - A is not null in the true branch. + * + * Since PTR_TO_BTF_ID points to a kernel struct that does + * not need to be null checked by the BPF program, i.e., + * could be null even without PTR_MAYBE_NULL marking, so + * only propagate nullness when neither reg is that type. */ if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X && __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) && - type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type)) { + type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) && + base_type(src_reg->type) != PTR_TO_BTF_ID && + base_type(dst_reg->type) != PTR_TO_BTF_ID) { eq_branch_regs = NULL; switch (opcode) { case BPF_JEQ: |