diff options
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r-- | kernel/bpf/verifier.c | 1046 |
1 files changed, 816 insertions, 230 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5871aa78d01a..11e54dd8b6dd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -197,6 +197,7 @@ static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg); static void specialize_kfunc(struct bpf_verifier_env *env, u32 func_id, u16 offset, unsigned long *addr); +static bool is_trusted_reg(const struct bpf_reg_state *reg); static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux) { @@ -240,6 +241,12 @@ static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state) (poisoned ? BPF_MAP_KEY_POISON : 0ULL); } +static bool bpf_helper_call(const struct bpf_insn *insn) +{ + return insn->code == (BPF_JMP | BPF_CALL) && + insn->src_reg == 0; +} + static bool bpf_pseudo_call(const struct bpf_insn *insn) { return insn->code == (BPF_JMP | BPF_CALL) && @@ -273,11 +280,6 @@ struct bpf_call_arg_meta { struct btf_field *kptr_field; }; -struct btf_and_id { - struct btf *btf; - u32 btf_id; -}; - struct bpf_kfunc_call_arg_meta { /* In parameters */ struct btf *btf; @@ -296,10 +298,21 @@ struct bpf_kfunc_call_arg_meta { u64 value; bool found; } arg_constant; - union { - struct btf_and_id arg_obj_drop; - struct btf_and_id arg_refcount_acquire; - }; + + /* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling, + * generally to pass info about user-defined local kptr types to later + * verification logic + * bpf_obj_drop + * Record the local kptr type to be drop'd + * bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type) + * Record the local kptr type to be refcount_incr'd and use + * arg_owning_ref to determine whether refcount_acquire should be + * fallible + */ + struct btf *arg_btf; + u32 arg_btf_id; + bool arg_owning_ref; + struct { struct btf_field *field; } arg_list_head; @@ -309,6 +322,7 @@ struct bpf_kfunc_call_arg_meta { struct { enum bpf_dynptr_type type; u32 id; + u32 ref_obj_id; } initialized_dynptr; struct { u8 spi; @@ -429,8 +443,11 @@ static bool type_may_be_null(u32 type) return type & PTR_MAYBE_NULL; } -static bool reg_type_not_null(enum bpf_reg_type type) +static bool reg_not_null(const struct bpf_reg_state *reg) { + enum bpf_reg_type type; + + type = reg->type; if (type_may_be_null(type)) return false; @@ -440,6 +457,7 @@ static bool reg_type_not_null(enum bpf_reg_type type) type == PTR_TO_MAP_VALUE || type == PTR_TO_MAP_KEY || type == PTR_TO_SOCK_COMMON || + (type == PTR_TO_BTF_ID && is_trusted_reg(reg)) || type == PTR_TO_MEM; } @@ -468,6 +486,13 @@ static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg) return rec; } +static bool subprog_is_global(const struct bpf_verifier_env *env, int subprog) +{ + struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux; + + return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL; +} + static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) { return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK); @@ -515,6 +540,8 @@ static bool is_dynptr_ref_function(enum bpf_func_id func_id) return func_id == BPF_FUNC_dynptr_data; } +static bool is_callback_calling_kfunc(u32 btf_id); + static bool is_callback_calling_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_for_each_map_elem || @@ -524,6 +551,11 @@ static bool is_callback_calling_function(enum bpf_func_id func_id) func_id == BPF_FUNC_user_ringbuf_drain; } +static bool is_async_callback_calling_function(enum bpf_func_id func_id) +{ + return func_id == BPF_FUNC_timer_set_callback; +} + static bool is_storage_get_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_sk_storage_get || @@ -604,9 +636,9 @@ static const char *reg_type_str(struct bpf_verifier_env *env, type & PTR_TRUSTED ? "trusted_" : "" ); - snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", + snprintf(env->tmp_str_buf, TMP_STR_BUF_LEN, "%s%s%s", prefix, str[base_type(type)], postfix); - return env->type_str_buf; + return env->tmp_str_buf; } static char slot_type_char[] = { @@ -847,11 +879,11 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi); static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg, - enum bpf_arg_type arg_type, int insn_idx) + enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id) { struct bpf_func_state *state = func(env, reg); enum bpf_dynptr_type type; - int spi, i, id, err; + int spi, i, err; spi = dynptr_get_spi(env, reg); if (spi < 0) @@ -887,7 +919,13 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_ if (dynptr_type_refcounted(type)) { /* The id is used to track proper releasing */ - id = acquire_reference_state(env, insn_idx); + int id; + + if (clone_ref_obj_id) + id = clone_ref_obj_id; + else + id = acquire_reference_state(env, insn_idx); + if (id < 0) return id; @@ -901,24 +939,15 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_ return 0; } -static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi) { - struct bpf_func_state *state = func(env, reg); - int spi, i; - - spi = dynptr_get_spi(env, reg); - if (spi < 0) - return spi; + int i; for (i = 0; i < BPF_REG_SIZE; i++) { state->stack[spi].slot_type[i] = STACK_INVALID; state->stack[spi - 1].slot_type[i] = STACK_INVALID; } - /* Invalidate any slices associated with this dynptr */ - if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) - WARN_ON_ONCE(release_reference(env, state->stack[spi].spilled_ptr.ref_obj_id)); - __mark_reg_not_init(env, &state->stack[spi].spilled_ptr); __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr); @@ -945,6 +974,50 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re */ state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN; +} + +static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + int spi, ref_obj_id, i; + + spi = dynptr_get_spi(env, reg); + if (spi < 0) + return spi; + + if (!dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) { + invalidate_dynptr(env, state, spi); + return 0; + } + + ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id; + + /* If the dynptr has a ref_obj_id, then we need to invalidate + * two things: + * + * 1) Any dynptrs with a matching ref_obj_id (clones) + * 2) Any slices derived from this dynptr. + */ + + /* Invalidate any slices associated with this dynptr */ + WARN_ON_ONCE(release_reference(env, ref_obj_id)); + + /* Invalidate any dynptr clones */ + for (i = 1; i < state->allocated_stack / BPF_REG_SIZE; i++) { + if (state->stack[i].spilled_ptr.ref_obj_id != ref_obj_id) + continue; + + /* it should always be the case that if the ref obj id + * matches then the stack slot also belongs to a + * dynptr + */ + if (state->stack[i].slot_type[0] != STACK_DYNPTR) { + verbose(env, "verifier internal error: misconfigured ref_obj_id\n"); + return -EFAULT; + } + if (state->stack[i].spilled_ptr.dynptr.first_slot) + invalidate_dynptr(env, state, i); + } return 0; } @@ -1254,6 +1327,12 @@ static bool is_spilled_reg(const struct bpf_stack_state *stack) return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL; } +static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack) +{ + return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL && + stack->spilled_ptr.type == SCALAR_VALUE; +} + static void scrub_spilled_slot(u8 *stype) { if (*stype != STACK_INVALID) @@ -3144,12 +3223,172 @@ static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn) return btf_name_by_offset(desc_btf, func->name_off); } +static inline void bt_init(struct backtrack_state *bt, u32 frame) +{ + bt->frame = frame; +} + +static inline void bt_reset(struct backtrack_state *bt) +{ + struct bpf_verifier_env *env = bt->env; + + memset(bt, 0, sizeof(*bt)); + bt->env = env; +} + +static inline u32 bt_empty(struct backtrack_state *bt) +{ + u64 mask = 0; + int i; + + for (i = 0; i <= bt->frame; i++) + mask |= bt->reg_masks[i] | bt->stack_masks[i]; + + return mask == 0; +} + +static inline int bt_subprog_enter(struct backtrack_state *bt) +{ + if (bt->frame == MAX_CALL_FRAMES - 1) { + verbose(bt->env, "BUG subprog enter from frame %d\n", bt->frame); + WARN_ONCE(1, "verifier backtracking bug"); + return -EFAULT; + } + bt->frame++; + return 0; +} + +static inline int bt_subprog_exit(struct backtrack_state *bt) +{ + if (bt->frame == 0) { + verbose(bt->env, "BUG subprog exit from frame 0\n"); + WARN_ONCE(1, "verifier backtracking bug"); + return -EFAULT; + } + bt->frame--; + return 0; +} + +static inline void bt_set_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg) +{ + bt->reg_masks[frame] |= 1 << reg; +} + +static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg) +{ + bt->reg_masks[frame] &= ~(1 << reg); +} + +static inline void bt_set_reg(struct backtrack_state *bt, u32 reg) +{ + bt_set_frame_reg(bt, bt->frame, reg); +} + +static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg) +{ + bt_clear_frame_reg(bt, bt->frame, reg); +} + +static inline void bt_set_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot) +{ + bt->stack_masks[frame] |= 1ull << slot; +} + +static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot) +{ + bt->stack_masks[frame] &= ~(1ull << slot); +} + +static inline void bt_set_slot(struct backtrack_state *bt, u32 slot) +{ + bt_set_frame_slot(bt, bt->frame, slot); +} + +static inline void bt_clear_slot(struct backtrack_state *bt, u32 slot) +{ + bt_clear_frame_slot(bt, bt->frame, slot); +} + +static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame) +{ + return bt->reg_masks[frame]; +} + +static inline u32 bt_reg_mask(struct backtrack_state *bt) +{ + return bt->reg_masks[bt->frame]; +} + +static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame) +{ + return bt->stack_masks[frame]; +} + +static inline u64 bt_stack_mask(struct backtrack_state *bt) +{ + return bt->stack_masks[bt->frame]; +} + +static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg) +{ + return bt->reg_masks[bt->frame] & (1 << reg); +} + +static inline bool bt_is_slot_set(struct backtrack_state *bt, u32 slot) +{ + return bt->stack_masks[bt->frame] & (1ull << slot); +} + +/* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */ +static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask) +{ + DECLARE_BITMAP(mask, 64); + bool first = true; + int i, n; + + buf[0] = '\0'; + + bitmap_from_u64(mask, reg_mask); + for_each_set_bit(i, mask, 32) { + n = snprintf(buf, buf_sz, "%sr%d", first ? "" : ",", i); + first = false; + buf += n; + buf_sz -= n; + if (buf_sz < 0) + break; + } +} +/* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */ +static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask) +{ + DECLARE_BITMAP(mask, 64); + bool first = true; + int i, n; + + buf[0] = '\0'; + + bitmap_from_u64(mask, stack_mask); + for_each_set_bit(i, mask, 64) { + n = snprintf(buf, buf_sz, "%s%d", first ? "" : ",", -(i + 1) * 8); + first = false; + buf += n; + buf_sz -= n; + if (buf_sz < 0) + break; + } +} + /* For given verifier state backtrack_insn() is called from the last insn to * the first insn. Its purpose is to compute a bitmask of registers and * stack slots that needs precision in the parent verifier state. + * + * @idx is an index of the instruction we are currently processing; + * @subseq_idx is an index of the subsequent instruction that: + * - *would be* executed next, if jump history is viewed in forward order; + * - *was* processed previously during backtracking. */ -static int backtrack_insn(struct bpf_verifier_env *env, int idx, - u32 *reg_mask, u64 *stack_mask) +static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx, + struct backtrack_state *bt) { const struct bpf_insn_cbs cbs = { .cb_call = disasm_kfunc_name, @@ -3160,20 +3399,24 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, u8 class = BPF_CLASS(insn->code); u8 opcode = BPF_OP(insn->code); u8 mode = BPF_MODE(insn->code); - u32 dreg = 1u << insn->dst_reg; - u32 sreg = 1u << insn->src_reg; - u32 spi; + u32 dreg = insn->dst_reg; + u32 sreg = insn->src_reg; + u32 spi, i; if (insn->code == 0) return 0; if (env->log.level & BPF_LOG_LEVEL2) { - verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask); + fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt)); + verbose(env, "mark_precise: frame%d: regs=%s ", + bt->frame, env->tmp_str_buf); + fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt)); + verbose(env, "stack=%s before ", env->tmp_str_buf); verbose(env, "%d: ", idx); print_bpf_insn(&cbs, insn, env->allow_ptr_leaks); } if (class == BPF_ALU || class == BPF_ALU64) { - if (!(*reg_mask & dreg)) + if (!bt_is_reg_set(bt, dreg)) return 0; if (opcode == BPF_MOV) { if (BPF_SRC(insn->code) == BPF_X) { @@ -3181,8 +3424,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, * dreg needs precision after this insn * sreg needs precision before this insn */ - *reg_mask &= ~dreg; - *reg_mask |= sreg; + bt_clear_reg(bt, dreg); + bt_set_reg(bt, sreg); } else { /* dreg = K * dreg needs precision after this insn. @@ -3190,7 +3433,7 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, * as precise=true in this verifier state. * No further markings in parent are necessary */ - *reg_mask &= ~dreg; + bt_clear_reg(bt, dreg); } } else { if (BPF_SRC(insn->code) == BPF_X) { @@ -3198,15 +3441,15 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, * both dreg and sreg need precision * before this insn */ - *reg_mask |= sreg; + bt_set_reg(bt, sreg); } /* else dreg += K * dreg still needs precision before this insn */ } } else if (class == BPF_LDX) { - if (!(*reg_mask & dreg)) + if (!bt_is_reg_set(bt, dreg)) return 0; - *reg_mask &= ~dreg; + bt_clear_reg(bt, dreg); /* scalars can only be spilled into stack w/o losing precision. * Load from any other memory can be zero extended. @@ -3227,9 +3470,9 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, WARN_ONCE(1, "verifier backtracking bug"); return -EFAULT; } - *stack_mask |= 1ull << spi; + bt_set_slot(bt, spi); } else if (class == BPF_STX || class == BPF_ST) { - if (*reg_mask & dreg) + if (bt_is_reg_set(bt, dreg)) /* stx & st shouldn't be using _scalar_ dst_reg * to access memory. It means backtracking * encountered a case of pointer subtraction. @@ -3244,20 +3487,92 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, WARN_ONCE(1, "verifier backtracking bug"); return -EFAULT; } - if (!(*stack_mask & (1ull << spi))) + if (!bt_is_slot_set(bt, spi)) return 0; - *stack_mask &= ~(1ull << spi); + bt_clear_slot(bt, spi); if (class == BPF_STX) - *reg_mask |= sreg; + bt_set_reg(bt, sreg); } else if (class == BPF_JMP || class == BPF_JMP32) { - if (opcode == BPF_CALL) { - if (insn->src_reg == BPF_PSEUDO_CALL) - return -ENOTSUPP; - /* BPF helpers that invoke callback subprogs are - * equivalent to BPF_PSEUDO_CALL above + if (bpf_pseudo_call(insn)) { + int subprog_insn_idx, subprog; + + subprog_insn_idx = idx + insn->imm + 1; + subprog = find_subprog(env, subprog_insn_idx); + if (subprog < 0) + return -EFAULT; + + if (subprog_is_global(env, subprog)) { + /* check that jump history doesn't have any + * extra instructions from subprog; the next + * instruction after call to global subprog + * should be literally next instruction in + * caller program + */ + WARN_ONCE(idx + 1 != subseq_idx, "verifier backtracking bug"); + /* r1-r5 are invalidated after subprog call, + * so for global func call it shouldn't be set + * anymore + */ + if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) { + verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); + WARN_ONCE(1, "verifier backtracking bug"); + return -EFAULT; + } + /* global subprog always sets R0 */ + bt_clear_reg(bt, BPF_REG_0); + return 0; + } else { + /* static subprog call instruction, which + * means that we are exiting current subprog, + * so only r1-r5 could be still requested as + * precise, r0 and r6-r10 or any stack slot in + * the current frame should be zero by now + */ + if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) { + verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); + WARN_ONCE(1, "verifier backtracking bug"); + return -EFAULT; + } + /* we don't track register spills perfectly, + * so fallback to force-precise instead of failing */ + if (bt_stack_mask(bt) != 0) + return -ENOTSUPP; + /* propagate r1-r5 to the caller */ + for (i = BPF_REG_1; i <= BPF_REG_5; i++) { + if (bt_is_reg_set(bt, i)) { + bt_clear_reg(bt, i); + bt_set_frame_reg(bt, bt->frame - 1, i); + } + } + if (bt_subprog_exit(bt)) + return -EFAULT; + return 0; + } + } else if ((bpf_helper_call(insn) && + is_callback_calling_function(insn->imm) && + !is_async_callback_calling_function(insn->imm)) || + (bpf_pseudo_kfunc_call(insn) && is_callback_calling_kfunc(insn->imm))) { + /* callback-calling helper or kfunc call, which means + * we are exiting from subprog, but unlike the subprog + * call handling above, we shouldn't propagate + * precision of r1-r5 (if any requested), as they are + * not actually arguments passed directly to callback + * subprogs */ - if (insn->src_reg == 0 && is_callback_calling_function(insn->imm)) + if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) { + verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); + WARN_ONCE(1, "verifier backtracking bug"); + return -EFAULT; + } + if (bt_stack_mask(bt) != 0) return -ENOTSUPP; + /* clear r1-r5 in callback subprog's mask */ + for (i = BPF_REG_1; i <= BPF_REG_5; i++) + bt_clear_reg(bt, i); + if (bt_subprog_exit(bt)) + return -EFAULT; + return 0; + } else if (opcode == BPF_CALL) { /* kfunc with imm==0 is invalid and fixup_kfunc_call will * catch this error later. Make backtracking conservative * with ENOTSUPP. @@ -3265,19 +3580,51 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0) return -ENOTSUPP; /* regular helper call sets R0 */ - *reg_mask &= ~1; - if (*reg_mask & 0x3f) { + bt_clear_reg(bt, BPF_REG_0); + if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) { /* if backtracing was looking for registers R1-R5 * they should have been found already. */ - verbose(env, "BUG regs %x\n", *reg_mask); + verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); WARN_ONCE(1, "verifier backtracking bug"); return -EFAULT; } } else if (opcode == BPF_EXIT) { - return -ENOTSUPP; + bool r0_precise; + + if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) { + /* if backtracing was looking for registers R1-R5 + * they should have been found already. + */ + verbose(env, "BUG regs %x\n", bt_reg_mask(bt)); + WARN_ONCE(1, "verifier backtracking bug"); + return -EFAULT; + } + + /* BPF_EXIT in subprog or callback always returns + * right after the call instruction, so by checking + * whether the instruction at subseq_idx-1 is subprog + * call or not we can distinguish actual exit from + * *subprog* from exit from *callback*. In the former + * case, we need to propagate r0 precision, if + * necessary. In the former we never do that. + */ + r0_precise = subseq_idx - 1 >= 0 && + bpf_pseudo_call(&env->prog->insnsi[subseq_idx - 1]) && + bt_is_reg_set(bt, BPF_REG_0); + + bt_clear_reg(bt, BPF_REG_0); + if (bt_subprog_enter(bt)) + return -EFAULT; + + if (r0_precise) + bt_set_reg(bt, BPF_REG_0); + /* r6-r9 and stack slots will stay set in caller frame + * bitmasks until we return back from callee(s) + */ + return 0; } else if (BPF_SRC(insn->code) == BPF_X) { - if (!(*reg_mask & (dreg | sreg))) + if (!bt_is_reg_set(bt, dreg) && !bt_is_reg_set(bt, sreg)) return 0; /* dreg <cond> sreg * Both dreg and sreg need precision before @@ -3285,7 +3632,8 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, * before it would be equally necessary to * propagate it to dreg. */ - *reg_mask |= (sreg | dreg); + bt_set_reg(bt, dreg); + bt_set_reg(bt, sreg); /* else dreg <cond> K * Only dreg still needs precision before * this insn, so for the K-based conditional @@ -3293,9 +3641,9 @@ static int backtrack_insn(struct bpf_verifier_env *env, int idx, */ } } else if (class == BPF_LD) { - if (!(*reg_mask & dreg)) + if (!bt_is_reg_set(bt, dreg)) return 0; - *reg_mask &= ~dreg; + bt_clear_reg(bt, dreg); /* It's ld_imm64 or ld_abs or ld_ind. * For ld_imm64 no further tracking of precision * into parent is necessary @@ -3366,6 +3714,11 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env, struct bpf_reg_state *reg; int i, j; + if (env->log.level & BPF_LOG_LEVEL2) { + verbose(env, "mark_precise: frame%d: falling back to forcing all scalars precise\n", + st->curframe); + } + /* big hammer: mark all scalars precise in this path. * pop_stack may still get !precise scalars. * We also skip current state and go straight to first parent state, @@ -3377,17 +3730,25 @@ static void mark_all_scalars_precise(struct bpf_verifier_env *env, func = st->frame[i]; for (j = 0; j < BPF_REG_FP; j++) { reg = &func->regs[j]; - if (reg->type != SCALAR_VALUE) + if (reg->type != SCALAR_VALUE || reg->precise) continue; reg->precise = true; + if (env->log.level & BPF_LOG_LEVEL2) { + verbose(env, "force_precise: frame%d: forcing r%d to be precise\n", + i, j); + } } for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) { if (!is_spilled_reg(&func->stack[j])) continue; reg = &func->stack[j].spilled_ptr; - if (reg->type != SCALAR_VALUE) + if (reg->type != SCALAR_VALUE || reg->precise) continue; reg->precise = true; + if (env->log.level & BPF_LOG_LEVEL2) { + verbose(env, "force_precise: frame%d: forcing fp%d to be precise\n", + i, -(j + 1) * 8); + } } } } @@ -3418,6 +3779,96 @@ static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_ } } +static bool idset_contains(struct bpf_idset *s, u32 id) +{ + u32 i; + + for (i = 0; i < s->count; ++i) + if (s->ids[i] == id) + return true; + + return false; +} + +static int idset_push(struct bpf_idset *s, u32 id) +{ + if (WARN_ON_ONCE(s->count >= ARRAY_SIZE(s->ids))) + return -EFAULT; + s->ids[s->count++] = id; + return 0; +} + +static void idset_reset(struct bpf_idset *s) +{ + s->count = 0; +} + +/* Collect a set of IDs for all registers currently marked as precise in env->bt. + * Mark all registers with these IDs as precise. + */ +static int mark_precise_scalar_ids(struct bpf_verifier_env *env, struct bpf_verifier_state *st) +{ + struct bpf_idset *precise_ids = &env->idset_scratch; + struct backtrack_state *bt = &env->bt; + struct bpf_func_state *func; + struct bpf_reg_state *reg; + DECLARE_BITMAP(mask, 64); + int i, fr; + + idset_reset(precise_ids); + + for (fr = bt->frame; fr >= 0; fr--) { + func = st->frame[fr]; + + bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr)); + for_each_set_bit(i, mask, 32) { + reg = &func->regs[i]; + if (!reg->id || reg->type != SCALAR_VALUE) + continue; + if (idset_push(precise_ids, reg->id)) + return -EFAULT; + } + + bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr)); + for_each_set_bit(i, mask, 64) { + if (i >= func->allocated_stack / BPF_REG_SIZE) + break; + if (!is_spilled_scalar_reg(&func->stack[i])) + continue; + reg = &func->stack[i].spilled_ptr; + if (!reg->id) + continue; + if (idset_push(precise_ids, reg->id)) + return -EFAULT; + } + } + + for (fr = 0; fr <= st->curframe; ++fr) { + func = st->frame[fr]; + + for (i = BPF_REG_0; i < BPF_REG_10; ++i) { + reg = &func->regs[i]; + if (!reg->id) + continue; + if (!idset_contains(precise_ids, reg->id)) + continue; + bt_set_frame_reg(bt, fr, i); + } + for (i = 0; i < func->allocated_stack / BPF_REG_SIZE; ++i) { + if (!is_spilled_scalar_reg(&func->stack[i])) + continue; + reg = &func->stack[i].spilled_ptr; + if (!reg->id) + continue; + if (!idset_contains(precise_ids, reg->id)) + continue; + bt_set_frame_slot(bt, fr, i); + } + } + + return 0; +} + /* * __mark_chain_precision() backtracks BPF program instruction sequence and * chain of verifier states making sure that register *regno* (if regno >= 0) @@ -3505,62 +3956,74 @@ static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_ * mark_all_scalars_imprecise() to hopefully get more permissive and generic * finalized states which help in short circuiting more future states. */ -static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int regno, - int spi) +static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) { + struct backtrack_state *bt = &env->bt; struct bpf_verifier_state *st = env->cur_state; int first_idx = st->first_insn_idx; int last_idx = env->insn_idx; + int subseq_idx = -1; struct bpf_func_state *func; struct bpf_reg_state *reg; - u32 reg_mask = regno >= 0 ? 1u << regno : 0; - u64 stack_mask = spi >= 0 ? 1ull << spi : 0; bool skip_first = true; - bool new_marks = false; - int i, err; + int i, fr, err; if (!env->bpf_capable) return 0; + /* set frame number from which we are starting to backtrack */ + bt_init(bt, env->cur_state->curframe); + /* Do sanity checks against current state of register and/or stack * slot, but don't set precise flag in current state, as precision * tracking in the current state is unnecessary. */ - func = st->frame[frame]; + func = st->frame[bt->frame]; if (regno >= 0) { reg = &func->regs[regno]; if (reg->type != SCALAR_VALUE) { WARN_ONCE(1, "backtracing misuse"); return -EFAULT; } - new_marks = true; + bt_set_reg(bt, regno); } - while (spi >= 0) { - if (!is_spilled_reg(&func->stack[spi])) { - stack_mask = 0; - break; - } - reg = &func->stack[spi].spilled_ptr; - if (reg->type != SCALAR_VALUE) { - stack_mask = 0; - break; - } - new_marks = true; - break; - } - - if (!new_marks) - return 0; - if (!reg_mask && !stack_mask) + if (bt_empty(bt)) return 0; for (;;) { DECLARE_BITMAP(mask, 64); u32 history = st->jmp_history_cnt; - if (env->log.level & BPF_LOG_LEVEL2) - verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx); + if (env->log.level & BPF_LOG_LEVEL2) { + verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n", + bt->frame, last_idx, first_idx, subseq_idx); + } + + /* If some register with scalar ID is marked as precise, + * make sure that all registers sharing this ID are also precise. + * This is needed to estimate effect of find_equal_scalars(). + * Do this at the last instruction of each state, + * bpf_reg_state::id fields are valid for these instructions. + * + * Allows to track precision in situation like below: + * + * r2 = unknown value + * ... + * --- state #0 --- + * ... + * r1 = r2 // r1 and r2 now share the same ID + * ... + * --- state #1 {r1.id = A, r2.id = A} --- + * ... + * if (r2 > 10) goto exit; // find_equal_scalars() assigns range to r1 + * ... + * --- state #2 {r1.id = A, r2.id = A} --- + * r3 = r10 + * r3 += r1 // need to mark both r1 and r2 + */ + if (mark_precise_scalar_ids(env, st)) + return -EFAULT; if (last_idx < 0) { /* we are at the entry into subprog, which @@ -3571,12 +4034,13 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int r if (st->curframe == 0 && st->frame[0]->subprogno > 0 && st->frame[0]->callsite == BPF_MAIN_FUNC && - stack_mask == 0 && (reg_mask & ~0x3e) == 0) { - bitmap_from_u64(mask, reg_mask); + bt_stack_mask(bt) == 0 && + (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == 0) { + bitmap_from_u64(mask, bt_reg_mask(bt)); for_each_set_bit(i, mask, 32) { reg = &st->frame[0]->regs[i]; if (reg->type != SCALAR_VALUE) { - reg_mask &= ~(1u << i); + bt_clear_reg(bt, i); continue; } reg->precise = true; @@ -3584,8 +4048,8 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int r return 0; } - verbose(env, "BUG backtracing func entry subprog %d reg_mask %x stack_mask %llx\n", - st->frame[0]->subprogno, reg_mask, stack_mask); + verbose(env, "BUG backtracking func entry subprog %d reg_mask %x stack_mask %llx\n", + st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt)); WARN_ONCE(1, "verifier backtracking bug"); return -EFAULT; } @@ -3595,15 +4059,16 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int r err = 0; skip_first = false; } else { - err = backtrack_insn(env, i, ®_mask, &stack_mask); + err = backtrack_insn(env, i, subseq_idx, bt); } if (err == -ENOTSUPP) { - mark_all_scalars_precise(env, st); + mark_all_scalars_precise(env, env->cur_state); + bt_reset(bt); return 0; } else if (err) { return err; } - if (!reg_mask && !stack_mask) + if (bt_empty(bt)) /* Found assignment(s) into tracked register in this state. * Since this state is already marked, just return. * Nothing to be tracked further in the parent state. @@ -3611,6 +4076,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int r return 0; if (i == first_idx) break; + subseq_idx = i; i = get_prev_insn_idx(st, i, &history); if (i >= env->prog->len) { /* This can happen if backtracking reached insn 0 @@ -3628,84 +4094,95 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int r if (!st) break; - new_marks = false; - func = st->frame[frame]; - bitmap_from_u64(mask, reg_mask); - for_each_set_bit(i, mask, 32) { - reg = &func->regs[i]; - if (reg->type != SCALAR_VALUE) { - reg_mask &= ~(1u << i); - continue; + for (fr = bt->frame; fr >= 0; fr--) { + func = st->frame[fr]; + bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr)); + for_each_set_bit(i, mask, 32) { + reg = &func->regs[i]; + if (reg->type != SCALAR_VALUE) { + bt_clear_frame_reg(bt, fr, i); + continue; + } + if (reg->precise) + bt_clear_frame_reg(bt, fr, i); + else + reg->precise = true; } - if (!reg->precise) - new_marks = true; - reg->precise = true; - } - bitmap_from_u64(mask, stack_mask); - for_each_set_bit(i, mask, 64) { - if (i >= func->allocated_stack / BPF_REG_SIZE) { - /* the sequence of instructions: - * 2: (bf) r3 = r10 - * 3: (7b) *(u64 *)(r3 -8) = r0 - * 4: (79) r4 = *(u64 *)(r10 -8) - * doesn't contain jmps. It's backtracked - * as a single block. - * During backtracking insn 3 is not recognized as - * stack access, so at the end of backtracking - * stack slot fp-8 is still marked in stack_mask. - * However the parent state may not have accessed - * fp-8 and it's "unallocated" stack space. - * In such case fallback to conservative. - */ - mark_all_scalars_precise(env, st); - return 0; - } + bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr)); + for_each_set_bit(i, mask, 64) { + if (i >= func->allocated_stack / BPF_REG_SIZE) { + /* the sequence of instructions: + * 2: (bf) r3 = r10 + * 3: (7b) *(u64 *)(r3 -8) = r0 + * 4: (79) r4 = *(u64 *)(r10 -8) + * doesn't contain jmps. It's backtracked + * as a single block. + * During backtracking insn 3 is not recognized as + * stack access, so at the end of backtracking + * stack slot fp-8 is still marked in stack_mask. + * However the parent state may not have accessed + * fp-8 and it's "unallocated" stack space. + * In such case fallback to conservative. + */ + mark_all_scalars_precise(env, env->cur_state); + bt_reset(bt); + return 0; + } - if (!is_spilled_reg(&func->stack[i])) { - stack_mask &= ~(1ull << i); - continue; + if (!is_spilled_scalar_reg(&func->stack[i])) { + bt_clear_frame_slot(bt, fr, i); + continue; + } + reg = &func->stack[i].spilled_ptr; + if (reg->precise) + bt_clear_frame_slot(bt, fr, i); + else + reg->precise = true; } - reg = &func->stack[i].spilled_ptr; - if (reg->type != SCALAR_VALUE) { - stack_mask &= ~(1ull << i); - continue; + if (env->log.level & BPF_LOG_LEVEL2) { + fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, + bt_frame_reg_mask(bt, fr)); + verbose(env, "mark_precise: frame%d: parent state regs=%s ", + fr, env->tmp_str_buf); + fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, + bt_frame_stack_mask(bt, fr)); + verbose(env, "stack=%s: ", env->tmp_str_buf); + print_verifier_state(env, func, true); } - if (!reg->precise) - new_marks = true; - reg->precise = true; - } - if (env->log.level & BPF_LOG_LEVEL2) { - verbose(env, "parent %s regs=%x stack=%llx marks:", - new_marks ? "didn't have" : "already had", - reg_mask, stack_mask); - print_verifier_state(env, func, true); } - if (!reg_mask && !stack_mask) - break; - if (!new_marks) - break; + if (bt_empty(bt)) + return 0; + subseq_idx = first_idx; last_idx = st->last_insn_idx; first_idx = st->first_insn_idx; } + + /* if we still have requested precise regs or slots, we missed + * something (e.g., stack access through non-r10 register), so + * fallback to marking all precise + */ + if (!bt_empty(bt)) { + mark_all_scalars_precise(env, env->cur_state); + bt_reset(bt); + } + return 0; } int mark_chain_precision(struct bpf_verifier_env *env, int regno) { - return __mark_chain_precision(env, env->cur_state->curframe, regno, -1); -} - -static int mark_chain_precision_frame(struct bpf_verifier_env *env, int frame, int regno) -{ - return __mark_chain_precision(env, frame, regno, -1); + return __mark_chain_precision(env, regno); } -static int mark_chain_precision_stack_frame(struct bpf_verifier_env *env, int frame, int spi) +/* mark_chain_precision_batch() assumes that env->bt is set in the caller to + * desired reg and stack masks across all relevant frames + */ +static int mark_chain_precision_batch(struct bpf_verifier_env *env) { - return __mark_chain_precision(env, frame, -1, spi); + return __mark_chain_precision(env, -1); } static bool is_spillable_regtype(enum bpf_reg_type type) @@ -3868,6 +4345,9 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, return err; } save_register_state(state, spi, reg, size); + /* Break the relation on a narrowing spill. */ + if (fls64(reg->umax_value) > BITS_PER_BYTE * size) + state->stack[spi].spilled_ptr.id = 0; } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) && insn->imm != 0 && env->bpf_capable) { struct bpf_reg_state fake_reg = {}; @@ -4067,6 +4547,7 @@ static void mark_reg_stack_read(struct bpf_verifier_env *env, for (i = min_off; i < max_off; i++) { slot = -i - 1; spi = slot / BPF_REG_SIZE; + mark_stack_slot_scratched(env, spi); stype = ptr_state->stack[spi].slot_type; if (stype[slot % BPF_REG_SIZE] != STACK_ZERO) break; @@ -4118,6 +4599,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, stype = reg_state->stack[spi].slot_type; reg = ®_state->stack[spi].spilled_ptr; + mark_stack_slot_scratched(env, spi); + if (is_spilled_reg(®_state->stack[spi])) { u8 spill_size = 1; @@ -5534,7 +6017,7 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env, * program allocated objects (which always have ref_obj_id > 0), * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC. */ - if (atype != BPF_READ && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) { + if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) { verbose(env, "only read is supported\n"); return -EACCES; } @@ -6677,7 +7160,7 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno, * type, and declare it as 'const struct bpf_dynptr *' in their prototype. */ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx, - enum bpf_arg_type arg_type) + enum bpf_arg_type arg_type, int clone_ref_obj_id) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; int err; @@ -6721,7 +7204,7 @@ static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn return err; } - err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx); + err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, clone_ref_obj_id); } else /* MEM_RDONLY and None case from above */ { /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */ if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) { @@ -7143,14 +7626,18 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno, * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL, * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL * + * ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type. + * * Therefore we fold these flags depending on the arg_type before comparison. */ if (arg_type & MEM_RDONLY) type &= ~MEM_RDONLY; if (arg_type & PTR_MAYBE_NULL) type &= ~PTR_MAYBE_NULL; + if (base_type(arg_type) == ARG_PTR_TO_MEM) + type &= ~DYNPTR_TYPE_FLAG_MASK; - if (meta->func_id == BPF_FUNC_kptr_xchg && type & MEM_ALLOC) + if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) type &= ~MEM_ALLOC; for (i = 0; i < ARRAY_SIZE(compatible->types); i++) { @@ -7631,7 +8118,7 @@ skip_type_check: err = check_mem_size_reg(env, reg, regno, true, meta); break; case ARG_PTR_TO_DYNPTR: - err = process_dynptr_func(env, regno, insn_idx, arg_type); + err = process_dynptr_func(env, regno, insn_idx, arg_type, 0); if (err) return err; break; @@ -8178,17 +8665,13 @@ static int set_callee_state(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee, int insn_idx); -static bool is_callback_calling_kfunc(u32 btf_id); - static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx, int subprog, set_callee_state_fn set_callee_state_cb) { struct bpf_verifier_state *state = env->cur_state; - struct bpf_func_info_aux *func_info_aux; struct bpf_func_state *caller, *callee; int err; - bool is_global = false; if (state->curframe + 1 >= MAX_CALL_FRAMES) { verbose(env, "the call stack of %d frames is too deep\n", @@ -8203,13 +8686,10 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EFAULT; } - func_info_aux = env->prog->aux->func_info_aux; - if (func_info_aux) - is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL; err = btf_check_subprog_call(env, subprog, caller->regs); if (err == -EFAULT) return err; - if (is_global) { + if (subprog_is_global(env, subprog)) { if (err) { verbose(env, "Caller passes invalid args into func#%d\n", subprog); @@ -9324,11 +9804,6 @@ static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta) return meta->kfunc_flags & KF_ACQUIRE; } -static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) -{ - return meta->kfunc_flags & KF_RET_NULL; -} - static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta) { return meta->kfunc_flags & KF_RELEASE; @@ -9398,6 +9873,11 @@ static bool is_kfunc_arg_const_mem_size(const struct btf *btf, return __kfunc_param_match_suffix(btf, arg, "__szk"); } +static bool is_kfunc_arg_optional(const struct btf *btf, const struct btf_param *arg) +{ + return __kfunc_param_match_suffix(btf, arg, "__opt"); +} + static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg) { return __kfunc_param_match_suffix(btf, arg, "__k"); @@ -9595,6 +10075,7 @@ enum special_kfunc_type { KF_bpf_dynptr_from_xdp, KF_bpf_dynptr_slice, KF_bpf_dynptr_slice_rdwr, + KF_bpf_dynptr_clone, }; BTF_SET_START(special_kfunc_set) @@ -9614,6 +10095,7 @@ BTF_ID(func, bpf_dynptr_from_skb) BTF_ID(func, bpf_dynptr_from_xdp) BTF_ID(func, bpf_dynptr_slice) BTF_ID(func, bpf_dynptr_slice_rdwr) +BTF_ID(func, bpf_dynptr_clone) BTF_SET_END(special_kfunc_set) BTF_ID_LIST(special_kfunc_list) @@ -9635,6 +10117,17 @@ BTF_ID(func, bpf_dynptr_from_skb) BTF_ID(func, bpf_dynptr_from_xdp) BTF_ID(func, bpf_dynptr_slice) BTF_ID(func, bpf_dynptr_slice_rdwr) +BTF_ID(func, bpf_dynptr_clone) + +static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) +{ + if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] && + meta->arg_owning_ref) { + return false; + } + + return meta->kfunc_flags & KF_RET_NULL; +} static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta) { @@ -10113,6 +10606,8 @@ __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env, node_off, btf_name_by_offset(reg->btf, t->name_off)); return -EINVAL; } + meta->arg_btf = reg->btf; + meta->arg_btf_id = reg->btf_id; if (node_off != field->graph_root.node_offset) { verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n", @@ -10323,13 +10818,14 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ } if (meta->btf == btf_vmlinux && meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) { - meta->arg_obj_drop.btf = reg->btf; - meta->arg_obj_drop.btf_id = reg->btf_id; + meta->arg_btf = reg->btf; + meta->arg_btf_id = reg->btf_id; } break; case KF_ARG_PTR_TO_DYNPTR: { enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR; + int clone_ref_obj_id = 0; if (reg->type != PTR_TO_STACK && reg->type != CONST_PTR_TO_DYNPTR) { @@ -10343,12 +10839,28 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (is_kfunc_arg_uninit(btf, &args[i])) dynptr_arg_type |= MEM_UNINIT; - if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) + if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) { dynptr_arg_type |= DYNPTR_TYPE_SKB; - else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) + } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) { dynptr_arg_type |= DYNPTR_TYPE_XDP; + } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] && + (dynptr_arg_type & MEM_UNINIT)) { + enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type; + + if (parent_type == BPF_DYNPTR_TYPE_INVALID) { + verbose(env, "verifier internal error: no dynptr type for parent of clone\n"); + return -EFAULT; + } + + dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type); + clone_ref_obj_id = meta->initialized_dynptr.ref_obj_id; + if (dynptr_type_refcounted(parent_type) && !clone_ref_obj_id) { + verbose(env, "verifier internal error: missing ref obj id for parent of clone\n"); + return -EFAULT; + } + } - ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type); + ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type, clone_ref_obj_id); if (ret < 0) return ret; @@ -10361,6 +10873,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ } meta->initialized_dynptr.id = id; meta->initialized_dynptr.type = dynptr_get_type(env, reg); + meta->initialized_dynptr.ref_obj_id = dynptr_ref_obj_id(env, reg); } break; @@ -10464,13 +10977,17 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ break; case KF_ARG_PTR_TO_MEM_SIZE: { + struct bpf_reg_state *buff_reg = ®s[regno]; + const struct btf_param *buff_arg = &args[i]; struct bpf_reg_state *size_reg = ®s[regno + 1]; const struct btf_param *size_arg = &args[i + 1]; - ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1); - if (ret < 0) { - verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1); - return ret; + if (!register_is_null(buff_reg) || !is_kfunc_arg_optional(meta->btf, buff_arg)) { + ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1); + if (ret < 0) { + verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1); + return ret; + } } if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) { @@ -10494,10 +11011,12 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ meta->subprogno = reg->subprogno; break; case KF_ARG_PTR_TO_REFCOUNTED_KPTR: - if (!type_is_ptr_alloc_obj(reg->type) && !type_is_non_owning_ref(reg->type)) { + if (!type_is_ptr_alloc_obj(reg->type)) { verbose(env, "arg#%d is neither owning or non-owning ref\n", i); return -EINVAL; } + if (!type_is_non_owning_ref(reg->type)) + meta->arg_owning_ref = true; rec = reg_btf_record(reg); if (!rec) { @@ -10513,8 +11032,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ verbose(env, "bpf_refcount_acquire calls are disabled for now\n"); return -EINVAL; } - meta->arg_refcount_acquire.btf = reg->btf; - meta->arg_refcount_acquire.btf_id = reg->btf_id; + meta->arg_btf = reg->btf; + meta->arg_btf_id = reg->btf_id; break; } } @@ -10555,7 +11074,7 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env, *kfunc_name = func_name; func_proto = btf_type_by_id(desc_btf, func->type); - kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id); + kfunc_flags = btf_kfunc_id_set_contains(desc_btf, func_id, env->prog); if (!kfunc_flags) { return -EACCES; } @@ -10660,6 +11179,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { release_ref_obj_id = regs[BPF_REG_2].ref_obj_id; insn_aux->insert_off = regs[BPF_REG_2].off; + insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id); err = ref_convert_owning_non_owning(env, release_ref_obj_id); if (err) { verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n", @@ -10746,12 +11266,12 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) { mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC; - regs[BPF_REG_0].btf = meta.arg_refcount_acquire.btf; - regs[BPF_REG_0].btf_id = meta.arg_refcount_acquire.btf_id; + regs[BPF_REG_0].btf = meta.arg_btf; + regs[BPF_REG_0].btf_id = meta.arg_btf_id; insn_aux->kptr_struct_meta = - btf_find_struct_meta(meta.arg_refcount_acquire.btf, - meta.arg_refcount_acquire.btf_id); + btf_find_struct_meta(meta.arg_btf, + meta.arg_btf_id); } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] || meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) { struct btf_field *field = meta.arg_list_head.field; @@ -10881,8 +11401,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) { if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) { insn_aux->kptr_struct_meta = - btf_find_struct_meta(meta.arg_obj_drop.btf, - meta.arg_obj_drop.btf_id); + btf_find_struct_meta(meta.arg_btf, + meta.arg_btf_id); } } } @@ -12417,12 +12937,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) if (BPF_SRC(insn->code) == BPF_X) { struct bpf_reg_state *src_reg = regs + insn->src_reg; struct bpf_reg_state *dst_reg = regs + insn->dst_reg; + bool need_id = src_reg->type == SCALAR_VALUE && !src_reg->id && + !tnum_is_const(src_reg->var_off); if (BPF_CLASS(insn->code) == BPF_ALU64) { /* case: R1 = R2 * copy register state to dest reg */ - if (src_reg->type == SCALAR_VALUE && !src_reg->id) + if (need_id) /* Assign src and dst registers the same ID * that will be used by find_equal_scalars() * to propagate min/max range. @@ -12441,7 +12963,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } else if (src_reg->type == SCALAR_VALUE) { bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX; - if (is_src_reg_u32 && !src_reg->id) + if (is_src_reg_u32 && need_id) src_reg->id = ++env->id_gen; copy_register_state(dst_reg, src_reg); /* Make sure ID is cleared if src_reg is not in u32 range otherwise @@ -12773,7 +13295,7 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, bool is_jmp32) { if (__is_pointer_value(false, reg)) { - if (!reg_type_not_null(reg->type)) + if (!reg_not_null(reg)) return -1; /* If pointer is valid tests against zero will fail so we can @@ -14597,8 +15119,9 @@ static bool range_within(struct bpf_reg_state *old, * So we look through our idmap to see if this old id has been seen before. If * so, we require the new id to match; otherwise, we add the id pair to the map. */ -static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap) +static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap) { + struct bpf_id_pair *map = idmap->map; unsigned int i; /* either both IDs should be set or both should be zero */ @@ -14609,20 +15132,34 @@ static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap) return true; for (i = 0; i < BPF_ID_MAP_SIZE; i++) { - if (!idmap[i].old) { + if (!map[i].old) { /* Reached an empty slot; haven't seen this id before */ - idmap[i].old = old_id; - idmap[i].cur = cur_id; + map[i].old = old_id; + map[i].cur = cur_id; return true; } - if (idmap[i].old == old_id) - return idmap[i].cur == cur_id; + if (map[i].old == old_id) + return map[i].cur == cur_id; + if (map[i].cur == cur_id) + return false; } /* We ran out of idmap slots, which should be impossible */ WARN_ON_ONCE(1); return false; } +/* Similar to check_ids(), but allocate a unique temporary ID + * for 'old_id' or 'cur_id' of zero. + * This makes pairs like '0 vs unique ID', 'unique ID vs 0' valid. + */ +static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap) +{ + old_id = old_id ? old_id : ++idmap->tmp_id_gen; + cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen; + + return check_ids(old_id, cur_id, idmap); +} + static void clean_func_state(struct bpf_verifier_env *env, struct bpf_func_state *st) { @@ -14721,7 +15258,7 @@ next: static bool regs_exact(const struct bpf_reg_state *rold, const struct bpf_reg_state *rcur, - struct bpf_id_pair *idmap) + struct bpf_idmap *idmap) { return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && check_ids(rold->id, rcur->id, idmap) && @@ -14730,7 +15267,7 @@ static bool regs_exact(const struct bpf_reg_state *rold, /* Returns true if (rold safe implies rcur safe) */ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, - struct bpf_reg_state *rcur, struct bpf_id_pair *idmap) + struct bpf_reg_state *rcur, struct bpf_idmap *idmap) { if (!(rold->live & REG_LIVE_READ)) /* explored state didn't use this */ @@ -14767,15 +15304,42 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, switch (base_type(rold->type)) { case SCALAR_VALUE: - if (regs_exact(rold, rcur, idmap)) - return true; - if (env->explore_alu_limits) - return false; + if (env->explore_alu_limits) { + /* explore_alu_limits disables tnum_in() and range_within() + * logic and requires everything to be strict + */ + return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && + check_scalar_ids(rold->id, rcur->id, idmap); + } if (!rold->precise) return true; - /* new val must satisfy old val knowledge */ + /* Why check_ids() for scalar registers? + * + * Consider the following BPF code: + * 1: r6 = ... unbound scalar, ID=a ... + * 2: r7 = ... unbound scalar, ID=b ... + * 3: if (r6 > r7) goto +1 + * 4: r6 = r7 + * 5: if (r6 > X) goto ... + * 6: ... memory operation using r7 ... + * + * First verification path is [1-6]: + * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7; + * - at (5) r6 would be marked <= X, find_equal_scalars() would also mark + * r7 <= X, because r6 and r7 share same id. + * Next verification path is [1-4, 6]. + * + * Instruction (6) would be reached in two states: + * I. r6{.id=b}, r7{.id=b} via path 1-6; + * II. r6{.id=a}, r7{.id=b} via path 1-4, 6. + * + * Use check_ids() to distinguish these states. + * --- + * Also verify that new value satisfies old value range knowledge. + */ return range_within(rold, rcur) && - tnum_in(rold->var_off, rcur->var_off); + tnum_in(rold->var_off, rcur->var_off) && + check_scalar_ids(rold->id, rcur->id, idmap); case PTR_TO_MAP_KEY: case PTR_TO_MAP_VALUE: case PTR_TO_MEM: @@ -14821,7 +15385,7 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, } static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, - struct bpf_func_state *cur, struct bpf_id_pair *idmap) + struct bpf_func_state *cur, struct bpf_idmap *idmap) { int i, spi; @@ -14924,7 +15488,7 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, } static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur, - struct bpf_id_pair *idmap) + struct bpf_idmap *idmap) { int i; @@ -14972,13 +15536,13 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat for (i = 0; i < MAX_BPF_REG; i++) if (!regsafe(env, &old->regs[i], &cur->regs[i], - env->idmap_scratch)) + &env->idmap_scratch)) return false; - if (!stacksafe(env, old, cur, env->idmap_scratch)) + if (!stacksafe(env, old, cur, &env->idmap_scratch)) return false; - if (!refsafe(old, cur, env->idmap_scratch)) + if (!refsafe(old, cur, &env->idmap_scratch)) return false; return true; @@ -14993,7 +15557,8 @@ static bool states_equal(struct bpf_verifier_env *env, if (old->curframe != cur->curframe) return false; - memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch)); + env->idmap_scratch.tmp_id_gen = env->id_gen; + memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map)); /* Verification state from speculative execution simulation * must never prune a non-speculative execution one. @@ -15011,7 +15576,7 @@ static bool states_equal(struct bpf_verifier_env *env, return false; if (old->active_lock.id && - !check_ids(old->active_lock.id, cur->active_lock.id, env->idmap_scratch)) + !check_ids(old->active_lock.id, cur->active_lock.id, &env->idmap_scratch)) return false; if (old->active_rcu_lock != cur->active_rcu_lock) @@ -15118,20 +15683,25 @@ static int propagate_precision(struct bpf_verifier_env *env, struct bpf_reg_state *state_reg; struct bpf_func_state *state; int i, err = 0, fr; + bool first; for (fr = old->curframe; fr >= 0; fr--) { state = old->frame[fr]; state_reg = state->regs; + first = true; for (i = 0; i < BPF_REG_FP; i++, state_reg++) { if (state_reg->type != SCALAR_VALUE || !state_reg->precise || !(state_reg->live & REG_LIVE_READ)) continue; - if (env->log.level & BPF_LOG_LEVEL2) - verbose(env, "frame %d: propagating r%d\n", fr, i); - err = mark_chain_precision_frame(env, fr, i); - if (err < 0) - return err; + if (env->log.level & BPF_LOG_LEVEL2) { + if (first) + verbose(env, "frame %d: propagating r%d", fr, i); + else + verbose(env, ",r%d", i); + } + bt_set_frame_reg(&env->bt, fr, i); + first = false; } for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { @@ -15142,14 +15712,24 @@ static int propagate_precision(struct bpf_verifier_env *env, !state_reg->precise || !(state_reg->live & REG_LIVE_READ)) continue; - if (env->log.level & BPF_LOG_LEVEL2) - verbose(env, "frame %d: propagating fp%d\n", - fr, (-i - 1) * BPF_REG_SIZE); - err = mark_chain_precision_stack_frame(env, fr, i); - if (err < 0) - return err; + if (env->log.level & BPF_LOG_LEVEL2) { + if (first) + verbose(env, "frame %d: propagating fp%d", + fr, (-i - 1) * BPF_REG_SIZE); + else + verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE); + } + bt_set_frame_slot(&env->bt, fr, i); + first = false; } + if (!first) + verbose(env, "\n"); } + + err = mark_chain_precision_batch(env); + if (err < 0) + return err; + return 0; } @@ -17214,9 +17794,10 @@ static int jit_subprogs(struct bpf_verifier_env *env) } /* finally lock prog and jit images for all functions and - * populate kallsysm + * populate kallsysm. Begin at the first subprogram, since + * bpf_prog_load will add the kallsyms for the main program. */ - for (i = 0; i < env->subprog_cnt; i++) { + for (i = 1; i < env->subprog_cnt; i++) { bpf_prog_lock_ro(func[i]); bpf_prog_kallsyms_add(func[i]); } @@ -17242,6 +17823,8 @@ static int jit_subprogs(struct bpf_verifier_env *env) prog->jited = 1; prog->bpf_func = func[0]->bpf_func; prog->jited_len = func[0]->jited_len; + prog->aux->extable = func[0]->aux->extable; + prog->aux->num_exentries = func[0]->aux->num_exentries; prog->aux->func = func; prog->aux->func_cnt = env->subprog_cnt; bpf_prog_jit_attempt_done(prog); @@ -18611,7 +19194,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, * in the fmodret id set with the KF_SLEEPABLE flag. */ else { - u32 *flags = btf_kfunc_is_modify_return(btf, btf_id); + u32 *flags = btf_kfunc_is_modify_return(btf, btf_id, + prog); if (flags && (*flags & KF_SLEEPABLE)) ret = 0; @@ -18639,7 +19223,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, return -EINVAL; } ret = -EINVAL; - if (btf_kfunc_is_modify_return(btf, btf_id) || + if (btf_kfunc_is_modify_return(btf, btf_id, prog) || !check_attach_modify_return(addr, tname)) ret = 0; if (ret) { @@ -18806,6 +19390,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (!env) return -ENOMEM; + env->bt.env = env; + len = (*prog)->len; env->insn_aux_data = vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len)); |