diff options
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r-- | kernel/bpf/verifier.c | 817 |
1 files changed, 644 insertions, 173 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index db2574e7b8b0..668e07903c8f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1,4 +1,5 @@ /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * Copyright (c) 2016 Facebook * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -136,13 +137,32 @@ enum bpf_reg_type { FRAME_PTR, /* reg == frame_pointer */ PTR_TO_STACK, /* reg == frame_pointer + imm */ CONST_IMM, /* constant integer value */ + + /* PTR_TO_PACKET represents: + * skb->data + * skb->data + imm + * skb->data + (u16) var + * skb->data + (u16) var + imm + * if (range > 0) then [ptr, ptr + range - off) is safe to access + * if (id > 0) means that some 'var' was added + * if (off > 0) menas that 'imm' was added + */ + PTR_TO_PACKET, + PTR_TO_PACKET_END, /* skb->data + headlen */ }; struct reg_state { enum bpf_reg_type type; union { - /* valid when type == CONST_IMM | PTR_TO_STACK */ - int imm; + /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ + s64 imm; + + /* valid when type == PTR_TO_PACKET* */ + struct { + u32 id; + u16 off; + u16 range; + }; /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | * PTR_TO_MAP_VALUE_OR_NULL @@ -202,6 +222,16 @@ struct verifier_env { bool allow_ptr_leaks; }; +#define BPF_COMPLEXITY_LIMIT_INSNS 65536 +#define BPF_COMPLEXITY_LIMIT_STACK 1024 + +struct bpf_call_arg_meta { + struct bpf_map *map_ptr; + bool raw_mode; + int regno; + int access_size; +}; + /* verbose verifier prints what it's seeing * bpf_check() is called under lock, so no race to access these global vars */ @@ -237,40 +267,39 @@ static const char * const reg_type_str[] = { [FRAME_PTR] = "fp", [PTR_TO_STACK] = "fp", [CONST_IMM] = "imm", + [PTR_TO_PACKET] = "pkt", + [PTR_TO_PACKET_END] = "pkt_end", }; -static const struct { - int map_type; - int func_id; -} func_limit[] = { - {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call}, - {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read}, - {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output}, - {BPF_MAP_TYPE_STACK_TRACE, BPF_FUNC_get_stackid}, -}; - -static void print_verifier_state(struct verifier_env *env) +static void print_verifier_state(struct verifier_state *state) { + struct reg_state *reg; enum bpf_reg_type t; int i; for (i = 0; i < MAX_BPF_REG; i++) { - t = env->cur_state.regs[i].type; + reg = &state->regs[i]; + t = reg->type; if (t == NOT_INIT) continue; verbose(" R%d=%s", i, reg_type_str[t]); if (t == CONST_IMM || t == PTR_TO_STACK) - verbose("%d", env->cur_state.regs[i].imm); + verbose("%lld", reg->imm); + else if (t == PTR_TO_PACKET) + verbose("(id=%d,off=%d,r=%d)", + reg->id, reg->off, reg->range); + else if (t == UNKNOWN_VALUE && reg->imm) + verbose("%lld", reg->imm); else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE || t == PTR_TO_MAP_VALUE_OR_NULL) verbose("(ks=%d,vs=%d)", - env->cur_state.regs[i].map_ptr->key_size, - env->cur_state.regs[i].map_ptr->value_size); + reg->map_ptr->key_size, + reg->map_ptr->value_size); } for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { - if (env->cur_state.stack_slot_type[i] == STACK_SPILL) + if (state->stack_slot_type[i] == STACK_SPILL) verbose(" fp%d=%s", -MAX_BPF_STACK + i, - reg_type_str[env->cur_state.spilled_regs[i / BPF_REG_SIZE].type]); + reg_type_str[state->spilled_regs[i / BPF_REG_SIZE].type]); } verbose("\n"); } @@ -454,7 +483,7 @@ static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx, elem->next = env->head; env->head = elem; env->stack_size++; - if (env->stack_size > 1024) { + if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) { verbose("BPF program is too complex\n"); goto err; } @@ -477,7 +506,6 @@ static void init_reg_state(struct reg_state *regs) for (i = 0; i < MAX_BPF_REG; i++) { regs[i].type = NOT_INIT; regs[i].imm = 0; - regs[i].map_ptr = NULL; } /* frame pointer */ @@ -492,7 +520,6 @@ static void mark_reg_unknown_value(struct reg_state *regs, u32 regno) BUG_ON(regno >= MAX_BPF_REG); regs[regno].type = UNKNOWN_VALUE; regs[regno].imm = 0; - regs[regno].map_ptr = NULL; } enum reg_arg_type { @@ -548,6 +575,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_MAP_VALUE_OR_NULL: case PTR_TO_STACK: case PTR_TO_CTX: + case PTR_TO_PACKET: + case PTR_TO_PACKET_END: case FRAME_PTR: case CONST_PTR_TO_MAP: return true; @@ -647,13 +676,34 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off, return 0; } +#define MAX_PACKET_OFF 0xffff + +static int check_packet_access(struct verifier_env *env, u32 regno, int off, + int size) +{ + struct reg_state *regs = env->cur_state.regs; + struct reg_state *reg = ®s[regno]; + + off += reg->off; + if (off < 0 || off + size > reg->range) { + verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", + off, size, regno, reg->id, reg->off, reg->range); + return -EACCES; + } + return 0; +} + /* check access to 'struct bpf_context' fields */ static int check_ctx_access(struct verifier_env *env, int off, int size, enum bpf_access_type t) { if (env->prog->aux->ops->is_valid_access && - env->prog->aux->ops->is_valid_access(off, size, t)) + env->prog->aux->ops->is_valid_access(off, size, t)) { + /* remember the offset of last byte accessed in ctx */ + if (env->prog->aux->max_ctx_offset < off + size) + env->prog->aux->max_ctx_offset = off + size; return 0; + } verbose("invalid bpf_context access off=%d size=%d\n", off, size); return -EACCES; @@ -673,6 +723,45 @@ static bool is_pointer_value(struct verifier_env *env, int regno) } } +static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg, + int off, int size) +{ + if (reg->type != PTR_TO_PACKET) { + if (off % size != 0) { + verbose("misaligned access off %d size %d\n", off, size); + return -EACCES; + } else { + return 0; + } + } + + switch (env->prog->type) { + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: + break; + default: + verbose("verifier is misconfigured\n"); + return -EACCES; + } + + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) + /* misaligned access to packet is ok on x86,arm,arm64 */ + return 0; + + if (reg->id && size != 1) { + verbose("Unknown packet alignment. Only byte-sized access allowed\n"); + return -EACCES; + } + + /* skb->data is NET_IP_ALIGN-ed */ + if ((NET_IP_ALIGN + reg->off + off) % size != 0) { + verbose("misaligned packet access off %d+%d+%d size %d\n", + NET_IP_ALIGN, reg->off, off, size); + return -EACCES; + } + return 0; +} + /* check whether memory at (regno + off) is accessible for t = (read | write) * if t==write, value_regno is a register which value is stored into memory * if t==read, value_regno is a register which will receive the value from memory @@ -684,21 +773,21 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, int value_regno) { struct verifier_state *state = &env->cur_state; + struct reg_state *reg = &state->regs[regno]; int size, err = 0; - if (state->regs[regno].type == PTR_TO_STACK) - off += state->regs[regno].imm; + if (reg->type == PTR_TO_STACK) + off += reg->imm; size = bpf_size_to_bytes(bpf_size); if (size < 0) return size; - if (off % size != 0) { - verbose("misaligned access off %d size %d\n", off, size); - return -EACCES; - } + err = check_ptr_alignment(env, reg, off, size); + if (err) + return err; - if (state->regs[regno].type == PTR_TO_MAP_VALUE) { + if (reg->type == PTR_TO_MAP_VALUE) { if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose("R%d leaks addr into map\n", value_regno); @@ -708,18 +797,25 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown_value(state->regs, value_regno); - } else if (state->regs[regno].type == PTR_TO_CTX) { + } else if (reg->type == PTR_TO_CTX) { if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose("R%d leaks addr into ctx\n", value_regno); return -EACCES; } err = check_ctx_access(env, off, size, t); - if (!err && t == BPF_READ && value_regno >= 0) + if (!err && t == BPF_READ && value_regno >= 0) { mark_reg_unknown_value(state->regs, value_regno); + if (off == offsetof(struct __sk_buff, data) && + env->allow_ptr_leaks) + /* note that reg.[id|off|range] == 0 */ + state->regs[value_regno].type = PTR_TO_PACKET; + else if (off == offsetof(struct __sk_buff, data_end) && + env->allow_ptr_leaks) + state->regs[value_regno].type = PTR_TO_PACKET_END; + } - } else if (state->regs[regno].type == FRAME_PTR || - state->regs[regno].type == PTR_TO_STACK) { + } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) { if (off >= 0 || off < -MAX_BPF_STACK) { verbose("invalid stack off=%d size=%d\n", off, size); return -EACCES; @@ -735,11 +831,28 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, } else { err = check_stack_read(state, off, size, value_regno); } + } else if (state->regs[regno].type == PTR_TO_PACKET) { + if (t == BPF_WRITE) { + verbose("cannot write into packet\n"); + return -EACCES; + } + err = check_packet_access(env, regno, off, size); + if (!err && t == BPF_READ && value_regno >= 0) + mark_reg_unknown_value(state->regs, value_regno); } else { verbose("R%d invalid mem access '%s'\n", - regno, reg_type_str[state->regs[regno].type]); + regno, reg_type_str[reg->type]); return -EACCES; } + + if (!err && size <= 2 && value_regno >= 0 && env->allow_ptr_leaks && + state->regs[value_regno].type == UNKNOWN_VALUE) { + /* 1 or 2 byte load zero-extends, determine the number of + * zero upper bits. Not doing it fo 4 byte load, since + * such values cannot be added to ptr_to_packet anyway. + */ + state->regs[value_regno].imm = 64 - size * 8; + } return err; } @@ -780,7 +893,8 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) * and all elements of stack are initialized */ static int check_stack_boundary(struct verifier_env *env, int regno, - int access_size, bool zero_size_allowed) + int access_size, bool zero_size_allowed, + struct bpf_call_arg_meta *meta) { struct verifier_state *state = &env->cur_state; struct reg_state *regs = state->regs; @@ -806,6 +920,12 @@ static int check_stack_boundary(struct verifier_env *env, int regno, return -EACCES; } + if (meta && meta->raw_mode) { + meta->access_size = access_size; + meta->regno = regno; + return 0; + } + for (i = 0; i < access_size; i++) { if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) { verbose("invalid indirect read from stack off %d+%d size %d\n", @@ -817,7 +937,8 @@ static int check_stack_boundary(struct verifier_env *env, int regno, } static int check_func_arg(struct verifier_env *env, u32 regno, - enum bpf_arg_type arg_type, struct bpf_map **mapp) + enum bpf_arg_type arg_type, + struct bpf_call_arg_meta *meta) { struct reg_state *reg = env->cur_state.regs + regno; enum bpf_reg_type expected_type; @@ -849,7 +970,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno, expected_type = CONST_PTR_TO_MAP; } else if (arg_type == ARG_PTR_TO_CTX) { expected_type = PTR_TO_CTX; - } else if (arg_type == ARG_PTR_TO_STACK) { + } else if (arg_type == ARG_PTR_TO_STACK || + arg_type == ARG_PTR_TO_RAW_STACK) { expected_type = PTR_TO_STACK; /* One exception here. In case function allows for NULL to be * passed in as argument, it's a CONST_IMM type. Final test @@ -857,6 +979,7 @@ static int check_func_arg(struct verifier_env *env, u32 regno, */ if (reg->type == CONST_IMM && reg->imm == 0) expected_type = CONST_IMM; + meta->raw_mode = arg_type == ARG_PTR_TO_RAW_STACK; } else { verbose("unsupported arg_type %d\n", arg_type); return -EFAULT; @@ -870,14 +993,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno, if (arg_type == ARG_CONST_MAP_PTR) { /* bpf_map_xxx(map_ptr) call: remember that map_ptr */ - *mapp = reg->map_ptr; - + meta->map_ptr = reg->map_ptr; } else if (arg_type == ARG_PTR_TO_MAP_KEY) { /* bpf_map_xxx(..., map_ptr, ..., key) call: * check that [key, key + map->key_size) are within * stack limits and initialized */ - if (!*mapp) { + if (!meta->map_ptr) { /* in function declaration map_ptr must come before * map_key, so that it's verified and known before * we have to check map_key here. Otherwise it means @@ -886,19 +1008,20 @@ static int check_func_arg(struct verifier_env *env, u32 regno, verbose("invalid map_ptr to access map->key\n"); return -EACCES; } - err = check_stack_boundary(env, regno, (*mapp)->key_size, - false); + err = check_stack_boundary(env, regno, meta->map_ptr->key_size, + false, NULL); } else if (arg_type == ARG_PTR_TO_MAP_VALUE) { /* bpf_map_xxx(..., map_ptr, ..., value) call: * check [value, value + map->value_size) validity */ - if (!*mapp) { + if (!meta->map_ptr) { /* kernel subsystem misconfigured verifier */ verbose("invalid map_ptr to access map->value\n"); return -EACCES; } - err = check_stack_boundary(env, regno, (*mapp)->value_size, - false); + err = check_stack_boundary(env, regno, + meta->map_ptr->value_size, + false, NULL); } else if (arg_type == ARG_CONST_STACK_SIZE || arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) { bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO); @@ -913,7 +1036,7 @@ static int check_func_arg(struct verifier_env *env, u32 regno, return -EACCES; } err = check_stack_boundary(env, regno - 1, reg->imm, - zero_size_allowed); + zero_size_allowed, meta); } return err; @@ -921,27 +1044,93 @@ static int check_func_arg(struct verifier_env *env, u32 regno, static int check_map_func_compatibility(struct bpf_map *map, int func_id) { - bool bool_map, bool_func; - int i; - if (!map) return 0; - for (i = 0; i < ARRAY_SIZE(func_limit); i++) { - bool_map = (map->map_type == func_limit[i].map_type); - bool_func = (func_id == func_limit[i].func_id); - /* only when map & func pair match it can continue. - * don't allow any other map type to be passed into - * the special func; - */ - if (bool_func && bool_map != bool_func) { - verbose("cannot pass map_type %d into func %d\n", - map->map_type, func_id); - return -EINVAL; - } + /* We need a two way check, first is from map perspective ... */ + switch (map->map_type) { + case BPF_MAP_TYPE_PROG_ARRAY: + if (func_id != BPF_FUNC_tail_call) + goto error; + break; + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + if (func_id != BPF_FUNC_perf_event_read && + func_id != BPF_FUNC_perf_event_output) + goto error; + break; + case BPF_MAP_TYPE_STACK_TRACE: + if (func_id != BPF_FUNC_get_stackid) + goto error; + break; + default: + break; + } + + /* ... and second from the function itself. */ + switch (func_id) { + case BPF_FUNC_tail_call: + if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) + goto error; + break; + case BPF_FUNC_perf_event_read: + case BPF_FUNC_perf_event_output: + if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) + goto error; + break; + case BPF_FUNC_get_stackid: + if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) + goto error; + break; + default: + break; } return 0; +error: + verbose("cannot pass map_type %d into func %d\n", + map->map_type, func_id); + return -EINVAL; +} + +static int check_raw_mode(const struct bpf_func_proto *fn) +{ + int count = 0; + + if (fn->arg1_type == ARG_PTR_TO_RAW_STACK) + count++; + if (fn->arg2_type == ARG_PTR_TO_RAW_STACK) + count++; + if (fn->arg3_type == ARG_PTR_TO_RAW_STACK) + count++; + if (fn->arg4_type == ARG_PTR_TO_RAW_STACK) + count++; + if (fn->arg5_type == ARG_PTR_TO_RAW_STACK) + count++; + + return count > 1 ? -EINVAL : 0; +} + +static void clear_all_pkt_pointers(struct verifier_env *env) +{ + struct verifier_state *state = &env->cur_state; + struct reg_state *regs = state->regs, *reg; + int i; + + for (i = 0; i < MAX_BPF_REG; i++) + if (regs[i].type == PTR_TO_PACKET || + regs[i].type == PTR_TO_PACKET_END) + mark_reg_unknown_value(regs, i); + + for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { + if (state->stack_slot_type[i] != STACK_SPILL) + continue; + reg = &state->spilled_regs[i / BPF_REG_SIZE]; + if (reg->type != PTR_TO_PACKET && + reg->type != PTR_TO_PACKET_END) + continue; + reg->type = UNKNOWN_VALUE; + reg->imm = 0; + } } static int check_call(struct verifier_env *env, int func_id) @@ -949,8 +1138,9 @@ static int check_call(struct verifier_env *env, int func_id) struct verifier_state *state = &env->cur_state; const struct bpf_func_proto *fn = NULL; struct reg_state *regs = state->regs; - struct bpf_map *map = NULL; struct reg_state *reg; + struct bpf_call_arg_meta meta; + bool changes_data; int i, err; /* find function prototype */ @@ -973,23 +1163,45 @@ static int check_call(struct verifier_env *env, int func_id) return -EINVAL; } + changes_data = bpf_helper_changes_skb_data(fn->func); + + memset(&meta, 0, sizeof(meta)); + + /* We only support one arg being in raw mode at the moment, which + * is sufficient for the helper functions we have right now. + */ + err = check_raw_mode(fn); + if (err) { + verbose("kernel subsystem misconfigured func %d\n", func_id); + return err; + } + /* check args */ - err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &map); + err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta); if (err) return err; - err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &map); + err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta); if (err) return err; - err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &map); + err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta); if (err) return err; - err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &map); + err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta); if (err) return err; - err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &map); + err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta); if (err) return err; + /* Mark slots with STACK_MISC in case of raw mode, stack offset + * is inferred from register state. + */ + for (i = 0; i < meta.access_size; i++) { + err = check_mem_access(env, meta.regno, i, BPF_B, BPF_WRITE, -1); + if (err) + return err; + } + /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { reg = regs + caller_saved[i]; @@ -1008,28 +1220,225 @@ static int check_call(struct verifier_env *env, int func_id) * can check 'value_size' boundary of memory access * to map element returned from bpf_map_lookup_elem() */ - if (map == NULL) { + if (meta.map_ptr == NULL) { verbose("kernel subsystem misconfigured verifier\n"); return -EINVAL; } - regs[BPF_REG_0].map_ptr = map; + regs[BPF_REG_0].map_ptr = meta.map_ptr; } else { verbose("unknown return type %d of func %d\n", fn->ret_type, func_id); return -EINVAL; } - err = check_map_func_compatibility(map, func_id); + err = check_map_func_compatibility(meta.map_ptr, func_id); if (err) return err; + if (changes_data) + clear_all_pkt_pointers(env); + return 0; +} + +static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn) +{ + struct reg_state *regs = env->cur_state.regs; + struct reg_state *dst_reg = ®s[insn->dst_reg]; + struct reg_state *src_reg = ®s[insn->src_reg]; + struct reg_state tmp_reg; + s32 imm; + + if (BPF_SRC(insn->code) == BPF_K) { + /* pkt_ptr += imm */ + imm = insn->imm; + +add_imm: + if (imm <= 0) { + verbose("addition of negative constant to packet pointer is not allowed\n"); + return -EACCES; + } + if (imm >= MAX_PACKET_OFF || + imm + dst_reg->off >= MAX_PACKET_OFF) { + verbose("constant %d is too large to add to packet pointer\n", + imm); + return -EACCES; + } + /* a constant was added to pkt_ptr. + * Remember it while keeping the same 'id' + */ + dst_reg->off += imm; + } else { + if (src_reg->type == PTR_TO_PACKET) { + /* R6=pkt(id=0,off=0,r=62) R7=imm22; r7 += r6 */ + tmp_reg = *dst_reg; /* save r7 state */ + *dst_reg = *src_reg; /* copy pkt_ptr state r6 into r7 */ + src_reg = &tmp_reg; /* pretend it's src_reg state */ + /* if the checks below reject it, the copy won't matter, + * since we're rejecting the whole program. If all ok, + * then imm22 state will be added to r7 + * and r7 will be pkt(id=0,off=22,r=62) while + * r6 will stay as pkt(id=0,off=0,r=62) + */ + } + + if (src_reg->type == CONST_IMM) { + /* pkt_ptr += reg where reg is known constant */ + imm = src_reg->imm; + goto add_imm; + } + /* disallow pkt_ptr += reg + * if reg is not uknown_value with guaranteed zero upper bits + * otherwise pkt_ptr may overflow and addition will become + * subtraction which is not allowed + */ + if (src_reg->type != UNKNOWN_VALUE) { + verbose("cannot add '%s' to ptr_to_packet\n", + reg_type_str[src_reg->type]); + return -EACCES; + } + if (src_reg->imm < 48) { + verbose("cannot add integer value with %lld upper zero bits to ptr_to_packet\n", + src_reg->imm); + return -EACCES; + } + /* dst_reg stays as pkt_ptr type and since some positive + * integer value was added to the pointer, increment its 'id' + */ + dst_reg->id++; + + /* something was added to pkt_ptr, set range and off to zero */ + dst_reg->off = 0; + dst_reg->range = 0; + } + return 0; +} + +static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn) +{ + struct reg_state *regs = env->cur_state.regs; + struct reg_state *dst_reg = ®s[insn->dst_reg]; + u8 opcode = BPF_OP(insn->code); + s64 imm_log2; + + /* for type == UNKNOWN_VALUE: + * imm > 0 -> number of zero upper bits + * imm == 0 -> don't track which is the same as all bits can be non-zero + */ + + if (BPF_SRC(insn->code) == BPF_X) { + struct reg_state *src_reg = ®s[insn->src_reg]; + + if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 && + dst_reg->imm && opcode == BPF_ADD) { + /* dreg += sreg + * where both have zero upper bits. Adding them + * can only result making one more bit non-zero + * in the larger value. + * Ex. 0xffff (imm=48) + 1 (imm=63) = 0x10000 (imm=47) + * 0xffff (imm=48) + 0xffff = 0x1fffe (imm=47) + */ + dst_reg->imm = min(dst_reg->imm, src_reg->imm); + dst_reg->imm--; + return 0; + } + if (src_reg->type == CONST_IMM && src_reg->imm > 0 && + dst_reg->imm && opcode == BPF_ADD) { + /* dreg += sreg + * where dreg has zero upper bits and sreg is const. + * Adding them can only result making one more bit + * non-zero in the larger value. + */ + imm_log2 = __ilog2_u64((long long)src_reg->imm); + dst_reg->imm = min(dst_reg->imm, 63 - imm_log2); + dst_reg->imm--; + return 0; + } + /* all other cases non supported yet, just mark dst_reg */ + dst_reg->imm = 0; + return 0; + } + + /* sign extend 32-bit imm into 64-bit to make sure that + * negative values occupy bit 63. Note ilog2() would have + * been incorrect, since sizeof(insn->imm) == 4 + */ + imm_log2 = __ilog2_u64((long long)insn->imm); + + if (dst_reg->imm && opcode == BPF_LSH) { + /* reg <<= imm + * if reg was a result of 2 byte load, then its imm == 48 + * which means that upper 48 bits are zero and shifting this reg + * left by 4 would mean that upper 44 bits are still zero + */ + dst_reg->imm -= insn->imm; + } else if (dst_reg->imm && opcode == BPF_MUL) { + /* reg *= imm + * if multiplying by 14 subtract 4 + * This is conservative calculation of upper zero bits. + * It's not trying to special case insn->imm == 1 or 0 cases + */ + dst_reg->imm -= imm_log2 + 1; + } else if (opcode == BPF_AND) { + /* reg &= imm */ + dst_reg->imm = 63 - imm_log2; + } else if (dst_reg->imm && opcode == BPF_ADD) { + /* reg += imm */ + dst_reg->imm = min(dst_reg->imm, 63 - imm_log2); + dst_reg->imm--; + } else if (opcode == BPF_RSH) { + /* reg >>= imm + * which means that after right shift, upper bits will be zero + * note that verifier already checked that + * 0 <= imm < 64 for shift insn + */ + dst_reg->imm += insn->imm; + if (unlikely(dst_reg->imm > 64)) + /* some dumb code did: + * r2 = *(u32 *)mem; + * r2 >>= 32; + * and all bits are zero now */ + dst_reg->imm = 64; + } else { + /* all other alu ops, means that we don't know what will + * happen to the value, mark it with unknown number of zero bits + */ + dst_reg->imm = 0; + } + + if (dst_reg->imm < 0) { + /* all 64 bits of the register can contain non-zero bits + * and such value cannot be added to ptr_to_packet, since it + * may overflow, mark it as unknown to avoid further eval + */ + dst_reg->imm = 0; + } + return 0; +} + +static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn) +{ + struct reg_state *regs = env->cur_state.regs; + struct reg_state *dst_reg = ®s[insn->dst_reg]; + struct reg_state *src_reg = ®s[insn->src_reg]; + u8 opcode = BPF_OP(insn->code); + + /* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn. + * Don't care about overflow or negative values, just add them + */ + if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_K) + dst_reg->imm += insn->imm; + else if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_X && + src_reg->type == CONST_IMM) + dst_reg->imm += src_reg->imm; + else + mark_reg_unknown_value(regs, insn->dst_reg); return 0; } /* check validity of 32-bit and 64-bit arithmetic operations */ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; + struct reg_state *regs = env->cur_state.regs, *dst_reg; u8 opcode = BPF_OP(insn->code); int err; @@ -1118,8 +1527,6 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) } else { /* all other ALU ops: and, sub, xor, add, ... */ - bool stack_relative = false; - if (BPF_SRC(insn->code) == BPF_X) { if (insn->imm != 0 || insn->off != 0) { verbose("BPF_ALU uses reserved fields\n"); @@ -1157,11 +1564,36 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) } } + /* check dest operand */ + err = check_reg_arg(regs, insn->dst_reg, DST_OP_NO_MARK); + if (err) + return err; + + dst_reg = ®s[insn->dst_reg]; + /* pattern match 'bpf_add Rx, imm' instruction */ if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && - regs[insn->dst_reg].type == FRAME_PTR && - BPF_SRC(insn->code) == BPF_K) { - stack_relative = true; + dst_reg->type == FRAME_PTR && BPF_SRC(insn->code) == BPF_K) { + dst_reg->type = PTR_TO_STACK; + dst_reg->imm = insn->imm; + return 0; + } else if (opcode == BPF_ADD && + BPF_CLASS(insn->code) == BPF_ALU64 && + (dst_reg->type == PTR_TO_PACKET || + (BPF_SRC(insn->code) == BPF_X && + regs[insn->src_reg].type == PTR_TO_PACKET))) { + /* ptr_to_packet += K|X */ + return check_packet_ptr_add(env, insn); + } else if (BPF_CLASS(insn->code) == BPF_ALU64 && + dst_reg->type == UNKNOWN_VALUE && + env->allow_ptr_leaks) { + /* unknown += K|X */ + return evaluate_reg_alu(env, insn); + } else if (BPF_CLASS(insn->code) == BPF_ALU64 && + dst_reg->type == CONST_IMM && + env->allow_ptr_leaks) { + /* reg_imm += K|X */ + return evaluate_reg_imm_alu(env, insn); } else if (is_pointer_value(env, insn->dst_reg)) { verbose("R%d pointer arithmetic prohibited\n", insn->dst_reg); @@ -1173,24 +1605,45 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) return -EACCES; } - /* check dest operand */ - err = check_reg_arg(regs, insn->dst_reg, DST_OP); - if (err) - return err; - - if (stack_relative) { - regs[insn->dst_reg].type = PTR_TO_STACK; - regs[insn->dst_reg].imm = insn->imm; - } + /* mark dest operand */ + mark_reg_unknown_value(regs, insn->dst_reg); } return 0; } +static void find_good_pkt_pointers(struct verifier_env *env, + struct reg_state *dst_reg) +{ + struct verifier_state *state = &env->cur_state; + struct reg_state *regs = state->regs, *reg; + int i; + /* r2 = r3; + * r2 += 8 + * if (r2 > pkt_end) goto somewhere + * r2 == dst_reg, pkt_end == src_reg, + * r2=pkt(id=n,off=8,r=0) + * r3=pkt(id=n,off=0,r=0) + * find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) + * so that range of bytes [r3, r3 + 8) is safe to access + */ + for (i = 0; i < MAX_BPF_REG; i++) + if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) + regs[i].range = dst_reg->off; + + for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { + if (state->stack_slot_type[i] != STACK_SPILL) + continue; + reg = &state->spilled_regs[i / BPF_REG_SIZE]; + if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) + reg->range = dst_reg->off; + } +} + static int check_cond_jmp_op(struct verifier_env *env, struct bpf_insn *insn, int *insn_idx) { - struct reg_state *regs = env->cur_state.regs; + struct reg_state *regs = env->cur_state.regs, *dst_reg; struct verifier_state *other_branch; u8 opcode = BPF_OP(insn->code); int err; @@ -1228,11 +1681,12 @@ static int check_cond_jmp_op(struct verifier_env *env, if (err) return err; + dst_reg = ®s[insn->dst_reg]; + /* detect if R == 0 where R was initialized to zero earlier */ if (BPF_SRC(insn->code) == BPF_K && (opcode == BPF_JEQ || opcode == BPF_JNE) && - regs[insn->dst_reg].type == CONST_IMM && - regs[insn->dst_reg].imm == insn->imm) { + dst_reg->type == CONST_IMM && dst_reg->imm == insn->imm) { if (opcode == BPF_JEQ) { /* if (imm == imm) goto pc+off; * only follow the goto, ignore fall-through @@ -1254,44 +1708,30 @@ static int check_cond_jmp_op(struct verifier_env *env, /* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */ if (BPF_SRC(insn->code) == BPF_K && - insn->imm == 0 && (opcode == BPF_JEQ || - opcode == BPF_JNE) && - regs[insn->dst_reg].type == PTR_TO_MAP_VALUE_OR_NULL) { + insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && + dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { if (opcode == BPF_JEQ) { /* next fallthrough insn can access memory via * this register */ regs[insn->dst_reg].type = PTR_TO_MAP_VALUE; /* branch targer cannot access it, since reg == 0 */ - other_branch->regs[insn->dst_reg].type = CONST_IMM; - other_branch->regs[insn->dst_reg].imm = 0; + mark_reg_unknown_value(other_branch->regs, + insn->dst_reg); } else { other_branch->regs[insn->dst_reg].type = PTR_TO_MAP_VALUE; - regs[insn->dst_reg].type = CONST_IMM; - regs[insn->dst_reg].imm = 0; + mark_reg_unknown_value(regs, insn->dst_reg); } + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && + dst_reg->type == PTR_TO_PACKET && + regs[insn->src_reg].type == PTR_TO_PACKET_END) { + find_good_pkt_pointers(env, dst_reg); } else if (is_pointer_value(env, insn->dst_reg)) { verbose("R%d pointer comparison prohibited\n", insn->dst_reg); return -EACCES; - } else if (BPF_SRC(insn->code) == BPF_K && - (opcode == BPF_JEQ || opcode == BPF_JNE)) { - - if (opcode == BPF_JEQ) { - /* detect if (R == imm) goto - * and in the target state recognize that R = imm - */ - other_branch->regs[insn->dst_reg].type = CONST_IMM; - other_branch->regs[insn->dst_reg].imm = insn->imm; - } else { - /* detect if (R != imm) goto - * and in the fall-through state recognize that R = imm - */ - regs[insn->dst_reg].type = CONST_IMM; - regs[insn->dst_reg].imm = insn->imm; - } } if (log_level) - print_verifier_state(env); + print_verifier_state(&env->cur_state); return 0; } @@ -1369,14 +1809,14 @@ static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn) int i, err; if (!may_access_skb(env->prog->type)) { - verbose("BPF_LD_ABS|IND instructions not allowed for this program type\n"); + verbose("BPF_LD_[ABS|IND] instructions not allowed for this program type\n"); return -EINVAL; } if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || BPF_SIZE(insn->code) == BPF_DW || (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) { - verbose("BPF_LD_ABS uses reserved fields\n"); + verbose("BPF_LD_[ABS|IND] uses reserved fields\n"); return -EINVAL; } @@ -1540,6 +1980,8 @@ peek_stack: goto peek_stack; else if (ret < 0) goto err_free; + if (t + 1 < insn_cnt) + env->explored_states[t + 1] = STATE_LIST_MARK; } else if (opcode == BPF_JA) { if (BPF_SRC(insns[t].code) != BPF_K) { ret = -EINVAL; @@ -1607,6 +2049,58 @@ err_free: return ret; } +/* the following conditions reduce the number of explored insns + * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet + */ +static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur) +{ + if (old->id != cur->id) + return false; + + /* old ptr_to_packet is more conservative, since it allows smaller + * range. Ex: + * old(off=0,r=10) is equal to cur(off=0,r=20), because + * old(off=0,r=10) means that with range=10 the verifier proceeded + * further and found no issues with the program. Now we're in the same + * spot with cur(off=0,r=20), so we're safe too, since anything further + * will only be looking at most 10 bytes after this pointer. + */ + if (old->off == cur->off && old->range < cur->range) + return true; + + /* old(off=20,r=10) is equal to cur(off=22,re=22 or 5 or 0) + * since both cannot be used for packet access and safe(old) + * pointer has smaller off that could be used for further + * 'if (ptr > data_end)' check + * Ex: + * old(off=20,r=10) and cur(off=22,r=22) and cur(off=22,r=0) mean + * that we cannot access the packet. + * The safe range is: + * [ptr, ptr + range - off) + * so whenever off >=range, it means no safe bytes from this pointer. + * When comparing old->off <= cur->off, it means that older code + * went with smaller offset and that offset was later + * used to figure out the safe range after 'if (ptr > data_end)' check + * Say, 'old' state was explored like: + * ... R3(off=0, r=0) + * R4 = R3 + 20 + * ... now R4(off=20,r=0) <-- here + * if (R4 > data_end) + * ... R4(off=20,r=20), R3(off=0,r=20) and R3 can be used to access. + * ... the code further went all the way to bpf_exit. + * Now the 'cur' state at the mark 'here' has R4(off=30,r=0). + * old_R4(off=20,r=0) equal to cur_R4(off=30,r=0), since if the verifier + * goes further, such cur_R4 will give larger safe packet range after + * 'if (R4 > data_end)' and all further insn were already good with r=20, + * so they will be good with r=30 and we can prune the search. + */ + if (old->off <= cur->off && + old->off >= old->range && cur->off >= cur->range) + return true; + + return false; +} + /* compare two verifier states * * all states stored in state_list are known to be valid, since @@ -1635,17 +2129,25 @@ err_free: */ static bool states_equal(struct verifier_state *old, struct verifier_state *cur) { + struct reg_state *rold, *rcur; int i; for (i = 0; i < MAX_BPF_REG; i++) { - if (memcmp(&old->regs[i], &cur->regs[i], - sizeof(old->regs[0])) != 0) { - if (old->regs[i].type == NOT_INIT || - (old->regs[i].type == UNKNOWN_VALUE && - cur->regs[i].type != NOT_INIT)) - continue; - return false; - } + rold = &old->regs[i]; + rcur = &cur->regs[i]; + + if (memcmp(rold, rcur, sizeof(*rold)) == 0) + continue; + + if (rold->type == NOT_INIT || + (rold->type == UNKNOWN_VALUE && rcur->type != NOT_INIT)) + continue; + + if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET && + compare_ptrs_to_packet(rold, rcur)) + continue; + + return false; } for (i = 0; i < MAX_BPF_STACK; i++) { @@ -1744,7 +2246,7 @@ static int do_check(struct verifier_env *env) insn = &insns[insn_idx]; class = BPF_CLASS(insn->code); - if (++insn_processed > 32768) { + if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { verbose("BPF program is too large. Proccessed %d insn\n", insn_processed); return -E2BIG; @@ -1767,7 +2269,7 @@ static int do_check(struct verifier_env *env) if (log_level && do_print_state) { verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx); - print_verifier_state(env); + print_verifier_state(&env->cur_state); do_print_state = false; } @@ -1979,6 +2481,7 @@ process_bpf_exit: insn_idx++; } + verbose("processed %d insns\n", insn_processed); return 0; } @@ -2049,15 +2552,18 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env) return -E2BIG; } - /* remember this map */ - env->used_maps[env->used_map_cnt++] = map; - /* hold the map. If the program is rejected by verifier, * the map will be released by release_maps() or it * will be used by the valid program until it's unloaded * and all maps are released in free_bpf_prog_info() */ - bpf_map_inc(map, false); + map = bpf_map_inc(map, false); + if (IS_ERR(map)) { + fdput(f); + return PTR_ERR(map); + } + env->used_maps[env->used_map_cnt++] = map; + fdput(f); next_insn: insn++; @@ -2093,26 +2599,6 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env) insn->src_reg = 0; } -static void adjust_branches(struct bpf_prog *prog, int pos, int delta) -{ - struct bpf_insn *insn = prog->insnsi; - int insn_cnt = prog->len; - int i; - - for (i = 0; i < insn_cnt; i++, insn++) { - if (BPF_CLASS(insn->code) != BPF_JMP || - BPF_OP(insn->code) == BPF_CALL || - BPF_OP(insn->code) == BPF_EXIT) - continue; - - /* adjust offset of jmps if necessary */ - if (i < pos && i + insn->off + 1 > pos) - insn->off += delta; - else if (i > pos + delta && i + insn->off + 1 <= pos + delta) - insn->off -= delta; - } -} - /* convert load instructions that access fields of 'struct __sk_buff' * into sequence of instructions that access fields of 'struct sk_buff' */ @@ -2122,14 +2608,15 @@ static int convert_ctx_accesses(struct verifier_env *env) int insn_cnt = env->prog->len; struct bpf_insn insn_buf[16]; struct bpf_prog *new_prog; - u32 cnt; - int i; enum bpf_access_type type; + int i; if (!env->prog->aux->ops->convert_ctx_access) return 0; for (i = 0; i < insn_cnt; i++, insn++) { + u32 insn_delta, cnt; + if (insn->code == (BPF_LDX | BPF_MEM | BPF_W)) type = BPF_READ; else if (insn->code == (BPF_STX | BPF_MEM | BPF_W)) @@ -2151,34 +2638,18 @@ static int convert_ctx_accesses(struct verifier_env *env) return -EINVAL; } - if (cnt == 1) { - memcpy(insn, insn_buf, sizeof(*insn)); - continue; - } - - /* several new insns need to be inserted. Make room for them */ - insn_cnt += cnt - 1; - new_prog = bpf_prog_realloc(env->prog, - bpf_prog_size(insn_cnt), - GFP_USER); + new_prog = bpf_patch_insn_single(env->prog, i, insn_buf, cnt); if (!new_prog) return -ENOMEM; - new_prog->len = insn_cnt; - - memmove(new_prog->insnsi + i + cnt, new_prog->insns + i + 1, - sizeof(*insn) * (insn_cnt - i - cnt)); - - /* copy substitute insns in place of load instruction */ - memcpy(new_prog->insnsi + i, insn_buf, sizeof(*insn) * cnt); - - /* adjust branches in the whole program */ - adjust_branches(new_prog, i, cnt - 1); + insn_delta = cnt - 1; /* keep walking new program and skip insns we just inserted */ env->prog = new_prog; - insn = new_prog->insnsi + i + cnt - 1; - i += cnt - 1; + insn = new_prog->insnsi + i + insn_delta; + + insn_cnt += insn_delta; + i += insn_delta; } return 0; |