From 8fe2d6ccd52b086268f2f36e5e2fc0fe3aeffa80 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 5 Oct 2017 16:20:56 -0700 Subject: bpf: fix liveness marking while processing Rx = Ry instruction the verifier does regs[insn->dst_reg] = regs[insn->src_reg] which often clears write mark (when Ry doesn't have it) that was just set by check_reg_arg(Rx) prior to the assignment. That causes mark_reg_read() to keep marking Rx in this block as REG_LIVE_READ (since the logic incorrectly misses that it's screened by the write) and in many of its parents (until lucky write into the same Rx or beginning of the program). That causes is_state_visited() logic to miss many pruning opportunities. Furthermore mark_reg_read() logic propagates the read mark for BPF_REG_FP as well (though it's readonly) which causes harmless but unnecssary work during is_state_visited(). Note that do_propagate_liveness() skips FP correctly, so do the same in mark_reg_read() as well. It saves 0.2 seconds for the test below program before after bpf_lb-DLB_L3.o 2604 2304 bpf_lb-DLB_L4.o 11159 3723 bpf_lb-DUNKNOWN.o 1116 1110 bpf_lxc-DDROP_ALL.o 34566 28004 bpf_lxc-DUNKNOWN.o 53267 39026 bpf_netdev.o 17843 16943 bpf_overlay.o 8672 7929 time ~11 sec ~4 sec Fixes: dc503a8ad984 ("bpf/verifier: track liveness for pruning") Signed-off-by: Alexei Starovoitov Acked-by: Edward Cree Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/bpf/verifier.c') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b914fbe1383e..8b8d6ba39e23 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -653,6 +653,10 @@ static void mark_reg_read(const struct bpf_verifier_state *state, u32 regno) { struct bpf_verifier_state *parent = state->parent; + if (regno == BPF_REG_FP) + /* We don't need to worry about FP liveness because it's read-only */ + return; + while (parent) { /* if read wasn't screened by an earlier write ... */ if (state->regs[regno].live & REG_LIVE_WRITTEN) @@ -2345,6 +2349,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) * copy register state to dest reg */ regs[insn->dst_reg] = regs[insn->src_reg]; + regs[insn->dst_reg].live |= REG_LIVE_WRITTEN; } else { /* R1 = (u32) R2 */ if (is_pointer_value(env, insn->src_reg)) { -- cgit v1.2.3 From 28e33f9d78eefe98ea86673ab31e988b37a9a738 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 16 Oct 2017 11:16:55 -0700 Subject: bpf: disallow arithmetic operations on context pointer Commit f1174f77b50c ("bpf/verifier: rework value tracking") removed the crafty selection of which pointer types are allowed to be modified. This is OK for most pointer types since adjust_ptr_min_max_vals() will catch operations on immutable pointers. One exception is PTR_TO_CTX which is now allowed to be offseted freely. The intent of aforementioned commit was to allow context access via modified registers. The offset passed to ->is_valid_access() verifier callback has been adjusted by the value of the variable offset. What is missing, however, is taking the variable offset into account when the context register is used. Or in terms of the code adding the offset to the value passed to the ->convert_ctx_access() callback. This leads to the following eBPF user code: r1 += 68 r0 = *(u32 *)(r1 + 8) exit being translated to this in kernel space: 0: (07) r1 += 68 1: (61) r0 = *(u32 *)(r1 +180) 2: (95) exit Offset 8 is corresponding to 180 in the kernel, but offset 76 is valid too. Verifier will "accept" access to offset 68+8=76 but then "convert" access to offset 8 as 180. Effective access to offset 248 is beyond the kernel context. (This is a __sk_buff example on a debug-heavy kernel - packet mark is 8 -> 180, 76 would be data.) Dereferencing the modified context pointer is not as easy as dereferencing other types, because we have to translate the access to reading a field in kernel structures which is usually at a different offset and often of a different size. To allow modifying the pointer we would have to make sure that given eBPF instruction will always access the same field or the fields accessed are "compatible" in terms of offset and size... Disallow dereferencing modified context pointers and add to selftests the test case described here. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Edward Cree Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 8 ++++++-- tools/testing/selftests/bpf/test_verifier.c | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'kernel/bpf/verifier.c') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8b8d6ba39e23..20f3889c006e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1116,7 +1116,12 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn /* ctx accesses must be at a fixed offset, so that we can * determine what type of data were returned. */ - if (!tnum_is_const(reg->var_off)) { + if (reg->off) { + verbose("dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n", + regno, reg->off, off - reg->off); + return -EACCES; + } + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); @@ -1124,7 +1129,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn tn_buf, off, size); return -EACCES; } - off += reg->var_off.value; err = check_ctx_access(env, insn_idx, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { /* ctx access returns either a scalar, or a diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 26f3250bdcd2..3c7d3a45a3c5 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -6645,6 +6645,20 @@ static struct bpf_test tests[] = { .errstr = "BPF_END uses reserved fields", .result = REJECT, }, + { + "arithmetic ops make PTR_TO_CTX unusable", + .insns = { + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, + offsetof(struct __sk_buff, data) - + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .errstr = "dereference of modified ctx ptr R1 off=68+8, ctx+const is allowed, ctx+const+const is not", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From fb2a311a31d3457fe8c3ee16f5609877e2ead9f7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 21 Oct 2017 02:34:21 +0200 Subject: bpf: fix off by one for range markings with L{T, E} patterns During review I noticed that the current logic for direct packet access marking in check_cond_jmp_op() has an off by one for the upper right range border when marking in find_good_pkt_pointers() with BPF_JLT and BPF_JLE. It's not really harmful given access up to pkt_end is always safe, but we should nevertheless correct the range marking before it becomes ABI. If pkt_data' denotes a pkt_data derived pointer (pkt_data + X), then for pkt_data' < pkt_end in the true branch as well as for pkt_end <= pkt_data' in the false branch we mark the range with X although it should really be X - 1 in these cases. For example, X could be pkt_end - pkt_data, then when testing for pkt_data' < pkt_end the verifier simulation cannot deduce that a byte load of pkt_data' - 1 would succeed in this branch. Fixes: b4e432f1000a ("bpf: enable BPF_J{LT, LE, SLT, SLE} opcodes in verifier") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'kernel/bpf/verifier.c') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 20f3889c006e..49cb5ad14746 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2430,12 +2430,15 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) } static void find_good_pkt_pointers(struct bpf_verifier_state *state, - struct bpf_reg_state *dst_reg) + struct bpf_reg_state *dst_reg, + bool range_right_open) { struct bpf_reg_state *regs = state->regs, *reg; + u16 new_range; int i; - if (dst_reg->off < 0) + if (dst_reg->off < 0 || + (dst_reg->off == 0 && range_right_open)) /* This doesn't give us any range */ return; @@ -2446,9 +2449,13 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, */ return; - /* LLVM can generate four kind of checks: + new_range = dst_reg->off; + if (range_right_open) + new_range--; + + /* Examples for register markings: * - * Type 1/2: + * pkt_data in dst register: * * r2 = r3; * r2 += 8; @@ -2465,7 +2472,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, * r2=pkt(id=n,off=8,r=0) * r3=pkt(id=n,off=0,r=0) * - * Type 3/4: + * pkt_data in src register: * * r2 = r3; * r2 += 8; @@ -2483,7 +2490,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, * r3=pkt(id=n,off=0,r=0) * * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) - * so that range of bytes [r3, r3 + 8) is safe to access. + * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8) + * and [r3, r3 + 8-1) respectively is safe to access depending on + * the check. */ /* If our ids match, then we must have the same max_value. And we @@ -2494,14 +2503,14 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, for (i = 0; i < MAX_BPF_REG; i++) if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) /* keep the maximum range already checked */ - regs[i].range = max_t(u16, regs[i].range, dst_reg->off); + regs[i].range = max(regs[i].range, new_range); for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { if (state->stack_slot_type[i] != STACK_SPILL) continue; reg = &state->spilled_regs[i / BPF_REG_SIZE]; if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) - reg->range = max_t(u16, reg->range, dst_reg->off); + reg->range = max(reg->range, new_range); } } @@ -2865,19 +2874,19 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { - find_good_pkt_pointers(this_branch, dst_reg); + find_good_pkt_pointers(this_branch, dst_reg, false); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { - find_good_pkt_pointers(other_branch, dst_reg); + find_good_pkt_pointers(other_branch, dst_reg, true); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && dst_reg->type == PTR_TO_PACKET_END && regs[insn->src_reg].type == PTR_TO_PACKET) { - find_good_pkt_pointers(other_branch, ®s[insn->src_reg]); + find_good_pkt_pointers(other_branch, ®s[insn->src_reg], false); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && dst_reg->type == PTR_TO_PACKET_END && regs[insn->src_reg].type == PTR_TO_PACKET) { - find_good_pkt_pointers(this_branch, ®s[insn->src_reg]); + find_good_pkt_pointers(this_branch, ®s[insn->src_reg], true); } else if (is_pointer_value(env, insn->dst_reg)) { verbose("R%d pointer comparison prohibited\n", insn->dst_reg); return -EACCES; -- cgit v1.2.3 From 0fd4759c5515b7f2297d7fee5c45e5d9dd733001 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 21 Oct 2017 02:34:22 +0200 Subject: bpf: fix pattern matches for direct packet access Alexander had a test program with direct packet access, where the access test was in the form of data + X > data_end. In an unrelated change to the program LLVM decided to swap the branches and emitted code for the test in form of data + X <= data_end. We hadn't seen these being generated previously, thus verifier would reject the program. Therefore, fix up the verifier to detect all test cases, so we don't run into such issues in the future. Fixes: b4e432f1000a ("bpf: enable BPF_J{LT, LE, SLT, SLE} opcodes in verifier") Reported-by: Alexander Alemayhu Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'kernel/bpf/verifier.c') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 49cb5ad14746..c48ca2a34b5e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2874,18 +2874,42 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { + /* pkt_data' > pkt_end */ find_good_pkt_pointers(this_branch, dst_reg, false); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && + dst_reg->type == PTR_TO_PACKET_END && + regs[insn->src_reg].type == PTR_TO_PACKET) { + /* pkt_end > pkt_data' */ + find_good_pkt_pointers(other_branch, ®s[insn->src_reg], true); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { + /* pkt_data' < pkt_end */ find_good_pkt_pointers(other_branch, dst_reg, true); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT && + dst_reg->type == PTR_TO_PACKET_END && + regs[insn->src_reg].type == PTR_TO_PACKET) { + /* pkt_end < pkt_data' */ + find_good_pkt_pointers(this_branch, ®s[insn->src_reg], false); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && + dst_reg->type == PTR_TO_PACKET && + regs[insn->src_reg].type == PTR_TO_PACKET_END) { + /* pkt_data' >= pkt_end */ + find_good_pkt_pointers(this_branch, dst_reg, true); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && dst_reg->type == PTR_TO_PACKET_END && regs[insn->src_reg].type == PTR_TO_PACKET) { + /* pkt_end >= pkt_data' */ find_good_pkt_pointers(other_branch, ®s[insn->src_reg], false); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && + dst_reg->type == PTR_TO_PACKET && + regs[insn->src_reg].type == PTR_TO_PACKET_END) { + /* pkt_data' <= pkt_end */ + find_good_pkt_pointers(other_branch, dst_reg, false); } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE && dst_reg->type == PTR_TO_PACKET_END && regs[insn->src_reg].type == PTR_TO_PACKET) { + /* pkt_end <= pkt_data' */ find_good_pkt_pointers(this_branch, ®s[insn->src_reg], true); } else if (is_pointer_value(env, insn->dst_reg)) { verbose("R%d pointer comparison prohibited\n", insn->dst_reg); -- cgit v1.2.3