diff options
| author | Alexei Starovoitov <ast@kernel.org> | 2020-02-25 03:12:21 +0300 |
|---|---|---|
| committer | Alexei Starovoitov <ast@kernel.org> | 2020-02-25 03:20:10 +0300 |
| commit | 80a836c2506b2b249a9934fbe373eb7a4a98db86 (patch) | |
| tree | 38d6b44dc8654900cfb0e735cd8d027ce00d4e53 /include/linux | |
| parent | 8eece07c011f88da0ccf4127fca9a4e4faaf58ae (diff) | |
| parent | 099bfaa731ec347d3f16a463ae53b88a1700c0af (diff) | |
| download | linux-80a836c2506b2b249a9934fbe373eb7a4a98db86.tar.xz | |
Merge branch 'BPF_and_RT'
Thomas Gleixner says:
====================
This is the third version of the BPF/RT patch set which makes both coexist
nicely. The long explanation can be found in the cover letter of the V1
submission:
https://lore.kernel.org/r/20200214133917.304937432@linutronix.de
V2 is here:
https://lore.kernel.org/r/20200220204517.863202864@linutronix.de
The following changes vs. V2 have been made:
- Rebased to bpf-next, adjusted to the lock changes in the hashmap code.
- Split the preallocation enforcement patch for instrumentation type BPF
programs into two pieces:
1) Emit a one-time warning on !RT kernels when any instrumentation type
BPF program uses run-time allocation. Emit also a corresponding
warning in the verifier log. But allow the program to run for
backward compatibility sake. After a grace period this should be
enforced.
2) On RT reject such programs because on RT the memory allocator cannot
be called from truly atomic contexts.
- Fixed the fallout from V2 as reported by Alexei and 0-day
- Removed the redundant preempt_disable() from trace_call_bpf()
- Removed the unused export of trace_call_bpf()
====================
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/bpf.h | 38 | ||||
| -rw-r--r-- | include/linux/filter.h | 37 |
2 files changed, 63 insertions, 12 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 49b1a70e12c8..1acd5bf70350 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -885,7 +885,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, struct bpf_prog *_prog; \ struct bpf_prog_array *_array; \ u32 _ret = 1; \ - preempt_disable(); \ + migrate_disable(); \ rcu_read_lock(); \ _array = rcu_dereference(array); \ if (unlikely(check_non_null && !_array))\ @@ -898,7 +898,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, } \ _out: \ rcu_read_unlock(); \ - preempt_enable(); \ + migrate_enable(); \ _ret; \ }) @@ -932,7 +932,7 @@ _out: \ u32 ret; \ u32 _ret = 1; \ u32 _cn = 0; \ - preempt_disable(); \ + migrate_disable(); \ rcu_read_lock(); \ _array = rcu_dereference(array); \ _item = &_array->items[0]; \ @@ -944,7 +944,7 @@ _out: \ _item++; \ } \ rcu_read_unlock(); \ - preempt_enable(); \ + migrate_enable(); \ if (_ret) \ _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ else \ @@ -961,6 +961,36 @@ _out: \ #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); +/* + * Block execution of BPF programs attached to instrumentation (perf, + * kprobes, tracepoints) to prevent deadlocks on map operations as any of + * these events can happen inside a region which holds a map bucket lock + * and can deadlock on it. + * + * Use the preemption safe inc/dec variants on RT because migrate disable + * is preemptible on RT and preemption in the middle of the RMW operation + * might lead to inconsistent state. Use the raw variants for non RT + * kernels as migrate_disable() maps to preempt_disable() so the slightly + * more expensive save operation can be avoided. + */ +static inline void bpf_disable_instrumentation(void) +{ + migrate_disable(); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + this_cpu_inc(bpf_prog_active); + else + __this_cpu_inc(bpf_prog_active); +} + +static inline void bpf_enable_instrumentation(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + this_cpu_dec(bpf_prog_active); + else + __this_cpu_dec(bpf_prog_active); + migrate_enable(); +} + extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; diff --git a/include/linux/filter.h b/include/linux/filter.h index f349e2c0884c..43b5e455d2f5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -561,7 +561,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); #define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \ u32 ret; \ - cant_sleep(); \ + cant_migrate(); \ if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ struct bpf_prog_stats *stats; \ u64 start = sched_clock(); \ @@ -576,8 +576,30 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); } \ ret; }) -#define BPF_PROG_RUN(prog, ctx) __BPF_PROG_RUN(prog, ctx, \ - bpf_dispatcher_nopfunc) +#define BPF_PROG_RUN(prog, ctx) \ + __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc) + +/* + * Use in preemptible and therefore migratable context to make sure that + * the execution of the BPF program runs on one CPU. + * + * This uses migrate_disable/enable() explicitly to document that the + * invocation of a BPF program does not require reentrancy protection + * against a BPF program which is invoked from a preempting task. + * + * For non RT enabled kernels migrate_disable/enable() maps to + * preempt_disable/enable(), i.e. it disables also preemption. + */ +static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog, + const void *ctx) +{ + u32 ret; + + migrate_disable(); + ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc); + migrate_enable(); + return ret; +} #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN @@ -655,6 +677,7 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb) return qdisc_skb_cb(skb)->data; } +/* Must be invoked with migration disabled */ static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog, struct sk_buff *skb) { @@ -680,9 +703,9 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, { u32 res; - preempt_disable(); + migrate_disable(); res = __bpf_prog_run_save_cb(prog, skb); - preempt_enable(); + migrate_enable(); return res; } @@ -695,9 +718,7 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, if (unlikely(prog->cb_access)) memset(cb_data, 0, BPF_SKB_CB_LEN); - preempt_disable(); - res = BPF_PROG_RUN(prog, skb); - preempt_enable(); + res = bpf_prog_run_pin_on_cpu(prog, skb); return res; } |
