summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2020-02-25 03:12:21 +0300
committerAlexei Starovoitov <ast@kernel.org>2020-02-25 03:20:10 +0300
commit80a836c2506b2b249a9934fbe373eb7a4a98db86 (patch)
tree38d6b44dc8654900cfb0e735cd8d027ce00d4e53 /include/linux
parent8eece07c011f88da0ccf4127fca9a4e4faaf58ae (diff)
parent099bfaa731ec347d3f16a463ae53b88a1700c0af (diff)
downloadlinux-80a836c2506b2b249a9934fbe373eb7a4a98db86.tar.xz
Merge branch 'BPF_and_RT'
Thomas Gleixner says: ==================== This is the third version of the BPF/RT patch set which makes both coexist nicely. The long explanation can be found in the cover letter of the V1 submission: https://lore.kernel.org/r/20200214133917.304937432@linutronix.de V2 is here: https://lore.kernel.org/r/20200220204517.863202864@linutronix.de The following changes vs. V2 have been made: - Rebased to bpf-next, adjusted to the lock changes in the hashmap code. - Split the preallocation enforcement patch for instrumentation type BPF programs into two pieces: 1) Emit a one-time warning on !RT kernels when any instrumentation type BPF program uses run-time allocation. Emit also a corresponding warning in the verifier log. But allow the program to run for backward compatibility sake. After a grace period this should be enforced. 2) On RT reject such programs because on RT the memory allocator cannot be called from truly atomic contexts. - Fixed the fallout from V2 as reported by Alexei and 0-day - Removed the redundant preempt_disable() from trace_call_bpf() - Removed the unused export of trace_call_bpf() ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/bpf.h38
-rw-r--r--include/linux/filter.h37
2 files changed, 63 insertions, 12 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 49b1a70e12c8..1acd5bf70350 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -885,7 +885,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *_prog; \
struct bpf_prog_array *_array; \
u32 _ret = 1; \
- preempt_disable(); \
+ migrate_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
if (unlikely(check_non_null && !_array))\
@@ -898,7 +898,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
} \
_out: \
rcu_read_unlock(); \
- preempt_enable(); \
+ migrate_enable(); \
_ret; \
})
@@ -932,7 +932,7 @@ _out: \
u32 ret; \
u32 _ret = 1; \
u32 _cn = 0; \
- preempt_disable(); \
+ migrate_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
_item = &_array->items[0]; \
@@ -944,7 +944,7 @@ _out: \
_item++; \
} \
rcu_read_unlock(); \
- preempt_enable(); \
+ migrate_enable(); \
if (_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
else \
@@ -961,6 +961,36 @@ _out: \
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
+/*
+ * Block execution of BPF programs attached to instrumentation (perf,
+ * kprobes, tracepoints) to prevent deadlocks on map operations as any of
+ * these events can happen inside a region which holds a map bucket lock
+ * and can deadlock on it.
+ *
+ * Use the preemption safe inc/dec variants on RT because migrate disable
+ * is preemptible on RT and preemption in the middle of the RMW operation
+ * might lead to inconsistent state. Use the raw variants for non RT
+ * kernels as migrate_disable() maps to preempt_disable() so the slightly
+ * more expensive save operation can be avoided.
+ */
+static inline void bpf_disable_instrumentation(void)
+{
+ migrate_disable();
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ this_cpu_inc(bpf_prog_active);
+ else
+ __this_cpu_inc(bpf_prog_active);
+}
+
+static inline void bpf_enable_instrumentation(void)
+{
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ this_cpu_dec(bpf_prog_active);
+ else
+ __this_cpu_dec(bpf_prog_active);
+ migrate_enable();
+}
+
extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index f349e2c0884c..43b5e455d2f5 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -561,7 +561,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
#define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \
u32 ret; \
- cant_sleep(); \
+ cant_migrate(); \
if (static_branch_unlikely(&bpf_stats_enabled_key)) { \
struct bpf_prog_stats *stats; \
u64 start = sched_clock(); \
@@ -576,8 +576,30 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
} \
ret; })
-#define BPF_PROG_RUN(prog, ctx) __BPF_PROG_RUN(prog, ctx, \
- bpf_dispatcher_nopfunc)
+#define BPF_PROG_RUN(prog, ctx) \
+ __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc)
+
+/*
+ * Use in preemptible and therefore migratable context to make sure that
+ * the execution of the BPF program runs on one CPU.
+ *
+ * This uses migrate_disable/enable() explicitly to document that the
+ * invocation of a BPF program does not require reentrancy protection
+ * against a BPF program which is invoked from a preempting task.
+ *
+ * For non RT enabled kernels migrate_disable/enable() maps to
+ * preempt_disable/enable(), i.e. it disables also preemption.
+ */
+static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog,
+ const void *ctx)
+{
+ u32 ret;
+
+ migrate_disable();
+ ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc);
+ migrate_enable();
+ return ret;
+}
#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
@@ -655,6 +677,7 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb)
return qdisc_skb_cb(skb)->data;
}
+/* Must be invoked with migration disabled */
static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
struct sk_buff *skb)
{
@@ -680,9 +703,9 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
{
u32 res;
- preempt_disable();
+ migrate_disable();
res = __bpf_prog_run_save_cb(prog, skb);
- preempt_enable();
+ migrate_enable();
return res;
}
@@ -695,9 +718,7 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
if (unlikely(prog->cb_access))
memset(cb_data, 0, BPF_SKB_CB_LEN);
- preempt_disable();
- res = BPF_PROG_RUN(prog, skb);
- preempt_enable();
+ res = bpf_prog_run_pin_on_cpu(prog, skb);
return res;
}