summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/ftrace_event.h9
-rw-r--r--include/trace/ftrace.h39
-rw-r--r--kernel/trace/trace_event_profile.c41
-rw-r--r--kernel/trace/trace_kprobe.c50
-rw-r--r--kernel/trace/trace_syscalls.c44
5 files changed, 133 insertions, 50 deletions
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index f7b47c336703..43360c1d8f70 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -137,8 +137,13 @@ struct ftrace_event_call {
#define FTRACE_MAX_PROFILE_SIZE 2048
-extern char *trace_profile_buf;
-extern char *trace_profile_buf_nmi;
+struct perf_trace_buf {
+ char buf[FTRACE_MAX_PROFILE_SIZE];
+ int recursion;
+};
+
+extern struct perf_trace_buf *perf_trace_buf;
+extern struct perf_trace_buf *perf_trace_buf_nmi;
#define MAX_FILTER_PRED 32
#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index a7f946094128..4945d1c99864 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -649,6 +649,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
* struct ftrace_event_call *event_call = &event_<call>;
* extern void perf_tp_event(int, u64, u64, void *, int);
* struct ftrace_raw_##call *entry;
+ * struct perf_trace_buf *trace_buf;
* u64 __addr = 0, __count = 1;
* unsigned long irq_flags;
* struct trace_entry *ent;
@@ -673,14 +674,25 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
* __cpu = smp_processor_id();
*
* if (in_nmi())
- * raw_data = rcu_dereference(trace_profile_buf_nmi);
+ * trace_buf = rcu_dereference(perf_trace_buf_nmi);
* else
- * raw_data = rcu_dereference(trace_profile_buf);
+ * trace_buf = rcu_dereference(perf_trace_buf);
*
- * if (!raw_data)
+ * if (!trace_buf)
* goto end;
*
- * raw_data = per_cpu_ptr(raw_data, __cpu);
+ * trace_buf = per_cpu_ptr(trace_buf, __cpu);
+ *
+ * // Avoid recursion from perf that could mess up the buffer
+ * if (trace_buf->recursion++)
+ * goto end_recursion;
+ *
+ * raw_data = trace_buf->buf;
+ *
+ * // Make recursion update visible before entering perf_tp_event
+ * // so that we protect from perf recursions.
+ *
+ * barrier();
*
* //zero dead bytes from alignment to avoid stack leak to userspace:
* *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
@@ -713,8 +725,9 @@ static void ftrace_profile_##call(proto) \
{ \
struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
struct ftrace_event_call *event_call = &event_##call; \
- extern void perf_tp_event(int, u64, u64, void *, int); \
+ extern void perf_tp_event(int, u64, u64, void *, int); \
struct ftrace_raw_##call *entry; \
+ struct perf_trace_buf *trace_buf; \
u64 __addr = 0, __count = 1; \
unsigned long irq_flags; \
struct trace_entry *ent; \
@@ -739,14 +752,20 @@ static void ftrace_profile_##call(proto) \
__cpu = smp_processor_id(); \
\
if (in_nmi()) \
- raw_data = rcu_dereference(trace_profile_buf_nmi); \
+ trace_buf = rcu_dereference(perf_trace_buf_nmi); \
else \
- raw_data = rcu_dereference(trace_profile_buf); \
+ trace_buf = rcu_dereference(perf_trace_buf); \
\
- if (!raw_data) \
+ if (!trace_buf) \
goto end; \
\
- raw_data = per_cpu_ptr(raw_data, __cpu); \
+ trace_buf = per_cpu_ptr(trace_buf, __cpu); \
+ if (trace_buf->recursion++) \
+ goto end_recursion; \
+ \
+ barrier(); \
+ \
+ raw_data = trace_buf->buf; \
\
*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
entry = (struct ftrace_raw_##call *)raw_data; \
@@ -761,6 +780,8 @@ static void ftrace_profile_##call(proto) \
perf_tp_event(event_call->id, __addr, __count, entry, \
__entry_size); \
\
+end_recursion: \
+ trace_buf->recursion--; \
end: \
local_irq_restore(irq_flags); \
\
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index c9f687ab0d4f..e0d351b01f5a 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,41 +8,36 @@
#include <linux/module.h>
#include "trace.h"
-/*
- * We can't use a size but a type in alloc_percpu()
- * So let's create a dummy type that matches the desired size
- */
-typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
-char *trace_profile_buf;
-EXPORT_SYMBOL_GPL(trace_profile_buf);
+struct perf_trace_buf *perf_trace_buf;
+EXPORT_SYMBOL_GPL(perf_trace_buf);
-char *trace_profile_buf_nmi;
-EXPORT_SYMBOL_GPL(trace_profile_buf_nmi);
+struct perf_trace_buf *perf_trace_buf_nmi;
+EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
/* Count the events in use (per event id, not per instance) */
static int total_profile_count;
static int ftrace_profile_enable_event(struct ftrace_event_call *event)
{
- char *buf;
+ struct perf_trace_buf *buf;
int ret = -ENOMEM;
if (atomic_inc_return(&event->profile_count))
return 0;
if (!total_profile_count) {
- buf = (char *)alloc_percpu(profile_buf_t);
+ buf = alloc_percpu(struct perf_trace_buf);
if (!buf)
goto fail_buf;
- rcu_assign_pointer(trace_profile_buf, buf);
+ rcu_assign_pointer(perf_trace_buf, buf);
- buf = (char *)alloc_percpu(profile_buf_t);
+ buf = alloc_percpu(struct perf_trace_buf);
if (!buf)
goto fail_buf_nmi;
- rcu_assign_pointer(trace_profile_buf_nmi, buf);
+ rcu_assign_pointer(perf_trace_buf_nmi, buf);
}
ret = event->profile_enable(event);
@@ -53,10 +48,10 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
fail_buf_nmi:
if (!total_profile_count) {
- free_percpu(trace_profile_buf_nmi);
- free_percpu(trace_profile_buf);
- trace_profile_buf_nmi = NULL;
- trace_profile_buf = NULL;
+ free_percpu(perf_trace_buf_nmi);
+ free_percpu(perf_trace_buf);
+ perf_trace_buf_nmi = NULL;
+ perf_trace_buf = NULL;
}
fail_buf:
atomic_dec(&event->profile_count);
@@ -84,7 +79,7 @@ int ftrace_profile_enable(int event_id)
static void ftrace_profile_disable_event(struct ftrace_event_call *event)
{
- char *buf, *nmi_buf;
+ struct perf_trace_buf *buf, *nmi_buf;
if (!atomic_add_negative(-1, &event->profile_count))
return;
@@ -92,11 +87,11 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
event->profile_disable(event);
if (!--total_profile_count) {
- buf = trace_profile_buf;
- rcu_assign_pointer(trace_profile_buf, NULL);
+ buf = perf_trace_buf;
+ rcu_assign_pointer(perf_trace_buf, NULL);
- nmi_buf = trace_profile_buf_nmi;
- rcu_assign_pointer(trace_profile_buf_nmi, NULL);
+ nmi_buf = perf_trace_buf_nmi;
+ rcu_assign_pointer(perf_trace_buf_nmi, NULL);
/*
* Ensure every events in profiling have finished before
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index cf17a6694f32..3696476f307d 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1208,6 +1208,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
struct ftrace_event_call *call = &tp->call;
struct kprobe_trace_entry *entry;
+ struct perf_trace_buf *trace_buf;
struct trace_entry *ent;
int size, __size, i, pc, __cpu;
unsigned long irq_flags;
@@ -1229,14 +1230,26 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
__cpu = smp_processor_id();
if (in_nmi())
- raw_data = rcu_dereference(trace_profile_buf_nmi);
+ trace_buf = rcu_dereference(perf_trace_buf_nmi);
else
- raw_data = rcu_dereference(trace_profile_buf);
+ trace_buf = rcu_dereference(perf_trace_buf);
- if (!raw_data)
+ if (!trace_buf)
goto end;
- raw_data = per_cpu_ptr(raw_data, __cpu);
+ trace_buf = per_cpu_ptr(trace_buf, __cpu);
+
+ if (trace_buf->recursion++)
+ goto end_recursion;
+
+ /*
+ * Make recursion update visible before entering perf_tp_event
+ * so that we protect from perf recursions.
+ */
+ barrier();
+
+ raw_data = trace_buf->buf;
+
/* Zero dead bytes from alignment to avoid buffer leak to userspace */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
entry = (struct kprobe_trace_entry *)raw_data;
@@ -1249,8 +1262,12 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
perf_tp_event(call->id, entry->ip, 1, entry, size);
+
+end_recursion:
+ trace_buf->recursion--;
end:
local_irq_restore(irq_flags);
+
return 0;
}
@@ -1261,6 +1278,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
struct ftrace_event_call *call = &tp->call;
struct kretprobe_trace_entry *entry;
+ struct perf_trace_buf *trace_buf;
struct trace_entry *ent;
int size, __size, i, pc, __cpu;
unsigned long irq_flags;
@@ -1282,14 +1300,26 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
__cpu = smp_processor_id();
if (in_nmi())
- raw_data = rcu_dereference(trace_profile_buf_nmi);
+ trace_buf = rcu_dereference(perf_trace_buf_nmi);
else
- raw_data = rcu_dereference(trace_profile_buf);
+ trace_buf = rcu_dereference(perf_trace_buf);
- if (!raw_data)
+ if (!trace_buf)
goto end;
- raw_data = per_cpu_ptr(raw_data, __cpu);
+ trace_buf = per_cpu_ptr(trace_buf, __cpu);
+
+ if (trace_buf->recursion++)
+ goto end_recursion;
+
+ /*
+ * Make recursion update visible before entering perf_tp_event
+ * so that we protect from perf recursions.
+ */
+ barrier();
+
+ raw_data = trace_buf->buf;
+
/* Zero dead bytes from alignment to avoid buffer leak to userspace */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
entry = (struct kretprobe_trace_entry *)raw_data;
@@ -1303,8 +1333,12 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
for (i = 0; i < tp->nr_args; i++)
entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
+
+end_recursion:
+ trace_buf->recursion--;
end:
local_irq_restore(irq_flags);
+
return 0;
}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 58b8e5370767..51213b0aa81b 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -477,6 +477,7 @@ static int sys_prof_refcount_exit;
static void prof_syscall_enter(struct pt_regs *regs, long id)
{
struct syscall_metadata *sys_data;
+ struct perf_trace_buf *trace_buf;
struct syscall_trace_enter *rec;
unsigned long flags;
char *raw_data;
@@ -507,14 +508,25 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
cpu = smp_processor_id();
if (in_nmi())
- raw_data = rcu_dereference(trace_profile_buf_nmi);
+ trace_buf = rcu_dereference(perf_trace_buf_nmi);
else
- raw_data = rcu_dereference(trace_profile_buf);
+ trace_buf = rcu_dereference(perf_trace_buf);
- if (!raw_data)
+ if (!trace_buf)
goto end;
- raw_data = per_cpu_ptr(raw_data, cpu);
+ trace_buf = per_cpu_ptr(trace_buf, cpu);
+
+ if (trace_buf->recursion++)
+ goto end_recursion;
+
+ /*
+ * Make recursion update visible before entering perf_tp_event
+ * so that we protect from perf recursions.
+ */
+ barrier();
+
+ raw_data = trace_buf->buf;
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -527,6 +539,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
(unsigned long *)&rec->args);
perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
+end_recursion:
+ trace_buf->recursion--;
end:
local_irq_restore(flags);
}
@@ -574,6 +588,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
{
struct syscall_metadata *sys_data;
struct syscall_trace_exit *rec;
+ struct perf_trace_buf *trace_buf;
unsigned long flags;
int syscall_nr;
char *raw_data;
@@ -605,14 +620,25 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
cpu = smp_processor_id();
if (in_nmi())
- raw_data = rcu_dereference(trace_profile_buf_nmi);
+ trace_buf = rcu_dereference(perf_trace_buf_nmi);
else
- raw_data = rcu_dereference(trace_profile_buf);
+ trace_buf = rcu_dereference(perf_trace_buf);
- if (!raw_data)
+ if (!trace_buf)
goto end;
- raw_data = per_cpu_ptr(raw_data, cpu);
+ trace_buf = per_cpu_ptr(trace_buf, cpu);
+
+ if (trace_buf->recursion++)
+ goto end_recursion;
+
+ /*
+ * Make recursion update visible before entering perf_tp_event
+ * so that we protect from perf recursions.
+ */
+ barrier();
+
+ raw_data = trace_buf->buf;
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -626,6 +652,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
+end_recursion:
+ trace_buf->recursion--;
end:
local_irq_restore(flags);
}