summaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/Kconfig22
-rw-r--r--kernel/trace/blktrace.c36
-rw-r--r--kernel/trace/bpf_trace.c97
-rw-r--r--kernel/trace/fgraph.c76
-rw-r--r--kernel/trace/fprobe.c662
-rw-r--r--kernel/trace/ftrace.c266
-rw-r--r--kernel/trace/pid_list.c2
-rw-r--r--kernel/trace/ring_buffer.c106
-rw-r--r--kernel/trace/rv/Kconfig27
-rw-r--r--kernel/trace/rv/Makefile3
-rw-r--r--kernel/trace/rv/monitors/wip/Kconfig12
-rw-r--r--kernel/trace/rv/monitors/wip/wip.c2
-rw-r--r--kernel/trace/rv/monitors/wip/wip_trace.h15
-rw-r--r--kernel/trace/rv/monitors/wwnr/Kconfig11
-rw-r--r--kernel/trace/rv/monitors/wwnr/wwnr.c2
-rw-r--r--kernel/trace/rv/monitors/wwnr/wwnr_trace.h16
-rw-r--r--kernel/trace/rv/rv.c2
-rw-r--r--kernel/trace/rv/rv_trace.h130
-rw-r--r--kernel/trace/trace.c585
-rw-r--r--kernel/trace/trace.h32
-rw-r--r--kernel/trace/trace_dynevent.c23
-rw-r--r--kernel/trace/trace_entries.h8
-rw-r--r--kernel/trace/trace_eprobe.c36
-rw-r--r--kernel/trace/trace_events.c709
-rw-r--r--kernel/trace/trace_events_filter.c23
-rw-r--r--kernel/trace/trace_events_hist.c119
-rw-r--r--kernel/trace/trace_events_synth.c17
-rw-r--r--kernel/trace/trace_events_trigger.c67
-rw-r--r--kernel/trace/trace_events_user.c2
-rw-r--r--kernel/trace/trace_fprobe.c270
-rw-r--r--kernel/trace/trace_functions.c9
-rw-r--r--kernel/trace/trace_functions_graph.c82
-rw-r--r--kernel/trace/trace_irqsoff.c23
-rw-r--r--kernel/trace/trace_kprobe.c151
-rw-r--r--kernel/trace/trace_osnoise.c57
-rw-r--r--kernel/trace/trace_output.c6
-rw-r--r--kernel/trace/trace_probe.c51
-rw-r--r--kernel/trace/trace_probe_tmpl.h2
-rw-r--r--kernel/trace/trace_sched_switch.c2
-rw-r--r--kernel/trace/trace_sched_wakeup.c24
-rw-r--r--kernel/trace/trace_selftest.c11
-rw-r--r--kernel/trace/trace_stack.c6
-rw-r--r--kernel/trace/trace_stat.c26
-rw-r--r--kernel/trace/trace_uprobe.c15
44 files changed, 2305 insertions, 1538 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 74c2b1d43bb9..d570b8b9c0a9 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -31,9 +31,14 @@ config HAVE_FUNCTION_GRAPH_TRACER
help
See Documentation/trace/ftrace-design.rst
-config HAVE_FUNCTION_GRAPH_RETVAL
+config HAVE_FUNCTION_GRAPH_FREGS
bool
+config HAVE_FTRACE_GRAPH_FUNC
+ bool
+ help
+ True if ftrace_graph_func() is defined.
+
config HAVE_DYNAMIC_FTRACE
bool
help
@@ -57,6 +62,12 @@ config HAVE_DYNAMIC_FTRACE_WITH_ARGS
This allows for use of ftrace_regs_get_argument() and
ftrace_regs_get_stack_pointer().
+config HAVE_FTRACE_REGS_HAVING_PT_REGS
+ bool
+ help
+ If this is set, ftrace_regs has pt_regs, thus it can convert to
+ pt_regs without allocating memory.
+
config HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
bool
help
@@ -232,7 +243,7 @@ config FUNCTION_GRAPH_TRACER
config FUNCTION_GRAPH_RETVAL
bool "Kernel Function Graph Return Value"
- depends on HAVE_FUNCTION_GRAPH_RETVAL
+ depends on HAVE_FUNCTION_GRAPH_FREGS
depends on FUNCTION_GRAPH_TRACER
default n
help
@@ -296,10 +307,9 @@ config DYNAMIC_FTRACE_WITH_ARGS
config FPROBE
bool "Kernel Function Probe (fprobe)"
- depends on FUNCTION_TRACER
- depends on DYNAMIC_FTRACE_WITH_REGS
- depends on HAVE_RETHOOK
- select RETHOOK
+ depends on HAVE_FUNCTION_GRAPH_FREGS && HAVE_FTRACE_GRAPH_FUNC
+ depends on DYNAMIC_FTRACE_WITH_ARGS
+ select FUNCTION_GRAPH_TRACER
default n
help
This option enables kernel function probe (fprobe) based on ftrace.
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 8fd292d34d89..3679a6d18934 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -617,8 +617,9 @@ err:
return ret;
}
-static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
- struct block_device *bdev, char __user *arg)
+int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+ struct block_device *bdev,
+ char __user *arg)
{
struct blk_user_trace_setup buts;
int ret;
@@ -627,29 +628,18 @@ static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (ret)
return -EFAULT;
+ mutex_lock(&q->debugfs_mutex);
ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
+ mutex_unlock(&q->debugfs_mutex);
if (ret)
return ret;
if (copy_to_user(arg, &buts, sizeof(buts))) {
- __blk_trace_remove(q);
+ blk_trace_remove(q);
return -EFAULT;
}
return 0;
}
-
-int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
- struct block_device *bdev,
- char __user *arg)
-{
- int ret;
-
- mutex_lock(&q->debugfs_mutex);
- ret = __blk_trace_setup(q, name, dev, bdev, arg);
- mutex_unlock(&q->debugfs_mutex);
-
- return ret;
-}
EXPORT_SYMBOL_GPL(blk_trace_setup);
#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
@@ -673,12 +663,14 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
.pid = cbuts.pid,
};
+ mutex_lock(&q->debugfs_mutex);
ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
+ mutex_unlock(&q->debugfs_mutex);
if (ret)
return ret;
if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) {
- __blk_trace_remove(q);
+ blk_trace_remove(q);
return -EFAULT;
}
@@ -732,12 +724,10 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
int ret, start = 0;
char b[BDEVNAME_SIZE];
- mutex_lock(&q->debugfs_mutex);
-
switch (cmd) {
case BLKTRACESETUP:
snprintf(b, sizeof(b), "%pg", bdev);
- ret = __blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
+ ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
break;
#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
case BLKTRACESETUP32:
@@ -749,17 +739,15 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
start = 1;
fallthrough;
case BLKTRACESTOP:
- ret = __blk_trace_startstop(q, start);
+ ret = blk_trace_startstop(q, start);
break;
case BLKTRACETEARDOWN:
- ret = __blk_trace_remove(q);
+ ret = blk_trace_remove(q);
break;
default:
ret = -ENOTTY;
break;
}
-
- mutex_unlock(&q->debugfs_mutex);
return ret;
}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 1b8db5aee9d3..adc947587eb8 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -357,17 +357,6 @@ static const struct bpf_func_proto bpf_probe_write_user_proto = {
.arg3_type = ARG_CONST_SIZE,
};
-static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
-{
- if (!capable(CAP_SYS_ADMIN))
- return NULL;
-
- pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!",
- current->comm, task_pid_nr(current));
-
- return &bpf_probe_write_user_proto;
-}
-
#define MAX_TRACE_PRINTK_VARARGS 3
#define BPF_TRACE_PRINTK_SIZE 1024
@@ -619,7 +608,8 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
static __always_inline u64
__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
- u64 flags, struct perf_sample_data *sd)
+ u64 flags, struct perf_raw_record *raw,
+ struct perf_sample_data *sd)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
unsigned int cpu = smp_processor_id();
@@ -644,6 +634,8 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
if (unlikely(event->oncpu != cpu))
return -EOPNOTSUPP;
+ perf_sample_save_raw_data(sd, event, raw);
+
return perf_event_output(event, sd, regs);
}
@@ -687,9 +679,8 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
}
perf_sample_data_init(sd, 0, 0);
- perf_sample_save_raw_data(sd, &raw);
- err = __bpf_perf_event_output(regs, map, flags, sd);
+ err = __bpf_perf_event_output(regs, map, flags, &raw, sd);
out:
this_cpu_dec(bpf_trace_nest_level);
preempt_enable();
@@ -748,9 +739,8 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
perf_fetch_caller_regs(regs);
perf_sample_data_init(sd, 0, 0);
- perf_sample_save_raw_data(sd, &raw);
- ret = __bpf_perf_event_output(regs, map, flags, sd);
+ ret = __bpf_perf_event_output(regs, map, flags, &raw, sd);
out:
this_cpu_dec(bpf_event_output_nest_level);
preempt_enable();
@@ -853,7 +843,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type, struct task_struc
if (unlikely(is_global_init(task)))
return -EPERM;
- if (irqs_disabled()) {
+ if (!preemptible()) {
/* Do an early check on signal validity. Otherwise,
* the error is lost in deferred irq_work.
*/
@@ -1444,6 +1434,8 @@ late_initcall(bpf_key_sig_kfuncs_init);
static const struct bpf_func_proto *
bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
+ const struct bpf_func_proto *func_proto;
+
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
return &bpf_map_lookup_elem_proto;
@@ -1485,9 +1477,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_perf_event_read_proto;
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
- case BPF_FUNC_probe_write_user:
- return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ?
- NULL : bpf_get_probe_write_proto();
case BPF_FUNC_probe_read_user:
return &bpf_probe_read_user_proto;
case BPF_FUNC_probe_read_kernel:
@@ -1566,7 +1555,22 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_trace_vprintk:
return bpf_get_trace_vprintk_proto();
default:
- return bpf_base_func_proto(func_id, prog);
+ break;
+ }
+
+ func_proto = bpf_base_func_proto(func_id, prog);
+ if (func_proto)
+ return func_proto;
+
+ if (!bpf_token_capable(prog->aux->token, CAP_SYS_ADMIN))
+ return NULL;
+
+ switch (func_id) {
+ case BPF_FUNC_probe_write_user:
+ return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ?
+ NULL : &bpf_probe_write_user_proto;
+ default:
+ return NULL;
}
}
@@ -2242,6 +2246,7 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
{
struct bpf_prog_array *old_array;
struct bpf_prog_array *new_array;
+ struct bpf_prog *prog = NULL;
int ret;
mutex_lock(&bpf_event_mutex);
@@ -2262,18 +2267,22 @@ void perf_event_detach_bpf_prog(struct perf_event *event)
}
put:
- /*
- * It could be that the bpf_prog is not sleepable (and will be freed
- * via normal RCU), but is called from a point that supports sleepable
- * programs and uses tasks-trace-RCU.
- */
- synchronize_rcu_tasks_trace();
-
- bpf_prog_put(event->prog);
+ prog = event->prog;
event->prog = NULL;
unlock:
mutex_unlock(&bpf_event_mutex);
+
+ if (prog) {
+ /*
+ * It could be that the bpf_prog is not sleepable (and will be freed
+ * via normal RCU), but is called from a point that supports sleepable
+ * programs and uses tasks-trace-RCU.
+ */
+ synchronize_rcu_tasks_trace();
+
+ bpf_prog_put(prog);
+ }
}
int perf_event_query_prog_array(struct perf_event *event, void __user *info)
@@ -2584,6 +2593,20 @@ struct user_syms {
char *buf;
};
+#ifndef CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS
+static DEFINE_PER_CPU(struct pt_regs, bpf_kprobe_multi_pt_regs);
+#define bpf_kprobe_multi_pt_regs_ptr() this_cpu_ptr(&bpf_kprobe_multi_pt_regs)
+#else
+#define bpf_kprobe_multi_pt_regs_ptr() (NULL)
+#endif
+
+static unsigned long ftrace_get_entry_ip(unsigned long fentry_ip)
+{
+ unsigned long ip = ftrace_get_symaddr(fentry_ip);
+
+ return ip ? : fentry_ip;
+}
+
static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32 cnt)
{
unsigned long __user usymbol;
@@ -2778,7 +2801,7 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
static int
kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
- unsigned long entry_ip, struct pt_regs *regs,
+ unsigned long entry_ip, struct ftrace_regs *fregs,
bool is_return, void *data)
{
struct bpf_kprobe_multi_run_ctx run_ctx = {
@@ -2790,16 +2813,18 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
.entry_ip = entry_ip,
};
struct bpf_run_ctx *old_run_ctx;
+ struct pt_regs *regs;
int err;
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
bpf_prog_inc_misses_counter(link->link.prog);
- err = 0;
+ err = 1;
goto out;
}
migrate_disable();
rcu_read_lock();
+ regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr());
old_run_ctx = bpf_set_run_ctx(&run_ctx.session_ctx.run_ctx);
err = bpf_prog_run(link->link.prog, regs);
bpf_reset_run_ctx(old_run_ctx);
@@ -2813,26 +2838,28 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
static int
kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
void *data)
{
struct bpf_kprobe_multi_link *link;
int err;
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
- err = kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs, false, data);
+ err = kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip),
+ fregs, false, data);
return is_kprobe_session(link->link.prog) ? err : 0;
}
static void
kprobe_multi_link_exit_handler(struct fprobe *fp, unsigned long fentry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
void *data)
{
struct bpf_kprobe_multi_link *link;
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
- kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs, true, data);
+ kprobe_multi_link_prog_run(link, ftrace_get_entry_ip(fentry_ip),
+ fregs, true, data);
}
static int symbols_cmp_r(const void *a, const void *b, const void *priv)
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 0bf78517b5d4..5dddfc2149f6 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -292,13 +292,15 @@ static inline unsigned long make_data_type_val(int idx, int size, int offset)
}
/* ftrace_graph_entry set to this to tell some archs to run function graph */
-static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops)
+static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops,
+ struct ftrace_regs *fregs)
{
return 0;
}
/* ftrace_graph_return set to this to tell some archs to run function graph */
-static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops)
+static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops,
+ struct ftrace_regs *fregs)
{
}
@@ -520,13 +522,15 @@ int __weak ftrace_disable_ftrace_graph_caller(void)
#endif
int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
return 0;
}
static void ftrace_graph_ret_stub(struct ftrace_graph_ret *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
}
@@ -644,14 +648,20 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func,
#endif
/* If the caller does not use ftrace, call this function. */
-int function_graph_enter(unsigned long ret, unsigned long func,
- unsigned long frame_pointer, unsigned long *retp)
+int function_graph_enter_regs(unsigned long ret, unsigned long func,
+ unsigned long frame_pointer, unsigned long *retp,
+ struct ftrace_regs *fregs)
{
struct ftrace_graph_ent trace;
unsigned long bitmap = 0;
int offset;
+ int bit;
int i;
+ bit = ftrace_test_recursion_trylock(func, ret);
+ if (bit < 0)
+ return -EBUSY;
+
trace.func = func;
trace.depth = ++current->curr_ret_depth;
@@ -663,7 +673,7 @@ int function_graph_enter(unsigned long ret, unsigned long func,
if (static_branch_likely(&fgraph_do_direct)) {
int save_curr_ret_stack = current->curr_ret_stack;
- if (static_call(fgraph_func)(&trace, fgraph_direct_gops))
+ if (static_call(fgraph_func)(&trace, fgraph_direct_gops, fregs))
bitmap |= BIT(fgraph_direct_gops->idx);
else
/* Clear out any saved storage */
@@ -681,7 +691,7 @@ int function_graph_enter(unsigned long ret, unsigned long func,
save_curr_ret_stack = current->curr_ret_stack;
if (ftrace_ops_test(&gops->ops, func, NULL) &&
- gops->entryfunc(&trace, gops))
+ gops->entryfunc(&trace, gops, fregs))
bitmap |= BIT(i);
else
/* Clear out any saved storage */
@@ -697,12 +707,13 @@ int function_graph_enter(unsigned long ret, unsigned long func,
* flag, set that bit always.
*/
set_bitmap(current, offset, bitmap | BIT(0));
-
+ ftrace_test_recursion_unlock(bit);
return 0;
out_ret:
current->curr_ret_stack -= FGRAPH_FRAME_OFFSET + 1;
out:
current->curr_ret_depth--;
+ ftrace_test_recursion_unlock(bit);
return -EBUSY;
}
@@ -792,15 +803,12 @@ static struct notifier_block ftrace_suspend_notifier = {
.notifier_call = ftrace_suspend_notifier_call,
};
-/* fgraph_ret_regs is not defined without CONFIG_FUNCTION_GRAPH_RETVAL */
-struct fgraph_ret_regs;
-
/*
* Send the trace to the ring-buffer.
* @return the original return address.
*/
-static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs,
- unsigned long frame_pointer)
+static inline unsigned long
+__ftrace_return_to_handler(struct ftrace_regs *fregs, unsigned long frame_pointer)
{
struct ftrace_ret_stack *ret_stack;
struct ftrace_graph_ret trace;
@@ -818,9 +826,11 @@ static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs
return (unsigned long)panic;
}
- trace.rettime = trace_clock_local();
+ if (fregs)
+ ftrace_regs_set_instruction_pointer(fregs, ret);
+
#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
- trace.retval = fgraph_ret_regs_return_value(ret_regs);
+ trace.retval = ftrace_regs_get_return_value(fregs);
#endif
bitmap = get_bitmap_bits(current, offset);
@@ -828,17 +838,17 @@ static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs
#ifdef CONFIG_HAVE_STATIC_CALL
if (static_branch_likely(&fgraph_do_direct)) {
if (test_bit(fgraph_direct_gops->idx, &bitmap))
- static_call(fgraph_retfunc)(&trace, fgraph_direct_gops);
+ static_call(fgraph_retfunc)(&trace, fgraph_direct_gops, fregs);
} else
#endif
{
for_each_set_bit(i, &bitmap, sizeof(bitmap) * BITS_PER_BYTE) {
- struct fgraph_ops *gops = fgraph_array[i];
+ struct fgraph_ops *gops = READ_ONCE(fgraph_array[i]);
if (gops == &fgraph_stub)
continue;
- gops->retfunc(&trace, gops);
+ gops->retfunc(&trace, gops, fregs);
}
}
@@ -855,14 +865,14 @@ static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs
}
/*
- * After all architecures have selected HAVE_FUNCTION_GRAPH_RETVAL, we can
- * leave only ftrace_return_to_handler(ret_regs).
+ * After all architecures have selected HAVE_FUNCTION_GRAPH_FREGS, we can
+ * leave only ftrace_return_to_handler(fregs).
*/
-#ifdef CONFIG_HAVE_FUNCTION_GRAPH_RETVAL
-unsigned long ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs)
+#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FREGS
+unsigned long ftrace_return_to_handler(struct ftrace_regs *fregs)
{
- return __ftrace_return_to_handler(ret_regs,
- fgraph_ret_regs_frame_pointer(ret_regs));
+ return __ftrace_return_to_handler(fregs,
+ ftrace_regs_get_frame_pointer(fregs));
}
#else
unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
@@ -1010,7 +1020,8 @@ void ftrace_graph_sleep_time_control(bool enable)
* Simply points to ftrace_stub, but with the proper protocol.
* Defined by the linker script in linux/vmlinux.lds.h
*/
-void ftrace_stub_graph(struct ftrace_graph_ret *trace, struct fgraph_ops *gops);
+void ftrace_stub_graph(struct ftrace_graph_ret *trace, struct fgraph_ops *gops,
+ struct ftrace_regs *fregs);
/* The callbacks that hook a function */
trace_func_graph_ret_t ftrace_graph_return = ftrace_stub_graph;
@@ -1174,7 +1185,8 @@ void ftrace_graph_exit_task(struct task_struct *t)
#ifdef CONFIG_DYNAMIC_FTRACE
static int fgraph_pid_func(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct trace_array *tr = gops->ops.private;
int pid;
@@ -1188,7 +1200,7 @@ static int fgraph_pid_func(struct ftrace_graph_ent *trace,
return 0;
}
- return gops->saved_func(trace, gops);
+ return gops->saved_func(trace, gops, fregs);
}
void fgraph_update_pid_func(void)
@@ -1215,7 +1227,7 @@ void fgraph_update_pid_func(void)
static int start_graph_tracing(void)
{
unsigned long **ret_stack_list;
- int ret;
+ int ret, cpu;
ret_stack_list = kcalloc(FTRACE_RETSTACK_ALLOC_SIZE,
sizeof(*ret_stack_list), GFP_KERNEL);
@@ -1223,6 +1235,12 @@ static int start_graph_tracing(void)
if (!ret_stack_list)
return -ENOMEM;
+ /* The cpu_boot init_task->ret_stack will never be freed */
+ for_each_online_cpu(cpu) {
+ if (!idle_task(cpu)->ret_stack)
+ ftrace_graph_init_idle_task(idle_task(cpu), cpu);
+ }
+
do {
ret = alloc_retstack_tasklist(ret_stack_list);
} while (ret == -EAGAIN);
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 9ff018245840..33082c4e8154 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -8,98 +8,224 @@
#include <linux/fprobe.h>
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
-#include <linux/rethook.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/sort.h>
+#include <asm/fprobe.h>
+
#include "trace.h"
-struct fprobe_rethook_node {
- struct rethook_node node;
- unsigned long entry_ip;
- unsigned long entry_parent_ip;
- char data[];
-};
+#define FPROBE_IP_HASH_BITS 8
+#define FPROBE_IP_TABLE_SIZE (1 << FPROBE_IP_HASH_BITS)
-static inline void __fprobe_handler(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct ftrace_regs *fregs)
-{
- struct fprobe_rethook_node *fpr;
- struct rethook_node *rh = NULL;
- struct fprobe *fp;
- void *entry_data = NULL;
- int ret = 0;
+#define FPROBE_HASH_BITS 6
+#define FPROBE_TABLE_SIZE (1 << FPROBE_HASH_BITS)
- fp = container_of(ops, struct fprobe, ops);
+#define SIZE_IN_LONG(x) ((x + sizeof(long) - 1) >> (sizeof(long) == 8 ? 3 : 2))
- if (fp->exit_handler) {
- rh = rethook_try_get(fp->rethook);
- if (!rh) {
- fp->nmissed++;
- return;
- }
- fpr = container_of(rh, struct fprobe_rethook_node, node);
- fpr->entry_ip = ip;
- fpr->entry_parent_ip = parent_ip;
- if (fp->entry_data_size)
- entry_data = fpr->data;
+/*
+ * fprobe_table: hold 'fprobe_hlist::hlist' for checking the fprobe still
+ * exists. The key is the address of fprobe instance.
+ * fprobe_ip_table: hold 'fprobe_hlist::array[*]' for searching the fprobe
+ * instance related to the funciton address. The key is the ftrace IP
+ * address.
+ *
+ * When unregistering the fprobe, fprobe_hlist::fp and fprobe_hlist::array[*].fp
+ * are set NULL and delete those from both hash tables (by hlist_del_rcu).
+ * After an RCU grace period, the fprobe_hlist itself will be released.
+ *
+ * fprobe_table and fprobe_ip_table can be accessed from either
+ * - Normal hlist traversal and RCU add/del under 'fprobe_mutex' is held.
+ * - RCU hlist traversal under disabling preempt
+ */
+static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE];
+static struct hlist_head fprobe_ip_table[FPROBE_IP_TABLE_SIZE];
+static DEFINE_MUTEX(fprobe_mutex);
+
+/*
+ * Find first fprobe in the hlist. It will be iterated twice in the entry
+ * probe, once for correcting the total required size, the second time is
+ * calling back the user handlers.
+ * Thus the hlist in the fprobe_table must be sorted and new probe needs to
+ * be added *before* the first fprobe.
+ */
+static struct fprobe_hlist_node *find_first_fprobe_node(unsigned long ip)
+{
+ struct fprobe_hlist_node *node;
+ struct hlist_head *head;
+
+ head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)];
+ hlist_for_each_entry_rcu(node, head, hlist,
+ lockdep_is_held(&fprobe_mutex)) {
+ if (node->addr == ip)
+ return node;
}
+ return NULL;
+}
+NOKPROBE_SYMBOL(find_first_fprobe_node);
+
+/* Node insertion and deletion requires the fprobe_mutex */
+static void insert_fprobe_node(struct fprobe_hlist_node *node)
+{
+ unsigned long ip = node->addr;
+ struct fprobe_hlist_node *next;
+ struct hlist_head *head;
- if (fp->entry_handler)
- ret = fp->entry_handler(fp, ip, parent_ip, ftrace_get_regs(fregs), entry_data);
+ lockdep_assert_held(&fprobe_mutex);
- /* If entry_handler returns !0, nmissed is not counted. */
- if (rh) {
- if (ret)
- rethook_recycle(rh);
- else
- rethook_hook(rh, ftrace_get_regs(fregs), true);
+ next = find_first_fprobe_node(ip);
+ if (next) {
+ hlist_add_before_rcu(&node->hlist, &next->hlist);
+ return;
}
+ head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)];
+ hlist_add_head_rcu(&node->hlist, head);
}
-static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct ftrace_regs *fregs)
+/* Return true if there are synonims */
+static bool delete_fprobe_node(struct fprobe_hlist_node *node)
{
- struct fprobe *fp;
- int bit;
+ lockdep_assert_held(&fprobe_mutex);
- fp = container_of(ops, struct fprobe, ops);
- if (fprobe_disabled(fp))
- return;
+ WRITE_ONCE(node->fp, NULL);
+ hlist_del_rcu(&node->hlist);
+ return !!find_first_fprobe_node(node->addr);
+}
- /* recursion detection has to go before any traceable function and
- * all functions before this point should be marked as notrace
- */
- bit = ftrace_test_recursion_trylock(ip, parent_ip);
- if (bit < 0) {
- fp->nmissed++;
- return;
+/* Check existence of the fprobe */
+static bool is_fprobe_still_exist(struct fprobe *fp)
+{
+ struct hlist_head *head;
+ struct fprobe_hlist *fph;
+
+ head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)];
+ hlist_for_each_entry_rcu(fph, head, hlist,
+ lockdep_is_held(&fprobe_mutex)) {
+ if (fph->fp == fp)
+ return true;
}
- __fprobe_handler(ip, parent_ip, ops, fregs);
- ftrace_test_recursion_unlock(bit);
+ return false;
+}
+NOKPROBE_SYMBOL(is_fprobe_still_exist);
+
+static int add_fprobe_hash(struct fprobe *fp)
+{
+ struct fprobe_hlist *fph = fp->hlist_array;
+ struct hlist_head *head;
+
+ lockdep_assert_held(&fprobe_mutex);
+
+ if (WARN_ON_ONCE(!fph))
+ return -EINVAL;
+
+ if (is_fprobe_still_exist(fp))
+ return -EEXIST;
+
+ head = &fprobe_table[hash_ptr(fp, FPROBE_HASH_BITS)];
+ hlist_add_head_rcu(&fp->hlist_array->hlist, head);
+ return 0;
+}
+
+static int del_fprobe_hash(struct fprobe *fp)
+{
+ struct fprobe_hlist *fph = fp->hlist_array;
+
+ lockdep_assert_held(&fprobe_mutex);
+
+ if (WARN_ON_ONCE(!fph))
+ return -EINVAL;
+
+ if (!is_fprobe_still_exist(fp))
+ return -ENOENT;
+
+ fph->fp = NULL;
+ hlist_del_rcu(&fph->hlist);
+ return 0;
+}
+
+#ifdef ARCH_DEFINE_ENCODE_FPROBE_HEADER
+
+/* The arch should encode fprobe_header info into one unsigned long */
+#define FPROBE_HEADER_SIZE_IN_LONG 1
+
+static inline bool write_fprobe_header(unsigned long *stack,
+ struct fprobe *fp, unsigned int size_words)
+{
+ if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD ||
+ !arch_fprobe_header_encodable(fp)))
+ return false;
+ *stack = arch_encode_fprobe_header(fp, size_words);
+ return true;
}
-NOKPROBE_SYMBOL(fprobe_handler);
-static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct ftrace_regs *fregs)
+static inline void read_fprobe_header(unsigned long *stack,
+ struct fprobe **fp, unsigned int *size_words)
{
+ *fp = arch_decode_fprobe_header_fp(*stack);
+ *size_words = arch_decode_fprobe_header_size(*stack);
+}
+
+#else
+
+/* Generic fprobe_header */
+struct __fprobe_header {
struct fprobe *fp;
- int bit;
+ unsigned long size_words;
+} __packed;
- fp = container_of(ops, struct fprobe, ops);
- if (fprobe_disabled(fp))
- return;
+#define FPROBE_HEADER_SIZE_IN_LONG SIZE_IN_LONG(sizeof(struct __fprobe_header))
- /* recursion detection has to go before any traceable function and
- * all functions called before this point should be marked as notrace
- */
- bit = ftrace_test_recursion_trylock(ip, parent_ip);
- if (bit < 0) {
- fp->nmissed++;
- return;
- }
+static inline bool write_fprobe_header(unsigned long *stack,
+ struct fprobe *fp, unsigned int size_words)
+{
+ struct __fprobe_header *fph = (struct __fprobe_header *)stack;
+
+ if (WARN_ON_ONCE(size_words > MAX_FPROBE_DATA_SIZE_WORD))
+ return false;
+
+ fph->fp = fp;
+ fph->size_words = size_words;
+ return true;
+}
+
+static inline void read_fprobe_header(unsigned long *stack,
+ struct fprobe **fp, unsigned int *size_words)
+{
+ struct __fprobe_header *fph = (struct __fprobe_header *)stack;
+
+ *fp = fph->fp;
+ *size_words = fph->size_words;
+}
+
+#endif
+
+/*
+ * fprobe shadow stack management:
+ * Since fprobe shares a single fgraph_ops, it needs to share the stack entry
+ * among the probes on the same function exit. Note that a new probe can be
+ * registered before a target function is returning, we can not use the hash
+ * table to find the corresponding probes. Thus the probe address is stored on
+ * the shadow stack with its entry data size.
+ *
+ */
+static inline int __fprobe_handler(unsigned long ip, unsigned long parent_ip,
+ struct fprobe *fp, struct ftrace_regs *fregs,
+ void *data)
+{
+ if (!fp->entry_handler)
+ return 0;
+ return fp->entry_handler(fp, ip, parent_ip, fregs, data);
+}
+
+static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
+ struct fprobe *fp, struct ftrace_regs *fregs,
+ void *data)
+{
+ int ret;
/*
* This user handler is shared with other kprobes and is not expected to be
* called recursively. So if any other kprobe handler is running, this will
@@ -108,44 +234,182 @@ static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
*/
if (unlikely(kprobe_running())) {
fp->nmissed++;
- goto recursion_unlock;
+ return 0;
}
kprobe_busy_begin();
- __fprobe_handler(ip, parent_ip, ops, fregs);
+ ret = __fprobe_handler(ip, parent_ip, fp, fregs, data);
kprobe_busy_end();
-
-recursion_unlock:
- ftrace_test_recursion_unlock(bit);
+ return ret;
}
-static void fprobe_exit_handler(struct rethook_node *rh, void *data,
- unsigned long ret_ip, struct pt_regs *regs)
+static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
- struct fprobe *fp = (struct fprobe *)data;
- struct fprobe_rethook_node *fpr;
- int bit;
+ struct fprobe_hlist_node *node, *first;
+ unsigned long *fgraph_data = NULL;
+ unsigned long func = trace->func;
+ unsigned long ret_ip;
+ int reserved_words;
+ struct fprobe *fp;
+ int used, ret;
- if (!fp || fprobe_disabled(fp))
- return;
+ if (WARN_ON_ONCE(!fregs))
+ return 0;
+
+ first = node = find_first_fprobe_node(func);
+ if (unlikely(!first))
+ return 0;
- fpr = container_of(rh, struct fprobe_rethook_node, node);
+ reserved_words = 0;
+ hlist_for_each_entry_from_rcu(node, hlist) {
+ if (node->addr != func)
+ break;
+ fp = READ_ONCE(node->fp);
+ if (!fp || !fp->exit_handler)
+ continue;
+ /*
+ * Since fprobe can be enabled until the next loop, we ignore the
+ * fprobe's disabled flag in this loop.
+ */
+ reserved_words +=
+ FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size);
+ }
+ node = first;
+ if (reserved_words) {
+ fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long));
+ if (unlikely(!fgraph_data)) {
+ hlist_for_each_entry_from_rcu(node, hlist) {
+ if (node->addr != func)
+ break;
+ fp = READ_ONCE(node->fp);
+ if (fp && !fprobe_disabled(fp))
+ fp->nmissed++;
+ }
+ return 0;
+ }
+ }
/*
- * we need to assure no calls to traceable functions in-between the
- * end of fprobe_handler and the beginning of fprobe_exit_handler.
+ * TODO: recursion detection has been done in the fgraph. Thus we need
+ * to add a callback to increment missed counter.
*/
- bit = ftrace_test_recursion_trylock(fpr->entry_ip, fpr->entry_parent_ip);
- if (bit < 0) {
- fp->nmissed++;
+ ret_ip = ftrace_regs_get_return_address(fregs);
+ used = 0;
+ hlist_for_each_entry_from_rcu(node, hlist) {
+ int data_size;
+ void *data;
+
+ if (node->addr != func)
+ break;
+ fp = READ_ONCE(node->fp);
+ if (!fp || fprobe_disabled(fp))
+ continue;
+
+ data_size = fp->entry_data_size;
+ if (data_size && fp->exit_handler)
+ data = fgraph_data + used + FPROBE_HEADER_SIZE_IN_LONG;
+ else
+ data = NULL;
+
+ if (fprobe_shared_with_kprobes(fp))
+ ret = __fprobe_kprobe_handler(func, ret_ip, fp, fregs, data);
+ else
+ ret = __fprobe_handler(func, ret_ip, fp, fregs, data);
+
+ /* If entry_handler returns !0, nmissed is not counted but skips exit_handler. */
+ if (!ret && fp->exit_handler) {
+ int size_words = SIZE_IN_LONG(data_size);
+
+ if (write_fprobe_header(&fgraph_data[used], fp, size_words))
+ used += FPROBE_HEADER_SIZE_IN_LONG + size_words;
+ }
+ }
+ if (used < reserved_words)
+ memset(fgraph_data + used, 0, reserved_words - used);
+
+ /* If any exit_handler is set, data must be used. */
+ return used != 0;
+}
+NOKPROBE_SYMBOL(fprobe_entry);
+
+static void fprobe_return(struct ftrace_graph_ret *trace,
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
+{
+ unsigned long *fgraph_data = NULL;
+ unsigned long ret_ip;
+ struct fprobe *fp;
+ int size, curr;
+ int size_words;
+
+ fgraph_data = (unsigned long *)fgraph_retrieve_data(gops->idx, &size);
+ if (WARN_ON_ONCE(!fgraph_data))
return;
+ size_words = SIZE_IN_LONG(size);
+ ret_ip = ftrace_regs_get_instruction_pointer(fregs);
+
+ preempt_disable();
+
+ curr = 0;
+ while (size_words > curr) {
+ read_fprobe_header(&fgraph_data[curr], &fp, &size);
+ if (!fp)
+ break;
+ curr += FPROBE_HEADER_SIZE_IN_LONG;
+ if (is_fprobe_still_exist(fp) && !fprobe_disabled(fp)) {
+ if (WARN_ON_ONCE(curr + size > size_words))
+ break;
+ fp->exit_handler(fp, trace->func, ret_ip, fregs,
+ size ? fgraph_data + curr : NULL);
+ }
+ curr += size;
+ }
+ preempt_enable();
+}
+NOKPROBE_SYMBOL(fprobe_return);
+
+static struct fgraph_ops fprobe_graph_ops = {
+ .entryfunc = fprobe_entry,
+ .retfunc = fprobe_return,
+};
+static int fprobe_graph_active;
+
+/* Add @addrs to the ftrace filter and register fgraph if needed. */
+static int fprobe_graph_add_ips(unsigned long *addrs, int num)
+{
+ int ret;
+
+ lockdep_assert_held(&fprobe_mutex);
+
+ ret = ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 0, 0);
+ if (ret)
+ return ret;
+
+ if (!fprobe_graph_active) {
+ ret = register_ftrace_graph(&fprobe_graph_ops);
+ if (WARN_ON_ONCE(ret)) {
+ ftrace_free_filter(&fprobe_graph_ops.ops);
+ return ret;
+ }
}
+ fprobe_graph_active++;
+ return 0;
+}
- fp->exit_handler(fp, fpr->entry_ip, ret_ip, regs,
- fp->entry_data_size ? (void *)fpr->data : NULL);
- ftrace_test_recursion_unlock(bit);
+/* Remove @addrs from the ftrace filter and unregister fgraph if possible. */
+static void fprobe_graph_remove_ips(unsigned long *addrs, int num)
+{
+ lockdep_assert_held(&fprobe_mutex);
+
+ fprobe_graph_active--;
+ /* Q: should we unregister it ? */
+ if (!fprobe_graph_active)
+ unregister_ftrace_graph(&fprobe_graph_ops);
+
+ if (num)
+ ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0);
}
-NOKPROBE_SYMBOL(fprobe_exit_handler);
static int symbols_cmp(const void *a, const void *b)
{
@@ -175,53 +439,97 @@ static unsigned long *get_ftrace_locations(const char **syms, int num)
return ERR_PTR(-ENOENT);
}
-static void fprobe_init(struct fprobe *fp)
-{
- fp->nmissed = 0;
- if (fprobe_shared_with_kprobes(fp))
- fp->ops.func = fprobe_kprobe_handler;
- else
- fp->ops.func = fprobe_handler;
- fp->ops.flags |= FTRACE_OPS_FL_SAVE_REGS;
-}
+struct filter_match_data {
+ const char *filter;
+ const char *notfilter;
+ size_t index;
+ size_t size;
+ unsigned long *addrs;
+};
-static int fprobe_init_rethook(struct fprobe *fp, int num)
+static int filter_match_callback(void *data, const char *name, unsigned long addr)
{
- int size;
+ struct filter_match_data *match = data;
- if (!fp->exit_handler) {
- fp->rethook = NULL;
+ if (!glob_match(match->filter, name) ||
+ (match->notfilter && glob_match(match->notfilter, name)))
return 0;
- }
- /* Initialize rethook if needed */
- if (fp->nr_maxactive)
- num = fp->nr_maxactive;
- else
- num *= num_possible_cpus() * 2;
- if (num <= 0)
- return -EINVAL;
+ if (!ftrace_location(addr))
+ return 0;
+
+ if (match->addrs)
+ match->addrs[match->index] = addr;
- size = sizeof(struct fprobe_rethook_node) + fp->entry_data_size;
+ match->index++;
+ return match->index == match->size;
+}
- /* Initialize rethook */
- fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler, size, num);
- if (IS_ERR(fp->rethook))
- return PTR_ERR(fp->rethook);
+/*
+ * Make IP list from the filter/no-filter glob patterns.
+ * Return the number of matched symbols, or -ENOENT.
+ */
+static int ip_list_from_filter(const char *filter, const char *notfilter,
+ unsigned long *addrs, size_t size)
+{
+ struct filter_match_data match = { .filter = filter, .notfilter = notfilter,
+ .index = 0, .size = size, .addrs = addrs};
+ int ret;
- return 0;
+ ret = kallsyms_on_each_symbol(filter_match_callback, &match);
+ if (ret < 0)
+ return ret;
+ ret = module_kallsyms_on_each_symbol(NULL, filter_match_callback, &match);
+ if (ret < 0)
+ return ret;
+
+ return match.index ?: -ENOENT;
}
static void fprobe_fail_cleanup(struct fprobe *fp)
{
- if (!IS_ERR_OR_NULL(fp->rethook)) {
- /* Don't need to cleanup rethook->handler because this is not used. */
- rethook_free(fp->rethook);
- fp->rethook = NULL;
+ kfree(fp->hlist_array);
+ fp->hlist_array = NULL;
+}
+
+/* Initialize the fprobe data structure. */
+static int fprobe_init(struct fprobe *fp, unsigned long *addrs, int num)
+{
+ struct fprobe_hlist *hlist_array;
+ unsigned long addr;
+ int size, i;
+
+ if (!fp || !addrs || num <= 0)
+ return -EINVAL;
+
+ size = ALIGN(fp->entry_data_size, sizeof(long));
+ if (size > MAX_FPROBE_DATA_SIZE)
+ return -E2BIG;
+ fp->entry_data_size = size;
+
+ hlist_array = kzalloc(struct_size(hlist_array, array, num), GFP_KERNEL);
+ if (!hlist_array)
+ return -ENOMEM;
+
+ fp->nmissed = 0;
+
+ hlist_array->size = num;
+ fp->hlist_array = hlist_array;
+ hlist_array->fp = fp;
+ for (i = 0; i < num; i++) {
+ hlist_array->array[i].fp = fp;
+ addr = ftrace_location(addrs[i]);
+ if (!addr) {
+ fprobe_fail_cleanup(fp);
+ return -ENOENT;
+ }
+ hlist_array->array[i].addr = addr;
}
- ftrace_free_filter(&fp->ops);
+ return 0;
}
+#define FPROBE_IPS_MAX INT_MAX
+
/**
* register_fprobe() - Register fprobe to ftrace by pattern.
* @fp: A fprobe data structure to be registered.
@@ -235,46 +543,24 @@ static void fprobe_fail_cleanup(struct fprobe *fp)
*/
int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter)
{
- struct ftrace_hash *hash;
- unsigned char *str;
- int ret, len;
+ unsigned long *addrs;
+ int ret;
if (!fp || !filter)
return -EINVAL;
- fprobe_init(fp);
-
- len = strlen(filter);
- str = kstrdup(filter, GFP_KERNEL);
- ret = ftrace_set_filter(&fp->ops, str, len, 0);
- kfree(str);
- if (ret)
+ ret = ip_list_from_filter(filter, notfilter, NULL, FPROBE_IPS_MAX);
+ if (ret < 0)
return ret;
- if (notfilter) {
- len = strlen(notfilter);
- str = kstrdup(notfilter, GFP_KERNEL);
- ret = ftrace_set_notrace(&fp->ops, str, len, 0);
- kfree(str);
- if (ret)
- goto out;
- }
-
- /* TODO:
- * correctly calculate the total number of filtered symbols
- * from both filter and notfilter.
- */
- hash = rcu_access_pointer(fp->ops.local_hash.filter_hash);
- if (WARN_ON_ONCE(!hash))
- goto out;
-
- ret = fprobe_init_rethook(fp, (int)hash->count);
- if (!ret)
- ret = register_ftrace_function(&fp->ops);
+ addrs = kcalloc(ret, sizeof(unsigned long), GFP_KERNEL);
+ if (!addrs)
+ return -ENOMEM;
+ ret = ip_list_from_filter(filter, notfilter, addrs, ret);
+ if (ret > 0)
+ ret = register_fprobe_ips(fp, addrs, ret);
-out:
- if (ret)
- fprobe_fail_cleanup(fp);
+ kfree(addrs);
return ret;
}
EXPORT_SYMBOL_GPL(register_fprobe);
@@ -282,7 +568,7 @@ EXPORT_SYMBOL_GPL(register_fprobe);
/**
* register_fprobe_ips() - Register fprobe to ftrace by address.
* @fp: A fprobe data structure to be registered.
- * @addrs: An array of target ftrace location addresses.
+ * @addrs: An array of target function address.
* @num: The number of entries of @addrs.
*
* Register @fp to ftrace for enabling the probe on the address given by @addrs.
@@ -294,23 +580,27 @@ EXPORT_SYMBOL_GPL(register_fprobe);
*/
int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
{
- int ret;
-
- if (!fp || !addrs || num <= 0)
- return -EINVAL;
-
- fprobe_init(fp);
+ struct fprobe_hlist *hlist_array;
+ int ret, i;
- ret = ftrace_set_filter_ips(&fp->ops, addrs, num, 0, 0);
+ ret = fprobe_init(fp, addrs, num);
if (ret)
return ret;
- ret = fprobe_init_rethook(fp, num);
- if (!ret)
- ret = register_ftrace_function(&fp->ops);
+ mutex_lock(&fprobe_mutex);
+
+ hlist_array = fp->hlist_array;
+ ret = fprobe_graph_add_ips(addrs, num);
+ if (!ret) {
+ add_fprobe_hash(fp);
+ for (i = 0; i < hlist_array->size; i++)
+ insert_fprobe_node(&hlist_array->array[i]);
+ }
+ mutex_unlock(&fprobe_mutex);
if (ret)
fprobe_fail_cleanup(fp);
+
return ret;
}
EXPORT_SYMBOL_GPL(register_fprobe_ips);
@@ -348,14 +638,13 @@ EXPORT_SYMBOL_GPL(register_fprobe_syms);
bool fprobe_is_registered(struct fprobe *fp)
{
- if (!fp || (fp->ops.saved_func != fprobe_handler &&
- fp->ops.saved_func != fprobe_kprobe_handler))
+ if (!fp || !fp->hlist_array)
return false;
return true;
}
/**
- * unregister_fprobe() - Unregister fprobe from ftrace
+ * unregister_fprobe() - Unregister fprobe.
* @fp: A fprobe data structure to be unregistered.
*
* Unregister fprobe (and remove ftrace hooks from the function entries).
@@ -364,23 +653,40 @@ bool fprobe_is_registered(struct fprobe *fp)
*/
int unregister_fprobe(struct fprobe *fp)
{
- int ret;
+ struct fprobe_hlist *hlist_array;
+ unsigned long *addrs = NULL;
+ int ret = 0, i, count;
- if (!fprobe_is_registered(fp))
- return -EINVAL;
+ mutex_lock(&fprobe_mutex);
+ if (!fp || !is_fprobe_still_exist(fp)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ hlist_array = fp->hlist_array;
+ addrs = kcalloc(hlist_array->size, sizeof(unsigned long), GFP_KERNEL);
+ if (!addrs) {
+ ret = -ENOMEM; /* TODO: Fallback to one-by-one loop */
+ goto out;
+ }
- if (!IS_ERR_OR_NULL(fp->rethook))
- rethook_stop(fp->rethook);
+ /* Remove non-synonim ips from table and hash */
+ count = 0;
+ for (i = 0; i < hlist_array->size; i++) {
+ if (!delete_fprobe_node(&hlist_array->array[i]))
+ addrs[count++] = hlist_array->array[i].addr;
+ }
+ del_fprobe_hash(fp);
- ret = unregister_ftrace_function(&fp->ops);
- if (ret < 0)
- return ret;
+ fprobe_graph_remove_ips(addrs, count);
- if (!IS_ERR_OR_NULL(fp->rethook))
- rethook_free(fp->rethook);
+ kfree_rcu(hlist_array, rcu);
+ fp->hlist_array = NULL;
- ftrace_free_filter(&fp->ops);
+out:
+ mutex_unlock(&fprobe_mutex);
+ kfree(addrs);
return ret;
}
EXPORT_SYMBOL_GPL(unregister_fprobe);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 9b17efb1a87d..6b0c25761ccb 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -536,24 +536,21 @@ static int function_stat_show(struct seq_file *m, void *v)
{
struct ftrace_profile *rec = v;
char str[KSYM_SYMBOL_LEN];
- int ret = 0;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
static struct trace_seq s;
unsigned long long avg;
unsigned long long stddev;
#endif
- mutex_lock(&ftrace_profile_lock);
+ guard(mutex)(&ftrace_profile_lock);
/* we raced with function_profile_reset() */
- if (unlikely(rec->counter == 0)) {
- ret = -EBUSY;
- goto out;
- }
+ if (unlikely(rec->counter == 0))
+ return -EBUSY;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
avg = div64_ul(rec->time, rec->counter);
if (tracing_thresh && (avg < tracing_thresh))
- goto out;
+ return 0;
#endif
kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
@@ -590,10 +587,8 @@ static int function_stat_show(struct seq_file *m, void *v)
trace_print_seq(m, &s);
#endif
seq_putc(m, '\n');
-out:
- mutex_unlock(&ftrace_profile_lock);
- return ret;
+ return 0;
}
static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
@@ -789,27 +784,24 @@ function_profile_call(unsigned long ip, unsigned long parent_ip,
{
struct ftrace_profile_stat *stat;
struct ftrace_profile *rec;
- unsigned long flags;
if (!ftrace_profile_enabled)
return;
- local_irq_save(flags);
+ guard(preempt_notrace)();
stat = this_cpu_ptr(&ftrace_profile_stats);
if (!stat->hash || !ftrace_profile_enabled)
- goto out;
+ return;
rec = ftrace_find_profiled_func(stat, ip);
if (!rec) {
rec = ftrace_profile_alloc(stat, ip);
if (!rec)
- goto out;
+ return;
}
rec->counter++;
- out:
- local_irq_restore(flags);
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -827,7 +819,8 @@ struct profile_fgraph_data {
};
static int profile_graph_entry(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct profile_fgraph_data *profile_data;
@@ -849,26 +842,27 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace,
}
static void profile_graph_return(struct ftrace_graph_ret *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct profile_fgraph_data *profile_data;
struct ftrace_profile_stat *stat;
unsigned long long calltime;
unsigned long long rettime = trace_clock_local();
struct ftrace_profile *rec;
- unsigned long flags;
int size;
- local_irq_save(flags);
+ guard(preempt_notrace)();
+
stat = this_cpu_ptr(&ftrace_profile_stats);
if (!stat->hash || !ftrace_profile_enabled)
- goto out;
+ return;
profile_data = fgraph_retrieve_data(gops->idx, &size);
/* If the calltime was zero'd ignore it */
if (!profile_data || !profile_data->calltime)
- goto out;
+ return;
calltime = rettime - profile_data->calltime;
@@ -896,22 +890,16 @@ static void profile_graph_return(struct ftrace_graph_ret *trace,
rec->time += calltime;
rec->time_squared += calltime * calltime;
}
-
- out:
- local_irq_restore(flags);
}
static struct fgraph_ops fprofiler_ops = {
- .ops = {
- .flags = FTRACE_OPS_FL_INITIALIZED,
- INIT_OPS_HASH(fprofiler_ops.ops)
- },
.entryfunc = &profile_graph_entry,
.retfunc = &profile_graph_return,
};
static int register_ftrace_profiler(void)
{
+ ftrace_ops_set_global_filter(&fprofiler_ops.ops);
return register_ftrace_graph(&fprofiler_ops);
}
@@ -922,12 +910,11 @@ static void unregister_ftrace_profiler(void)
#else
static struct ftrace_ops ftrace_profile_ops __read_mostly = {
.func = function_profile_call,
- .flags = FTRACE_OPS_FL_INITIALIZED,
- INIT_OPS_HASH(ftrace_profile_ops)
};
static int register_ftrace_profiler(void)
{
+ ftrace_ops_set_global_filter(&ftrace_profile_ops);
return register_ftrace_function(&ftrace_profile_ops);
}
@@ -950,20 +937,16 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
val = !!val;
- mutex_lock(&ftrace_profile_lock);
+ guard(mutex)(&ftrace_profile_lock);
if (ftrace_profile_enabled ^ val) {
if (val) {
ret = ftrace_profile_init();
- if (ret < 0) {
- cnt = ret;
- goto out;
- }
+ if (ret < 0)
+ return ret;
ret = register_ftrace_profiler();
- if (ret < 0) {
- cnt = ret;
- goto out;
- }
+ if (ret < 0)
+ return ret;
ftrace_profile_enabled = 1;
} else {
ftrace_profile_enabled = 0;
@@ -974,8 +957,6 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
unregister_ftrace_profiler();
}
}
- out:
- mutex_unlock(&ftrace_profile_lock);
*ppos += cnt;
@@ -1675,14 +1656,12 @@ unsigned long ftrace_location(unsigned long ip)
loc = ftrace_location_range(ip, ip);
if (!loc) {
if (!kallsyms_lookup_size_offset(ip, &size, &offset))
- goto out;
+ return 0;
/* map sym+0 to __fentry__ */
if (!offset)
loc = ftrace_location_range(ip, ip + size - 1);
}
-
-out:
return loc;
}
@@ -2077,7 +2056,7 @@ rollback:
continue;
if (rec == end)
- goto err_out;
+ return -EBUSY;
in_old = !!ftrace_lookup_ip(old_hash, rec->ip);
in_new = !!ftrace_lookup_ip(new_hash, rec->ip);
@@ -2090,7 +2069,6 @@ rollback:
rec->flags |= FTRACE_FL_IPMODIFY;
} while_for_each_ftrace_rec();
-err_out:
return -EBUSY;
}
@@ -3242,15 +3220,22 @@ static struct ftrace_hash *copy_hash(struct ftrace_hash *src)
* The filter_hash updates uses just the append_hash() function
* and the notrace_hash does not.
*/
-static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash)
+static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash,
+ int size_bits)
{
struct ftrace_func_entry *entry;
int size;
int i;
- /* An empty hash does everything */
- if (ftrace_hash_empty(*hash))
- return 0;
+ if (*hash) {
+ /* An empty hash does everything */
+ if (ftrace_hash_empty(*hash))
+ return 0;
+ } else {
+ *hash = alloc_ftrace_hash(size_bits);
+ if (!*hash)
+ return -ENOMEM;
+ }
/* If new_hash has everything make hash have everything */
if (ftrace_hash_empty(new_hash)) {
@@ -3314,16 +3299,18 @@ static int intersect_hash(struct ftrace_hash **hash, struct ftrace_hash *new_has
/* Return a new hash that has a union of all @ops->filter_hash entries */
static struct ftrace_hash *append_hashes(struct ftrace_ops *ops)
{
- struct ftrace_hash *new_hash;
+ struct ftrace_hash *new_hash = NULL;
struct ftrace_ops *subops;
+ int size_bits;
int ret;
- new_hash = alloc_ftrace_hash(ops->func_hash->filter_hash->size_bits);
- if (!new_hash)
- return NULL;
+ if (ops->func_hash->filter_hash)
+ size_bits = ops->func_hash->filter_hash->size_bits;
+ else
+ size_bits = FTRACE_HASH_DEFAULT_BITS;
list_for_each_entry(subops, &ops->subop_list, list) {
- ret = append_hash(&new_hash, subops->func_hash->filter_hash);
+ ret = append_hash(&new_hash, subops->func_hash->filter_hash, size_bits);
if (ret < 0) {
free_ftrace_hash(new_hash);
return NULL;
@@ -3332,7 +3319,8 @@ static struct ftrace_hash *append_hashes(struct ftrace_ops *ops)
if (ftrace_hash_empty(new_hash))
break;
}
- return new_hash;
+ /* Can't return NULL as that means this failed */
+ return new_hash ? : EMPTY_HASH;
}
/* Make @ops trace evenything except what all its subops do not trace */
@@ -3527,7 +3515,8 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int
filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash);
if (!filter_hash)
return -ENOMEM;
- ret = append_hash(&filter_hash, subops->func_hash->filter_hash);
+ ret = append_hash(&filter_hash, subops->func_hash->filter_hash,
+ size_bits);
if (ret < 0) {
free_ftrace_hash(filter_hash);
return ret;
@@ -4930,23 +4919,6 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops,
return __ftrace_hash_move_and_update_ops(ops, orig_hash, hash, enable);
}
-static bool module_exists(const char *module)
-{
- /* All modules have the symbol __this_module */
- static const char this_mod[] = "__this_module";
- char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
- unsigned long val;
- int n;
-
- n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
-
- if (n > sizeof(modname) - 1)
- return false;
-
- val = module_kallsyms_lookup_name(modname);
- return val != 0;
-}
-
static int cache_mod(struct trace_array *tr,
const char *func, char *module, int enable)
{
@@ -4986,10 +4958,6 @@ static int cache_mod(struct trace_array *tr,
return ftrace_add_mod(tr, func, module, enable);
}
-static int
-ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
- int reset, int enable);
-
#ifdef CONFIG_MODULES
static void process_mod_list(struct list_head *head, struct ftrace_ops *ops,
char *mod, bool enable)
@@ -5619,20 +5587,15 @@ static DEFINE_MUTEX(ftrace_cmd_mutex);
__init int register_ftrace_command(struct ftrace_func_command *cmd)
{
struct ftrace_func_command *p;
- int ret = 0;
- mutex_lock(&ftrace_cmd_mutex);
+ guard(mutex)(&ftrace_cmd_mutex);
list_for_each_entry(p, &ftrace_commands, list) {
- if (strcmp(cmd->name, p->name) == 0) {
- ret = -EBUSY;
- goto out_unlock;
- }
+ if (strcmp(cmd->name, p->name) == 0)
+ return -EBUSY;
}
list_add(&cmd->list, &ftrace_commands);
- out_unlock:
- mutex_unlock(&ftrace_cmd_mutex);
- return ret;
+ return 0;
}
/*
@@ -5642,20 +5605,17 @@ __init int register_ftrace_command(struct ftrace_func_command *cmd)
__init int unregister_ftrace_command(struct ftrace_func_command *cmd)
{
struct ftrace_func_command *p, *n;
- int ret = -ENODEV;
- mutex_lock(&ftrace_cmd_mutex);
+ guard(mutex)(&ftrace_cmd_mutex);
+
list_for_each_entry_safe(p, n, &ftrace_commands, list) {
if (strcmp(cmd->name, p->name) == 0) {
- ret = 0;
list_del_init(&p->list);
- goto out_unlock;
+ return 0;
}
}
- out_unlock:
- mutex_unlock(&ftrace_cmd_mutex);
- return ret;
+ return -ENODEV;
}
static int ftrace_process_regex(struct ftrace_iterator *iter,
@@ -5665,7 +5625,7 @@ static int ftrace_process_regex(struct ftrace_iterator *iter,
struct trace_array *tr = iter->ops->private;
char *func, *command, *next = buff;
struct ftrace_func_command *p;
- int ret = -EINVAL;
+ int ret;
func = strsep(&next, ":");
@@ -5682,17 +5642,14 @@ static int ftrace_process_regex(struct ftrace_iterator *iter,
command = strsep(&next, ":");
- mutex_lock(&ftrace_cmd_mutex);
+ guard(mutex)(&ftrace_cmd_mutex);
+
list_for_each_entry(p, &ftrace_commands, list) {
- if (strcmp(p->name, command) == 0) {
- ret = p->func(tr, hash, func, command, next, enable);
- goto out_unlock;
- }
+ if (strcmp(p->name, command) == 0)
+ return p->func(tr, hash, func, command, next, enable);
}
- out_unlock:
- mutex_unlock(&ftrace_cmd_mutex);
- return ret;
+ return -EINVAL;
}
static ssize_t
@@ -5726,12 +5683,10 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
parser->idx, enable);
trace_parser_clear(parser);
if (ret < 0)
- goto out;
+ return ret;
}
- ret = read;
- out:
- return ret;
+ return read;
}
ssize_t
@@ -5763,6 +5718,9 @@ __ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
return -ENOENT;
free_hash_entry(hash, entry);
return 0;
+ } else if (__ftrace_lookup_ip(hash, ip) != NULL) {
+ /* Already exists */
+ return 0;
}
entry = add_hash_entry(hash, ip);
@@ -5792,7 +5750,7 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long *ips,
static int
ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
unsigned long *ips, unsigned int cnt,
- int remove, int reset, int enable)
+ int remove, int reset, int enable, char *mod)
{
struct ftrace_hash **orig_hash;
struct ftrace_hash *hash;
@@ -5818,7 +5776,15 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
goto out_regex_unlock;
}
- if (buf && !ftrace_match_records(hash, buf, len)) {
+ if (buf && !match_records(hash, buf, len, mod)) {
+ /* If this was for a module and nothing was enabled, flag it */
+ if (mod)
+ (*orig_hash)->flags |= FTRACE_HASH_FL_MOD;
+
+ /*
+ * Even if it is a mod, return error to let caller know
+ * nothing was added
+ */
ret = -EINVAL;
goto out_regex_unlock;
}
@@ -5843,7 +5809,7 @@ static int
ftrace_set_addr(struct ftrace_ops *ops, unsigned long *ips, unsigned int cnt,
int remove, int reset, int enable)
{
- return ftrace_set_hash(ops, NULL, 0, ips, cnt, remove, reset, enable);
+ return ftrace_set_hash(ops, NULL, 0, ips, cnt, remove, reset, enable, NULL);
}
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
@@ -6221,7 +6187,38 @@ static int
ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
int reset, int enable)
{
- return ftrace_set_hash(ops, buf, len, NULL, 0, 0, reset, enable);
+ char *mod = NULL, *func, *command, *next = buf;
+ char *tmp __free(kfree) = NULL;
+ struct trace_array *tr = ops->private;
+ int ret;
+
+ func = strsep(&next, ":");
+
+ /* This can also handle :mod: parsing */
+ if (next) {
+ if (!tr)
+ return -EINVAL;
+
+ command = strsep(&next, ":");
+ if (strcmp(command, "mod") != 0)
+ return -EINVAL;
+
+ mod = next;
+ len = command - func;
+ /* Save the original func as ftrace_set_hash() can modify it */
+ tmp = kstrdup(func, GFP_KERNEL);
+ }
+
+ ret = ftrace_set_hash(ops, func, len, NULL, 0, 0, reset, enable, mod);
+
+ if (tr && mod && ret < 0) {
+ /* Did tmp fail to allocate? */
+ if (!tmp)
+ return -ENOMEM;
+ ret = cache_mod(tr, tmp, mod, enable);
+ }
+
+ return ret;
}
/**
@@ -6385,6 +6382,14 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable)
ftrace_ops_init(ops);
+ /* The trace_array is needed for caching module function filters */
+ if (!ops->private) {
+ struct trace_array *tr = trace_get_global_array();
+
+ ops->private = tr;
+ ftrace_init_trace_array(tr);
+ }
+
while (buf) {
func = strsep(&buf, ",");
ftrace_set_regex(ops, func, strlen(func), 0, enable);
@@ -7818,9 +7823,14 @@ static void ftrace_update_trampoline(struct ftrace_ops *ops)
void ftrace_init_trace_array(struct trace_array *tr)
{
+ if (tr->flags & TRACE_ARRAY_FL_MOD_INIT)
+ return;
+
INIT_LIST_HEAD(&tr->func_probes);
INIT_LIST_HEAD(&tr->mod_trace);
INIT_LIST_HEAD(&tr->mod_notrace);
+
+ tr->flags |= TRACE_ARRAY_FL_MOD_INIT;
}
#else
@@ -7849,7 +7859,8 @@ static void ftrace_update_trampoline(struct ftrace_ops *ops)
__init void ftrace_init_global_array_ops(struct trace_array *tr)
{
tr->ops = &global_ops;
- tr->ops->private = tr;
+ if (!global_ops.private)
+ global_ops.private = tr;
ftrace_init_trace_array(tr);
init_array_fgraph_ops(tr, tr->ops);
}
@@ -8291,7 +8302,7 @@ pid_write(struct file *filp, const char __user *ubuf,
if (!cnt)
return 0;
- mutex_lock(&ftrace_lock);
+ guard(mutex)(&ftrace_lock);
switch (type) {
case TRACE_PIDS:
@@ -8307,14 +8318,13 @@ pid_write(struct file *filp, const char __user *ubuf,
lockdep_is_held(&ftrace_lock));
break;
default:
- ret = -EINVAL;
WARN_ON_ONCE(1);
- goto out;
+ return -EINVAL;
}
ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
if (ret < 0)
- goto out;
+ return ret;
switch (type) {
case TRACE_PIDS:
@@ -8343,11 +8353,8 @@ pid_write(struct file *filp, const char __user *ubuf,
ftrace_update_pid_func();
ftrace_startup_all(0);
- out:
- mutex_unlock(&ftrace_lock);
- if (ret > 0)
- *ppos += ret;
+ *ppos += ret;
return ret;
}
@@ -8750,17 +8757,17 @@ static int
ftrace_enable_sysctl(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- int ret = -ENODEV;
+ int ret;
- mutex_lock(&ftrace_lock);
+ guard(mutex)(&ftrace_lock);
if (unlikely(ftrace_disabled))
- goto out;
+ return -ENODEV;
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
- goto out;
+ return ret;
if (ftrace_enabled) {
@@ -8774,8 +8781,7 @@ ftrace_enable_sysctl(const struct ctl_table *table, int write,
} else {
if (is_permanent_ops_registered()) {
ftrace_enabled = true;
- ret = -EBUSY;
- goto out;
+ return -EBUSY;
}
/* stopping ftrace calls (just send to ftrace_stub) */
@@ -8785,12 +8791,10 @@ ftrace_enable_sysctl(const struct ctl_table *table, int write,
}
last_ftrace_enabled = !!ftrace_enabled;
- out:
- mutex_unlock(&ftrace_lock);
- return ret;
+ return 0;
}
-static struct ctl_table ftrace_sysctls[] = {
+static const struct ctl_table ftrace_sysctls[] = {
{
.procname = "ftrace_enabled",
.data = &ftrace_enabled,
diff --git a/kernel/trace/pid_list.c b/kernel/trace/pid_list.c
index 4966e6bbdf6f..c62b9b3cfb3d 100644
--- a/kernel/trace/pid_list.c
+++ b/kernel/trace/pid_list.c
@@ -414,7 +414,7 @@ struct trace_pid_list *trace_pid_list_alloc(void)
int i;
/* According to linux/thread.h, pids can be no bigger that 30 bits */
- WARN_ON_ONCE(pid_max > (1 << 30));
+ WARN_ON_ONCE(init_pid_ns.pid_max > (1 << 30));
pid_list = kzalloc(sizeof(*pid_list), GFP_KERNEL);
if (!pid_list)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7e257e855dd1..bb6089c2951e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1672,7 +1672,8 @@ static void *rb_range_buffer(struct ring_buffer_per_cpu *cpu_buffer, int idx)
* must be the same.
*/
static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu,
- struct trace_buffer *buffer, int nr_pages)
+ struct trace_buffer *buffer, int nr_pages,
+ unsigned long *subbuf_mask)
{
int subbuf_size = PAGE_SIZE;
struct buffer_data_page *subbuf;
@@ -1680,6 +1681,9 @@ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu,
unsigned long buffers_end;
int i;
+ if (!subbuf_mask)
+ return false;
+
/* Check the meta magic and meta struct size */
if (meta->magic != RING_BUFFER_META_MAGIC ||
meta->struct_size != sizeof(*meta)) {
@@ -1712,6 +1716,8 @@ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu,
subbuf = rb_subbufs_from_meta(meta);
+ bitmap_clear(subbuf_mask, 0, meta->nr_subbufs);
+
/* Is the meta buffers and the subbufs themselves have correct data? */
for (i = 0; i < meta->nr_subbufs; i++) {
if (meta->buffers[i] < 0 ||
@@ -1725,6 +1731,12 @@ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu,
return false;
}
+ if (test_bit(meta->buffers[i], subbuf_mask)) {
+ pr_info("Ring buffer boot meta [%d] array has duplicates\n", cpu);
+ return false;
+ }
+
+ set_bit(meta->buffers[i], subbuf_mask);
subbuf = (void *)subbuf + subbuf_size;
}
@@ -1838,6 +1850,11 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->cpu);
goto invalid;
}
+
+ /* If the buffer has content, update pages_touched */
+ if (ret)
+ local_inc(&cpu_buffer->pages_touched);
+
entries += ret;
entry_bytes += local_read(&head_page->page->commit);
local_set(&cpu_buffer->head_page->entries, ret);
@@ -1889,17 +1906,22 @@ static void rb_meta_init_text_addr(struct ring_buffer_meta *meta)
static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages)
{
struct ring_buffer_meta *meta;
+ unsigned long *subbuf_mask;
unsigned long delta;
void *subbuf;
int cpu;
int i;
+ /* Create a mask to test the subbuf array */
+ subbuf_mask = bitmap_alloc(nr_pages + 1, GFP_KERNEL);
+ /* If subbuf_mask fails to allocate, then rb_meta_valid() will return false */
+
for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
void *next_meta;
meta = rb_range_meta(buffer, nr_pages, cpu);
- if (rb_meta_valid(meta, cpu, buffer, nr_pages)) {
+ if (rb_meta_valid(meta, cpu, buffer, nr_pages, subbuf_mask)) {
/* Make the mappings match the current address */
subbuf = rb_subbufs_from_meta(meta);
delta = (unsigned long)subbuf - meta->first_buffer;
@@ -1943,6 +1965,7 @@ static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages)
subbuf += meta->subbuf_size;
}
}
+ bitmap_free(subbuf_mask);
}
static void *rbm_start(struct seq_file *m, loff_t *pos)
@@ -4398,8 +4421,13 @@ rb_reserve_next_event(struct trace_buffer *buffer,
int nr_loops = 0;
int add_ts_default;
- /* ring buffer does cmpxchg, make sure it is safe in NMI context */
- if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) &&
+ /*
+ * ring buffer does cmpxchg as well as atomic64 operations
+ * (which some archs use locking for atomic64), make sure this
+ * is safe in NMI context
+ */
+ if ((!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) ||
+ IS_ENABLED(CONFIG_GENERIC_ATOMIC64)) &&
(unlikely(in_nmi()))) {
return NULL;
}
@@ -4682,40 +4710,22 @@ int ring_buffer_write(struct trace_buffer *buffer,
}
EXPORT_SYMBOL_GPL(ring_buffer_write);
-static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
+/*
+ * The total entries in the ring buffer is the running counter
+ * of entries entered into the ring buffer, minus the sum of
+ * the entries read from the ring buffer and the number of
+ * entries that were overwritten.
+ */
+static inline unsigned long
+rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
{
- struct buffer_page *reader = cpu_buffer->reader_page;
- struct buffer_page *head = rb_set_head_page(cpu_buffer);
- struct buffer_page *commit = cpu_buffer->commit_page;
-
- /* In case of error, head will be NULL */
- if (unlikely(!head))
- return true;
-
- /* Reader should exhaust content in reader page */
- if (reader->read != rb_page_size(reader))
- return false;
-
- /*
- * If writers are committing on the reader page, knowing all
- * committed content has been read, the ring buffer is empty.
- */
- if (commit == reader)
- return true;
-
- /*
- * If writers are committing on a page other than reader page
- * and head page, there should always be content to read.
- */
- if (commit != head)
- return false;
+ return local_read(&cpu_buffer->entries) -
+ (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
+}
- /*
- * Writers are committing on the head page, we just need
- * to care about there're committed data, and the reader will
- * swap reader page with head page when it is to read data.
- */
- return rb_page_commit(commit) == 0;
+static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
+{
+ return !rb_num_of_entries(cpu_buffer);
}
/**
@@ -4861,19 +4871,6 @@ void ring_buffer_record_enable_cpu(struct trace_buffer *buffer, int cpu)
}
EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu);
-/*
- * The total entries in the ring buffer is the running counter
- * of entries entered into the ring buffer, minus the sum of
- * the entries read from the ring buffer and the number of
- * entries that were overwritten.
- */
-static inline unsigned long
-rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
-{
- return local_read(&cpu_buffer->entries) -
- (local_read(&cpu_buffer->overrun) + cpu_buffer->read);
-}
-
/**
* ring_buffer_oldest_event_ts - get the oldest event timestamp from the buffer
* @buffer: The ring buffer
@@ -7019,7 +7016,11 @@ static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
lockdep_assert_held(&cpu_buffer->mapping_lock);
nr_subbufs = cpu_buffer->nr_pages + 1; /* + reader-subbuf */
- nr_pages = ((nr_subbufs + 1) << subbuf_order) - pgoff; /* + meta-page */
+ nr_pages = ((nr_subbufs + 1) << subbuf_order); /* + meta-page */
+ if (nr_pages <= pgoff)
+ return -EINVAL;
+
+ nr_pages -= pgoff;
nr_vma_pages = vma_pages(vma);
if (!nr_vma_pages || nr_vma_pages > nr_pages)
@@ -7055,7 +7056,7 @@ static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
}
while (p < nr_pages) {
- struct page *page = virt_to_page((void *)cpu_buffer->subbuf_ids[s]);
+ struct page *page;
int off = 0;
if (WARN_ON_ONCE(s >= nr_subbufs)) {
@@ -7063,6 +7064,8 @@ static int __rb_map_vma(struct ring_buffer_per_cpu *cpu_buffer,
goto out;
}
+ page = virt_to_page((void *)cpu_buffer->subbuf_ids[s]);
+
for (; off < (1 << (subbuf_order)); off++, page++) {
if (p >= nr_pages)
break;
@@ -7146,6 +7149,7 @@ int ring_buffer_map(struct trace_buffer *buffer, int cpu,
kfree(cpu_buffer->subbuf_ids);
cpu_buffer->subbuf_ids = NULL;
rb_free_meta_page(cpu_buffer);
+ atomic_dec(&cpu_buffer->resize_disabled);
}
unlock:
diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig
index 831779607e84..8226352a0062 100644
--- a/kernel/trace/rv/Kconfig
+++ b/kernel/trace/rv/Kconfig
@@ -25,30 +25,9 @@ menuconfig RV
For further information, see:
Documentation/trace/rv/runtime-verification.rst
-config RV_MON_WIP
- depends on RV
- depends on PREEMPT_TRACER
- select DA_MON_EVENTS_IMPLICIT
- bool "wip monitor"
- help
- Enable wip (wakeup in preemptive) sample monitor that illustrates
- the usage of per-cpu monitors, and one limitation of the
- preempt_disable/enable events.
-
- For further information, see:
- Documentation/trace/rv/monitor_wip.rst
-
-config RV_MON_WWNR
- depends on RV
- select DA_MON_EVENTS_ID
- bool "wwnr monitor"
- help
- Enable wwnr (wakeup while not running) sample monitor, this is a
- sample monitor that illustrates the usage of per-task monitor.
- The model is borken on purpose: it serves to test reactors.
-
- For further information, see:
- Documentation/trace/rv/monitor_wwnr.rst
+source "kernel/trace/rv/monitors/wip/Kconfig"
+source "kernel/trace/rv/monitors/wwnr/Kconfig"
+# Add new monitors here
config RV_REACTORS
bool "Runtime verification reactors"
diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile
index 963d14875b45..188b64668e1f 100644
--- a/kernel/trace/rv/Makefile
+++ b/kernel/trace/rv/Makefile
@@ -1,8 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
+ccflags-y += -I $(src) # needed for trace events
+
obj-$(CONFIG_RV) += rv.o
obj-$(CONFIG_RV_MON_WIP) += monitors/wip/wip.o
obj-$(CONFIG_RV_MON_WWNR) += monitors/wwnr/wwnr.o
+# Add new monitors here
obj-$(CONFIG_RV_REACTORS) += rv_reactors.o
obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o
obj-$(CONFIG_RV_REACT_PANIC) += reactor_panic.o
diff --git a/kernel/trace/rv/monitors/wip/Kconfig b/kernel/trace/rv/monitors/wip/Kconfig
new file mode 100644
index 000000000000..3ef664b5cd90
--- /dev/null
+++ b/kernel/trace/rv/monitors/wip/Kconfig
@@ -0,0 +1,12 @@
+config RV_MON_WIP
+ depends on RV
+ depends on PREEMPT_TRACER
+ select DA_MON_EVENTS_IMPLICIT
+ bool "wip monitor"
+ help
+ Enable wip (wakeup in preemptive) sample monitor that illustrates
+ the usage of per-cpu monitors, and one limitation of the
+ preempt_disable/enable events.
+
+ For further information, see:
+ Documentation/trace/rv/monitor_wip.rst
diff --git a/kernel/trace/rv/monitors/wip/wip.c b/kernel/trace/rv/monitors/wip/wip.c
index b2b49a27e886..db7389157c87 100644
--- a/kernel/trace/rv/monitors/wip/wip.c
+++ b/kernel/trace/rv/monitors/wip/wip.c
@@ -10,7 +10,7 @@
#define MODULE_NAME "wip"
-#include <trace/events/rv.h>
+#include <rv_trace.h>
#include <trace/events/sched.h>
#include <trace/events/preemptirq.h>
diff --git a/kernel/trace/rv/monitors/wip/wip_trace.h b/kernel/trace/rv/monitors/wip/wip_trace.h
new file mode 100644
index 000000000000..aa2162f47a4c
--- /dev/null
+++ b/kernel/trace/rv/monitors/wip/wip_trace.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Snippet to be included in rv_trace.h
+ */
+
+#ifdef CONFIG_RV_MON_WIP
+DEFINE_EVENT(event_da_monitor, event_wip,
+ TP_PROTO(char *state, char *event, char *next_state, bool final_state),
+ TP_ARGS(state, event, next_state, final_state));
+
+DEFINE_EVENT(error_da_monitor, error_wip,
+ TP_PROTO(char *state, char *event),
+ TP_ARGS(state, event));
+#endif /* CONFIG_RV_MON_WIP */
diff --git a/kernel/trace/rv/monitors/wwnr/Kconfig b/kernel/trace/rv/monitors/wwnr/Kconfig
new file mode 100644
index 000000000000..ee741aa6d6b8
--- /dev/null
+++ b/kernel/trace/rv/monitors/wwnr/Kconfig
@@ -0,0 +1,11 @@
+config RV_MON_WWNR
+ depends on RV
+ select DA_MON_EVENTS_ID
+ bool "wwnr monitor"
+ help
+ Enable wwnr (wakeup while not running) sample monitor, this is a
+ sample monitor that illustrates the usage of per-task monitor.
+ The model is borken on purpose: it serves to test reactors.
+
+ For further information, see:
+ Documentation/trace/rv/monitor_wwnr.rst
diff --git a/kernel/trace/rv/monitors/wwnr/wwnr.c b/kernel/trace/rv/monitors/wwnr/wwnr.c
index 0e43dd2db685..3b16994a9984 100644
--- a/kernel/trace/rv/monitors/wwnr/wwnr.c
+++ b/kernel/trace/rv/monitors/wwnr/wwnr.c
@@ -10,7 +10,7 @@
#define MODULE_NAME "wwnr"
-#include <trace/events/rv.h>
+#include <rv_trace.h>
#include <trace/events/sched.h>
#include "wwnr.h"
diff --git a/kernel/trace/rv/monitors/wwnr/wwnr_trace.h b/kernel/trace/rv/monitors/wwnr/wwnr_trace.h
new file mode 100644
index 000000000000..fc97ec7476ad
--- /dev/null
+++ b/kernel/trace/rv/monitors/wwnr/wwnr_trace.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Snippet to be included in rv_trace.h
+ */
+
+#ifdef CONFIG_RV_MON_WWNR
+/* id is the pid of the task */
+DEFINE_EVENT(event_da_monitor_id, event_wwnr,
+ TP_PROTO(int id, char *state, char *event, char *next_state, bool final_state),
+ TP_ARGS(id, state, event, next_state, final_state));
+
+DEFINE_EVENT(error_da_monitor_id, error_wwnr,
+ TP_PROTO(int id, char *state, char *event),
+ TP_ARGS(id, state, event));
+#endif /* CONFIG_RV_MON_WWNR */
diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c
index 279c70e1bd74..8657fc8806e7 100644
--- a/kernel/trace/rv/rv.c
+++ b/kernel/trace/rv/rv.c
@@ -145,7 +145,7 @@
#ifdef CONFIG_DA_MON_EVENTS
#define CREATE_TRACE_POINTS
-#include <trace/events/rv.h>
+#include <rv_trace.h>
#endif
#include "rv.h"
diff --git a/kernel/trace/rv/rv_trace.h b/kernel/trace/rv/rv_trace.h
new file mode 100644
index 000000000000..5e65097423ba
--- /dev/null
+++ b/kernel/trace/rv/rv_trace.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rv
+
+#if !defined(_TRACE_RV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RV_H
+
+#include <linux/rv.h>
+#include <linux/tracepoint.h>
+
+#ifdef CONFIG_DA_MON_EVENTS_IMPLICIT
+DECLARE_EVENT_CLASS(event_da_monitor,
+
+ TP_PROTO(char *state, char *event, char *next_state, bool final_state),
+
+ TP_ARGS(state, event, next_state, final_state),
+
+ TP_STRUCT__entry(
+ __array( char, state, MAX_DA_NAME_LEN )
+ __array( char, event, MAX_DA_NAME_LEN )
+ __array( char, next_state, MAX_DA_NAME_LEN )
+ __field( bool, final_state )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->state, state, MAX_DA_NAME_LEN);
+ memcpy(__entry->event, event, MAX_DA_NAME_LEN);
+ memcpy(__entry->next_state, next_state, MAX_DA_NAME_LEN);
+ __entry->final_state = final_state;
+ ),
+
+ TP_printk("%s x %s -> %s %s",
+ __entry->state,
+ __entry->event,
+ __entry->next_state,
+ __entry->final_state ? "(final)" : "")
+);
+
+DECLARE_EVENT_CLASS(error_da_monitor,
+
+ TP_PROTO(char *state, char *event),
+
+ TP_ARGS(state, event),
+
+ TP_STRUCT__entry(
+ __array( char, state, MAX_DA_NAME_LEN )
+ __array( char, event, MAX_DA_NAME_LEN )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->state, state, MAX_DA_NAME_LEN);
+ memcpy(__entry->event, event, MAX_DA_NAME_LEN);
+ ),
+
+ TP_printk("event %s not expected in the state %s",
+ __entry->event,
+ __entry->state)
+);
+
+#include <monitors/wip/wip_trace.h>
+// Add new monitors based on CONFIG_DA_MON_EVENTS_IMPLICIT here
+
+#endif /* CONFIG_DA_MON_EVENTS_IMPLICIT */
+
+#ifdef CONFIG_DA_MON_EVENTS_ID
+DECLARE_EVENT_CLASS(event_da_monitor_id,
+
+ TP_PROTO(int id, char *state, char *event, char *next_state, bool final_state),
+
+ TP_ARGS(id, state, event, next_state, final_state),
+
+ TP_STRUCT__entry(
+ __field( int, id )
+ __array( char, state, MAX_DA_NAME_LEN )
+ __array( char, event, MAX_DA_NAME_LEN )
+ __array( char, next_state, MAX_DA_NAME_LEN )
+ __field( bool, final_state )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->state, state, MAX_DA_NAME_LEN);
+ memcpy(__entry->event, event, MAX_DA_NAME_LEN);
+ memcpy(__entry->next_state, next_state, MAX_DA_NAME_LEN);
+ __entry->id = id;
+ __entry->final_state = final_state;
+ ),
+
+ TP_printk("%d: %s x %s -> %s %s",
+ __entry->id,
+ __entry->state,
+ __entry->event,
+ __entry->next_state,
+ __entry->final_state ? "(final)" : "")
+);
+
+DECLARE_EVENT_CLASS(error_da_monitor_id,
+
+ TP_PROTO(int id, char *state, char *event),
+
+ TP_ARGS(id, state, event),
+
+ TP_STRUCT__entry(
+ __field( int, id )
+ __array( char, state, MAX_DA_NAME_LEN )
+ __array( char, event, MAX_DA_NAME_LEN )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->state, state, MAX_DA_NAME_LEN);
+ memcpy(__entry->event, event, MAX_DA_NAME_LEN);
+ __entry->id = id;
+ ),
+
+ TP_printk("%d: event %s not expected in the state %s",
+ __entry->id,
+ __entry->event,
+ __entry->state)
+);
+
+#include <monitors/wwnr/wwnr_trace.h>
+// Add new monitors based on CONFIG_DA_MON_EVENTS_ID here
+
+#endif /* CONFIG_DA_MON_EVENTS_ID */
+#endif /* _TRACE_RV_H */
+
+/* This part ust be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE rv_trace
+#include <trace/define_trace.h>
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index be62f0ea1814..0e6d517e74e0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -26,6 +26,7 @@
#include <linux/hardirq.h>
#include <linux/linkage.h>
#include <linux/uaccess.h>
+#include <linux/cleanup.h>
#include <linux/vmalloc.h>
#include <linux/ftrace.h>
#include <linux/module.h>
@@ -535,19 +536,16 @@ LIST_HEAD(ftrace_trace_arrays);
int trace_array_get(struct trace_array *this_tr)
{
struct trace_array *tr;
- int ret = -ENODEV;
- mutex_lock(&trace_types_lock);
+ guard(mutex)(&trace_types_lock);
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
if (tr == this_tr) {
tr->ref++;
- ret = 0;
- break;
+ return 0;
}
}
- mutex_unlock(&trace_types_lock);
- return ret;
+ return -ENODEV;
}
static void __trace_array_put(struct trace_array *this_tr)
@@ -1443,22 +1441,20 @@ EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
cond_update_fn_t update)
{
- struct cond_snapshot *cond_snapshot;
- int ret = 0;
+ struct cond_snapshot *cond_snapshot __free(kfree) =
+ kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
+ int ret;
- cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
if (!cond_snapshot)
return -ENOMEM;
cond_snapshot->cond_data = cond_data;
cond_snapshot->update = update;
- mutex_lock(&trace_types_lock);
+ guard(mutex)(&trace_types_lock);
- if (tr->current_trace->use_max_tr) {
- ret = -EBUSY;
- goto fail_unlock;
- }
+ if (tr->current_trace->use_max_tr)
+ return -EBUSY;
/*
* The cond_snapshot can only change to NULL without the
@@ -1468,29 +1464,20 @@ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
* do safely with only holding the trace_types_lock and not
* having to take the max_lock.
*/
- if (tr->cond_snapshot) {
- ret = -EBUSY;
- goto fail_unlock;
- }
+ if (tr->cond_snapshot)
+ return -EBUSY;
ret = tracing_arm_snapshot_locked(tr);
if (ret)
- goto fail_unlock;
+ return ret;
local_irq_disable();
arch_spin_lock(&tr->max_lock);
- tr->cond_snapshot = cond_snapshot;
+ tr->cond_snapshot = no_free_ptr(cond_snapshot);
arch_spin_unlock(&tr->max_lock);
local_irq_enable();
- mutex_unlock(&trace_types_lock);
-
- return ret;
-
- fail_unlock:
- mutex_unlock(&trace_types_lock);
- kfree(cond_snapshot);
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
@@ -2203,10 +2190,10 @@ static __init int init_trace_selftests(void)
selftests_can_run = true;
- mutex_lock(&trace_types_lock);
+ guard(mutex)(&trace_types_lock);
if (list_empty(&postponed_selftests))
- goto out;
+ return 0;
pr_info("Running postponed tracer tests:\n");
@@ -2235,9 +2222,6 @@ static __init int init_trace_selftests(void)
}
tracing_selftest_running = false;
- out:
- mutex_unlock(&trace_types_lock);
-
return 0;
}
core_initcall(init_trace_selftests);
@@ -2807,7 +2791,7 @@ int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
int save_tracepoint_printk;
int ret;
- mutex_lock(&tracepoint_printk_mutex);
+ guard(mutex)(&tracepoint_printk_mutex);
save_tracepoint_printk = tracepoint_printk;
ret = proc_dointvec(table, write, buffer, lenp, ppos);
@@ -2820,16 +2804,13 @@ int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
tracepoint_printk = 0;
if (save_tracepoint_printk == tracepoint_printk)
- goto out;
+ return ret;
if (tracepoint_printk)
static_key_enable(&tracepoint_printk_key.key);
else
static_key_disable(&tracepoint_printk_key.key);
- out:
- mutex_unlock(&tracepoint_printk_mutex);
-
return ret;
}
@@ -3611,17 +3592,12 @@ char *trace_iter_expand_format(struct trace_iterator *iter)
}
/* Returns true if the string is safe to dereference from an event */
-static bool trace_safe_str(struct trace_iterator *iter, const char *str,
- bool star, int len)
+static bool trace_safe_str(struct trace_iterator *iter, const char *str)
{
unsigned long addr = (unsigned long)str;
struct trace_event *trace_event;
struct trace_event_call *event;
- /* Ignore strings with no length */
- if (star && !len)
- return true;
-
/* OK if part of the event data */
if ((addr >= (unsigned long)iter->ent) &&
(addr < (unsigned long)iter->ent + iter->ent_size))
@@ -3661,181 +3637,69 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str,
return false;
}
-static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
-
-static int test_can_verify_check(const char *fmt, ...)
-{
- char buf[16];
- va_list ap;
- int ret;
-
- /*
- * The verifier is dependent on vsnprintf() modifies the va_list
- * passed to it, where it is sent as a reference. Some architectures
- * (like x86_32) passes it by value, which means that vsnprintf()
- * does not modify the va_list passed to it, and the verifier
- * would then need to be able to understand all the values that
- * vsnprintf can use. If it is passed by value, then the verifier
- * is disabled.
- */
- va_start(ap, fmt);
- vsnprintf(buf, 16, "%d", ap);
- ret = va_arg(ap, int);
- va_end(ap);
-
- return ret;
-}
-
-static void test_can_verify(void)
-{
- if (!test_can_verify_check("%d %d", 0, 1)) {
- pr_info("trace event string verifier disabled\n");
- static_branch_inc(&trace_no_verify);
- }
-}
-
/**
- * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
+ * ignore_event - Check dereferenced fields while writing to the seq buffer
* @iter: The iterator that holds the seq buffer and the event being printed
- * @fmt: The format used to print the event
- * @ap: The va_list holding the data to print from @fmt.
*
- * This writes the data into the @iter->seq buffer using the data from
- * @fmt and @ap. If the format has a %s, then the source of the string
- * is examined to make sure it is safe to print, otherwise it will
- * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
- * pointer.
+ * At boot up, test_event_printk() will flag any event that dereferences
+ * a string with "%s" that does exist in the ring buffer. It may still
+ * be valid, as the string may point to a static string in the kernel
+ * rodata that never gets freed. But if the string pointer is pointing
+ * to something that was allocated, there's a chance that it can be freed
+ * by the time the user reads the trace. This would cause a bad memory
+ * access by the kernel and possibly crash the system.
+ *
+ * This function will check if the event has any fields flagged as needing
+ * to be checked at runtime and perform those checks.
+ *
+ * If it is found that a field is unsafe, it will write into the @iter->seq
+ * a message stating what was found to be unsafe.
+ *
+ * @return: true if the event is unsafe and should be ignored,
+ * false otherwise.
*/
-void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
- va_list ap)
+bool ignore_event(struct trace_iterator *iter)
{
- long text_delta = 0;
- long data_delta = 0;
- const char *p = fmt;
- const char *str;
- bool good;
- int i, j;
+ struct ftrace_event_field *field;
+ struct trace_event *trace_event;
+ struct trace_event_call *event;
+ struct list_head *head;
+ struct trace_seq *seq;
+ const void *ptr;
- if (WARN_ON_ONCE(!fmt))
- return;
+ trace_event = ftrace_find_event(iter->ent->type);
- if (static_branch_unlikely(&trace_no_verify))
- goto print;
+ seq = &iter->seq;
- /*
- * When the kernel is booted with the tp_printk command line
- * parameter, trace events go directly through to printk().
- * It also is checked by this function, but it does not
- * have an associated trace_array (tr) for it.
- */
- if (iter->tr) {
- text_delta = iter->tr->text_delta;
- data_delta = iter->tr->data_delta;
+ if (!trace_event) {
+ trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
+ return true;
}
- /* Don't bother checking when doing a ftrace_dump() */
- if (iter->fmt == static_fmt_buf)
- goto print;
-
- while (*p) {
- bool star = false;
- int len = 0;
-
- j = 0;
-
- /*
- * We only care about %s and variants
- * as well as %p[sS] if delta is non-zero
- */
- for (i = 0; p[i]; i++) {
- if (i + 1 >= iter->fmt_size) {
- /*
- * If we can't expand the copy buffer,
- * just print it.
- */
- if (!trace_iter_expand_format(iter))
- goto print;
- }
-
- if (p[i] == '\\' && p[i+1]) {
- i++;
- continue;
- }
- if (p[i] == '%') {
- /* Need to test cases like %08.*s */
- for (j = 1; p[i+j]; j++) {
- if (isdigit(p[i+j]) ||
- p[i+j] == '.')
- continue;
- if (p[i+j] == '*') {
- star = true;
- continue;
- }
- break;
- }
- if (p[i+j] == 's')
- break;
-
- if (text_delta && p[i+1] == 'p' &&
- ((p[i+2] == 's' || p[i+2] == 'S')))
- break;
-
- star = false;
- }
- j = 0;
- }
- /* If no %s found then just print normally */
- if (!p[i])
- break;
-
- /* Copy up to the %s, and print that */
- strncpy(iter->fmt, p, i);
- iter->fmt[i] = '\0';
- trace_seq_vprintf(&iter->seq, iter->fmt, ap);
+ event = container_of(trace_event, struct trace_event_call, event);
+ if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
+ return false;
- /* Add delta to %pS pointers */
- if (p[i+1] == 'p') {
- unsigned long addr;
- char fmt[4];
+ head = trace_get_fields(event);
+ if (!head) {
+ trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
+ trace_event_name(event));
+ return true;
+ }
- fmt[0] = '%';
- fmt[1] = 'p';
- fmt[2] = p[i+2]; /* Either %ps or %pS */
- fmt[3] = '\0';
+ /* Offsets are from the iter->ent that points to the raw event */
+ ptr = iter->ent;
- addr = va_arg(ap, unsigned long);
- addr += text_delta;
- trace_seq_printf(&iter->seq, fmt, (void *)addr);
+ list_for_each_entry(field, head, link) {
+ const char *str;
+ bool good;
- p += i + 3;
+ if (!field->needs_test)
continue;
- }
- /*
- * If iter->seq is full, the above call no longer guarantees
- * that ap is in sync with fmt processing, and further calls
- * to va_arg() can return wrong positional arguments.
- *
- * Ensure that ap is no longer used in this case.
- */
- if (iter->seq.full) {
- p = "";
- break;
- }
-
- if (star)
- len = va_arg(ap, int);
-
- /* The ap now points to the string data of the %s */
- str = va_arg(ap, const char *);
+ str = *(const char **)(ptr + field->offset);
- good = trace_safe_str(iter, str, star, len);
-
- /* Could be from the last boot */
- if (data_delta && !good) {
- str += data_delta;
- good = trace_safe_str(iter, str, star, len);
- }
+ good = trace_safe_str(iter, str);
/*
* If you hit this warning, it is likely that the
@@ -3846,44 +3710,14 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
* instead. See samples/trace_events/trace-events-sample.h
* for reference.
*/
- if (WARN_ONCE(!good, "fmt: '%s' current_buffer: '%s'",
- fmt, seq_buf_str(&iter->seq.seq))) {
- int ret;
-
- /* Try to safely read the string */
- if (star) {
- if (len + 1 > iter->fmt_size)
- len = iter->fmt_size - 1;
- if (len < 0)
- len = 0;
- ret = copy_from_kernel_nofault(iter->fmt, str, len);
- iter->fmt[len] = 0;
- star = false;
- } else {
- ret = strncpy_from_kernel_nofault(iter->fmt, str,
- iter->fmt_size);
- }
- if (ret < 0)
- trace_seq_printf(&iter->seq, "(0x%px)", str);
- else
- trace_seq_printf(&iter->seq, "(0x%px:%s)",
- str, iter->fmt);
- str = "[UNSAFE-MEMORY]";
- strcpy(iter->fmt, "%s");
- } else {
- strncpy(iter->fmt, p + i, j + 1);
- iter->fmt[j+1] = '\0';
+ if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
+ trace_event_name(event), field->name)) {
+ trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
+ trace_event_name(event), field->name);
+ return true;
}
- if (star)
- trace_seq_printf(&iter->seq, iter->fmt, len, str);
- else
- trace_seq_printf(&iter->seq, iter->fmt, str);
-
- p += i + j + 1;
}
- print:
- if (*p)
- trace_seq_vprintf(&iter->seq, p, ap);
+ return false;
}
const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
@@ -4269,6 +4103,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
preempt_model_none() ? "server" :
preempt_model_voluntary() ? "desktop" :
preempt_model_full() ? "preempt" :
+ preempt_model_lazy() ? "lazy" :
preempt_model_rt() ? "preempt_rt" :
"unknown",
/* These are reserved for later use */
@@ -4353,6 +4188,15 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
if (event) {
if (tr->trace_flags & TRACE_ITER_FIELDS)
return print_event_fields(iter, event);
+ /*
+ * For TRACE_EVENT() events, the print_fmt is not
+ * safe to use if the array has delta offsets
+ * Force printing via the fields.
+ */
+ if ((tr->text_delta || tr->data_delta) &&
+ event->type > __TRACE_LAST_TYPE)
+ return print_event_fields(iter, event);
+
return event->funcs->trace(iter, sym_flags, event);
}
@@ -5225,6 +5069,9 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
cpumask_var_t tracing_cpumask_new;
int err;
+ if (count == 0 || count > KMALLOC_MAX_SIZE)
+ return -EINVAL;
+
if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
return -ENOMEM;
@@ -5261,7 +5108,8 @@ static int tracing_trace_options_show(struct seq_file *m, void *v)
u32 tracer_flags;
int i;
- mutex_lock(&trace_types_lock);
+ guard(mutex)(&trace_types_lock);
+
tracer_flags = tr->current_trace->flags->val;
trace_opts = tr->current_trace->flags->opts;
@@ -5278,7 +5126,6 @@ static int tracing_trace_options_show(struct seq_file *m, void *v)
else
seq_printf(m, "no%s\n", trace_opts[i].name);
}
- mutex_unlock(&trace_types_lock);
return 0;
}
@@ -5671,6 +5518,8 @@ static const char readme_msg[] =
"\t efield: For event probes ('e' types), the field is on of the fields\n"
"\t of the <attached-group>/<attached-event>.\n"
#endif
+ " set_event\t\t- Enables events by name written into it\n"
+ "\t\t\t Can enable module events via: :mod:<module>\n"
" events/\t\t- Directory containing all trace event subsystems:\n"
" enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
" events/<system>/\t- Directory containing all trace events for <system>:\n"
@@ -5943,7 +5792,7 @@ trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
return;
}
- mutex_lock(&trace_eval_mutex);
+ guard(mutex)(&trace_eval_mutex);
if (!trace_eval_maps)
trace_eval_maps = map_array;
@@ -5967,8 +5816,6 @@ trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
map_array++;
}
memset(map_array, 0, sizeof(*map_array));
-
- mutex_unlock(&trace_eval_mutex);
}
static void trace_create_eval_file(struct dentry *d_tracer)
@@ -6130,26 +5977,15 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr,
ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
unsigned long size, int cpu_id)
{
- int ret;
-
- mutex_lock(&trace_types_lock);
+ guard(mutex)(&trace_types_lock);
if (cpu_id != RING_BUFFER_ALL_CPUS) {
/* make sure, this cpu is enabled in the mask */
- if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
- ret = -EINVAL;
- goto out;
- }
+ if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
+ return -EINVAL;
}
- ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
- if (ret < 0)
- ret = -ENOMEM;
-
-out:
- mutex_unlock(&trace_types_lock);
-
- return ret;
+ return __tracing_resize_ring_buffer(tr, size, cpu_id);
}
static void update_last_data(struct trace_array *tr)
@@ -6240,9 +6076,9 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
#ifdef CONFIG_TRACER_MAX_TRACE
bool had_max_tr;
#endif
- int ret = 0;
+ int ret;
- mutex_lock(&trace_types_lock);
+ guard(mutex)(&trace_types_lock);
update_last_data(tr);
@@ -6250,7 +6086,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
RING_BUFFER_ALL_CPUS);
if (ret < 0)
- goto out;
+ return ret;
ret = 0;
}
@@ -6258,43 +6094,37 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
if (strcmp(t->name, buf) == 0)
break;
}
- if (!t) {
- ret = -EINVAL;
- goto out;
- }
+ if (!t)
+ return -EINVAL;
+
if (t == tr->current_trace)
- goto out;
+ return 0;
#ifdef CONFIG_TRACER_SNAPSHOT
if (t->use_max_tr) {
local_irq_disable();
arch_spin_lock(&tr->max_lock);
- if (tr->cond_snapshot)
- ret = -EBUSY;
+ ret = tr->cond_snapshot ? -EBUSY : 0;
arch_spin_unlock(&tr->max_lock);
local_irq_enable();
if (ret)
- goto out;
+ return ret;
}
#endif
/* Some tracers won't work on kernel command line */
if (system_state < SYSTEM_RUNNING && t->noboot) {
pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
t->name);
- goto out;
+ return -EINVAL;
}
/* Some tracers are only allowed for the top level buffer */
- if (!trace_ok_for_array(t, tr)) {
- ret = -EINVAL;
- goto out;
- }
+ if (!trace_ok_for_array(t, tr))
+ return -EINVAL;
/* If trace pipe files are being read, we can't change the tracer */
- if (tr->trace_ref) {
- ret = -EBUSY;
- goto out;
- }
+ if (tr->trace_ref)
+ return -EBUSY;
trace_branch_disable();
@@ -6325,7 +6155,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
if (!had_max_tr && t->use_max_tr) {
ret = tracing_arm_snapshot_locked(tr);
if (ret)
- goto out;
+ return ret;
}
#else
tr->current_trace = &nop_trace;
@@ -6338,17 +6168,15 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
if (t->use_max_tr)
tracing_disarm_snapshot(tr);
#endif
- goto out;
+ return ret;
}
}
tr->current_trace = t;
tr->current_trace->enabled++;
trace_branch_enable(tr);
- out:
- mutex_unlock(&trace_types_lock);
- return ret;
+ return 0;
}
static ssize_t
@@ -6426,22 +6254,18 @@ tracing_thresh_write(struct file *filp, const char __user *ubuf,
struct trace_array *tr = filp->private_data;
int ret;
- mutex_lock(&trace_types_lock);
+ guard(mutex)(&trace_types_lock);
ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
if (ret < 0)
- goto out;
+ return ret;
if (tr->current_trace->update_thresh) {
ret = tr->current_trace->update_thresh(tr);
if (ret < 0)
- goto out;
+ return ret;
}
- ret = cnt;
-out:
- mutex_unlock(&trace_types_lock);
-
- return ret;
+ return cnt;
}
#ifdef CONFIG_TRACER_MAX_TRACE
@@ -6660,31 +6484,29 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
* This is just a matter of traces coherency, the ring buffer itself
* is protected.
*/
- mutex_lock(&iter->mutex);
+ guard(mutex)(&iter->mutex);
/* return any leftover data */
sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
if (sret != -EBUSY)
- goto out;
+ return sret;
trace_seq_init(&iter->seq);
if (iter->trace->read) {
sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
if (sret)
- goto out;
+ return sret;
}
waitagain:
sret = tracing_wait_pipe(filp);
if (sret <= 0)
- goto out;
+ return sret;
/* stop when tracing is finished */
- if (trace_empty(iter)) {
- sret = 0;
- goto out;
- }
+ if (trace_empty(iter))
+ return 0;
if (cnt >= TRACE_SEQ_BUFFER_SIZE)
cnt = TRACE_SEQ_BUFFER_SIZE - 1;
@@ -6748,9 +6570,6 @@ waitagain:
if (sret == -EBUSY)
goto waitagain;
-out:
- mutex_unlock(&iter->mutex);
-
return sret;
}
@@ -7342,25 +7161,19 @@ u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_eve
*/
int tracing_set_filter_buffering(struct trace_array *tr, bool set)
{
- int ret = 0;
-
- mutex_lock(&trace_types_lock);
+ guard(mutex)(&trace_types_lock);
if (set && tr->no_filter_buffering_ref++)
- goto out;
+ return 0;
if (!set) {
- if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
- ret = -EINVAL;
- goto out;
- }
+ if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
+ return -EINVAL;
--tr->no_filter_buffering_ref;
}
- out:
- mutex_unlock(&trace_types_lock);
- return ret;
+ return 0;
}
struct ftrace_buffer_info {
@@ -7436,12 +7249,10 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
if (ret)
return ret;
- mutex_lock(&trace_types_lock);
+ guard(mutex)(&trace_types_lock);
- if (tr->current_trace->use_max_tr) {
- ret = -EBUSY;
- goto out;
- }
+ if (tr->current_trace->use_max_tr)
+ return -EBUSY;
local_irq_disable();
arch_spin_lock(&tr->max_lock);
@@ -7450,24 +7261,20 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
arch_spin_unlock(&tr->max_lock);
local_irq_enable();
if (ret)
- goto out;
+ return ret;
switch (val) {
case 0:
- if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
- ret = -EINVAL;
- break;
- }
+ if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
+ return -EINVAL;
if (tr->allocated_snapshot)
free_snapshot(tr);
break;
case 1:
/* Only allow per-cpu swap if the ring buffer supports it */
#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
- if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
- ret = -EINVAL;
- break;
- }
+ if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
+ return -EINVAL;
#endif
if (tr->allocated_snapshot)
ret = resize_buffer_duplicate_size(&tr->max_buffer,
@@ -7475,7 +7282,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
ret = tracing_arm_snapshot_locked(tr);
if (ret)
- break;
+ return ret;
/* Now, we're going to swap */
if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
@@ -7502,8 +7309,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
*ppos += cnt;
ret = cnt;
}
-out:
- mutex_unlock(&trace_types_lock);
+
return ret;
}
@@ -7889,12 +7695,11 @@ void tracing_log_err(struct trace_array *tr,
len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
- mutex_lock(&tracing_err_log_lock);
+ guard(mutex)(&tracing_err_log_lock);
+
err = get_tracing_log_err(tr, len);
- if (PTR_ERR(err) == -ENOMEM) {
- mutex_unlock(&tracing_err_log_lock);
+ if (PTR_ERR(err) == -ENOMEM)
return;
- }
snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
@@ -7905,7 +7710,6 @@ void tracing_log_err(struct trace_array *tr,
err->info.ts = local_clock();
list_add_tail(&err->list, &tr->err_log);
- mutex_unlock(&tracing_err_log_lock);
}
static void clear_tracing_err_log(struct trace_array *tr)
@@ -8475,6 +8279,10 @@ static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
struct trace_iterator *iter = &info->iter;
int ret = 0;
+ /* Currently the boot mapped buffer is not supported for mmap */
+ if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)
+ return -ENODEV;
+
ret = get_snapshot_map(iter->tr);
if (ret)
return ret;
@@ -9601,6 +9409,10 @@ trace_array_create_systems(const char *name, const char *systems,
INIT_LIST_HEAD(&tr->hist_vars);
INIT_LIST_HEAD(&tr->err_log);
+#ifdef CONFIG_MODULES
+ INIT_LIST_HEAD(&tr->mod_events);
+#endif
+
if (allocate_trace_buffers(tr, trace_buf_size) < 0)
goto out_free_tr;
@@ -9649,20 +9461,17 @@ static int instance_mkdir(const char *name)
struct trace_array *tr;
int ret;
- mutex_lock(&event_mutex);
- mutex_lock(&trace_types_lock);
+ guard(mutex)(&event_mutex);
+ guard(mutex)(&trace_types_lock);
ret = -EEXIST;
if (trace_array_find(name))
- goto out_unlock;
+ return -EEXIST;
tr = trace_array_create(name);
ret = PTR_ERR_OR_ZERO(tr);
-out_unlock:
- mutex_unlock(&trace_types_lock);
- mutex_unlock(&event_mutex);
return ret;
}
@@ -9712,24 +9521,23 @@ struct trace_array *trace_array_get_by_name(const char *name, const char *system
{
struct trace_array *tr;
- mutex_lock(&event_mutex);
- mutex_lock(&trace_types_lock);
+ guard(mutex)(&event_mutex);
+ guard(mutex)(&trace_types_lock);
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
- if (tr->name && strcmp(tr->name, name) == 0)
- goto out_unlock;
+ if (tr->name && strcmp(tr->name, name) == 0) {
+ tr->ref++;
+ return tr;
+ }
}
tr = trace_array_create_systems(name, systems, 0, 0);
if (IS_ERR(tr))
tr = NULL;
-out_unlock:
- if (tr)
+ else
tr->ref++;
- mutex_unlock(&trace_types_lock);
- mutex_unlock(&event_mutex);
return tr;
}
EXPORT_SYMBOL_GPL(trace_array_get_by_name);
@@ -9780,48 +9588,36 @@ static int __remove_instance(struct trace_array *tr)
int trace_array_destroy(struct trace_array *this_tr)
{
struct trace_array *tr;
- int ret;
if (!this_tr)
return -EINVAL;
- mutex_lock(&event_mutex);
- mutex_lock(&trace_types_lock);
+ guard(mutex)(&event_mutex);
+ guard(mutex)(&trace_types_lock);
- ret = -ENODEV;
/* Making sure trace array exists before destroying it. */
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
- if (tr == this_tr) {
- ret = __remove_instance(tr);
- break;
- }
+ if (tr == this_tr)
+ return __remove_instance(tr);
}
- mutex_unlock(&trace_types_lock);
- mutex_unlock(&event_mutex);
-
- return ret;
+ return -ENODEV;
}
EXPORT_SYMBOL_GPL(trace_array_destroy);
static int instance_rmdir(const char *name)
{
struct trace_array *tr;
- int ret;
- mutex_lock(&event_mutex);
- mutex_lock(&trace_types_lock);
+ guard(mutex)(&event_mutex);
+ guard(mutex)(&trace_types_lock);
- ret = -ENODEV;
tr = trace_array_find(name);
- if (tr)
- ret = __remove_instance(tr);
-
- mutex_unlock(&trace_types_lock);
- mutex_unlock(&event_mutex);
+ if (!tr)
+ return -ENODEV;
- return ret;
+ return __remove_instance(tr);
}
static __init void create_trace_instances(struct dentry *d_tracer)
@@ -9834,19 +9630,16 @@ static __init void create_trace_instances(struct dentry *d_tracer)
if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
return;
- mutex_lock(&event_mutex);
- mutex_lock(&trace_types_lock);
+ guard(mutex)(&event_mutex);
+ guard(mutex)(&trace_types_lock);
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
if (!tr->name)
continue;
if (MEM_FAIL(trace_array_create_dir(tr) < 0,
"Failed to create instance directory\n"))
- break;
+ return;
}
-
- mutex_unlock(&trace_types_lock);
- mutex_unlock(&event_mutex);
}
static void
@@ -10036,6 +9829,24 @@ late_initcall_sync(trace_eval_sync);
#ifdef CONFIG_MODULES
+
+bool module_exists(const char *module)
+{
+ /* All modules have the symbol __this_module */
+ static const char this_mod[] = "__this_module";
+ char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
+ unsigned long val;
+ int n;
+
+ n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
+
+ if (n > sizeof(modname) - 1)
+ return false;
+
+ val = module_kallsyms_lookup_name(modname);
+ return val != 0;
+}
+
static void trace_module_add_evals(struct module *mod)
{
if (!mod->num_trace_evals)
@@ -10060,7 +9871,7 @@ static void trace_module_remove_evals(struct module *mod)
if (!mod->num_trace_evals)
return;
- mutex_lock(&trace_eval_mutex);
+ guard(mutex)(&trace_eval_mutex);
map = trace_eval_maps;
@@ -10072,12 +9883,10 @@ static void trace_module_remove_evals(struct module *mod)
map = map->tail.next;
}
if (!map)
- goto out;
+ return;
*last = trace_eval_jmp_to_tail(map)->tail.next;
kfree(map);
- out:
- mutex_unlock(&trace_eval_mutex);
}
#else
static inline void trace_module_remove_evals(struct module *mod) { }
@@ -10750,6 +10559,10 @@ __init static int tracer_alloc_buffers(void)
#endif
ftrace_init_global_array_ops(&global_trace);
+#ifdef CONFIG_MODULES
+ INIT_LIST_HEAD(&global_trace.mod_events);
+#endif
+
init_trace_flags_index(&global_trace);
register_tracer(&nop_trace);
@@ -10777,8 +10590,6 @@ __init static int tracer_alloc_buffers(void)
register_snapshot_cmd();
- test_can_verify();
-
return 0;
out_free_pipe_cpumask:
@@ -10797,6 +10608,14 @@ out:
return ret;
}
+#ifdef CONFIG_FUNCTION_TRACER
+/* Used to set module cached ftrace filtering at boot up */
+__init struct trace_array *trace_get_global_array(void)
+{
+ return &global_trace;
+}
+#endif
+
void __init ftrace_boot_snapshot(void)
{
#ifdef CONFIG_TRACER_MAX_TRACE
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 266740b4e121..9c21ba45b7af 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -400,6 +400,9 @@ struct trace_array {
cpumask_var_t pipe_cpumask;
int ref;
int trace_ref;
+#ifdef CONFIG_MODULES
+ struct list_head mod_events;
+#endif
#ifdef CONFIG_FUNCTION_TRACER
struct ftrace_ops *ops;
struct trace_pid_list __rcu *function_pids;
@@ -432,8 +435,18 @@ struct trace_array {
enum {
TRACE_ARRAY_FL_GLOBAL = BIT(0),
TRACE_ARRAY_FL_BOOT = BIT(1),
+ TRACE_ARRAY_FL_MOD_INIT = BIT(2),
};
+#ifdef CONFIG_MODULES
+bool module_exists(const char *module);
+#else
+static inline bool module_exists(const char *module)
+{
+ return false;
+}
+#endif
+
extern struct list_head ftrace_trace_arrays;
extern struct mutex trace_types_lock;
@@ -667,9 +680,8 @@ void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
bool trace_is_tracepoint_string(const char *str);
const char *trace_event_format(struct trace_iterator *iter, const char *fmt);
-void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
- va_list ap) __printf(2, 0);
char *trace_iter_expand_format(struct trace_iterator *iter);
+bool ignore_event(struct trace_iterator *iter);
int trace_empty(struct trace_iterator *iter);
@@ -694,8 +706,10 @@ void trace_latency_header(struct seq_file *m);
void trace_default_header(struct seq_file *m);
void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
-void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops);
-int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops);
+void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops,
+ struct ftrace_regs *fregs);
+int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
+ struct ftrace_regs *fregs);
void tracing_start_cmdline_record(void);
void tracing_stop_cmdline_record(void);
@@ -718,8 +732,6 @@ extern unsigned long tracing_thresh;
/* PID filtering */
-extern int pid_max;
-
bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids,
pid_t search_pid);
bool trace_ignore_this_task(struct trace_pid_list *filtered_pids,
@@ -912,7 +924,9 @@ extern int __trace_graph_retaddr_entry(struct trace_array *tr,
unsigned long retaddr);
extern void __trace_graph_return(struct trace_array *tr,
struct ftrace_graph_ret *trace,
- unsigned int trace_ctx);
+ unsigned int trace_ctx,
+ u64 calltime, u64 rettime);
+
extern void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops);
extern int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops);
extern void free_fgraph_ops(struct trace_array *tr);
@@ -1115,6 +1129,7 @@ void ftrace_destroy_function_files(struct trace_array *tr);
int ftrace_allocate_ftrace_ops(struct trace_array *tr);
void ftrace_free_ftrace_ops(struct trace_array *tr);
void ftrace_init_global_array_ops(struct trace_array *tr);
+struct trace_array *trace_get_global_array(void);
void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func);
void ftrace_reset_array_ops(struct trace_array *tr);
void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer);
@@ -1413,7 +1428,8 @@ struct ftrace_event_field {
int filter_type;
int offset;
int size;
- int is_signed;
+ unsigned int is_signed:1;
+ unsigned int needs_test:1;
int len;
};
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
index 4376887e0d8a..a322e4f249a5 100644
--- a/kernel/trace/trace_dynevent.c
+++ b/kernel/trace/trace_dynevent.c
@@ -74,24 +74,19 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type
struct dyn_event *pos, *n;
char *system = NULL, *event, *p;
int argc, ret = -ENOENT;
- char **argv;
+ char **argv __free(argv_free) = argv_split(GFP_KERNEL, raw_command, &argc);
- argv = argv_split(GFP_KERNEL, raw_command, &argc);
if (!argv)
return -ENOMEM;
if (argv[0][0] == '-') {
- if (argv[0][1] != ':') {
- ret = -EINVAL;
- goto out;
- }
+ if (argv[0][1] != ':')
+ return -EINVAL;
event = &argv[0][2];
} else {
event = strchr(argv[0], ':');
- if (!event) {
- ret = -EINVAL;
- goto out;
- }
+ if (!event)
+ return -EINVAL;
event++;
}
@@ -101,10 +96,8 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type
event = p + 1;
*p = '\0';
}
- if (!system && event[0] == '\0') {
- ret = -EINVAL;
- goto out;
- }
+ if (!system && event[0] == '\0')
+ return -EINVAL;
mutex_lock(&event_mutex);
for_each_dyn_event_safe(pos, n) {
@@ -120,8 +113,6 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type
}
tracing_reset_all_online_cpus();
mutex_unlock(&event_mutex);
-out:
- argv_free(argv);
return ret;
}
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 82fd174ebbe0..fbfb396905a6 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -124,8 +124,8 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
__field_packed( unsigned long, ret, retval )
__field_packed( int, ret, depth )
__field_packed( unsigned int, ret, overrun )
- __field_packed( unsigned long long, ret, calltime)
- __field_packed( unsigned long long, ret, rettime )
+ __field(unsigned long long, calltime )
+ __field(unsigned long long, rettime )
),
F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d retval: %lx",
@@ -146,8 +146,8 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
__field_packed( unsigned long, ret, func )
__field_packed( int, ret, depth )
__field_packed( unsigned int, ret, overrun )
- __field_packed( unsigned long long, ret, calltime)
- __field_packed( unsigned long long, ret, rettime )
+ __field(unsigned long long, calltime )
+ __field(unsigned long long, rettime )
),
F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d",
diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
index be8be0c1aaf0..82fd637cfc19 100644
--- a/kernel/trace/trace_eprobe.c
+++ b/kernel/trace/trace_eprobe.c
@@ -917,10 +917,10 @@ static int __trace_eprobe_create(int argc, const char *argv[])
goto error;
}
- mutex_lock(&event_mutex);
- event_call = find_and_get_event(sys_name, sys_event);
- ep = alloc_event_probe(group, event, event_call, argc - 2);
- mutex_unlock(&event_mutex);
+ scoped_guard(mutex, &event_mutex) {
+ event_call = find_and_get_event(sys_name, sys_event);
+ ep = alloc_event_probe(group, event, event_call, argc - 2);
+ }
if (IS_ERR(ep)) {
ret = PTR_ERR(ep);
@@ -952,23 +952,21 @@ static int __trace_eprobe_create(int argc, const char *argv[])
if (ret < 0)
goto error;
init_trace_eprobe_call(ep);
- mutex_lock(&event_mutex);
- ret = trace_probe_register_event_call(&ep->tp);
- if (ret) {
- if (ret == -EEXIST) {
- trace_probe_log_set_index(0);
- trace_probe_log_err(0, EVENT_EXIST);
+ scoped_guard(mutex, &event_mutex) {
+ ret = trace_probe_register_event_call(&ep->tp);
+ if (ret) {
+ if (ret == -EEXIST) {
+ trace_probe_log_set_index(0);
+ trace_probe_log_err(0, EVENT_EXIST);
+ }
+ goto error;
+ }
+ ret = dyn_event_add(&ep->devent, &ep->tp.event->call);
+ if (ret < 0) {
+ trace_probe_unregister_event_call(&ep->tp);
+ goto error;
}
- mutex_unlock(&event_mutex);
- goto error;
- }
- ret = dyn_event_add(&ep->devent, &ep->tp.event->call);
- if (ret < 0) {
- trace_probe_unregister_event_call(&ep->tp);
- mutex_unlock(&event_mutex);
- goto error;
}
- mutex_unlock(&event_mutex);
return ret;
parse_error:
ret = -EINVAL;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 77e68efbd43e..513de9ceb80e 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -82,7 +82,7 @@ static int system_refcount_dec(struct event_subsystem *system)
}
static struct ftrace_event_field *
-__find_event_field(struct list_head *head, char *name)
+__find_event_field(struct list_head *head, const char *name)
{
struct ftrace_event_field *field;
@@ -114,7 +114,8 @@ trace_find_event_field(struct trace_event_call *call, char *name)
static int __trace_define_field(struct list_head *head, const char *type,
const char *name, int offset, int size,
- int is_signed, int filter_type, int len)
+ int is_signed, int filter_type, int len,
+ int need_test)
{
struct ftrace_event_field *field;
@@ -133,6 +134,7 @@ static int __trace_define_field(struct list_head *head, const char *type,
field->offset = offset;
field->size = size;
field->is_signed = is_signed;
+ field->needs_test = need_test;
field->len = len;
list_add(&field->link, head);
@@ -151,13 +153,13 @@ int trace_define_field(struct trace_event_call *call, const char *type,
head = trace_get_fields(call);
return __trace_define_field(head, type, name, offset, size,
- is_signed, filter_type, 0);
+ is_signed, filter_type, 0, 0);
}
EXPORT_SYMBOL_GPL(trace_define_field);
static int trace_define_field_ext(struct trace_event_call *call, const char *type,
const char *name, int offset, int size, int is_signed,
- int filter_type, int len)
+ int filter_type, int len, int need_test)
{
struct list_head *head;
@@ -166,13 +168,13 @@ static int trace_define_field_ext(struct trace_event_call *call, const char *typ
head = trace_get_fields(call);
return __trace_define_field(head, type, name, offset, size,
- is_signed, filter_type, len);
+ is_signed, filter_type, len, need_test);
}
#define __generic_field(type, item, filter_type) \
ret = __trace_define_field(&ftrace_generic_fields, #type, \
#item, 0, 0, is_signed_type(type), \
- filter_type, 0); \
+ filter_type, 0, 0); \
if (ret) \
return ret;
@@ -181,7 +183,8 @@ static int trace_define_field_ext(struct trace_event_call *call, const char *typ
"common_" #item, \
offsetof(typeof(ent), item), \
sizeof(ent.item), \
- is_signed_type(type), FILTER_OTHER, 0); \
+ is_signed_type(type), FILTER_OTHER, \
+ 0, 0); \
if (ret) \
return ret;
@@ -244,19 +247,16 @@ int trace_event_get_offsets(struct trace_event_call *call)
return tail->offset + tail->size;
}
-/*
- * Check if the referenced field is an array and return true,
- * as arrays are OK to dereference.
- */
-static bool test_field(const char *fmt, struct trace_event_call *call)
+
+static struct trace_event_fields *find_event_field(const char *fmt,
+ struct trace_event_call *call)
{
struct trace_event_fields *field = call->class->fields_array;
- const char *array_descriptor;
const char *p = fmt;
int len;
if (!(len = str_has_prefix(fmt, "REC->")))
- return false;
+ return NULL;
fmt += len;
for (p = fmt; *p; p++) {
if (!isalnum(*p) && *p != '_')
@@ -265,16 +265,141 @@ static bool test_field(const char *fmt, struct trace_event_call *call)
len = p - fmt;
for (; field->type; field++) {
- if (strncmp(field->name, fmt, len) ||
- field->name[len])
+ if (strncmp(field->name, fmt, len) || field->name[len])
continue;
- array_descriptor = strchr(field->type, '[');
- /* This is an array and is OK to dereference. */
- return array_descriptor != NULL;
+
+ return field;
+ }
+ return NULL;
+}
+
+/*
+ * Check if the referenced field is an array and return true,
+ * as arrays are OK to dereference.
+ */
+static bool test_field(const char *fmt, struct trace_event_call *call)
+{
+ struct trace_event_fields *field;
+
+ field = find_event_field(fmt, call);
+ if (!field)
+ return false;
+
+ /* This is an array and is OK to dereference. */
+ return strchr(field->type, '[') != NULL;
+}
+
+/* Look for a string within an argument */
+static bool find_print_string(const char *arg, const char *str, const char *end)
+{
+ const char *r;
+
+ r = strstr(arg, str);
+ return r && r < end;
+}
+
+/* Return true if the argument pointer is safe */
+static bool process_pointer(const char *fmt, int len, struct trace_event_call *call)
+{
+ const char *r, *e, *a;
+
+ e = fmt + len;
+
+ /* Find the REC-> in the argument */
+ r = strstr(fmt, "REC->");
+ if (r && r < e) {
+ /*
+ * Addresses of events on the buffer, or an array on the buffer is
+ * OK to dereference. There's ways to fool this, but
+ * this is to catch common mistakes, not malicious code.
+ */
+ a = strchr(fmt, '&');
+ if ((a && (a < r)) || test_field(r, call))
+ return true;
+ } else if (find_print_string(fmt, "__get_dynamic_array(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_rel_dynamic_array(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_dynamic_array_len(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_rel_dynamic_array_len(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_sockaddr(", e)) {
+ return true;
+ } else if (find_print_string(fmt, "__get_rel_sockaddr(", e)) {
+ return true;
}
return false;
}
+/* Return true if the string is safe */
+static bool process_string(const char *fmt, int len, struct trace_event_call *call)
+{
+ struct trace_event_fields *field;
+ const char *r, *e, *s;
+
+ e = fmt + len;
+
+ /*
+ * There are several helper functions that return strings.
+ * If the argument contains a function, then assume its field is valid.
+ * It is considered that the argument has a function if it has:
+ * alphanumeric or '_' before a parenthesis.
+ */
+ s = fmt;
+ do {
+ r = strstr(s, "(");
+ if (!r || r >= e)
+ break;
+ for (int i = 1; r - i >= s; i++) {
+ char ch = *(r - i);
+ if (isspace(ch))
+ continue;
+ if (isalnum(ch) || ch == '_')
+ return true;
+ /* Anything else, this isn't a function */
+ break;
+ }
+ /* A function could be wrapped in parethesis, try the next one */
+ s = r + 1;
+ } while (s < e);
+
+ /*
+ * Check for arrays. If the argument has: foo[REC->val]
+ * then it is very likely that foo is an array of strings
+ * that are safe to use.
+ */
+ r = strstr(s, "[");
+ if (r && r < e) {
+ r = strstr(r, "REC->");
+ if (r && r < e)
+ return true;
+ }
+
+ /*
+ * If there's any strings in the argument consider this arg OK as it
+ * could be: REC->field ? "foo" : "bar" and we don't want to get into
+ * verifying that logic here.
+ */
+ if (find_print_string(fmt, "\"", e))
+ return true;
+
+ /* Dereferenced strings are also valid like any other pointer */
+ if (process_pointer(fmt, len, call))
+ return true;
+
+ /* Make sure the field is found */
+ field = find_event_field(fmt, call);
+ if (!field)
+ return false;
+
+ /* Test this field's string before printing the event */
+ call->flags |= TRACE_EVENT_FL_TEST_STR;
+ field->needs_test = 1;
+
+ return true;
+}
+
/*
* Examine the print fmt of the event looking for unsafe dereference
* pointers using %p* that could be recorded in the trace event and
@@ -284,13 +409,14 @@ static bool test_field(const char *fmt, struct trace_event_call *call)
static void test_event_printk(struct trace_event_call *call)
{
u64 dereference_flags = 0;
+ u64 string_flags = 0;
bool first = true;
- const char *fmt, *c, *r, *a;
+ const char *fmt;
int parens = 0;
char in_quote = 0;
int start_arg = 0;
int arg = 0;
- int i;
+ int i, e;
fmt = call->print_fmt;
@@ -374,8 +500,16 @@ static void test_event_printk(struct trace_event_call *call)
star = true;
continue;
}
- if ((fmt[i + j] == 's') && star)
- arg++;
+ if ((fmt[i + j] == 's')) {
+ if (star)
+ arg++;
+ if (WARN_ONCE(arg == 63,
+ "Too many args for event: %s",
+ trace_event_name(call)))
+ return;
+ dereference_flags |= 1ULL << arg;
+ string_flags |= 1ULL << arg;
+ }
break;
}
break;
@@ -403,42 +537,47 @@ static void test_event_printk(struct trace_event_call *call)
case ',':
if (in_quote || parens)
continue;
+ e = i;
i++;
while (isspace(fmt[i]))
i++;
- start_arg = i;
- if (!(dereference_flags & (1ULL << arg)))
- goto next_arg;
- /* Find the REC-> in the argument */
- c = strchr(fmt + i, ',');
- r = strstr(fmt + i, "REC->");
- if (r && (!c || r < c)) {
- /*
- * Addresses of events on the buffer,
- * or an array on the buffer is
- * OK to dereference.
- * There's ways to fool this, but
- * this is to catch common mistakes,
- * not malicious code.
- */
- a = strchr(fmt + i, '&');
- if ((a && (a < r)) || test_field(r, call))
+ /*
+ * If start_arg is zero, then this is the start of the
+ * first argument. The processing of the argument happens
+ * when the end of the argument is found, as it needs to
+ * handle paranthesis and such.
+ */
+ if (!start_arg) {
+ start_arg = i;
+ /* Balance out the i++ in the for loop */
+ i--;
+ continue;
+ }
+
+ if (dereference_flags & (1ULL << arg)) {
+ if (string_flags & (1ULL << arg)) {
+ if (process_string(fmt + start_arg, e - start_arg, call))
+ dereference_flags &= ~(1ULL << arg);
+ } else if (process_pointer(fmt + start_arg, e - start_arg, call))
dereference_flags &= ~(1ULL << arg);
- } else if ((r = strstr(fmt + i, "__get_dynamic_array(")) &&
- (!c || r < c)) {
- dereference_flags &= ~(1ULL << arg);
- } else if ((r = strstr(fmt + i, "__get_sockaddr(")) &&
- (!c || r < c)) {
- dereference_flags &= ~(1ULL << arg);
}
- next_arg:
- i--;
+ start_arg = i;
arg++;
+ /* Balance out the i++ in the for loop */
+ i--;
}
}
+ if (dereference_flags & (1ULL << arg)) {
+ if (string_flags & (1ULL << arg)) {
+ if (process_string(fmt + start_arg, i - start_arg, call))
+ dereference_flags &= ~(1ULL << arg);
+ } else if (process_pointer(fmt + start_arg, i - start_arg, call))
+ dereference_flags &= ~(1ULL << arg);
+ }
+
/*
* If you triggered the below warning, the trace event reported
* uses an unsafe dereference pointer %p*. As the data stored
@@ -730,6 +869,120 @@ static int ftrace_event_enable_disable(struct trace_event_file *file,
return __ftrace_event_enable_disable(file, enable, 0);
}
+#ifdef CONFIG_MODULES
+struct event_mod_load {
+ struct list_head list;
+ char *module;
+ char *match;
+ char *system;
+ char *event;
+};
+
+static void free_event_mod(struct event_mod_load *event_mod)
+{
+ list_del(&event_mod->list);
+ kfree(event_mod->module);
+ kfree(event_mod->match);
+ kfree(event_mod->system);
+ kfree(event_mod->event);
+ kfree(event_mod);
+}
+
+static void clear_mod_events(struct trace_array *tr)
+{
+ struct event_mod_load *event_mod, *n;
+
+ list_for_each_entry_safe(event_mod, n, &tr->mod_events, list) {
+ free_event_mod(event_mod);
+ }
+}
+
+static int remove_cache_mod(struct trace_array *tr, const char *mod,
+ const char *match, const char *system, const char *event)
+{
+ struct event_mod_load *event_mod, *n;
+ int ret = -EINVAL;
+
+ list_for_each_entry_safe(event_mod, n, &tr->mod_events, list) {
+ if (strcmp(event_mod->module, mod) != 0)
+ continue;
+
+ if (match && strcmp(event_mod->match, match) != 0)
+ continue;
+
+ if (system &&
+ (!event_mod->system || strcmp(event_mod->system, system) != 0))
+ continue;
+
+ if (event &&
+ (!event_mod->event || strcmp(event_mod->event, event) != 0))
+ continue;
+
+ free_event_mod(event_mod);
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static int cache_mod(struct trace_array *tr, const char *mod, int set,
+ const char *match, const char *system, const char *event)
+{
+ struct event_mod_load *event_mod;
+
+ /* If the module exists, then this just failed to find an event */
+ if (module_exists(mod))
+ return -EINVAL;
+
+ /* See if this is to remove a cached filter */
+ if (!set)
+ return remove_cache_mod(tr, mod, match, system, event);
+
+ event_mod = kzalloc(sizeof(*event_mod), GFP_KERNEL);
+ if (!event_mod)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&event_mod->list);
+ event_mod->module = kstrdup(mod, GFP_KERNEL);
+ if (!event_mod->module)
+ goto out_free;
+
+ if (match) {
+ event_mod->match = kstrdup(match, GFP_KERNEL);
+ if (!event_mod->match)
+ goto out_free;
+ }
+
+ if (system) {
+ event_mod->system = kstrdup(system, GFP_KERNEL);
+ if (!event_mod->system)
+ goto out_free;
+ }
+
+ if (event) {
+ event_mod->event = kstrdup(event, GFP_KERNEL);
+ if (!event_mod->event)
+ goto out_free;
+ }
+
+ list_add(&event_mod->list, &tr->mod_events);
+
+ return 0;
+
+ out_free:
+ free_event_mod(event_mod);
+
+ return -ENOMEM;
+}
+#else /* CONFIG_MODULES */
+static inline void clear_mod_events(struct trace_array *tr) { }
+static int cache_mod(struct trace_array *tr, const char *mod, int set,
+ const char *match, const char *system, const char *event)
+{
+ return -EINVAL;
+}
+#endif
+
static void ftrace_clear_events(struct trace_array *tr)
{
struct trace_event_file *file;
@@ -738,6 +991,7 @@ static void ftrace_clear_events(struct trace_array *tr)
list_for_each_entry(file, &tr->events, list) {
ftrace_event_enable_disable(file, 0);
}
+ clear_mod_events(tr);
mutex_unlock(&event_mutex);
}
@@ -1026,17 +1280,36 @@ static void remove_event_file_dir(struct trace_event_file *file)
*/
static int
__ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
- const char *sub, const char *event, int set)
+ const char *sub, const char *event, int set,
+ const char *mod)
{
struct trace_event_file *file;
struct trace_event_call *call;
+ char *module __free(kfree) = NULL;
const char *name;
int ret = -EINVAL;
int eret = 0;
+ if (mod) {
+ char *p;
+
+ module = kstrdup(mod, GFP_KERNEL);
+ if (!module)
+ return -ENOMEM;
+
+ /* Replace all '-' with '_' as that's what modules do */
+ for (p = strchr(module, '-'); p; p = strchr(p + 1, '-'))
+ *p = '_';
+ }
+
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
+
+ /* If a module is specified, skip events that are not that module */
+ if (module && (!call->module || strcmp(module_name(call->module), module)))
+ continue;
+
name = trace_event_name(call);
if (!name || !call->class || !call->class->reg)
@@ -1069,16 +1342,24 @@ __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
ret = eret;
}
+ /*
+ * If this is a module setting and nothing was found,
+ * check if the module was loaded. If it wasn't cache it.
+ */
+ if (module && ret == -EINVAL && !eret)
+ ret = cache_mod(tr, module, set, match, sub, event);
+
return ret;
}
static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
- const char *sub, const char *event, int set)
+ const char *sub, const char *event, int set,
+ const char *mod)
{
int ret;
mutex_lock(&event_mutex);
- ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set);
+ ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set, mod);
mutex_unlock(&event_mutex);
return ret;
@@ -1086,11 +1367,20 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
{
- char *event = NULL, *sub = NULL, *match;
+ char *event = NULL, *sub = NULL, *match, *mod;
int ret;
if (!tr)
return -ENOENT;
+
+ /* Modules events can be appened with :mod:<module> */
+ mod = strstr(buf, ":mod:");
+ if (mod) {
+ *mod = '\0';
+ /* move to the module name */
+ mod += 5;
+ }
+
/*
* The buf format can be <subsystem>:<event-name>
* *:<event-name> means any event by that name.
@@ -1113,9 +1403,13 @@ int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
sub = NULL;
if (!strlen(event) || strcmp(event, "*") == 0)
event = NULL;
+ } else if (mod) {
+ /* Allow wildcard for no length or star */
+ if (!strlen(match) || strcmp(match, "*") == 0)
+ match = NULL;
}
- ret = __ftrace_set_clr_event(tr, match, sub, event, set);
+ ret = __ftrace_set_clr_event(tr, match, sub, event, set, mod);
/* Put back the colon to allow this to be called again */
if (buf)
@@ -1143,7 +1437,7 @@ int trace_set_clr_event(const char *system, const char *event, int set)
if (!tr)
return -ENODEV;
- return __ftrace_set_clr_event(tr, NULL, system, event, set);
+ return __ftrace_set_clr_event(tr, NULL, system, event, set, NULL);
}
EXPORT_SYMBOL_GPL(trace_set_clr_event);
@@ -1169,7 +1463,7 @@ int trace_array_set_clr_event(struct trace_array *tr, const char *system,
return -ENOENT;
set = (enable == true) ? 1 : 0;
- return __ftrace_set_clr_event(tr, NULL, system, event, set);
+ return __ftrace_set_clr_event(tr, NULL, system, event, set, NULL);
}
EXPORT_SYMBOL_GPL(trace_array_set_clr_event);
@@ -1256,37 +1550,78 @@ static void *t_start(struct seq_file *m, loff_t *pos)
return file;
}
+enum set_event_iter_type {
+ SET_EVENT_FILE,
+ SET_EVENT_MOD,
+};
+
+struct set_event_iter {
+ enum set_event_iter_type type;
+ union {
+ struct trace_event_file *file;
+ struct event_mod_load *event_mod;
+ };
+};
+
static void *
s_next(struct seq_file *m, void *v, loff_t *pos)
{
- struct trace_event_file *file = v;
+ struct set_event_iter *iter = v;
+ struct trace_event_file *file;
struct trace_array *tr = m->private;
(*pos)++;
- list_for_each_entry_continue(file, &tr->events, list) {
- if (file->flags & EVENT_FILE_FL_ENABLED)
- return file;
+ if (iter->type == SET_EVENT_FILE) {
+ file = iter->file;
+ list_for_each_entry_continue(file, &tr->events, list) {
+ if (file->flags & EVENT_FILE_FL_ENABLED) {
+ iter->file = file;
+ return iter;
+ }
+ }
+#ifdef CONFIG_MODULES
+ iter->type = SET_EVENT_MOD;
+ iter->event_mod = list_entry(&tr->mod_events, struct event_mod_load, list);
+#endif
}
+#ifdef CONFIG_MODULES
+ list_for_each_entry_continue(iter->event_mod, &tr->mod_events, list)
+ return iter;
+#endif
+
+ /*
+ * The iter is allocated in s_start() and passed via the 'v'
+ * parameter. To stop the iterator, NULL must be returned. But
+ * the return value is what the 'v' parameter in s_stop() receives
+ * and frees. Free iter here as it will no longer be used.
+ */
+ kfree(iter);
return NULL;
}
static void *s_start(struct seq_file *m, loff_t *pos)
{
- struct trace_event_file *file;
struct trace_array *tr = m->private;
+ struct set_event_iter *iter;
loff_t l;
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return NULL;
+
mutex_lock(&event_mutex);
- file = list_entry(&tr->events, struct trace_event_file, list);
+ iter->type = SET_EVENT_FILE;
+ iter->file = list_entry(&tr->events, struct trace_event_file, list);
+
for (l = 0; l <= *pos; ) {
- file = s_next(m, file, &l);
- if (!file)
+ iter = s_next(m, iter, &l);
+ if (!iter)
break;
}
- return file;
+ return iter;
}
static int t_show(struct seq_file *m, void *v)
@@ -1306,6 +1641,45 @@ static void t_stop(struct seq_file *m, void *p)
mutex_unlock(&event_mutex);
}
+#ifdef CONFIG_MODULES
+static int s_show(struct seq_file *m, void *v)
+{
+ struct set_event_iter *iter = v;
+ const char *system;
+ const char *event;
+
+ if (iter->type == SET_EVENT_FILE)
+ return t_show(m, iter->file);
+
+ /* When match is set, system and event are not */
+ if (iter->event_mod->match) {
+ seq_printf(m, "%s:mod:%s\n", iter->event_mod->match,
+ iter->event_mod->module);
+ return 0;
+ }
+
+ system = iter->event_mod->system ? : "*";
+ event = iter->event_mod->event ? : "*";
+
+ seq_printf(m, "%s:%s:mod:%s\n", system, event, iter->event_mod->module);
+
+ return 0;
+}
+#else /* CONFIG_MODULES */
+static int s_show(struct seq_file *m, void *v)
+{
+ struct set_event_iter *iter = v;
+
+ return t_show(m, iter->file);
+}
+#endif
+
+static void s_stop(struct seq_file *m, void *v)
+{
+ kfree(v);
+ t_stop(m, NULL);
+}
+
static void *
__next(struct seq_file *m, void *v, loff_t *pos, int type)
{
@@ -1419,21 +1793,20 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
if (ret)
return ret;
+ guard(mutex)(&event_mutex);
+
switch (val) {
case 0:
case 1:
- ret = -ENODEV;
- mutex_lock(&event_mutex);
file = event_file_file(filp);
- if (likely(file)) {
- ret = tracing_update_buffers(file->tr);
- if (ret < 0) {
- mutex_unlock(&event_mutex);
- return ret;
- }
- ret = ftrace_event_enable_disable(file, val);
- }
- mutex_unlock(&event_mutex);
+ if (!file)
+ return -ENODEV;
+ ret = tracing_update_buffers(file->tr);
+ if (ret < 0)
+ return ret;
+ ret = ftrace_event_enable_disable(file, val);
+ if (ret < 0)
+ return ret;
break;
default:
@@ -1442,7 +1815,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
*ppos += cnt;
- return ret ? ret : cnt;
+ return cnt;
}
static ssize_t
@@ -1520,7 +1893,7 @@ system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
if (system)
name = system->name;
- ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val);
+ ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val, NULL);
if (ret)
goto out;
@@ -2018,7 +2391,7 @@ event_pid_write(struct file *filp, const char __user *ubuf,
if (ret < 0)
return ret;
- mutex_lock(&event_mutex);
+ guard(mutex)(&event_mutex);
if (type == TRACE_PIDS) {
filtered_pids = rcu_dereference_protected(tr->filtered_pids,
@@ -2034,7 +2407,7 @@ event_pid_write(struct file *filp, const char __user *ubuf,
ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
if (ret < 0)
- goto out;
+ return ret;
if (type == TRACE_PIDS)
rcu_assign_pointer(tr->filtered_pids, pid_list);
@@ -2059,11 +2432,7 @@ event_pid_write(struct file *filp, const char __user *ubuf,
*/
on_each_cpu(ignore_task_cpu, tr, 1);
- out:
- mutex_unlock(&event_mutex);
-
- if (ret > 0)
- *ppos += ret;
+ *ppos += ret;
return ret;
}
@@ -2098,8 +2467,8 @@ static const struct seq_operations show_event_seq_ops = {
static const struct seq_operations show_set_event_seq_ops = {
.start = s_start,
.next = s_next,
- .show = t_show,
- .stop = t_stop,
+ .show = s_show,
+ .stop = s_stop,
};
static const struct seq_operations show_set_pid_seq_ops = {
@@ -2471,7 +2840,7 @@ event_define_fields(struct trace_event_call *call)
ret = trace_define_field_ext(call, field->type, field->name,
offset, field->size,
field->is_signed, field->filter_type,
- field->len);
+ field->len, field->needs_test);
if (WARN_ON_ONCE(ret)) {
pr_err("error code is %d\n", ret);
break;
@@ -2972,6 +3341,20 @@ static bool event_in_systems(struct trace_event_call *call,
return !*p || isspace(*p) || *p == ',';
}
+#ifdef CONFIG_HIST_TRIGGERS
+/*
+ * Wake up waiter on the hist_poll_wq from irq_work because the hist trigger
+ * may happen in any context.
+ */
+static void hist_poll_event_irq_work(struct irq_work *work)
+{
+ wake_up_all(&hist_poll_wq);
+}
+
+DEFINE_IRQ_WORK(hist_poll_work, hist_poll_event_irq_work);
+DECLARE_WAIT_QUEUE_HEAD(hist_poll_wq);
+#endif
+
static struct trace_event_file *
trace_create_new_event(struct trace_event_call *call,
struct trace_array *tr)
@@ -3130,13 +3513,13 @@ int trace_add_event_call(struct trace_event_call *call)
int ret;
lockdep_assert_held(&event_mutex);
- mutex_lock(&trace_types_lock);
+ guard(mutex)(&trace_types_lock);
ret = __register_event(call, NULL);
- if (ret >= 0)
- __add_event_to_tracers(call);
+ if (ret < 0)
+ return ret;
- mutex_unlock(&trace_types_lock);
+ __add_event_to_tracers(call);
return ret;
}
EXPORT_SYMBOL_GPL(trace_add_event_call);
@@ -3216,6 +3599,28 @@ EXPORT_SYMBOL_GPL(trace_remove_event_call);
event++)
#ifdef CONFIG_MODULES
+static void update_mod_cache(struct trace_array *tr, struct module *mod)
+{
+ struct event_mod_load *event_mod, *n;
+
+ list_for_each_entry_safe(event_mod, n, &tr->mod_events, list) {
+ if (strcmp(event_mod->module, mod->name) != 0)
+ continue;
+
+ __ftrace_set_clr_event_nolock(tr, event_mod->match,
+ event_mod->system,
+ event_mod->event, 1, mod->name);
+ free_event_mod(event_mod);
+ }
+}
+
+static void update_cache_events(struct module *mod)
+{
+ struct trace_array *tr;
+
+ list_for_each_entry(tr, &ftrace_trace_arrays, list)
+ update_mod_cache(tr, mod);
+}
static void trace_module_add_events(struct module *mod)
{
@@ -3238,6 +3643,8 @@ static void trace_module_add_events(struct module *mod)
__register_event(*call, mod);
__add_event_to_tracers(*call);
}
+
+ update_cache_events(mod);
}
static void trace_module_remove_events(struct module *mod)
@@ -3390,30 +3797,21 @@ struct trace_event_file *trace_get_event_file(const char *instance,
return ERR_PTR(ret);
}
- mutex_lock(&event_mutex);
+ guard(mutex)(&event_mutex);
file = find_event_file(tr, system, event);
if (!file) {
trace_array_put(tr);
- ret = -EINVAL;
- goto out;
+ return ERR_PTR(-EINVAL);
}
/* Don't let event modules unload while in use */
ret = trace_event_try_get_ref(file->event_call);
if (!ret) {
trace_array_put(tr);
- ret = -EBUSY;
- goto out;
+ return ERR_PTR(-EBUSY);
}
- ret = 0;
- out:
- mutex_unlock(&event_mutex);
-
- if (ret)
- file = ERR_PTR(ret);
-
return file;
}
EXPORT_SYMBOL_GPL(trace_get_event_file);
@@ -3631,6 +4029,7 @@ event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
struct trace_event_file *file;
struct ftrace_probe_ops *ops;
struct event_probe_data *data;
+ unsigned long count = -1;
const char *system;
const char *event;
char *number;
@@ -3650,12 +4049,11 @@ event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
event = strsep(&param, ":");
- mutex_lock(&event_mutex);
+ guard(mutex)(&event_mutex);
- ret = -EINVAL;
file = find_event_file(tr, system, event);
if (!file)
- goto out;
+ return -EINVAL;
enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
@@ -3664,74 +4062,62 @@ event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
else
ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops;
- if (glob[0] == '!') {
- ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
- goto out;
- }
+ if (glob[0] == '!')
+ return unregister_ftrace_function_probe_func(glob+1, tr, ops);
- ret = -ENOMEM;
+ if (param) {
+ number = strsep(&param, ":");
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- goto out;
+ if (!strlen(number))
+ return -EINVAL;
- data->enable = enable;
- data->count = -1;
- data->file = file;
-
- if (!param)
- goto out_reg;
-
- number = strsep(&param, ":");
-
- ret = -EINVAL;
- if (!strlen(number))
- goto out_free;
-
- /*
- * We use the callback data field (which is a pointer)
- * as our counter.
- */
- ret = kstrtoul(number, 0, &data->count);
- if (ret)
- goto out_free;
+ /*
+ * We use the callback data field (which is a pointer)
+ * as our counter.
+ */
+ ret = kstrtoul(number, 0, &count);
+ if (ret)
+ return ret;
+ }
- out_reg:
/* Don't let event modules unload while probe registered */
ret = trace_event_try_get_ref(file->event_call);
- if (!ret) {
- ret = -EBUSY;
- goto out_free;
- }
+ if (!ret)
+ return -EBUSY;
ret = __ftrace_event_enable_disable(file, 1, 1);
if (ret < 0)
goto out_put;
+ ret = -ENOMEM;
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto out_put;
+
+ data->enable = enable;
+ data->count = count;
+ data->file = file;
+
ret = register_ftrace_function_probe(glob, tr, ops, data);
/*
* The above returns on success the # of functions enabled,
* but if it didn't find any functions it returns zero.
* Consider no functions a failure too.
*/
- if (!ret) {
- ret = -ENOENT;
- goto out_disable;
- } else if (ret < 0)
- goto out_disable;
+
/* Just return zero, not the number of enabled functions */
- ret = 0;
- out:
- mutex_unlock(&event_mutex);
- return ret;
+ if (ret > 0)
+ return 0;
+
+ kfree(data);
+
+ if (!ret)
+ ret = -ENOENT;
- out_disable:
__ftrace_event_enable_disable(file, 0, 1);
out_put:
trace_event_put_ref(file->event_call);
- out_free:
- kfree(data);
- goto out;
+ return ret;
}
static struct ftrace_func_command event_enable_cmd = {
@@ -3954,20 +4340,17 @@ early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
{
int ret;
- mutex_lock(&event_mutex);
+ guard(mutex)(&event_mutex);
ret = create_event_toplevel_files(parent, tr);
if (ret)
- goto out_unlock;
+ return ret;
down_write(&trace_event_sem);
__trace_early_add_event_dirs(tr);
up_write(&trace_event_sem);
- out_unlock:
- mutex_unlock(&event_mutex);
-
- return ret;
+ return 0;
}
/* Must be called with event_mutex held */
@@ -3982,7 +4365,7 @@ int event_trace_del_tracer(struct trace_array *tr)
__ftrace_clear_event_pids(tr, TRACE_PIDS | TRACE_NO_PIDS);
/* Disable any running events */
- __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
+ __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0, NULL);
/* Make sure no more events are being executed */
tracepoint_synchronize_unregister();
@@ -4266,7 +4649,7 @@ static __init void event_trace_self_tests(void)
pr_info("Testing event system %s: ", system->name);
- ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1);
+ ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1, NULL);
if (WARN_ON_ONCE(ret)) {
pr_warn("error enabling system %s\n",
system->name);
@@ -4275,7 +4658,7 @@ static __init void event_trace_self_tests(void)
event_test_stuff();
- ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0);
+ ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0, NULL);
if (WARN_ON_ONCE(ret)) {
pr_warn("error disabling system %s\n",
system->name);
@@ -4290,7 +4673,7 @@ static __init void event_trace_self_tests(void)
pr_info("Running tests on all trace events:\n");
pr_info("Testing all events: ");
- ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1);
+ ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1, NULL);
if (WARN_ON_ONCE(ret)) {
pr_warn("error enabling all events\n");
return;
@@ -4299,7 +4682,7 @@ static __init void event_trace_self_tests(void)
event_test_stuff();
/* reset sysname */
- ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
+ ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0, NULL);
if (WARN_ON_ONCE(ret)) {
pr_warn("error disabling all events\n");
return;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 78051de581e7..0993dfc1c5c1 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -2405,13 +2405,11 @@ int apply_subsystem_event_filter(struct trace_subsystem_dir *dir,
struct event_filter *filter = NULL;
int err = 0;
- mutex_lock(&event_mutex);
+ guard(mutex)(&event_mutex);
/* Make sure the system still has events */
- if (!dir->nr_events) {
- err = -ENODEV;
- goto out_unlock;
- }
+ if (!dir->nr_events)
+ return -ENODEV;
if (!strcmp(strstrip(filter_string), "0")) {
filter_free_subsystem_preds(dir, tr);
@@ -2422,7 +2420,7 @@ int apply_subsystem_event_filter(struct trace_subsystem_dir *dir,
tracepoint_synchronize_unregister();
filter_free_subsystem_filters(dir, tr);
__free_filter(filter);
- goto out_unlock;
+ return 0;
}
err = create_system_filter(dir, filter_string, &filter);
@@ -2434,8 +2432,6 @@ int apply_subsystem_event_filter(struct trace_subsystem_dir *dir,
__free_filter(system->filter);
system->filter = filter;
}
-out_unlock:
- mutex_unlock(&event_mutex);
return err;
}
@@ -2612,17 +2608,15 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
struct event_filter *filter = NULL;
struct trace_event_call *call;
- mutex_lock(&event_mutex);
+ guard(mutex)(&event_mutex);
call = event->tp_event;
- err = -EINVAL;
if (!call)
- goto out_unlock;
+ return -EINVAL;
- err = -EEXIST;
if (event->filter)
- goto out_unlock;
+ return -EEXIST;
err = create_filter(NULL, call, filter_str, false, &filter);
if (err)
@@ -2637,9 +2631,6 @@ free_filter:
if (err || ftrace_event_is_function(call))
__free_filter(filter);
-out_unlock:
- mutex_unlock(&event_mutex);
-
return err;
}
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 9c058aa8baf3..261163b00137 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -5311,6 +5311,8 @@ static void event_hist_trigger(struct event_trigger_data *data,
if (resolve_var_refs(hist_data, key, var_ref_vals, true))
hist_trigger_actions(hist_data, elt, buffer, rec, rbe, key, var_ref_vals);
+
+ hist_poll_wakeup();
}
static void hist_trigger_stacktrace_print(struct seq_file *m,
@@ -5590,49 +5592,128 @@ static void hist_trigger_show(struct seq_file *m,
n_entries, (u64)atomic64_read(&hist_data->map->drops));
}
+struct hist_file_data {
+ struct file *file;
+ u64 last_read;
+ u64 last_act;
+};
+
+static u64 get_hist_hit_count(struct trace_event_file *event_file)
+{
+ struct hist_trigger_data *hist_data;
+ struct event_trigger_data *data;
+ u64 ret = 0;
+
+ list_for_each_entry(data, &event_file->triggers, list) {
+ if (data->cmd_ops->trigger_type == ETT_EVENT_HIST) {
+ hist_data = data->private_data;
+ ret += atomic64_read(&hist_data->map->hits);
+ }
+ }
+ return ret;
+}
+
static int hist_show(struct seq_file *m, void *v)
{
+ struct hist_file_data *hist_file = m->private;
struct event_trigger_data *data;
struct trace_event_file *event_file;
- int n = 0, ret = 0;
+ int n = 0;
- mutex_lock(&event_mutex);
+ guard(mutex)(&event_mutex);
- event_file = event_file_file(m->private);
- if (unlikely(!event_file)) {
- ret = -ENODEV;
- goto out_unlock;
- }
+ event_file = event_file_file(hist_file->file);
+ if (unlikely(!event_file))
+ return -ENODEV;
list_for_each_entry(data, &event_file->triggers, list) {
if (data->cmd_ops->trigger_type == ETT_EVENT_HIST)
hist_trigger_show(m, data, n++);
}
+ hist_file->last_read = get_hist_hit_count(event_file);
+ /*
+ * Update last_act too so that poll()/POLLPRI can wait for the next
+ * event after any syscall on hist file.
+ */
+ hist_file->last_act = hist_file->last_read;
+
+ return 0;
+}
+
+static __poll_t event_hist_poll(struct file *file, struct poll_table_struct *wait)
+{
+ struct trace_event_file *event_file;
+ struct seq_file *m = file->private_data;
+ struct hist_file_data *hist_file = m->private;
+ __poll_t ret = 0;
+ u64 cnt;
+
+ guard(mutex)(&event_mutex);
- out_unlock:
- mutex_unlock(&event_mutex);
+ event_file = event_file_data(file);
+ if (!event_file)
+ return EPOLLERR;
+
+ hist_poll_wait(file, wait);
+
+ cnt = get_hist_hit_count(event_file);
+ if (hist_file->last_read != cnt)
+ ret |= EPOLLIN | EPOLLRDNORM;
+ if (hist_file->last_act != cnt) {
+ hist_file->last_act = cnt;
+ ret |= EPOLLPRI;
+ }
return ret;
}
+static int event_hist_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *m = file->private_data;
+ struct hist_file_data *hist_file = m->private;
+
+ kfree(hist_file);
+ return tracing_single_release_file_tr(inode, file);
+}
+
static int event_hist_open(struct inode *inode, struct file *file)
{
+ struct trace_event_file *event_file;
+ struct hist_file_data *hist_file;
int ret;
ret = tracing_open_file_tr(inode, file);
if (ret)
return ret;
+ guard(mutex)(&event_mutex);
+
+ event_file = event_file_data(file);
+ if (!event_file)
+ return -ENODEV;
+
+ hist_file = kzalloc(sizeof(*hist_file), GFP_KERNEL);
+ if (!hist_file)
+ return -ENOMEM;
+
+ hist_file->file = file;
+ hist_file->last_act = get_hist_hit_count(event_file);
+
/* Clear private_data to avoid warning in single_open() */
file->private_data = NULL;
- return single_open(file, hist_show, file);
+ ret = single_open(file, hist_show, hist_file);
+ if (ret)
+ kfree(hist_file);
+
+ return ret;
}
const struct file_operations event_hist_fops = {
.open = event_hist_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = tracing_single_release_file_tr,
+ .release = event_hist_release,
+ .poll = event_hist_poll,
};
#ifdef CONFIG_HIST_TRIGGERS_DEBUG
@@ -5873,25 +5954,19 @@ static int hist_debug_show(struct seq_file *m, void *v)
{
struct event_trigger_data *data;
struct trace_event_file *event_file;
- int n = 0, ret = 0;
+ int n = 0;
- mutex_lock(&event_mutex);
+ guard(mutex)(&event_mutex);
event_file = event_file_file(m->private);
- if (unlikely(!event_file)) {
- ret = -ENODEV;
- goto out_unlock;
- }
+ if (unlikely(!event_file))
+ return -ENODEV;
list_for_each_entry(data, &event_file->triggers, list) {
if (data->cmd_ops->trigger_type == ETT_EVENT_HIST)
hist_trigger_debug_show(m, data, n++);
}
-
- out_unlock:
- mutex_unlock(&event_mutex);
-
- return ret;
+ return 0;
}
static int event_hist_debug_open(struct inode *inode, struct file *file)
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index c82b401a294d..e3f7d09e5512 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -49,16 +49,11 @@ static char *last_cmd;
static int errpos(const char *str)
{
- int ret = 0;
-
- mutex_lock(&lastcmd_mutex);
+ guard(mutex)(&lastcmd_mutex);
if (!str || !last_cmd)
- goto out;
+ return 0;
- ret = err_pos(last_cmd, str);
- out:
- mutex_unlock(&lastcmd_mutex);
- return ret;
+ return err_pos(last_cmd, str);
}
static void last_cmd_set(const char *str)
@@ -74,14 +69,12 @@ static void last_cmd_set(const char *str)
static void synth_err(u8 err_type, u16 err_pos)
{
- mutex_lock(&lastcmd_mutex);
+ guard(mutex)(&lastcmd_mutex);
if (!last_cmd)
- goto out;
+ return;
tracing_log_err(NULL, "synthetic_events", last_cmd, err_text,
err_type, err_pos);
- out:
- mutex_unlock(&lastcmd_mutex);
}
static int create_synth_event(const char *raw_command);
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index a5e3d6acf1e1..d45448947094 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -211,12 +211,10 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file)
if (ret)
return ret;
- mutex_lock(&event_mutex);
+ guard(mutex)(&event_mutex);
- if (unlikely(!event_file_file(file))) {
- mutex_unlock(&event_mutex);
+ if (unlikely(!event_file_file(file)))
return -ENODEV;
- }
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC)) {
@@ -239,8 +237,6 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file)
}
}
- mutex_unlock(&event_mutex);
-
return ret;
}
@@ -248,7 +244,6 @@ int trigger_process_regex(struct trace_event_file *file, char *buff)
{
char *command, *next;
struct event_command *p;
- int ret = -EINVAL;
next = buff = skip_spaces(buff);
command = strsep(&next, ": \t");
@@ -259,17 +254,14 @@ int trigger_process_regex(struct trace_event_file *file, char *buff)
}
command = (command[0] != '!') ? command : command + 1;
- mutex_lock(&trigger_cmd_mutex);
+ guard(mutex)(&trigger_cmd_mutex);
+
list_for_each_entry(p, &trigger_commands, list) {
- if (strcmp(p->name, command) == 0) {
- ret = p->parse(p, file, buff, command, next);
- goto out_unlock;
- }
+ if (strcmp(p->name, command) == 0)
+ return p->parse(p, file, buff, command, next);
}
- out_unlock:
- mutex_unlock(&trigger_cmd_mutex);
- return ret;
+ return -EINVAL;
}
static ssize_t event_trigger_regex_write(struct file *file,
@@ -278,7 +270,7 @@ static ssize_t event_trigger_regex_write(struct file *file,
{
struct trace_event_file *event_file;
ssize_t ret;
- char *buf;
+ char *buf __free(kfree) = NULL;
if (!cnt)
return 0;
@@ -292,24 +284,18 @@ static ssize_t event_trigger_regex_write(struct file *file,
strim(buf);
- mutex_lock(&event_mutex);
+ guard(mutex)(&event_mutex);
+
event_file = event_file_file(file);
- if (unlikely(!event_file)) {
- mutex_unlock(&event_mutex);
- kfree(buf);
+ if (unlikely(!event_file))
return -ENODEV;
- }
- ret = trigger_process_regex(event_file, buf);
- mutex_unlock(&event_mutex);
- kfree(buf);
+ ret = trigger_process_regex(event_file, buf);
if (ret < 0)
- goto out;
+ return ret;
*ppos += cnt;
- ret = cnt;
- out:
- return ret;
+ return cnt;
}
static int event_trigger_regex_release(struct inode *inode, struct file *file)
@@ -359,20 +345,16 @@ const struct file_operations event_trigger_fops = {
__init int register_event_command(struct event_command *cmd)
{
struct event_command *p;
- int ret = 0;
- mutex_lock(&trigger_cmd_mutex);
+ guard(mutex)(&trigger_cmd_mutex);
+
list_for_each_entry(p, &trigger_commands, list) {
- if (strcmp(cmd->name, p->name) == 0) {
- ret = -EBUSY;
- goto out_unlock;
- }
+ if (strcmp(cmd->name, p->name) == 0)
+ return -EBUSY;
}
list_add(&cmd->list, &trigger_commands);
- out_unlock:
- mutex_unlock(&trigger_cmd_mutex);
- return ret;
+ return 0;
}
/*
@@ -382,20 +364,17 @@ __init int register_event_command(struct event_command *cmd)
__init int unregister_event_command(struct event_command *cmd)
{
struct event_command *p, *n;
- int ret = -ENODEV;
- mutex_lock(&trigger_cmd_mutex);
+ guard(mutex)(&trigger_cmd_mutex);
+
list_for_each_entry_safe(p, n, &trigger_commands, list) {
if (strcmp(cmd->name, p->name) == 0) {
- ret = 0;
list_del_init(&p->list);
- goto out_unlock;
+ return 0;
}
}
- out_unlock:
- mutex_unlock(&trigger_cmd_mutex);
- return ret;
+ return -ENODEV;
}
/**
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 17bcad8f79de..97325fbd6283 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -2899,7 +2899,7 @@ static int set_max_user_events_sysctl(const struct ctl_table *table, int write,
return ret;
}
-static struct ctl_table user_event_sysctls[] = {
+static const struct ctl_table user_event_sysctls[] = {
{
.procname = "user_events_max",
.data = &max_user_events,
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index c62d1629cffe..b8f3c4ba309b 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -134,7 +134,7 @@ static int
process_fetch_insn(struct fetch_insn *code, void *rec, void *edata,
void *dest, void *base)
{
- struct pt_regs *regs = rec;
+ struct ftrace_regs *fregs = rec;
unsigned long val;
int ret;
@@ -142,17 +142,17 @@ retry:
/* 1st stage: get value from context */
switch (code->op) {
case FETCH_OP_STACK:
- val = regs_get_kernel_stack_nth(regs, code->param);
+ val = ftrace_regs_get_kernel_stack_nth(fregs, code->param);
break;
case FETCH_OP_STACKP:
- val = kernel_stack_pointer(regs);
+ val = ftrace_regs_get_stack_pointer(fregs);
break;
case FETCH_OP_RETVAL:
- val = regs_return_value(regs);
+ val = ftrace_regs_get_return_value(fregs);
break;
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
case FETCH_OP_ARG:
- val = regs_get_kernel_argument(regs, code->param);
+ val = ftrace_regs_get_argument(fregs, code->param);
break;
case FETCH_OP_EDATA:
val = *(unsigned long *)((unsigned long)edata + code->offset);
@@ -175,7 +175,7 @@ NOKPROBE_SYMBOL(process_fetch_insn)
/* function entry handler */
static nokprobe_inline void
__fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
- struct pt_regs *regs,
+ struct ftrace_regs *fregs,
struct trace_event_file *trace_file)
{
struct fentry_trace_entry_head *entry;
@@ -189,41 +189,71 @@ __fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
if (trace_trigger_soft_disabled(trace_file))
return;
- dsize = __get_data_size(&tf->tp, regs, NULL);
+ dsize = __get_data_size(&tf->tp, fregs, NULL);
entry = trace_event_buffer_reserve(&fbuffer, trace_file,
sizeof(*entry) + tf->tp.size + dsize);
if (!entry)
return;
- fbuffer.regs = regs;
+ fbuffer.regs = ftrace_get_regs(fregs);
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
entry->ip = entry_ip;
- store_trace_args(&entry[1], &tf->tp, regs, NULL, sizeof(*entry), dsize);
+ store_trace_args(&entry[1], &tf->tp, fregs, NULL, sizeof(*entry), dsize);
trace_event_buffer_commit(&fbuffer);
}
static void
fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
- struct pt_regs *regs)
+ struct ftrace_regs *fregs)
{
struct event_file_link *link;
trace_probe_for_each_link_rcu(link, &tf->tp)
- __fentry_trace_func(tf, entry_ip, regs, link->file);
+ __fentry_trace_func(tf, entry_ip, fregs, link->file);
}
NOKPROBE_SYMBOL(fentry_trace_func);
+static nokprobe_inline
+void store_fprobe_entry_data(void *edata, struct trace_probe *tp, struct ftrace_regs *fregs)
+{
+ struct probe_entry_arg *earg = tp->entry_arg;
+ unsigned long val = 0;
+ int i;
+
+ if (!earg)
+ return;
+
+ for (i = 0; i < earg->size; i++) {
+ struct fetch_insn *code = &earg->code[i];
+
+ switch (code->op) {
+ case FETCH_OP_ARG:
+ val = ftrace_regs_get_argument(fregs, code->param);
+ break;
+ case FETCH_OP_ST_EDATA:
+ *(unsigned long *)((unsigned long)edata + code->offset) = val;
+ break;
+ case FETCH_OP_END:
+ goto end;
+ default:
+ break;
+ }
+ }
+end:
+ return;
+}
+
/* function exit handler */
static int trace_fprobe_entry_handler(struct fprobe *fp, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
void *entry_data)
{
struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
if (tf->tp.entry_arg)
- store_trace_entry_data(entry_data, &tf->tp, regs);
+ store_fprobe_entry_data(entry_data, &tf->tp, fregs);
return 0;
}
@@ -231,7 +261,7 @@ NOKPROBE_SYMBOL(trace_fprobe_entry_handler)
static nokprobe_inline void
__fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
void *entry_data, struct trace_event_file *trace_file)
{
struct fexit_trace_entry_head *entry;
@@ -245,60 +275,63 @@ __fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
if (trace_trigger_soft_disabled(trace_file))
return;
- dsize = __get_data_size(&tf->tp, regs, entry_data);
+ dsize = __get_data_size(&tf->tp, fregs, entry_data);
entry = trace_event_buffer_reserve(&fbuffer, trace_file,
sizeof(*entry) + tf->tp.size + dsize);
if (!entry)
return;
- fbuffer.regs = regs;
+ fbuffer.regs = ftrace_get_regs(fregs);
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
entry->func = entry_ip;
entry->ret_ip = ret_ip;
- store_trace_args(&entry[1], &tf->tp, regs, entry_data, sizeof(*entry), dsize);
+ store_trace_args(&entry[1], &tf->tp, fregs, entry_data, sizeof(*entry), dsize);
trace_event_buffer_commit(&fbuffer);
}
static void
fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs, void *entry_data)
+ unsigned long ret_ip, struct ftrace_regs *fregs, void *entry_data)
{
struct event_file_link *link;
trace_probe_for_each_link_rcu(link, &tf->tp)
- __fexit_trace_func(tf, entry_ip, ret_ip, regs, entry_data, link->file);
+ __fexit_trace_func(tf, entry_ip, ret_ip, fregs, entry_data, link->file);
}
NOKPROBE_SYMBOL(fexit_trace_func);
#ifdef CONFIG_PERF_EVENTS
static int fentry_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
- struct pt_regs *regs)
+ struct ftrace_regs *fregs)
{
struct trace_event_call *call = trace_probe_event_call(&tf->tp);
struct fentry_trace_entry_head *entry;
struct hlist_head *head;
int size, __size, dsize;
+ struct pt_regs *regs;
int rctx;
head = this_cpu_ptr(call->perf_events);
if (hlist_empty(head))
return 0;
- dsize = __get_data_size(&tf->tp, regs, NULL);
+ dsize = __get_data_size(&tf->tp, fregs, NULL);
__size = sizeof(*entry) + tf->tp.size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
- entry = perf_trace_buf_alloc(size, NULL, &rctx);
+ entry = perf_trace_buf_alloc(size, &regs, &rctx);
if (!entry)
return 0;
+ regs = ftrace_fill_perf_regs(fregs, regs);
+
entry->ip = entry_ip;
memset(&entry[1], 0, dsize);
- store_trace_args(&entry[1], &tf->tp, regs, NULL, sizeof(*entry), dsize);
+ store_trace_args(&entry[1], &tf->tp, fregs, NULL, sizeof(*entry), dsize);
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL);
return 0;
@@ -307,31 +340,34 @@ NOKPROBE_SYMBOL(fentry_perf_func);
static void
fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
void *entry_data)
{
struct trace_event_call *call = trace_probe_event_call(&tf->tp);
struct fexit_trace_entry_head *entry;
struct hlist_head *head;
int size, __size, dsize;
+ struct pt_regs *regs;
int rctx;
head = this_cpu_ptr(call->perf_events);
if (hlist_empty(head))
return;
- dsize = __get_data_size(&tf->tp, regs, entry_data);
+ dsize = __get_data_size(&tf->tp, fregs, entry_data);
__size = sizeof(*entry) + tf->tp.size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
- entry = perf_trace_buf_alloc(size, NULL, &rctx);
+ entry = perf_trace_buf_alloc(size, &regs, &rctx);
if (!entry)
return;
+ regs = ftrace_fill_perf_regs(fregs, regs);
+
entry->func = entry_ip;
entry->ret_ip = ret_ip;
- store_trace_args(&entry[1], &tf->tp, regs, entry_data, sizeof(*entry), dsize);
+ store_trace_args(&entry[1], &tf->tp, fregs, entry_data, sizeof(*entry), dsize);
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL);
}
@@ -339,33 +375,34 @@ NOKPROBE_SYMBOL(fexit_perf_func);
#endif /* CONFIG_PERF_EVENTS */
static int fentry_dispatcher(struct fprobe *fp, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
void *entry_data)
{
struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
int ret = 0;
if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
- fentry_trace_func(tf, entry_ip, regs);
+ fentry_trace_func(tf, entry_ip, fregs);
+
#ifdef CONFIG_PERF_EVENTS
if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
- ret = fentry_perf_func(tf, entry_ip, regs);
+ ret = fentry_perf_func(tf, entry_ip, fregs);
#endif
return ret;
}
NOKPROBE_SYMBOL(fentry_dispatcher);
static void fexit_dispatcher(struct fprobe *fp, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
void *entry_data)
{
struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
- fexit_trace_func(tf, entry_ip, ret_ip, regs, entry_data);
+ fexit_trace_func(tf, entry_ip, ret_ip, fregs, entry_data);
#ifdef CONFIG_PERF_EVENTS
if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
- fexit_perf_func(tf, entry_ip, ret_ip, regs, entry_data);
+ fexit_perf_func(tf, entry_ip, ret_ip, fregs, entry_data);
#endif
}
NOKPROBE_SYMBOL(fexit_dispatcher);
@@ -379,6 +416,9 @@ static void free_trace_fprobe(struct trace_fprobe *tf)
}
}
+/* Since alloc_trace_fprobe() can return error, check the pointer is ERR too. */
+DEFINE_FREE(free_trace_fprobe, struct trace_fprobe *, if (!IS_ERR_OR_NULL(_T)) free_trace_fprobe(_T))
+
/*
* Allocate new trace_probe and initialize it (including fprobe).
*/
@@ -387,10 +427,9 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group,
const char *symbol,
struct tracepoint *tpoint,
struct module *mod,
- int maxactive,
int nargs, bool is_return)
{
- struct trace_fprobe *tf;
+ struct trace_fprobe *tf __free(free_trace_fprobe) = NULL;
int ret = -ENOMEM;
tf = kzalloc(struct_size(tf, tp.args, nargs), GFP_KERNEL);
@@ -399,7 +438,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group,
tf->symbol = kstrdup(symbol, GFP_KERNEL);
if (!tf->symbol)
- goto error;
+ return ERR_PTR(-ENOMEM);
if (is_return)
tf->fp.exit_handler = fexit_dispatcher;
@@ -408,17 +447,13 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group,
tf->tpoint = tpoint;
tf->mod = mod;
- tf->fp.nr_maxactive = maxactive;
ret = trace_probe_init(&tf->tp, event, group, false, nargs);
if (ret < 0)
- goto error;
+ return ERR_PTR(ret);
dyn_event_init(&tf->devent, &trace_fprobe_ops);
- return tf;
-error:
- free_trace_fprobe(tf);
- return ERR_PTR(ret);
+ return_ptr(tf);
}
static struct trace_fprobe *find_trace_fprobe(const char *event,
@@ -845,14 +880,12 @@ static int register_trace_fprobe(struct trace_fprobe *tf)
struct trace_fprobe *old_tf;
int ret;
- mutex_lock(&event_mutex);
+ guard(mutex)(&event_mutex);
old_tf = find_trace_fprobe(trace_probe_name(&tf->tp),
trace_probe_group_name(&tf->tp));
- if (old_tf) {
- ret = append_trace_fprobe(tf, old_tf);
- goto end;
- }
+ if (old_tf)
+ return append_trace_fprobe(tf, old_tf);
/* Register new event */
ret = register_fprobe_event(tf);
@@ -862,7 +895,7 @@ static int register_trace_fprobe(struct trace_fprobe *tf)
trace_probe_log_err(0, EVENT_EXIST);
} else
pr_warn("Failed to register probe event(%d)\n", ret);
- goto end;
+ return ret;
}
/* Register fprobe */
@@ -872,8 +905,6 @@ static int register_trace_fprobe(struct trace_fprobe *tf)
else
dyn_event_add(&tf->devent, trace_probe_event_call(&tf->tp));
-end:
- mutex_unlock(&event_mutex);
return ret;
}
@@ -1034,7 +1065,10 @@ static int parse_symbol_and_return(int argc, const char *argv[],
return 0;
}
-static int __trace_fprobe_create(int argc, const char *argv[])
+DEFINE_FREE(module_put, struct module *, if (_T) module_put(_T))
+
+static int trace_fprobe_create_internal(int argc, const char *argv[],
+ struct traceprobe_parse_context *ctx)
{
/*
* Argument syntax:
@@ -1060,24 +1094,20 @@ static int __trace_fprobe_create(int argc, const char *argv[])
* Type of args:
* FETCHARG:TYPE : use TYPE instead of unsigned long.
*/
- struct trace_fprobe *tf = NULL;
- int i, len, new_argc = 0, ret = 0;
+ struct trace_fprobe *tf __free(free_trace_fprobe) = NULL;
+ int i, new_argc = 0, ret = 0;
bool is_return = false;
- char *symbol = NULL;
+ char *symbol __free(kfree) = NULL;
const char *event = NULL, *group = FPROBE_EVENT_SYSTEM;
- const char **new_argv = NULL;
- int maxactive = 0;
+ const char **new_argv __free(kfree) = NULL;
char buf[MAX_EVENT_NAME_LEN];
char gbuf[MAX_EVENT_NAME_LEN];
char sbuf[KSYM_NAME_LEN];
char abuf[MAX_BTF_ARGS_LEN];
- char *dbuf = NULL;
+ char *dbuf __free(kfree) = NULL;
bool is_tracepoint = false;
- struct module *tp_mod = NULL;
+ struct module *tp_mod __free(module_put) = NULL;
struct tracepoint *tpoint = NULL;
- struct traceprobe_parse_context ctx = {
- .flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE,
- };
if ((argv[0][0] != 'f' && argv[0][0] != 't') || argc < 2)
return -ECANCELED;
@@ -1087,35 +1117,13 @@ static int __trace_fprobe_create(int argc, const char *argv[])
group = TRACEPOINT_EVENT_SYSTEM;
}
- trace_probe_log_init("trace_fprobe", argc, argv);
-
- event = strchr(&argv[0][1], ':');
- if (event)
- event++;
-
- if (isdigit(argv[0][1])) {
- if (event)
- len = event - &argv[0][1] - 1;
- else
- len = strlen(&argv[0][1]);
- if (len > MAX_EVENT_NAME_LEN - 1) {
- trace_probe_log_err(1, BAD_MAXACT);
- goto parse_error;
- }
- memcpy(buf, &argv[0][1], len);
- buf[len] = '\0';
- ret = kstrtouint(buf, 0, &maxactive);
- if (ret || !maxactive) {
+ if (argv[0][1] != '\0') {
+ if (argv[0][1] != ':') {
+ trace_probe_log_set_index(0);
trace_probe_log_err(1, BAD_MAXACT);
- goto parse_error;
- }
- /* fprobe rethook instances are iterated over via a list. The
- * maximum should stay reasonable.
- */
- if (maxactive > RETHOOK_MAXACTIVE_MAX) {
- trace_probe_log_err(1, MAXACT_TOO_BIG);
- goto parse_error;
+ return -EINVAL;
}
+ event = &argv[0][2];
}
trace_probe_log_set_index(1);
@@ -1123,20 +1131,14 @@ static int __trace_fprobe_create(int argc, const char *argv[])
/* a symbol(or tracepoint) must be specified */
ret = parse_symbol_and_return(argc, argv, &symbol, &is_return, is_tracepoint);
if (ret < 0)
- goto parse_error;
-
- if (!is_return && maxactive) {
- trace_probe_log_set_index(0);
- trace_probe_log_err(1, BAD_MAXACT_TYPE);
- goto parse_error;
- }
+ return -EINVAL;
trace_probe_log_set_index(0);
if (event) {
ret = traceprobe_parse_event_name(&event, &group, gbuf,
event - argv[0]);
if (ret)
- goto parse_error;
+ return -EINVAL;
}
if (!event) {
@@ -1152,67 +1154,62 @@ static int __trace_fprobe_create(int argc, const char *argv[])
}
if (is_return)
- ctx.flags |= TPARG_FL_RETURN;
+ ctx->flags |= TPARG_FL_RETURN;
else
- ctx.flags |= TPARG_FL_FENTRY;
+ ctx->flags |= TPARG_FL_FENTRY;
if (is_tracepoint) {
- ctx.flags |= TPARG_FL_TPOINT;
+ ctx->flags |= TPARG_FL_TPOINT;
tpoint = find_tracepoint(symbol, &tp_mod);
if (tpoint) {
- ctx.funcname = kallsyms_lookup(
+ ctx->funcname = kallsyms_lookup(
(unsigned long)tpoint->probestub,
NULL, NULL, NULL, sbuf);
} else if (IS_ENABLED(CONFIG_MODULES)) {
/* This *may* be loaded afterwards */
tpoint = TRACEPOINT_STUB;
- ctx.funcname = symbol;
+ ctx->funcname = symbol;
} else {
trace_probe_log_set_index(1);
trace_probe_log_err(0, NO_TRACEPOINT);
- goto parse_error;
+ return -EINVAL;
}
} else
- ctx.funcname = symbol;
+ ctx->funcname = symbol;
argc -= 2; argv += 2;
new_argv = traceprobe_expand_meta_args(argc, argv, &new_argc,
- abuf, MAX_BTF_ARGS_LEN, &ctx);
- if (IS_ERR(new_argv)) {
- ret = PTR_ERR(new_argv);
- new_argv = NULL;
- goto out;
- }
+ abuf, MAX_BTF_ARGS_LEN, ctx);
+ if (IS_ERR(new_argv))
+ return PTR_ERR(new_argv);
if (new_argv) {
argc = new_argc;
argv = new_argv;
}
- if (argc > MAX_TRACE_ARGS) {
- ret = -E2BIG;
- goto out;
- }
+ if (argc > MAX_TRACE_ARGS)
+ return -E2BIG;
ret = traceprobe_expand_dentry_args(argc, argv, &dbuf);
if (ret)
- goto out;
+ return ret;
/* setup a probe */
tf = alloc_trace_fprobe(group, event, symbol, tpoint, tp_mod,
- maxactive, argc, is_return);
+ argc, is_return);
if (IS_ERR(tf)) {
ret = PTR_ERR(tf);
/* This must return -ENOMEM, else there is a bug */
WARN_ON_ONCE(ret != -ENOMEM);
- goto out; /* We know tf is not allocated */
+ return ret;
}
/* parse arguments */
for (i = 0; i < argc; i++) {
trace_probe_log_set_index(i + 2);
- ctx.offset = 0;
- ret = traceprobe_parse_probe_arg(&tf->tp, i, argv[i], &ctx);
+ ctx->offset = 0;
+ ret = traceprobe_parse_probe_arg(&tf->tp, i, argv[i], ctx);
if (ret)
- goto error; /* This can be -ENOMEM */
+ return ret; /* This can be -ENOMEM */
}
if (is_return && tf->tp.entry_arg) {
@@ -1223,7 +1220,7 @@ static int __trace_fprobe_create(int argc, const char *argv[])
ret = traceprobe_set_print_fmt(&tf->tp,
is_return ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL);
if (ret < 0)
- goto error;
+ return ret;
ret = register_trace_fprobe(tf);
if (ret) {
@@ -1234,29 +1231,32 @@ static int __trace_fprobe_create(int argc, const char *argv[])
trace_probe_log_err(0, BAD_PROBE_ADDR);
else if (ret != -ENOMEM && ret != -EEXIST)
trace_probe_log_err(0, FAIL_REG_PROBE);
- goto error;
+ return -EINVAL;
}
-out:
- if (tp_mod)
- module_put(tp_mod);
+ /* 'tf' is successfully registered. To avoid freeing, assign NULL. */
+ tf = NULL;
+
+ return 0;
+}
+
+static int trace_fprobe_create_cb(int argc, const char *argv[])
+{
+ struct traceprobe_parse_context ctx = {
+ .flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE,
+ };
+ int ret;
+
+ trace_probe_log_init("trace_fprobe", argc, argv);
+ ret = trace_fprobe_create_internal(argc, argv, &ctx);
traceprobe_finish_parse(&ctx);
trace_probe_log_clear();
- kfree(new_argv);
- kfree(symbol);
- kfree(dbuf);
return ret;
-
-parse_error:
- ret = -EINVAL;
-error:
- free_trace_fprobe(tf);
- goto out;
}
static int trace_fprobe_create(const char *raw_command)
{
- return trace_probe_create(raw_command, __trace_fprobe_create);
+ return trace_probe_create(raw_command, trace_fprobe_create_cb);
}
static int trace_fprobe_release(struct dyn_event *ev)
@@ -1278,8 +1278,6 @@ static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev)
seq_putc(m, 't');
else
seq_putc(m, 'f');
- if (trace_fprobe_is_return(tf) && tf->fp.nr_maxactive)
- seq_printf(m, "%d", tf->fp.nr_maxactive);
seq_printf(m, ":%s/%s", trace_probe_group_name(&tf->tp),
trace_probe_name(&tf->tp));
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 74c353164ca1..df56f9b76010 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -176,7 +176,8 @@ static void function_trace_start(struct trace_array *tr)
tracing_reset_online_cpus(&tr->array_buffer);
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/* fregs are guaranteed not to be NULL if HAVE_DYNAMIC_FTRACE_WITH_ARGS is set */
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) && defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS)
static __always_inline unsigned long
function_get_true_parent_ip(unsigned long parent_ip, struct ftrace_regs *fregs)
{
@@ -215,7 +216,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
parent_ip = function_get_true_parent_ip(parent_ip, fregs);
- trace_ctx = tracing_gen_ctx();
+ trace_ctx = tracing_gen_ctx_dec();
data = this_cpu_ptr(tr->array_buffer.data);
if (!atomic_read(&data->disabled))
@@ -320,7 +321,6 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
struct trace_array *tr = op->private;
struct trace_array_cpu *data;
unsigned int trace_ctx;
- unsigned long flags;
int bit;
if (unlikely(!tr->function_enabled))
@@ -346,8 +346,7 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
if (is_repeat_check(tr, last_info, ip, parent_ip))
goto out;
- local_save_flags(flags);
- trace_ctx = tracing_gen_ctx_flags(flags);
+ trace_ctx = tracing_gen_ctx_dec();
process_repeats(tr, ip, parent_ip, last_info, trace_ctx);
trace_function(tr, ip, parent_ip, trace_ctx);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 5504b5e4e7b4..136c750b0b4d 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -175,16 +175,16 @@ struct fgraph_times {
};
int trace_graph_entry(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
unsigned long *task_var = fgraph_get_task_var(gops);
struct trace_array *tr = gops->private;
struct trace_array_cpu *data;
struct fgraph_times *ftimes;
- unsigned long flags;
unsigned int trace_ctx;
long disabled;
- int ret;
+ int ret = 0;
int cpu;
if (*task_var & TRACE_GRAPH_NOTRACE)
@@ -198,7 +198,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
* returning from the function.
*/
if (ftrace_graph_notrace_addr(trace->func)) {
- *task_var |= TRACE_GRAPH_NOTRACE_BIT;
+ *task_var |= TRACE_GRAPH_NOTRACE;
/*
* Need to return 1 to have the return called
* that will clear the NOTRACE bit.
@@ -235,25 +235,21 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
if (tracing_thresh)
return 1;
- local_irq_save(flags);
+ preempt_disable_notrace();
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->array_buffer.data, cpu);
- disabled = atomic_inc_return(&data->disabled);
- if (likely(disabled == 1)) {
- trace_ctx = tracing_gen_ctx_flags(flags);
- if (unlikely(IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) &&
- tracer_flags_is_set(TRACE_GRAPH_PRINT_RETADDR))) {
+ disabled = atomic_read(&data->disabled);
+ if (likely(!disabled)) {
+ trace_ctx = tracing_gen_ctx();
+ if (IS_ENABLED(CONFIG_FUNCTION_GRAPH_RETADDR) &&
+ tracer_flags_is_set(TRACE_GRAPH_PRINT_RETADDR)) {
unsigned long retaddr = ftrace_graph_top_ret_addr(current);
-
ret = __trace_graph_retaddr_entry(tr, trace, trace_ctx, retaddr);
- } else
+ } else {
ret = __trace_graph_entry(tr, trace, trace_ctx);
- } else {
- ret = 0;
+ }
}
-
- atomic_dec(&data->disabled);
- local_irq_restore(flags);
+ preempt_enable_notrace();
return ret;
}
@@ -270,12 +266,10 @@ __trace_graph_function(struct trace_array *tr,
struct ftrace_graph_ret ret = {
.func = ip,
.depth = 0,
- .calltime = time,
- .rettime = time,
};
__trace_graph_entry(tr, &ent, trace_ctx);
- __trace_graph_return(tr, &ret, trace_ctx);
+ __trace_graph_return(tr, &ret, trace_ctx, time, time);
}
void
@@ -287,8 +281,9 @@ trace_graph_function(struct trace_array *tr,
}
void __trace_graph_return(struct trace_array *tr,
- struct ftrace_graph_ret *trace,
- unsigned int trace_ctx)
+ struct ftrace_graph_ret *trace,
+ unsigned int trace_ctx,
+ u64 calltime, u64 rettime)
{
struct ring_buffer_event *event;
struct trace_buffer *buffer = tr->array_buffer.buffer;
@@ -300,6 +295,8 @@ void __trace_graph_return(struct trace_array *tr,
return;
entry = ring_buffer_event_data(event);
entry->ret = *trace;
+ entry->calltime = calltime;
+ entry->rettime = rettime;
trace_buffer_unlock_commit_nostack(buffer, event);
}
@@ -314,18 +311,20 @@ static void handle_nosleeptime(struct ftrace_graph_ret *trace,
}
void trace_graph_return(struct ftrace_graph_ret *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops, struct ftrace_regs *fregs)
{
unsigned long *task_var = fgraph_get_task_var(gops);
struct trace_array *tr = gops->private;
struct trace_array_cpu *data;
struct fgraph_times *ftimes;
- unsigned long flags;
unsigned int trace_ctx;
+ u64 calltime, rettime;
long disabled;
int size;
int cpu;
+ rettime = trace_clock_local();
+
ftrace_graph_addr_finish(gops, trace);
if (*task_var & TRACE_GRAPH_NOTRACE) {
@@ -339,22 +338,22 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
handle_nosleeptime(trace, ftimes, size);
- trace->calltime = ftimes->calltime;
+ calltime = ftimes->calltime;
- local_irq_save(flags);
+ preempt_disable_notrace();
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->array_buffer.data, cpu);
- disabled = atomic_inc_return(&data->disabled);
- if (likely(disabled == 1)) {
- trace_ctx = tracing_gen_ctx_flags(flags);
- __trace_graph_return(tr, trace, trace_ctx);
+ disabled = atomic_read(&data->disabled);
+ if (likely(!disabled)) {
+ trace_ctx = tracing_gen_ctx();
+ __trace_graph_return(tr, trace, trace_ctx, calltime, rettime);
}
- atomic_dec(&data->disabled);
- local_irq_restore(flags);
+ preempt_enable_notrace();
}
static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct fgraph_times *ftimes;
int size;
@@ -372,13 +371,11 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace,
handle_nosleeptime(trace, ftimes, size);
- trace->calltime = ftimes->calltime;
-
if (tracing_thresh &&
- (trace->rettime - ftimes->calltime < tracing_thresh))
+ (trace_clock_local() - ftimes->calltime < tracing_thresh))
return;
else
- trace_graph_return(trace, gops);
+ trace_graph_return(trace, gops, fregs);
}
static struct fgraph_ops funcgraph_ops = {
@@ -861,7 +858,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
graph_ret = &ret_entry->ret;
call = &entry->graph_ent;
- duration = graph_ret->rettime - graph_ret->calltime;
+ duration = ret_entry->rettime - ret_entry->calltime;
func = call->func + iter->tr->text_delta;
@@ -1142,11 +1139,14 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
}
static enum print_line_t
-print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
+print_graph_return(struct ftrace_graph_ret_entry *retentry, struct trace_seq *s,
struct trace_entry *ent, struct trace_iterator *iter,
u32 flags)
{
- unsigned long long duration = trace->rettime - trace->calltime;
+ struct ftrace_graph_ret *trace = &retentry->ret;
+ u64 calltime = retentry->calltime;
+ u64 rettime = retentry->rettime;
+ unsigned long long duration = rettime - calltime;
struct fgraph_data *data = iter->private;
struct trace_array *tr = iter->tr;
unsigned long func;
@@ -1347,7 +1347,7 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
case TRACE_GRAPH_RET: {
struct ftrace_graph_ret_entry *field;
trace_assign_type(field, entry);
- return print_graph_return(&field->ret, s, entry, iter, flags);
+ return print_graph_return(field, s, entry, iter, flags);
}
case TRACE_STACK:
case TRACE_FN:
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index fce064e20570..7294ad676379 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -176,12 +176,14 @@ static int irqsoff_display_graph(struct trace_array *tr, int set)
}
static int irqsoff_graph_entry(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
unsigned int trace_ctx;
+ u64 *calltime;
int ret;
if (ftrace_graph_ignore_func(gops, trace))
@@ -199,6 +201,12 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace,
if (!func_prolog_dec(tr, &data, &flags))
return 0;
+ calltime = fgraph_reserve_data(gops->idx, sizeof(*calltime));
+ if (!calltime)
+ return 0;
+
+ *calltime = trace_clock_local();
+
trace_ctx = tracing_gen_ctx_flags(flags);
ret = __trace_graph_entry(tr, trace, trace_ctx);
atomic_dec(&data->disabled);
@@ -207,20 +215,29 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace,
}
static void irqsoff_graph_return(struct ftrace_graph_ret *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct trace_array *tr = irqsoff_trace;
struct trace_array_cpu *data;
unsigned long flags;
unsigned int trace_ctx;
+ u64 *calltime;
+ u64 rettime;
+ int size;
ftrace_graph_addr_finish(gops, trace);
if (!func_prolog_dec(tr, &data, &flags))
return;
+ rettime = trace_clock_local();
+ calltime = fgraph_retrieve_data(gops->idx, &size);
+ if (!calltime)
+ return;
+
trace_ctx = tracing_gen_ctx_flags(flags);
- __trace_graph_return(tr, trace, trace_ctx);
+ __trace_graph_return(tr, trace, trace_ctx, *calltime, rettime);
atomic_dec(&data->disabled);
}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 263fac44d3ca..d8d5f18a141a 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -8,6 +8,7 @@
#define pr_fmt(fmt) "trace_kprobe: " fmt
#include <linux/bpf-cgroup.h>
+#include <linux/cleanup.h>
#include <linux/security.h>
#include <linux/module.h>
#include <linux/uaccess.h>
@@ -257,6 +258,9 @@ static void free_trace_kprobe(struct trace_kprobe *tk)
}
}
+DEFINE_FREE(free_trace_kprobe, struct trace_kprobe *,
+ if (!IS_ERR_OR_NULL(_T)) free_trace_kprobe(_T))
+
/*
* Allocate new trace_probe and initialize it (including kprobes).
*/
@@ -268,7 +272,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
int maxactive,
int nargs, bool is_return)
{
- struct trace_kprobe *tk;
+ struct trace_kprobe *tk __free(free_trace_kprobe) = NULL;
int ret = -ENOMEM;
tk = kzalloc(struct_size(tk, tp.args, nargs), GFP_KERNEL);
@@ -277,12 +281,12 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
tk->nhit = alloc_percpu(unsigned long);
if (!tk->nhit)
- goto error;
+ return ERR_PTR(ret);
if (symbol) {
tk->symbol = kstrdup(symbol, GFP_KERNEL);
if (!tk->symbol)
- goto error;
+ return ERR_PTR(ret);
tk->rp.kp.symbol_name = tk->symbol;
tk->rp.kp.offset = offs;
} else
@@ -299,13 +303,10 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
ret = trace_probe_init(&tk->tp, event, group, false, nargs);
if (ret < 0)
- goto error;
+ return ERR_PTR(ret);
dyn_event_init(&tk->devent, &trace_kprobe_ops);
- return tk;
-error:
- free_trace_kprobe(tk);
- return ERR_PTR(ret);
+ return_ptr(tk);
}
static struct trace_kprobe *find_trace_kprobe(const char *event,
@@ -634,7 +635,7 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
struct trace_kprobe *old_tk;
int ret;
- mutex_lock(&event_mutex);
+ guard(mutex)(&event_mutex);
old_tk = find_trace_kprobe(trace_probe_name(&tk->tp),
trace_probe_group_name(&tk->tp));
@@ -642,11 +643,9 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
if (trace_kprobe_is_return(tk) != trace_kprobe_is_return(old_tk)) {
trace_probe_log_set_index(0);
trace_probe_log_err(0, DIFF_PROBE_TYPE);
- ret = -EEXIST;
- } else {
- ret = append_trace_kprobe(tk, old_tk);
+ return -EEXIST;
}
- goto end;
+ return append_trace_kprobe(tk, old_tk);
}
/* Register new event */
@@ -657,7 +656,7 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
trace_probe_log_err(0, EVENT_EXIST);
} else
pr_warn("Failed to register probe event(%d)\n", ret);
- goto end;
+ return ret;
}
/* Register k*probe */
@@ -672,8 +671,6 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
else
dyn_event_add(&tk->devent, trace_probe_event_call(&tk->tp));
-end:
- mutex_unlock(&event_mutex);
return ret;
}
@@ -706,7 +703,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb,
return NOTIFY_DONE;
/* Update probes on coming module */
- mutex_lock(&event_mutex);
+ guard(mutex)(&event_mutex);
for_each_trace_kprobe(tk, pos) {
if (trace_kprobe_within_module(tk, mod)) {
/* Don't need to check busy - this should have gone. */
@@ -718,14 +715,13 @@ static int trace_kprobe_module_callback(struct notifier_block *nb,
module_name(mod), ret);
}
}
- mutex_unlock(&event_mutex);
return NOTIFY_DONE;
}
static struct notifier_block trace_kprobe_module_nb = {
.notifier_call = trace_kprobe_module_callback,
- .priority = 1 /* Invoked after kprobe module callback */
+ .priority = 2 /* Invoked after kprobe and jump_label module callback */
};
static int trace_kprobe_register_module_notifier(void)
{
@@ -840,7 +836,8 @@ out:
static int trace_kprobe_entry_handler(struct kretprobe_instance *ri,
struct pt_regs *regs);
-static int __trace_kprobe_create(int argc, const char *argv[])
+static int trace_kprobe_create_internal(int argc, const char *argv[],
+ struct traceprobe_parse_context *ctx)
{
/*
* Argument syntax:
@@ -866,11 +863,12 @@ static int __trace_kprobe_create(int argc, const char *argv[])
* Type of args:
* FETCHARG:TYPE : use TYPE instead of unsigned long.
*/
- struct trace_kprobe *tk = NULL;
+ struct trace_kprobe *tk __free(free_trace_kprobe) = NULL;
int i, len, new_argc = 0, ret = 0;
bool is_return = false;
- char *symbol = NULL, *tmp = NULL;
- const char **new_argv = NULL;
+ char *symbol __free(kfree) = NULL;
+ char *tmp = NULL;
+ const char **new_argv __free(kfree) = NULL;
const char *event = NULL, *group = KPROBE_EVENT_SYSTEM;
enum probe_print_type ptype;
int maxactive = 0;
@@ -879,8 +877,7 @@ static int __trace_kprobe_create(int argc, const char *argv[])
char buf[MAX_EVENT_NAME_LEN];
char gbuf[MAX_EVENT_NAME_LEN];
char abuf[MAX_BTF_ARGS_LEN];
- char *dbuf = NULL;
- struct traceprobe_parse_context ctx = { .flags = TPARG_FL_KERNEL };
+ char *dbuf __free(kfree) = NULL;
switch (argv[0][0]) {
case 'r':
@@ -894,8 +891,6 @@ static int __trace_kprobe_create(int argc, const char *argv[])
if (argc < 2)
return -ECANCELED;
- trace_probe_log_init("trace_kprobe", argc, argv);
-
event = strchr(&argv[0][1], ':');
if (event)
event++;
@@ -903,7 +898,7 @@ static int __trace_kprobe_create(int argc, const char *argv[])
if (isdigit(argv[0][1])) {
if (!is_return) {
trace_probe_log_err(1, BAD_MAXACT_TYPE);
- goto parse_error;
+ return -EINVAL;
}
if (event)
len = event - &argv[0][1] - 1;
@@ -911,21 +906,21 @@ static int __trace_kprobe_create(int argc, const char *argv[])
len = strlen(&argv[0][1]);
if (len > MAX_EVENT_NAME_LEN - 1) {
trace_probe_log_err(1, BAD_MAXACT);
- goto parse_error;
+ return -EINVAL;
}
memcpy(buf, &argv[0][1], len);
buf[len] = '\0';
ret = kstrtouint(buf, 0, &maxactive);
if (ret || !maxactive) {
trace_probe_log_err(1, BAD_MAXACT);
- goto parse_error;
+ return -EINVAL;
}
/* kretprobes instances are iterated over via a list. The
* maximum should stay reasonable.
*/
if (maxactive > KRETPROBE_MAXACTIVE_MAX) {
trace_probe_log_err(1, MAXACT_TOO_BIG);
- goto parse_error;
+ return -EINVAL;
}
}
@@ -934,10 +929,9 @@ static int __trace_kprobe_create(int argc, const char *argv[])
if (kstrtoul(argv[1], 0, (unsigned long *)&addr)) {
trace_probe_log_set_index(1);
/* Check whether uprobe event specified */
- if (strchr(argv[1], '/') && strchr(argv[1], ':')) {
- ret = -ECANCELED;
- goto error;
- }
+ if (strchr(argv[1], '/') && strchr(argv[1], ':'))
+ return -ECANCELED;
+
/* a symbol specified */
symbol = kstrdup(argv[1], GFP_KERNEL);
if (!symbol)
@@ -950,7 +944,7 @@ static int __trace_kprobe_create(int argc, const char *argv[])
is_return = true;
} else {
trace_probe_log_err(tmp - symbol, BAD_ADDR_SUFFIX);
- goto parse_error;
+ return -EINVAL;
}
}
@@ -958,7 +952,7 @@ static int __trace_kprobe_create(int argc, const char *argv[])
ret = traceprobe_split_symbol_offset(symbol, &offset);
if (ret || offset < 0 || offset > UINT_MAX) {
trace_probe_log_err(0, BAD_PROBE_ADDR);
- goto parse_error;
+ return -EINVAL;
}
ret = validate_probe_symbol(symbol);
if (ret) {
@@ -966,17 +960,17 @@ static int __trace_kprobe_create(int argc, const char *argv[])
trace_probe_log_err(0, NON_UNIQ_SYMBOL);
else
trace_probe_log_err(0, BAD_PROBE_ADDR);
- goto parse_error;
+ return -EINVAL;
}
if (is_return)
- ctx.flags |= TPARG_FL_RETURN;
+ ctx->flags |= TPARG_FL_RETURN;
ret = kprobe_on_func_entry(NULL, symbol, offset);
if (ret == 0 && !is_return)
- ctx.flags |= TPARG_FL_FENTRY;
+ ctx->flags |= TPARG_FL_FENTRY;
/* Defer the ENOENT case until register kprobe */
if (ret == -EINVAL && is_return) {
trace_probe_log_err(0, BAD_RETPROBE);
- goto parse_error;
+ return -EINVAL;
}
}
@@ -985,7 +979,7 @@ static int __trace_kprobe_create(int argc, const char *argv[])
ret = traceprobe_parse_event_name(&event, &group, gbuf,
event - argv[0]);
if (ret)
- goto parse_error;
+ return ret;
}
if (!event) {
@@ -1001,26 +995,24 @@ static int __trace_kprobe_create(int argc, const char *argv[])
}
argc -= 2; argv += 2;
- ctx.funcname = symbol;
+ ctx->funcname = symbol;
new_argv = traceprobe_expand_meta_args(argc, argv, &new_argc,
- abuf, MAX_BTF_ARGS_LEN, &ctx);
+ abuf, MAX_BTF_ARGS_LEN, ctx);
if (IS_ERR(new_argv)) {
ret = PTR_ERR(new_argv);
new_argv = NULL;
- goto out;
+ return ret;
}
if (new_argv) {
argc = new_argc;
argv = new_argv;
}
- if (argc > MAX_TRACE_ARGS) {
- ret = -E2BIG;
- goto out;
- }
+ if (argc > MAX_TRACE_ARGS)
+ return -E2BIG;
ret = traceprobe_expand_dentry_args(argc, argv, &dbuf);
if (ret)
- goto out;
+ return ret;
/* setup a probe */
tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
@@ -1029,16 +1021,16 @@ static int __trace_kprobe_create(int argc, const char *argv[])
ret = PTR_ERR(tk);
/* This must return -ENOMEM, else there is a bug */
WARN_ON_ONCE(ret != -ENOMEM);
- goto out; /* We know tk is not allocated */
+ return ret; /* We know tk is not allocated */
}
/* parse arguments */
for (i = 0; i < argc; i++) {
trace_probe_log_set_index(i + 2);
- ctx.offset = 0;
- ret = traceprobe_parse_probe_arg(&tk->tp, i, argv[i], &ctx);
+ ctx->offset = 0;
+ ret = traceprobe_parse_probe_arg(&tk->tp, i, argv[i], ctx);
if (ret)
- goto error; /* This can be -ENOMEM */
+ return ret; /* This can be -ENOMEM */
}
/* entry handler for kretprobe */
if (is_return && tk->tp.entry_arg) {
@@ -1049,7 +1041,7 @@ static int __trace_kprobe_create(int argc, const char *argv[])
ptype = is_return ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL;
ret = traceprobe_set_print_fmt(&tk->tp, ptype);
if (ret < 0)
- goto error;
+ return ret;
ret = register_trace_kprobe(tk);
if (ret) {
@@ -1060,27 +1052,34 @@ static int __trace_kprobe_create(int argc, const char *argv[])
trace_probe_log_err(0, BAD_PROBE_ADDR);
else if (ret != -ENOMEM && ret != -EEXIST)
trace_probe_log_err(0, FAIL_REG_PROBE);
- goto error;
+ return ret;
}
+ /*
+ * Here, 'tk' has been registered to the list successfully,
+ * so we don't need to free it.
+ */
+ tk = NULL;
+
+ return 0;
+}
+
+static int trace_kprobe_create_cb(int argc, const char *argv[])
+{
+ struct traceprobe_parse_context ctx = { .flags = TPARG_FL_KERNEL };
+ int ret;
+
+ trace_probe_log_init("trace_kprobe", argc, argv);
+
+ ret = trace_kprobe_create_internal(argc, argv, &ctx);
-out:
traceprobe_finish_parse(&ctx);
trace_probe_log_clear();
- kfree(new_argv);
- kfree(symbol);
- kfree(dbuf);
return ret;
-
-parse_error:
- ret = -EINVAL;
-error:
- free_trace_kprobe(tk);
- goto out;
}
static int trace_kprobe_create(const char *raw_command)
{
- return trace_probe_create(raw_command, __trace_kprobe_create);
+ return trace_probe_create(raw_command, trace_kprobe_create_cb);
}
static int create_or_delete_trace_kprobe(const char *raw_command)
@@ -1896,7 +1895,7 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
bool is_return)
{
enum probe_print_type ptype;
- struct trace_kprobe *tk;
+ struct trace_kprobe *tk __free(free_trace_kprobe) = NULL;
int ret;
char *event;
@@ -1927,19 +1926,14 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
ptype = trace_kprobe_is_return(tk) ?
PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL;
- if (traceprobe_set_print_fmt(&tk->tp, ptype) < 0) {
- ret = -ENOMEM;
- goto error;
- }
+ if (traceprobe_set_print_fmt(&tk->tp, ptype) < 0)
+ return ERR_PTR(-ENOMEM);
ret = __register_trace_kprobe(tk);
if (ret < 0)
- goto error;
+ return ERR_PTR(ret);
- return trace_probe_event_call(&tk->tp);
-error:
- free_trace_kprobe(tk);
- return ERR_PTR(ret);
+ return trace_probe_event_call(&(no_free_ptr(tk)->tp));
}
void destroy_local_trace_kprobe(struct trace_event_call *event_call)
@@ -1968,13 +1962,12 @@ static __init void enable_boot_kprobe_events(void)
struct trace_kprobe *tk;
struct dyn_event *pos;
- mutex_lock(&event_mutex);
+ guard(mutex)(&event_mutex);
for_each_trace_kprobe(tk, pos) {
list_for_each_entry(file, &tr->events, list)
if (file->event_call == trace_probe_event_call(&tk->tp))
trace_event_enable_disable(file, 1, 0);
}
- mutex_unlock(&event_mutex);
}
static __init void setup_boot_kprobe_events(void)
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index b9f96c77527d..f3a2722ee4c0 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -1229,6 +1229,8 @@ static void trace_sched_migrate_callback(void *data, struct task_struct *p, int
}
}
+static bool monitor_enabled;
+
static int register_migration_monitor(void)
{
int ret = 0;
@@ -1237,16 +1239,25 @@ static int register_migration_monitor(void)
* Timerlat thread migration check is only required when running timerlat in user-space.
* Thus, enable callback only if timerlat is set with no workload.
*/
- if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options))
+ if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options)) {
+ if (WARN_ON_ONCE(monitor_enabled))
+ return 0;
+
ret = register_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
+ if (!ret)
+ monitor_enabled = true;
+ }
return ret;
}
static void unregister_migration_monitor(void)
{
- if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options))
- unregister_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
+ if (!monitor_enabled)
+ return;
+
+ unregister_trace_sched_migrate_task(trace_sched_migrate_callback, NULL);
+ monitor_enabled = false;
}
#else
static int register_migration_monitor(void)
@@ -2083,26 +2094,21 @@ static void osnoise_hotplug_workfn(struct work_struct *dummy)
{
unsigned int cpu = smp_processor_id();
- mutex_lock(&trace_types_lock);
+ guard(mutex)(&trace_types_lock);
if (!osnoise_has_registered_instances())
- goto out_unlock_trace;
+ return;
- mutex_lock(&interface_lock);
- cpus_read_lock();
+ guard(mutex)(&interface_lock);
+ guard(cpus_read_lock)();
if (!cpu_online(cpu))
- goto out_unlock;
+ return;
+
if (!cpumask_test_cpu(cpu, &osnoise_cpumask))
- goto out_unlock;
+ return;
start_kthread(cpu);
-
-out_unlock:
- cpus_read_unlock();
- mutex_unlock(&interface_lock);
-out_unlock_trace:
- mutex_unlock(&trace_types_lock);
}
static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn);
@@ -2300,31 +2306,22 @@ static ssize_t
osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count,
loff_t *ppos)
{
- char *mask_str;
+ char *mask_str __free(kfree) = NULL;
int len;
- mutex_lock(&interface_lock);
+ guard(mutex)(&interface_lock);
len = snprintf(NULL, 0, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1;
mask_str = kmalloc(len, GFP_KERNEL);
- if (!mask_str) {
- count = -ENOMEM;
- goto out_unlock;
- }
+ if (!mask_str)
+ return -ENOMEM;
len = snprintf(mask_str, len, "%*pbl\n", cpumask_pr_args(&osnoise_cpumask));
- if (len >= count) {
- count = -EINVAL;
- goto out_free;
- }
+ if (len >= count)
+ return -EINVAL;
count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
-out_free:
- kfree(mask_str);
-out_unlock:
- mutex_unlock(&interface_lock);
-
return count;
}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index da748b7cbc4d..03d56f711ad1 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -317,10 +317,14 @@ EXPORT_SYMBOL(trace_raw_output_prep);
void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...)
{
+ struct trace_seq *s = &iter->seq;
va_list ap;
+ if (ignore_event(iter))
+ return;
+
va_start(ap, fmt);
- trace_check_vprintf(iter, trace_event_format(iter, fmt), ap);
+ trace_seq_vprintf(s, trace_event_format(iter, fmt), ap);
va_end(ap);
}
EXPORT_SYMBOL(trace_event_printf);
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 16a5e368e7b7..8f58ee1e8858 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -1409,7 +1409,7 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
struct traceprobe_parse_context *ctx)
{
struct fetch_insn *code, *tmp = NULL;
- char *type, *arg;
+ char *type, *arg __free(kfree) = NULL;
int ret, len;
len = strlen(argv);
@@ -1426,22 +1426,16 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,
return -ENOMEM;
parg->comm = kstrdup(arg, GFP_KERNEL);
- if (!parg->comm) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!parg->comm)
+ return -ENOMEM;
type = parse_probe_arg_type(arg, parg, ctx);
- if (IS_ERR(type)) {
- ret = PTR_ERR(type);
- goto out;
- }
+ if (IS_ERR(type))
+ return PTR_ERR(type);
code = tmp = kcalloc(FETCH_INSN_MAX, sizeof(*code), GFP_KERNEL);
- if (!code) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!code)
+ return -ENOMEM;
code[FETCH_INSN_MAX - 1].op = FETCH_OP_END;
ctx->last_type = NULL;
@@ -1497,8 +1491,6 @@ fail:
kfree(code->data);
}
kfree(tmp);
-out:
- kfree(arg);
return ret;
}
@@ -1668,7 +1660,7 @@ const char **traceprobe_expand_meta_args(int argc, const char *argv[],
{
const struct btf_param *params = NULL;
int i, j, n, used, ret, args_idx = -1;
- const char **new_argv = NULL;
+ const char **new_argv __free(kfree) = NULL;
ret = argv_has_var_arg(argc, argv, &args_idx, ctx);
if (ret < 0)
@@ -1707,7 +1699,7 @@ const char **traceprobe_expand_meta_args(int argc, const char *argv[],
ret = sprint_nth_btf_arg(n, "", buf + used,
bufsize - used, ctx);
if (ret < 0)
- goto error;
+ return ERR_PTR(ret);
new_argv[j++] = buf + used;
used += ret + 1;
@@ -1721,25 +1713,20 @@ const char **traceprobe_expand_meta_args(int argc, const char *argv[],
n = simple_strtoul(argv[i] + 4, &type, 10);
if (type && !(*type == ':' || *type == '\0')) {
trace_probe_log_err(0, BAD_VAR);
- ret = -ENOENT;
- goto error;
+ return ERR_PTR(-ENOENT);
}
/* Note: $argN starts from $arg1 */
ret = sprint_nth_btf_arg(n - 1, type, buf + used,
bufsize - used, ctx);
if (ret < 0)
- goto error;
+ return ERR_PTR(ret);
new_argv[j++] = buf + used;
used += ret + 1;
} else
new_argv[j++] = argv[i];
}
- return new_argv;
-
-error:
- kfree(new_argv);
- return ERR_PTR(ret);
+ return_ptr(new_argv);
}
/* @buf: *buf must be equal to NULL. Caller must to free *buf */
@@ -1747,14 +1734,14 @@ int traceprobe_expand_dentry_args(int argc, const char *argv[], char **buf)
{
int i, used, ret;
const int bufsize = MAX_DENTRY_ARGS_LEN;
- char *tmpbuf = NULL;
+ char *tmpbuf __free(kfree) = NULL;
if (*buf)
return -EINVAL;
used = 0;
for (i = 0; i < argc; i++) {
- char *tmp;
+ char *tmp __free(kfree) = NULL;
char *equal;
size_t arg_len;
@@ -1769,7 +1756,7 @@ int traceprobe_expand_dentry_args(int argc, const char *argv[], char **buf)
tmp = kstrdup(argv[i], GFP_KERNEL);
if (!tmp)
- goto nomem;
+ return -ENOMEM;
equal = strchr(tmp, '=');
if (equal)
@@ -1790,18 +1777,14 @@ int traceprobe_expand_dentry_args(int argc, const char *argv[], char **buf)
offsetof(struct file, f_path.dentry),
equal ? equal + 1 : tmp);
- kfree(tmp);
if (ret >= bufsize - used)
- goto nomem;
+ return -ENOMEM;
argv[i] = tmpbuf + used;
used += ret + 1;
}
- *buf = tmpbuf;
+ *buf = no_free_ptr(tmpbuf);
return 0;
-nomem:
- kfree(tmpbuf);
- return -ENOMEM;
}
void traceprobe_finish_parse(struct traceprobe_parse_context *ctx)
diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h
index 2caf0d2afb32..f39b37fcdb3b 100644
--- a/kernel/trace/trace_probe_tmpl.h
+++ b/kernel/trace/trace_probe_tmpl.h
@@ -232,7 +232,7 @@ array:
/* Sum up total data length for dynamic arrays (strings) */
static nokprobe_inline int
-__get_data_size(struct trace_probe *tp, struct pt_regs *regs, void *edata)
+__get_data_size(struct trace_probe *tp, void *regs, void *edata)
{
struct probe_arg *arg;
int i, len, ret = 0;
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 573b5d8e8a28..cb49f7279dc8 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -442,7 +442,7 @@ int trace_alloc_tgid_map(void)
if (tgid_map)
return 0;
- tgid_map_max = pid_max;
+ tgid_map_max = init_pid_ns.pid_max;
map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
GFP_KERNEL);
if (!map)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index d6c7f18daa15..af30586f1aea 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -113,11 +113,13 @@ static int wakeup_display_graph(struct trace_array *tr, int set)
}
static int wakeup_graph_entry(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct trace_array *tr = wakeup_trace;
struct trace_array_cpu *data;
unsigned int trace_ctx;
+ u64 *calltime;
int ret = 0;
if (ftrace_graph_ignore_func(gops, trace))
@@ -135,6 +137,12 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace,
if (!func_prolog_preempt_disable(tr, &data, &trace_ctx))
return 0;
+ calltime = fgraph_reserve_data(gops->idx, sizeof(*calltime));
+ if (!calltime)
+ return 0;
+
+ *calltime = trace_clock_local();
+
ret = __trace_graph_entry(tr, trace, trace_ctx);
atomic_dec(&data->disabled);
preempt_enable_notrace();
@@ -143,18 +151,28 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace,
}
static void wakeup_graph_return(struct ftrace_graph_ret *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct trace_array *tr = wakeup_trace;
struct trace_array_cpu *data;
unsigned int trace_ctx;
+ u64 *calltime;
+ u64 rettime;
+ int size;
ftrace_graph_addr_finish(gops, trace);
if (!func_prolog_preempt_disable(tr, &data, &trace_ctx))
return;
- __trace_graph_return(tr, trace, trace_ctx);
+ rettime = trace_clock_local();
+
+ calltime = fgraph_retrieve_data(gops->idx, &size);
+ if (!calltime)
+ return;
+
+ __trace_graph_return(tr, trace, trace_ctx, *calltime, rettime);
atomic_dec(&data->disabled);
preempt_enable_notrace();
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 38b5754790c9..d88c44f1dfa5 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -774,7 +774,8 @@ struct fgraph_fixture {
};
static __init int store_entry(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct fgraph_fixture *fixture = container_of(gops, struct fgraph_fixture, gops);
const char *type = fixture->store_type_name;
@@ -807,7 +808,8 @@ static __init int store_entry(struct ftrace_graph_ent *trace,
}
static __init void store_return(struct ftrace_graph_ret *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
struct fgraph_fixture *fixture = container_of(gops, struct fgraph_fixture, gops);
const char *type = fixture->store_type_name;
@@ -1025,7 +1027,8 @@ static unsigned int graph_hang_thresh;
/* Wrap the real function entry probe to avoid possible hanging */
static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops)
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs)
{
/* This is harmlessly racy, we want to approximately detect a hang */
if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) {
@@ -1039,7 +1042,7 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace,
return 0;
}
- return trace_graph_entry(trace, gops);
+ return trace_graph_entry(trace, gops, fregs);
}
static struct fgraph_ops fgraph_ops __initdata = {
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 7f9572a37333..14c6f272c4d8 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -520,20 +520,18 @@ stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer,
int was_enabled;
int ret;
- mutex_lock(&stack_sysctl_mutex);
+ guard(mutex)(&stack_sysctl_mutex);
was_enabled = !!stack_tracer_enabled;
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (ret || !write || (was_enabled == !!stack_tracer_enabled))
- goto out;
+ return ret;
if (stack_tracer_enabled)
register_ftrace_function(&trace_ops);
else
unregister_ftrace_function(&trace_ops);
- out:
- mutex_unlock(&stack_sysctl_mutex);
return ret;
}
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index bb247beec447..b3b5586f104d 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -128,7 +128,7 @@ static int stat_seq_init(struct stat_session *session)
int ret = 0;
int i;
- mutex_lock(&session->stat_mutex);
+ guard(mutex)(&session->stat_mutex);
__reset_stat_session(session);
if (!ts->stat_cmp)
@@ -136,11 +136,11 @@ static int stat_seq_init(struct stat_session *session)
stat = ts->stat_start(ts);
if (!stat)
- goto exit;
+ return 0;
ret = insert_stat(root, stat, ts->stat_cmp);
if (ret)
- goto exit;
+ return ret;
/*
* Iterate over the tracer stat entries and store them in an rbtree.
@@ -157,13 +157,10 @@ static int stat_seq_init(struct stat_session *session)
goto exit_free_rbtree;
}
-exit:
- mutex_unlock(&session->stat_mutex);
return ret;
exit_free_rbtree:
__reset_stat_session(session);
- mutex_unlock(&session->stat_mutex);
return ret;
}
@@ -308,7 +305,7 @@ static int init_stat_file(struct stat_session *session)
int register_stat_tracer(struct tracer_stat *trace)
{
struct stat_session *session, *node;
- int ret = -EINVAL;
+ int ret;
if (!trace)
return -EINVAL;
@@ -316,18 +313,18 @@ int register_stat_tracer(struct tracer_stat *trace)
if (!trace->stat_start || !trace->stat_next || !trace->stat_show)
return -EINVAL;
+ guard(mutex)(&all_stat_sessions_mutex);
+
/* Already registered? */
- mutex_lock(&all_stat_sessions_mutex);
list_for_each_entry(node, &all_stat_sessions, session_list) {
if (node->ts == trace)
- goto out;
+ return -EINVAL;
}
- ret = -ENOMEM;
/* Init the session */
session = kzalloc(sizeof(*session), GFP_KERNEL);
if (!session)
- goto out;
+ return -ENOMEM;
session->ts = trace;
INIT_LIST_HEAD(&session->session_list);
@@ -336,16 +333,13 @@ int register_stat_tracer(struct tracer_stat *trace)
ret = init_stat_file(session);
if (ret) {
destroy_session(session);
- goto out;
+ return ret;
}
- ret = 0;
/* Register */
list_add_tail(&session->session_list, &all_stat_sessions);
- out:
- mutex_unlock(&all_stat_sessions_mutex);
- return ret;
+ return 0;
}
void unregister_stat_tracer(struct tracer_stat *trace)
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 4875e7f5de3d..ccc762fbb69c 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -498,11 +498,11 @@ static int register_trace_uprobe(struct trace_uprobe *tu)
struct trace_uprobe *old_tu;
int ret;
- mutex_lock(&event_mutex);
+ guard(mutex)(&event_mutex);
ret = validate_ref_ctr_offset(tu);
if (ret)
- goto end;
+ return ret;
/* register as an event */
old_tu = find_probe_event(trace_probe_name(&tu->tp),
@@ -511,11 +511,9 @@ static int register_trace_uprobe(struct trace_uprobe *tu)
if (is_ret_probe(tu) != is_ret_probe(old_tu)) {
trace_probe_log_set_index(0);
trace_probe_log_err(0, DIFF_PROBE_TYPE);
- ret = -EEXIST;
- } else {
- ret = append_trace_uprobe(tu, old_tu);
+ return -EEXIST;
}
- goto end;
+ return append_trace_uprobe(tu, old_tu);
}
ret = register_uprobe_event(tu);
@@ -525,14 +523,11 @@ static int register_trace_uprobe(struct trace_uprobe *tu)
trace_probe_log_err(0, EVENT_EXIST);
} else
pr_warn("Failed to register probe event(%d)\n", ret);
- goto end;
+ return ret;
}
dyn_event_add(&tu->devent, trace_probe_event_call(&tu->tp));
-end:
- mutex_unlock(&event_mutex);
-
return ret;
}