From 02a392a0439ffdc62b4d8f17bd18d68736b166a9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 25 Nov 2015 12:50:47 -0500 Subject: ftrace: Add new type to distinguish what kind of ftrace_bug() The ftrace function hook utility has several internal checks to make sure that whatever it modifies is exactly what it expects to be modifying. This is essential as modifying running code can be extremely dangerous to the system. When an anomaly is detected, ftrace_bug() is called which sends a splat to the console and disables function tracing. There's some extra information that is printed to help diagnose the issue. One thing that is missing though is output of what ftrace was doing at the time of the crash. Was it updating a call site or perhaps converting a call site to a nop? A new global enum variable is created to state what ftrace was doing at the time of the anomaly, and this is reported in ftrace_bug(). Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux/ftrace.h') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index eae6548efbf0..870c8eea38cd 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -296,6 +296,15 @@ int ftrace_arch_code_modify_post_process(void); struct dyn_ftrace; +enum ftrace_bug_type { + FTRACE_BUG_UNKNOWN, + FTRACE_BUG_INIT, + FTRACE_BUG_NOP, + FTRACE_BUG_CALL, + FTRACE_BUG_UPDATE, +}; +extern enum ftrace_bug_type ftrace_bug_type; + void ftrace_bug(int err, struct dyn_ftrace *rec); struct seq_file; -- cgit v1.2.3 From b05086c77a162dd8ef79606cb4723f1fc1448bb1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 25 Nov 2015 14:13:11 -0500 Subject: ftrace: Add variable ftrace_expected for archs to show expected code When an anomaly is found while modifying function code, ftrace_bug() is called which disables the function tracing infrastructure and reports information about what failed. If the code that is to be replaced does not match what is expected, then actual code is shown. Currently there is no arch generic way to show what was expected. Add a new variable pointer calld ftrace_expected that the arch code can set to point to what it expected so that ftrace_bug() can report the actual text as well as the text that was expected to be there. Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 9 +++++++++ include/linux/ftrace.h | 6 ++++++ kernel/trace/ftrace.c | 9 +++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) (limited to 'include/linux/ftrace.h') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 311bcf338f07..909da012406d 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -105,6 +105,8 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, { unsigned char replaced[MCOUNT_INSN_SIZE]; + ftrace_expected = old_code; + /* * Note: Due to modules and __init, code can * disappear and change, we need to protect against faulting @@ -154,6 +156,8 @@ int ftrace_make_nop(struct module *mod, if (addr == MCOUNT_ADDR) return ftrace_modify_code_direct(rec->ip, old, new); + ftrace_expected = NULL; + /* Normal cases use add_brk_on_nop */ WARN_ONCE(1, "invalid use of ftrace_make_nop"); return -EINVAL; @@ -220,6 +224,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { WARN_ON(1); + ftrace_expected = NULL; return -EINVAL; } @@ -314,6 +319,8 @@ static int add_break(unsigned long ip, const char *old) if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; + ftrace_expected = old; + /* Make sure it is what we expect it to be */ if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) return -EINVAL; @@ -413,6 +420,8 @@ static int remove_breakpoint(struct dyn_ftrace *rec) ftrace_addr = ftrace_get_addr_curr(rec); nop = ftrace_call_replace(ip, ftrace_addr); + ftrace_expected = nop; + if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) return -EINVAL; } diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 870c8eea38cd..134f8d45b35b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -305,6 +305,12 @@ enum ftrace_bug_type { }; extern enum ftrace_bug_type ftrace_bug_type; +/* + * Archs can set this to point to a variable that holds the value that was + * expected at the call site before calling ftrace_bug(). + */ +extern const void *ftrace_expected; + void ftrace_bug(int err, struct dyn_ftrace *rec); struct seq_file; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b95efcac9dfe..7870c03b4c4d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1940,7 +1940,7 @@ static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash); } -static void print_ip_ins(const char *fmt, unsigned char *p) +static void print_ip_ins(const char *fmt, const unsigned char *p) { int i; @@ -1954,6 +1954,7 @@ static struct ftrace_ops * ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); enum ftrace_bug_type ftrace_bug_type; +const void *ftrace_expected; static void print_bug_type(void) { @@ -2001,8 +2002,12 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec) FTRACE_WARN_ON_ONCE(1); pr_info("ftrace failed to modify "); print_ip_sym(ip); - print_ip_ins(" actual: ", (unsigned char *)ip); + print_ip_ins(" actual: ", (unsigned char *)ip); pr_cont("\n"); + if (ftrace_expected) { + print_ip_ins(" expected: ", ftrace_expected); + pr_cont("\n"); + } break; case -EPERM: FTRACE_WARN_ON_ONCE(1); -- cgit v1.2.3 From ba27f2bc731135a0396f3968bdddb54f3bc72e64 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 30 Nov 2015 17:23:39 -0500 Subject: ftrace: Remove use of control list and ops Currently perf has its own list function within the ftrace infrastructure that seems to be used only to allow for it to have per-cpu disabling as well as a check to make sure that it's not called while RCU is not watching. It uses something called the "control_ops" which is used to iterate over ops under it with the control_list_func(). The problem is that this control_ops and control_list_func unnecessarily complicates the code. By replacing FTRACE_OPS_FL_CONTROL with two new flags (FTRACE_OPS_FL_RCU and FTRACE_OPS_FL_PER_CPU) we can remove all the code that is special with the control ops and add the needed checks within the generic ftrace_list_func(). Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 35 +++++------ kernel/trace/ftrace.c | 126 ++++++++++++---------------------------- kernel/trace/trace.h | 2 - kernel/trace/trace_event_perf.c | 2 +- 4 files changed, 57 insertions(+), 108 deletions(-) (limited to 'include/linux/ftrace.h') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 134f8d45b35b..4736a826baf5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -76,8 +76,8 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * ENABLED - set/unset when ftrace_ops is registered/unregistered * DYNAMIC - set when ftrace_ops is registered to denote dynamically * allocated ftrace_ops which need special care - * CONTROL - set manualy by ftrace_ops user to denote the ftrace_ops - * could be controled by following calls: + * PER_CPU - set manualy by ftrace_ops user to denote the ftrace_ops + * could be controlled by following calls: * ftrace_function_local_enable * ftrace_function_local_disable * SAVE_REGS - The ftrace_ops wants regs saved at each function called @@ -121,7 +121,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); enum { FTRACE_OPS_FL_ENABLED = 1 << 0, FTRACE_OPS_FL_DYNAMIC = 1 << 1, - FTRACE_OPS_FL_CONTROL = 1 << 2, + FTRACE_OPS_FL_PER_CPU = 1 << 2, FTRACE_OPS_FL_SAVE_REGS = 1 << 3, FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 4, FTRACE_OPS_FL_RECURSION_SAFE = 1 << 5, @@ -134,6 +134,7 @@ enum { FTRACE_OPS_FL_ALLOC_TRAMP = 1 << 12, FTRACE_OPS_FL_IPMODIFY = 1 << 13, FTRACE_OPS_FL_PID = 1 << 14, + FTRACE_OPS_FL_RCU = 1 << 15, }; #ifdef CONFIG_DYNAMIC_FTRACE @@ -146,11 +147,11 @@ struct ftrace_ops_hash { #endif /* - * Note, ftrace_ops can be referenced outside of RCU protection. - * (Although, for perf, the control ops prevent that). If ftrace_ops is - * allocated and not part of kernel core data, the unregistering of it will - * perform a scheduling on all CPUs to make sure that there are no more users. - * Depending on the load of the system that may take a bit of time. + * Note, ftrace_ops can be referenced outside of RCU protection, unless + * the RCU flag is set. If ftrace_ops is allocated and not part of kernel + * core data, the unregistering of it will perform a scheduling on all CPUs + * to make sure that there are no more users. Depending on the load of the + * system that may take a bit of time. * * Any private data added must also take care not to be freed and if private * data is added to a ftrace_ops that is in core code, the user of the @@ -196,34 +197,34 @@ int unregister_ftrace_function(struct ftrace_ops *ops); void clear_ftrace_function(void); /** - * ftrace_function_local_enable - enable controlled ftrace_ops on current cpu + * ftrace_function_local_enable - enable ftrace_ops on current cpu * * This function enables tracing on current cpu by decreasing * the per cpu control variable. * It must be called with preemption disabled and only on ftrace_ops - * registered with FTRACE_OPS_FL_CONTROL. If called without preemption + * registered with FTRACE_OPS_FL_PER_CPU. If called without preemption * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. */ static inline void ftrace_function_local_enable(struct ftrace_ops *ops) { - if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL))) + if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU))) return; (*this_cpu_ptr(ops->disabled))--; } /** - * ftrace_function_local_disable - enable controlled ftrace_ops on current cpu + * ftrace_function_local_disable - disable ftrace_ops on current cpu * - * This function enables tracing on current cpu by decreasing + * This function disables tracing on current cpu by increasing * the per cpu control variable. * It must be called with preemption disabled and only on ftrace_ops - * registered with FTRACE_OPS_FL_CONTROL. If called without preemption + * registered with FTRACE_OPS_FL_PER_CPU. If called without preemption * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. */ static inline void ftrace_function_local_disable(struct ftrace_ops *ops) { - if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL))) + if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU))) return; (*this_cpu_ptr(ops->disabled))++; @@ -235,12 +236,12 @@ static inline void ftrace_function_local_disable(struct ftrace_ops *ops) * * This function returns value of ftrace_ops::disabled on current cpu. * It must be called with preemption disabled and only on ftrace_ops - * registered with FTRACE_OPS_FL_CONTROL. If called without preemption + * registered with FTRACE_OPS_FL_PER_CPU. If called without preemption * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. */ static inline int ftrace_function_local_disabled(struct ftrace_ops *ops) { - WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL)); + WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU)); return *this_cpu_ptr(ops->disabled); } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bf7bebcdad82..bc7f4eb6b4b0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -62,8 +62,6 @@ #define FTRACE_HASH_DEFAULT_BITS 10 #define FTRACE_HASH_MAX_BITS 12 -#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_CONTROL) - #ifdef CONFIG_DYNAMIC_FTRACE #define INIT_OPS_HASH(opsname) \ .func_hash = &opsname.local_hash, \ @@ -113,11 +111,9 @@ static int ftrace_disabled __read_mostly; static DEFINE_MUTEX(ftrace_lock); -static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; static struct ftrace_ops global_ops; -static struct ftrace_ops control_ops; static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *regs); @@ -203,7 +199,7 @@ void clear_ftrace_function(void) ftrace_trace_function = ftrace_stub; } -static void control_ops_disable_all(struct ftrace_ops *ops) +static void per_cpu_ops_disable_all(struct ftrace_ops *ops) { int cpu; @@ -211,16 +207,19 @@ static void control_ops_disable_all(struct ftrace_ops *ops) *per_cpu_ptr(ops->disabled, cpu) = 1; } -static int control_ops_alloc(struct ftrace_ops *ops) +static int per_cpu_ops_alloc(struct ftrace_ops *ops) { int __percpu *disabled; + if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU))) + return -EINVAL; + disabled = alloc_percpu(int); if (!disabled) return -ENOMEM; ops->disabled = disabled; - control_ops_disable_all(ops); + per_cpu_ops_disable_all(ops); return 0; } @@ -256,10 +255,11 @@ static inline void update_function_graph_func(void) { } static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops) { /* - * If this is a dynamic ops or we force list func, + * If this is a dynamic, RCU, or per CPU ops, or we force list func, * then it needs to call the list anyway. */ - if (ops->flags & FTRACE_OPS_FL_DYNAMIC || FTRACE_FORCE_LIST_FUNC) + if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU | + FTRACE_OPS_FL_RCU) || FTRACE_FORCE_LIST_FUNC) return ftrace_ops_list_func; return ftrace_ops_get_func(ops); @@ -383,26 +383,6 @@ static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) return 0; } -static void add_ftrace_list_ops(struct ftrace_ops **list, - struct ftrace_ops *main_ops, - struct ftrace_ops *ops) -{ - int first = *list == &ftrace_list_end; - add_ftrace_ops(list, ops); - if (first) - add_ftrace_ops(&ftrace_ops_list, main_ops); -} - -static int remove_ftrace_list_ops(struct ftrace_ops **list, - struct ftrace_ops *main_ops, - struct ftrace_ops *ops) -{ - int ret = remove_ftrace_ops(list, ops); - if (!ret && *list == &ftrace_list_end) - ret = remove_ftrace_ops(&ftrace_ops_list, main_ops); - return ret; -} - static void ftrace_update_trampoline(struct ftrace_ops *ops); static int __register_ftrace_function(struct ftrace_ops *ops) @@ -430,14 +410,12 @@ static int __register_ftrace_function(struct ftrace_ops *ops) if (!core_kernel_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; - if (ops->flags & FTRACE_OPS_FL_CONTROL) { - if (control_ops_alloc(ops)) + if (ops->flags & FTRACE_OPS_FL_PER_CPU) { + if (per_cpu_ops_alloc(ops)) return -ENOMEM; - add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops); - /* The control_ops needs the trampoline update */ - ops = &control_ops; - } else - add_ftrace_ops(&ftrace_ops_list, ops); + } + + add_ftrace_ops(&ftrace_ops_list, ops); /* Always save the function, and reset at unregistering */ ops->saved_func = ops->func; @@ -460,11 +438,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED))) return -EBUSY; - if (ops->flags & FTRACE_OPS_FL_CONTROL) { - ret = remove_ftrace_list_ops(&ftrace_control_list, - &control_ops, ops); - } else - ret = remove_ftrace_ops(&ftrace_ops_list, ops); + ret = remove_ftrace_ops(&ftrace_ops_list, ops); if (ret < 0) return ret; @@ -2630,7 +2604,7 @@ void __weak arch_ftrace_trampoline_free(struct ftrace_ops *ops) { } -static void control_ops_free(struct ftrace_ops *ops) +static void per_cpu_ops_free(struct ftrace_ops *ops) { free_percpu(ops->disabled); } @@ -2731,13 +2705,13 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) if (!command || !ftrace_enabled) { /* - * If these are control ops, they still need their + * If these are per_cpu ops, they still need their * per_cpu field freed. Since, function tracing is * not currently active, we can just free them * without synchronizing all CPUs. */ - if (ops->flags & FTRACE_OPS_FL_CONTROL) - control_ops_free(ops); + if (ops->flags & FTRACE_OPS_FL_PER_CPU) + per_cpu_ops_free(ops); return 0; } @@ -2778,7 +2752,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) /* * Dynamic ops may be freed, we must make sure that all * callers are done before leaving this function. - * The same goes for freeing the per_cpu data of the control + * The same goes for freeing the per_cpu data of the per_cpu * ops. * * Again, normal synchronize_sched() is not good enough. @@ -2789,13 +2763,13 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) * infrastructure to do the synchronization, thus we must do it * ourselves. */ - if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) { + if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU)) { schedule_on_each_cpu(ftrace_sync); arch_ftrace_trampoline_free(ops); - if (ops->flags & FTRACE_OPS_FL_CONTROL) - control_ops_free(ops); + if (ops->flags & FTRACE_OPS_FL_PER_CPU) + per_cpu_ops_free(ops); } return 0; @@ -5185,44 +5159,6 @@ void ftrace_reset_array_ops(struct trace_array *tr) tr->ops->func = ftrace_stub; } -static void -ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct pt_regs *regs) -{ - if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT))) - return; - - /* - * Some of the ops may be dynamically allocated, - * they must be freed after a synchronize_sched(). - */ - preempt_disable_notrace(); - trace_recursion_set(TRACE_CONTROL_BIT); - - /* - * Control funcs (perf) uses RCU. Only trace if - * RCU is currently active. - */ - if (!rcu_is_watching()) - goto out; - - do_for_each_ftrace_op(op, ftrace_control_list) { - if (!(op->flags & FTRACE_OPS_FL_STUB) && - !ftrace_function_local_disabled(op) && - ftrace_ops_test(op, ip, regs)) - op->func(ip, parent_ip, op, regs); - } while_for_each_ftrace_op(op); - out: - trace_recursion_clear(TRACE_CONTROL_BIT); - preempt_enable_notrace(); -} - -static struct ftrace_ops control_ops = { - .func = ftrace_ops_control_func, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, - INIT_OPS_HASH(control_ops) -}; - static inline void __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ignored, struct pt_regs *regs) @@ -5239,8 +5175,22 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, * they must be freed after a synchronize_sched(). */ preempt_disable_notrace(); + do_for_each_ftrace_op(op, ftrace_ops_list) { - if (ftrace_ops_test(op, ip, regs)) { + /* + * Check the following for each ops before calling their func: + * if RCU flag is set, then rcu_is_watching() must be true + * if PER_CPU is set, then ftrace_function_local_disable() + * must be false + * Otherwise test if the ip matches the ops filter + * + * If any of the above fails then the op->func() is not executed. + */ + if ((!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) && + (!(op->flags & FTRACE_OPS_FL_PER_CPU) || + !ftrace_function_local_disabled(op)) && + ftrace_ops_test(op, ip, regs)) { + if (FTRACE_WARN_ON(!op->func)) { pr_warn("op=%p %pS\n", op, op); goto out; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 919d9d07686f..d3980b87bf04 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -467,8 +467,6 @@ enum { TRACE_INTERNAL_IRQ_BIT, TRACE_INTERNAL_SIRQ_BIT, - TRACE_CONTROL_BIT, - TRACE_BRANCH_BIT, /* * Abuse of the trace_recursion. diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index abfc903e741e..2649c85cd162 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -334,7 +334,7 @@ static int perf_ftrace_function_register(struct perf_event *event) { struct ftrace_ops *ops = &event->ftrace_ops; - ops->flags |= FTRACE_OPS_FL_CONTROL; + ops->flags |= FTRACE_OPS_FL_PER_CPU | FTRACE_OPS_FL_RCU; ops->func = perf_ftrace_function_call; return register_ftrace_function(ops); } -- cgit v1.2.3 From b7ffffbb46f205e7727a18bcc7a46c3c2b534f7c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 7 Jan 2016 15:40:01 -0500 Subject: ftrace: Add infrastructure for delayed enabling of module functions Qiu Peiyang pointed out that there's a race when enabling function tracing and loading a module. In order to make the modifications of converting nops in the prologue of functions into callbacks, the text needs to be converted from read-only to read-write. When enabling function tracing, the text permission is updated, the functions are modified, and then they are put back. When loading a module, the updates to convert function calls to mcount is done before the module text is set to read-only. But after it is done, the module text is visible by the function tracer. Thus we have the following race: CPU 0 CPU 1 ----- ----- start function tracing set text to read-write load_module add functions to ftrace set module text read-only update all functions to callbacks modify module functions too < Can't it's read-only > When this happens, ftrace detects the issue and disables itself till the next reboot. To fix this, a new DISABLED flag is added for ftrace records, which all module functions get when they are added. Then later, after the module code is all set, the records will have the DISABLED flag cleared, and they will be enabled if any callback wants all functions to be traced. Note, this doesn't add the delay to later. It simply changes the ftrace_module_init() to do both the setting of DISABLED records, and then immediately calls the enable code. This helps with testing this new code as it has the same behavior as previously. Another change will come after this to have the ftrace_module_enable() called after the text is set to read-only. Cc: Qiu Peiyang Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 6 +- kernel/trace/ftrace.c | 161 ++++++++++++++++++++++++++++++++----------------- 2 files changed, 110 insertions(+), 57 deletions(-) (limited to 'include/linux/ftrace.h') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4736a826baf5..660e7c698f3b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -357,6 +357,7 @@ bool is_ftrace_trampoline(unsigned long addr); * REGS - the record wants the function to save regs * REGS_EN - the function is set up to save regs. * IPMODIFY - the record allows for the IP address to be changed. + * DISABLED - the record is not ready to be touched yet * * When a new ftrace_ops is registered and wants a function to save * pt_regs, the rec->flag REGS is set. When the function has been @@ -371,10 +372,11 @@ enum { FTRACE_FL_TRAMP = (1UL << 28), FTRACE_FL_TRAMP_EN = (1UL << 27), FTRACE_FL_IPMODIFY = (1UL << 26), + FTRACE_FL_DISABLED = (1UL << 25), }; -#define FTRACE_REF_MAX_SHIFT 26 -#define FTRACE_FL_BITS 6 +#define FTRACE_REF_MAX_SHIFT 25 +#define FTRACE_FL_BITS 7 #define FTRACE_FL_MASKED_BITS ((1UL << FTRACE_FL_BITS) - 1) #define FTRACE_FL_MASK (FTRACE_FL_MASKED_BITS << FTRACE_REF_MAX_SHIFT) #define FTRACE_REF_MAX ((1UL << FTRACE_REF_MAX_SHIFT) - 1) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0f7ee341f89f..23683b06b18c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1658,6 +1658,9 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, int in_hash = 0; int match = 0; + if (rec->flags & FTRACE_FL_DISABLED) + continue; + if (all) { /* * Only the filter_hash affects all records. @@ -2023,6 +2026,9 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) ftrace_bug_type = FTRACE_BUG_UNKNOWN; + if (rec->flags & FTRACE_FL_DISABLED) + return FTRACE_UPDATE_IGNORE; + /* * If we are updating calls: * @@ -2833,9 +2839,9 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) return 0; - /* If ops traces all mods, we already accounted for it */ + /* If ops traces all then it includes this function */ if (ops_traces_mod(ops)) - return 0; + return 1; /* The function must be in the filter */ if (!ftrace_hash_empty(ops->func_hash->filter_hash) && @@ -2849,64 +2855,41 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) return 1; } -static int referenced_filters(struct dyn_ftrace *rec) -{ - struct ftrace_ops *ops; - int cnt = 0; - - for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) { - if (ops_references_rec(ops, rec)) - cnt++; - } - - return cnt; -} - static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) { struct ftrace_page *pg; struct dyn_ftrace *p; cycle_t start, stop; unsigned long update_cnt = 0; - unsigned long ref = 0; - bool test = false; + unsigned long rec_flags = 0; int i; + start = ftrace_now(raw_smp_processor_id()); + /* - * When adding a module, we need to check if tracers are - * currently enabled and if they are set to trace all functions. - * If they are, we need to enable the module functions as well - * as update the reference counts for those function records. + * When a module is loaded, this function is called to convert + * the calls to mcount in its text to nops, and also to create + * an entry in the ftrace data. Now, if ftrace is activated + * after this call, but before the module sets its text to + * read-only, the modification of enabling ftrace can fail if + * the read-only is done while ftrace is converting the calls. + * To prevent this, the module's records are set as disabled + * and will be enabled after the call to set the module's text + * to read-only. */ - if (mod) { - struct ftrace_ops *ops; - - for (ops = ftrace_ops_list; - ops != &ftrace_list_end; ops = ops->next) { - if (ops->flags & FTRACE_OPS_FL_ENABLED) { - if (ops_traces_mod(ops)) - ref++; - else - test = true; - } - } - } - - start = ftrace_now(raw_smp_processor_id()); + if (mod) + rec_flags |= FTRACE_FL_DISABLED; for (pg = new_pgs; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { - int cnt = ref; /* If something went wrong, bail without enabling anything */ if (unlikely(ftrace_disabled)) return -1; p = &pg->records[i]; - if (test) - cnt += referenced_filters(p); - p->flags = cnt; + p->flags = rec_flags; /* * Do the initial record conversion from mcount jump @@ -2916,21 +2899,6 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) break; update_cnt++; - - /* - * If the tracing is enabled, go ahead and enable the record. - * - * The reason not to enable the record immediatelly is the - * inherent check of ftrace_make_nop/ftrace_make_call for - * correct previous instructions. Making first the NOP - * conversion puts the module to the correct state, thus - * passing the ftrace_make_call check. - */ - if (ftrace_start_up && cnt) { - int failed = __ftrace_replace_code(p, 1); - if (failed) - ftrace_bug(failed, p); - } } } @@ -4938,6 +4906,19 @@ static int ftrace_process_locs(struct module *mod, #define next_to_ftrace_page(p) container_of(p, struct ftrace_page, next) +static int referenced_filters(struct dyn_ftrace *rec) +{ + struct ftrace_ops *ops; + int cnt = 0; + + for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) { + if (ops_references_rec(ops, rec)) + cnt++; + } + + return cnt; +} + void ftrace_release_mod(struct module *mod) { struct dyn_ftrace *rec; @@ -4980,6 +4961,75 @@ void ftrace_release_mod(struct module *mod) mutex_unlock(&ftrace_lock); } +static void ftrace_module_enable(struct module *mod) +{ + struct dyn_ftrace *rec; + struct ftrace_page *pg; + + mutex_lock(&ftrace_lock); + + if (ftrace_disabled) + goto out_unlock; + + /* + * If the tracing is enabled, go ahead and enable the record. + * + * The reason not to enable the record immediatelly is the + * inherent check of ftrace_make_nop/ftrace_make_call for + * correct previous instructions. Making first the NOP + * conversion puts the module to the correct state, thus + * passing the ftrace_make_call check. + * + * We also delay this to after the module code already set the + * text to read-only, as we now need to set it back to read-write + * so that we can modify the text. + */ + if (ftrace_start_up) + ftrace_arch_code_modify_prepare(); + + do_for_each_ftrace_rec(pg, rec) { + int cnt; + /* + * do_for_each_ftrace_rec() is a double loop. + * module text shares the pg. If a record is + * not part of this module, then skip this pg, + * which the "break" will do. + */ + if (!within_module_core(rec->ip, mod)) + break; + + cnt = 0; + + /* + * When adding a module, we need to check if tracers are + * currently enabled and if they are, and can trace this record, + * we need to enable the module functions as well as update the + * reference counts for those function records. + */ + if (ftrace_start_up) + cnt += referenced_filters(rec); + + /* This clears FTRACE_FL_DISABLED */ + rec->flags = cnt; + + if (ftrace_start_up && cnt) { + int failed = __ftrace_replace_code(rec, 1); + if (failed) { + ftrace_bug(failed, rec); + goto out_loop; + } + } + + } while_for_each_ftrace_rec(); + + out_loop: + if (ftrace_start_up) + ftrace_arch_code_modify_post_process(); + + out_unlock: + mutex_unlock(&ftrace_lock); +} + void ftrace_module_init(struct module *mod) { if (ftrace_disabled || !mod->num_ftrace_callsites) @@ -4987,6 +5037,7 @@ void ftrace_module_init(struct module *mod) ftrace_process_locs(mod, mod->ftrace_callsites, mod->ftrace_callsites + mod->num_ftrace_callsites); + ftrace_module_enable(mod); } static int ftrace_module_notify_exit(struct notifier_block *self, -- cgit v1.2.3