summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-22 02:15:28 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-22 02:15:28 +0300
commit2e04247f7cce8b8cd8381a29078701691fec684d (patch)
treeae54ba9fef156271e6cf22c447bd027542e6828a /include
parent0074adea39b64d717407b913fd405ac586ee45ca (diff)
parent31f505dc70331243fbb54af868c14bb5f44a15bc (diff)
downloadlinux-2e04247f7cce8b8cd8381a29078701691fec684d.tar.xz
Merge tag 'ftrace-v6.14' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull ftrace updates from Steven Rostedt: - Have fprobes built on top of function graph infrastructure The fprobe logic is an optimized kprobe that uses ftrace to attach to functions when a probe is needed at the start or end of the function. The fprobe and kretprobe logic implements a similar method as the function graph tracer to trace the end of the function. That is to hijack the return address and jump to a trampoline to do the trace when the function exits. To do this, a shadow stack needs to be created to store the original return address. Fprobes and function graph do this slightly differently. Fprobes (and kretprobes) has slots per callsite that are reserved to save the return address. This is fine when just a few points are traced. But users of fprobes, such as BPF programs, are starting to add many more locations, and this method does not scale. The function graph tracer was created to trace all functions in the kernel. In order to do this, when function graph tracing is started, every task gets its own shadow stack to hold the return address that is going to be traced. The function graph tracer has been updated to allow multiple users to use its infrastructure. Now have fprobes be one of those users. This will also allow for the fprobe and kretprobe methods to trace the return address to become obsolete. With new technologies like CFI that need to know about these methods of hijacking the return address, going toward a solution that has only one method of doing this will make the kernel less complex. - Cleanup with guard() and free() helpers There were several places in the code that had a lot of "goto out" in the error paths to either unlock a lock or free some memory that was allocated. But this is error prone. Convert the code over to use the guard() and free() helpers that let the compiler unlock locks or free memory when the function exits. - Remove disabling of interrupts in the function graph tracer When function graph tracer was first introduced, it could race with interrupts and NMIs. To prevent that race, it would disable interrupts and not trace NMIs. But the code has changed to allow NMIs and also interrupts. This change was done a long time ago, but the disabling of interrupts was never removed. Remove the disabling of interrupts in the function graph tracer is it is not needed. This greatly improves its performance. - Allow the :mod: command to enable tracing module functions on the kernel command line. The function tracer already has a way to enable functions to be traced in modules by writing ":mod:<module>" into set_ftrace_filter. That will enable either all the functions for the module if it is loaded, or if it is not, it will cache that command, and when the module is loaded that matches <module>, its functions will be enabled. This also allows init functions to be traced. But currently events do not have that feature. Because enabling function tracing can be done very early at boot up (before scheduling is enabled), the commands that can be done when function tracing is started is limited. Having the ":mod:" command to trace module functions as they are loaded is very useful. Update the kernel command line function filtering to allow it. * tag 'ftrace-v6.14' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: (26 commits) ftrace: Implement :mod: cache filtering on kernel command line tracing: Adopt __free() and guard() for trace_fprobe.c bpf: Use ftrace_get_symaddr() for kprobe_multi probes ftrace: Add ftrace_get_symaddr to convert fentry_ip to symaddr Documentation: probes: Update fprobe on function-graph tracer selftests/ftrace: Add a test case for repeating register/unregister fprobe selftests: ftrace: Remove obsolate maxactive syntax check tracing/fprobe: Remove nr_maxactive from fprobe fprobe: Add fprobe_header encoding feature fprobe: Rewrite fprobe on function-graph tracer s390/tracing: Enable HAVE_FTRACE_GRAPH_FUNC ftrace: Add CONFIG_HAVE_FTRACE_GRAPH_FUNC bpf: Enable kprobe_multi feature if CONFIG_FPROBE is enabled tracing/fprobe: Enable fprobe events with CONFIG_DYNAMIC_FTRACE_WITH_ARGS tracing: Add ftrace_fill_perf_regs() for perf event tracing: Add ftrace_partial_regs() for converting ftrace_regs to pt_regs fprobe: Use ftrace_regs in fprobe exit handler fprobe: Use ftrace_regs in fprobe entry handler fgraph: Pass ftrace_regs to retfunc fgraph: Replace fgraph_ret_regs with ftrace_regs ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/fprobe.h46
-rw-r--r--include/linux/fprobe.h62
-rw-r--r--include/linux/ftrace.h116
-rw-r--r--include/linux/ftrace_regs.h2
4 files changed, 200 insertions, 26 deletions
diff --git a/include/asm-generic/fprobe.h b/include/asm-generic/fprobe.h
new file mode 100644
index 000000000000..8659a4dc6eb6
--- /dev/null
+++ b/include/asm-generic/fprobe.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Generic arch dependent fprobe macros.
+ */
+#ifndef __ASM_GENERIC_FPROBE_H__
+#define __ASM_GENERIC_FPROBE_H__
+
+#include <linux/bits.h>
+
+#ifdef CONFIG_64BIT
+/*
+ * Encoding the size and the address of fprobe into one 64bit entry.
+ * The 32bit architectures should use 2 entries to store those info.
+ */
+
+#define ARCH_DEFINE_ENCODE_FPROBE_HEADER
+
+#define FPROBE_HEADER_MSB_SIZE_SHIFT (BITS_PER_LONG - FPROBE_DATA_SIZE_BITS)
+#define FPROBE_HEADER_MSB_MASK \
+ GENMASK(FPROBE_HEADER_MSB_SIZE_SHIFT - 1, 0)
+
+/*
+ * By default, this expects the MSBs in the address of kprobe is 0xf.
+ * If any arch needs another fixed pattern (e.g. s390 is zero filled),
+ * override this.
+ */
+#define FPROBE_HEADER_MSB_PATTERN \
+ GENMASK(BITS_PER_LONG - 1, FPROBE_HEADER_MSB_SIZE_SHIFT)
+
+#define arch_fprobe_header_encodable(fp) \
+ (((unsigned long)(fp) & ~FPROBE_HEADER_MSB_MASK) == \
+ FPROBE_HEADER_MSB_PATTERN)
+
+#define arch_encode_fprobe_header(fp, size) \
+ (((unsigned long)(fp) & FPROBE_HEADER_MSB_MASK) | \
+ ((unsigned long)(size) << FPROBE_HEADER_MSB_SIZE_SHIFT))
+
+#define arch_decode_fprobe_header_size(val) \
+ ((unsigned long)(val) >> FPROBE_HEADER_MSB_SIZE_SHIFT)
+
+#define arch_decode_fprobe_header_fp(val) \
+ ((struct fprobe *)(((unsigned long)(val) & FPROBE_HEADER_MSB_MASK) | \
+ FPROBE_HEADER_MSB_PATTERN))
+#endif /* CONFIG_64BIT */
+
+#endif /* __ASM_GENERIC_FPROBE_H__ */
diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index f39869588117..702099f08929 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -5,47 +5,68 @@
#include <linux/compiler.h>
#include <linux/ftrace.h>
-#include <linux/rethook.h>
+#include <linux/rcupdate.h>
+#include <linux/refcount.h>
+#include <linux/slab.h>
struct fprobe;
-
typedef int (*fprobe_entry_cb)(struct fprobe *fp, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *regs,
void *entry_data);
typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *regs,
void *entry_data);
/**
+ * struct fprobe_hlist_node - address based hash list node for fprobe.
+ *
+ * @hlist: The hlist node for address search hash table.
+ * @addr: One of the probing address of @fp.
+ * @fp: The fprobe which owns this.
+ */
+struct fprobe_hlist_node {
+ struct hlist_node hlist;
+ unsigned long addr;
+ struct fprobe *fp;
+};
+
+/**
+ * struct fprobe_hlist - hash list nodes for fprobe.
+ *
+ * @hlist: The hlist node for existence checking hash table.
+ * @rcu: rcu_head for RCU deferred release.
+ * @fp: The fprobe which owns this fprobe_hlist.
+ * @size: The size of @array.
+ * @array: The fprobe_hlist_node for each address to probe.
+ */
+struct fprobe_hlist {
+ struct hlist_node hlist;
+ struct rcu_head rcu;
+ struct fprobe *fp;
+ int size;
+ struct fprobe_hlist_node array[] __counted_by(size);
+};
+
+/**
* struct fprobe - ftrace based probe.
- * @ops: The ftrace_ops.
+ *
* @nmissed: The counter for missing events.
* @flags: The status flag.
- * @rethook: The rethook data structure. (internal data)
* @entry_data_size: The private data storage size.
- * @nr_maxactive: The max number of active functions.
* @entry_handler: The callback function for function entry.
* @exit_handler: The callback function for function exit.
+ * @hlist_array: The fprobe_hlist for fprobe search from IP hash table.
*/
struct fprobe {
-#ifdef CONFIG_FUNCTION_TRACER
- /*
- * If CONFIG_FUNCTION_TRACER is not set, CONFIG_FPROBE is disabled too.
- * But user of fprobe may keep embedding the struct fprobe on their own
- * code. To avoid build error, this will keep the fprobe data structure
- * defined here, but remove ftrace_ops data structure.
- */
- struct ftrace_ops ops;
-#endif
unsigned long nmissed;
unsigned int flags;
- struct rethook *rethook;
size_t entry_data_size;
- int nr_maxactive;
fprobe_entry_cb entry_handler;
fprobe_exit_cb exit_handler;
+
+ struct fprobe_hlist *hlist_array;
};
/* This fprobe is soft-disabled. */
@@ -121,4 +142,9 @@ static inline void enable_fprobe(struct fprobe *fp)
fp->flags &= ~FPROBE_FL_DISABLED;
}
+/* The entry data size is 4 bits (=16) * sizeof(long) in maximum */
+#define FPROBE_DATA_SIZE_BITS 4
+#define MAX_FPROBE_DATA_SIZE_WORD ((1L << FPROBE_DATA_SIZE_BITS) - 1)
+#define MAX_FPROBE_DATA_SIZE (MAX_FPROBE_DATA_SIZE_WORD * sizeof(long))
+
#endif
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index aa9ddd1e4bb6..07092dfb21a4 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -43,9 +43,8 @@ struct dyn_ftrace;
char *arch_ftrace_match_adjust(char *str, const char *search);
-#ifdef CONFIG_HAVE_FUNCTION_GRAPH_RETVAL
-struct fgraph_ret_regs;
-unsigned long ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs);
+#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FREGS
+unsigned long ftrace_return_to_handler(struct ftrace_regs *fregs);
#else
unsigned long ftrace_return_to_handler(unsigned long frame_pointer);
#endif
@@ -134,6 +133,13 @@ extern int ftrace_enabled;
* Also, architecture dependent fields can be used for internal process.
* (e.g. orig_ax on x86_64)
*
+ * Basically, ftrace_regs stores the registers related to the context.
+ * On function entry, registers for function parameters and hooking the
+ * function call are stored, and on function exit, registers for function
+ * return value and frame pointers are stored.
+ *
+ * And also, it dpends on the context that which registers are restored
+ * from the ftrace_regs.
* On the function entry, those registers will be restored except for
* the stack pointer, so that user can change the function parameters
* and instruction pointer (e.g. live patching.)
@@ -170,6 +176,12 @@ static inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs)
#define ftrace_regs_set_instruction_pointer(fregs, ip) do { } while (0)
#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
+#ifdef CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS
+
+static_assert(sizeof(struct pt_regs) == ftrace_regs_size());
+
+#endif /* CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS */
+
static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs)
{
if (!fregs)
@@ -178,6 +190,54 @@ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs
return arch_ftrace_get_regs(fregs);
}
+#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \
+ defined(CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS)
+
+static __always_inline struct pt_regs *
+ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+ /*
+ * If CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS=y, ftrace_regs memory
+ * layout is including pt_regs. So always returns that address.
+ * Since arch_ftrace_get_regs() will check some members and may return
+ * NULL, we can not use it.
+ */
+ return &arch_ftrace_regs(fregs)->regs;
+}
+
+#endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS */
+
+#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+
+/*
+ * Please define arch dependent pt_regs which compatible to the
+ * perf_arch_fetch_caller_regs() but based on ftrace_regs.
+ * This requires
+ * - user_mode(_regs) returns false (always kernel mode).
+ * - able to use the _regs for stack trace.
+ */
+#ifndef arch_ftrace_fill_perf_regs
+/* As same as perf_arch_fetch_caller_regs(), do nothing by default */
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {} while (0)
+#endif
+
+static __always_inline struct pt_regs *
+ftrace_fill_perf_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+ arch_ftrace_fill_perf_regs(fregs, regs);
+ return regs;
+}
+
+#else /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
+
+static __always_inline struct pt_regs *
+ftrace_fill_perf_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+ return &arch_ftrace_regs(fregs)->regs;
+}
+
+#endif
+
/*
* When true, the ftrace_regs_{get,set}_*() functions may be used on fregs.
* Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs.
@@ -190,6 +250,23 @@ static __always_inline bool ftrace_regs_has_args(struct ftrace_regs *fregs)
return ftrace_get_regs(fregs) != NULL;
}
+#ifdef CONFIG_HAVE_REGS_AND_STACK_ACCESS_API
+static __always_inline unsigned long
+ftrace_regs_get_kernel_stack_nth(struct ftrace_regs *fregs, unsigned int nth)
+{
+ unsigned long *stackp;
+
+ stackp = (unsigned long *)ftrace_regs_get_stack_pointer(fregs);
+ if (((unsigned long)(stackp + nth) & ~(THREAD_SIZE - 1)) ==
+ ((unsigned long)stackp & ~(THREAD_SIZE - 1)))
+ return *(stackp + nth);
+
+ return 0;
+}
+#else /* !CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */
+#define ftrace_regs_get_kernel_stack_nth(fregs, nth) (0L)
+#endif /* CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */
+
typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct ftrace_regs *fregs);
@@ -545,6 +622,19 @@ enum {
FTRACE_MAY_SLEEP = (1 << 5),
};
+/* Arches can override ftrace_get_symaddr() to convert fentry_ip to symaddr. */
+#ifndef ftrace_get_symaddr
+/**
+ * ftrace_get_symaddr - return the symbol address from fentry_ip
+ * @fentry_ip: the address of ftrace location
+ *
+ * Get the symbol address from @fentry_ip (fast path). If there is no fast
+ * search path, this returns 0.
+ * User may need to use kallsyms API to find the symbol address.
+ */
+#define ftrace_get_symaddr(fentry_ip) (0)
+#endif
+
#ifdef CONFIG_DYNAMIC_FTRACE
void ftrace_arch_code_modify_prepare(void);
@@ -1069,12 +1159,15 @@ struct fgraph_ops;
/* Type of the callback handlers for tracing function graph*/
typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *,
- struct fgraph_ops *); /* return */
+ struct fgraph_ops *,
+ struct ftrace_regs *); /* return */
typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *,
- struct fgraph_ops *); /* entry */
+ struct fgraph_ops *,
+ struct ftrace_regs *); /* entry */
extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace,
- struct fgraph_ops *gops);
+ struct fgraph_ops *gops,
+ struct ftrace_regs *fregs);
bool ftrace_pids_enabled(struct ftrace_ops *ops);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -1114,8 +1207,15 @@ struct ftrace_ret_stack {
extern void return_to_handler(void);
extern int
-function_graph_enter(unsigned long ret, unsigned long func,
- unsigned long frame_pointer, unsigned long *retp);
+function_graph_enter_regs(unsigned long ret, unsigned long func,
+ unsigned long frame_pointer, unsigned long *retp,
+ struct ftrace_regs *fregs);
+
+static inline int function_graph_enter(unsigned long ret, unsigned long func,
+ unsigned long fp, unsigned long *retp)
+{
+ return function_graph_enter_regs(ret, func, fp, retp, NULL);
+}
struct ftrace_ret_stack *
ftrace_graph_get_ret_stack(struct task_struct *task, int skip);
diff --git a/include/linux/ftrace_regs.h b/include/linux/ftrace_regs.h
index be1ed0c891d0..bbc1873ca6b8 100644
--- a/include/linux/ftrace_regs.h
+++ b/include/linux/ftrace_regs.h
@@ -30,6 +30,8 @@ struct ftrace_regs;
override_function_with_return(&arch_ftrace_regs(fregs)->regs)
#define ftrace_regs_query_register_offset(name) \
regs_query_register_offset(name)
+#define ftrace_regs_get_frame_pointer(fregs) \
+ frame_pointer(&arch_ftrace_regs(fregs)->regs)
#endif /* HAVE_ARCH_FTRACE_REGS */