From 7ead8b8313d92b3a69a1a61b0dcbc4cd66c960dc Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 17 Aug 2009 16:56:28 +0800 Subject: tracing/events: Add module tracepoints Add trace points to trace module_load, module_free, module_get, module_put and module_request, and use trace_event facility to get the trace output. Here's the sample output: TASK-PID CPU# TIMESTAMP FUNCTION | | | | | <...>-42 [000] 1.758380: module_request: fb0 wait=1 call_site=fb_open ... <...>-60 [000] 3.269403: module_load: scsi_wait_scan <...>-60 [000] 3.269432: module_put: scsi_wait_scan call_site=sys_init_module refcnt=0 <...>-61 [001] 3.273168: module_free: scsi_wait_scan ... <...>-1021 [000] 13.836081: module_load: sunrpc <...>-1021 [000] 13.840589: module_put: sunrpc call_site=sys_init_module refcnt=-1 <...>-1027 [000] 13.848098: module_get: sunrpc call_site=try_module_get refcnt=0 <...>-1027 [000] 13.848308: module_get: sunrpc call_site=get_filesystem refcnt=1 <...>-1027 [000] 13.848692: module_put: sunrpc call_site=put_filesystem refcnt=0 ... modprobe-2587 [001] 1088.437213: module_load: trace_events_sample F modprobe-2587 [001] 1088.437786: module_put: trace_events_sample call_site=sys_init_module refcnt=0 Note: - the taints flag can be 'F', 'C' and/or 'P' if mod->taints != 0 - the module refcnt is percpu, so it can be negative in a specific cpu Signed-off-by: Li Zefan Acked-by: Rusty Russell Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Rusty Russell LKML-Reference: <4A891B3C.5030608@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- include/linux/module.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 098bdb7bfacf..f8f92d015efe 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,10 +17,12 @@ #include #include #include -#include +#include #include +#include + /* Not Yet Implemented */ #define MODULE_SUPPORTED_DEVICE(name) @@ -462,7 +464,10 @@ static inline local_t *__module_ref_addr(struct module *mod, int cpu) static inline void __module_get(struct module *module) { if (module) { - local_inc(__module_ref_addr(module, get_cpu())); + unsigned int cpu = get_cpu(); + local_inc(__module_ref_addr(module, cpu)); + trace_module_get(module, _THIS_IP_, + local_read(__module_ref_addr(module, cpu))); put_cpu(); } } @@ -473,8 +478,11 @@ static inline int try_module_get(struct module *module) if (module) { unsigned int cpu = get_cpu(); - if (likely(module_is_live(module))) + if (likely(module_is_live(module))) { local_inc(__module_ref_addr(module, cpu)); + trace_module_get(module, _THIS_IP_, + local_read(__module_ref_addr(module, cpu))); + } else ret = 0; put_cpu(); -- cgit v1.2.3 From fc5377668c3d808e1d53c4aee152c836f55c3490 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Sep 2009 19:35:28 +0200 Subject: tracing: Remove markers Now that the last users of markers have migrated to the event tracer we can kill off the (now orphan) support code. Signed-off-by: Christoph Hellwig Acked-by: Mathieu Desnoyers Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <20090917173527.GA1699@lst.de> Signed-off-by: Ingo Molnar --- Documentation/markers.txt | 104 ---- arch/powerpc/platforms/cell/spufs/file.c | 1 - arch/powerpc/platforms/cell/spufs/sched.c | 1 - include/linux/kvm_host.h | 1 - include/linux/marker.h | 221 ------- include/linux/module.h | 11 - init/Kconfig | 7 - kernel/Makefile | 1 - kernel/marker.c | 930 ------------------------------ kernel/module.c | 18 - kernel/trace/trace_printk.c | 1 - samples/Kconfig | 6 - samples/Makefile | 2 +- samples/markers/Makefile | 4 - samples/markers/marker-example.c | 53 -- samples/markers/probe-example.c | 92 --- scripts/Makefile.modpost | 12 - 17 files changed, 1 insertion(+), 1464 deletions(-) delete mode 100644 Documentation/markers.txt delete mode 100644 include/linux/marker.h delete mode 100644 kernel/marker.c delete mode 100644 samples/markers/Makefile delete mode 100644 samples/markers/marker-example.c delete mode 100644 samples/markers/probe-example.c (limited to 'include/linux/module.h') diff --git a/Documentation/markers.txt b/Documentation/markers.txt deleted file mode 100644 index d2b3d0e91b26..000000000000 --- a/Documentation/markers.txt +++ /dev/null @@ -1,104 +0,0 @@ - Using the Linux Kernel Markers - - Mathieu Desnoyers - - -This document introduces Linux Kernel Markers and their use. It provides -examples of how to insert markers in the kernel and connect probe functions to -them and provides some examples of probe functions. - - -* Purpose of markers - -A marker placed in code provides a hook to call a function (probe) that you can -provide at runtime. A marker can be "on" (a probe is connected to it) or "off" -(no probe is attached). When a marker is "off" it has no effect, except for -adding a tiny time penalty (checking a condition for a branch) and space -penalty (adding a few bytes for the function call at the end of the -instrumented function and adds a data structure in a separate section). When a -marker is "on", the function you provide is called each time the marker is -executed, in the execution context of the caller. When the function provided -ends its execution, it returns to the caller (continuing from the marker site). - -You can put markers at important locations in the code. Markers are -lightweight hooks that can pass an arbitrary number of parameters, -described in a printk-like format string, to the attached probe function. - -They can be used for tracing and performance accounting. - - -* Usage - -In order to use the macro trace_mark, you should include linux/marker.h. - -#include - -And, - -trace_mark(subsystem_event, "myint %d mystring %s", someint, somestring); -Where : -- subsystem_event is an identifier unique to your event - - subsystem is the name of your subsystem. - - event is the name of the event to mark. -- "myint %d mystring %s" is the formatted string for the serializer. "myint" and - "mystring" are repectively the field names associated with the first and - second parameter. -- someint is an integer. -- somestring is a char pointer. - -Connecting a function (probe) to a marker is done by providing a probe (function -to call) for the specific marker through marker_probe_register() and can be -activated by calling marker_arm(). Marker deactivation can be done by calling -marker_disarm() as many times as marker_arm() has been called. Removing a probe -is done through marker_probe_unregister(); it will disarm the probe. - -marker_synchronize_unregister() must be called between probe unregistration and -the first occurrence of -- the end of module exit function, - to make sure there is no caller left using the probe; -- the free of any resource used by the probes, - to make sure the probes wont be accessing invalid data. -This, and the fact that preemption is disabled around the probe call, make sure -that probe removal and module unload are safe. See the "Probe example" section -below for a sample probe module. - -The marker mechanism supports inserting multiple instances of the same marker. -Markers can be put in inline functions, inlined static functions, and -unrolled loops as well as regular functions. - -The naming scheme "subsystem_event" is suggested here as a convention intended -to limit collisions. Marker names are global to the kernel: they are considered -as being the same whether they are in the core kernel image or in modules. -Conflicting format strings for markers with the same name will cause the markers -to be detected to have a different format string not to be armed and will output -a printk warning which identifies the inconsistency: - -"Format mismatch for probe probe_name (format), marker (format)" - -Another way to use markers is to simply define the marker without generating any -function call to actually call into the marker. This is useful in combination -with tracepoint probes in a scheme like this : - -void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk); - -DEFINE_MARKER_TP(marker_eventname, tracepoint_name, probe_tracepoint_name, - "arg1 %u pid %d"); - -notrace void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk) -{ - struct marker *marker = &GET_MARKER(kernel_irq_entry); - /* write data to trace buffers ... */ -} - -* Probe / marker example - -See the example provided in samples/markers/src - -Compile them with your kernel. - -Run, as root : -modprobe marker-example (insmod order is not important) -modprobe probe-example -cat /proc/marker-example (returns an expected error) -rmmod marker-example probe-example -dmesg diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index ab8aef9bb8ea..8f079b865ad0 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index bb5b77c66d05..4678078fede8 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4af56036a6bf..b7bbb5ddd7ae 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include diff --git a/include/linux/marker.h b/include/linux/marker.h deleted file mode 100644 index b85e74ca782f..000000000000 --- a/include/linux/marker.h +++ /dev/null @@ -1,221 +0,0 @@ -#ifndef _LINUX_MARKER_H -#define _LINUX_MARKER_H - -/* - * Code markup for dynamic and static tracing. - * - * See Documentation/marker.txt. - * - * (C) Copyright 2006 Mathieu Desnoyers - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#include -#include - -struct module; -struct marker; - -/** - * marker_probe_func - Type of a marker probe function - * @probe_private: probe private data - * @call_private: call site private data - * @fmt: format string - * @args: variable argument list pointer. Use a pointer to overcome C's - * inability to pass this around as a pointer in a portable manner in - * the callee otherwise. - * - * Type of marker probe functions. They receive the mdata and need to parse the - * format string to recover the variable argument list. - */ -typedef void marker_probe_func(void *probe_private, void *call_private, - const char *fmt, va_list *args); - -struct marker_probe_closure { - marker_probe_func *func; /* Callback */ - void *probe_private; /* Private probe data */ -}; - -struct marker { - const char *name; /* Marker name */ - const char *format; /* Marker format string, describing the - * variable argument list. - */ - char state; /* Marker state. */ - char ptype; /* probe type : 0 : single, 1 : multi */ - /* Probe wrapper */ - void (*call)(const struct marker *mdata, void *call_private, ...); - struct marker_probe_closure single; - struct marker_probe_closure *multi; - const char *tp_name; /* Optional tracepoint name */ - void *tp_cb; /* Optional tracepoint callback */ -} __attribute__((aligned(8))); - -#ifdef CONFIG_MARKERS - -#define _DEFINE_MARKER(name, tp_name_str, tp_cb, format) \ - static const char __mstrtab_##name[] \ - __attribute__((section("__markers_strings"))) \ - = #name "\0" format; \ - static struct marker __mark_##name \ - __attribute__((section("__markers"), aligned(8))) = \ - { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ - 0, 0, marker_probe_cb, { __mark_empty_function, NULL},\ - NULL, tp_name_str, tp_cb } - -#define DEFINE_MARKER(name, format) \ - _DEFINE_MARKER(name, NULL, NULL, format) - -#define DEFINE_MARKER_TP(name, tp_name, tp_cb, format) \ - _DEFINE_MARKER(name, #tp_name, tp_cb, format) - -/* - * Note : the empty asm volatile with read constraint is used here instead of a - * "used" attribute to fix a gcc 4.1.x bug. - * Make sure the alignment of the structure in the __markers section will - * not add unwanted padding between the beginning of the section and the - * structure. Force alignment to the same alignment as the section start. - * - * The "generic" argument controls which marker enabling mechanism must be used. - * If generic is true, a variable read is used. - * If generic is false, immediate values are used. - */ -#define __trace_mark(generic, name, call_private, format, args...) \ - do { \ - DEFINE_MARKER(name, format); \ - __mark_check_format(format, ## args); \ - if (unlikely(__mark_##name.state)) { \ - (*__mark_##name.call) \ - (&__mark_##name, call_private, ## args);\ - } \ - } while (0) - -#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ - do { \ - void __check_tp_type(void) \ - { \ - register_trace_##tp_name(tp_cb); \ - } \ - DEFINE_MARKER_TP(name, tp_name, tp_cb, format); \ - __mark_check_format(format, ## args); \ - (*__mark_##name.call)(&__mark_##name, call_private, \ - ## args); \ - } while (0) - -extern void marker_update_probe_range(struct marker *begin, - struct marker *end); - -#define GET_MARKER(name) (__mark_##name) - -#else /* !CONFIG_MARKERS */ -#define DEFINE_MARKER(name, tp_name, tp_cb, format) -#define __trace_mark(generic, name, call_private, format, args...) \ - __mark_check_format(format, ## args) -#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ - do { \ - void __check_tp_type(void) \ - { \ - register_trace_##tp_name(tp_cb); \ - } \ - __mark_check_format(format, ## args); \ - } while (0) -static inline void marker_update_probe_range(struct marker *begin, - struct marker *end) -{ } -#define GET_MARKER(name) -#endif /* CONFIG_MARKERS */ - -/** - * trace_mark - Marker using code patching - * @name: marker name, not quoted. - * @format: format string - * @args...: variable argument list - * - * Places a marker using optimized code patching technique (imv_read()) - * to be enabled when immediate values are present. - */ -#define trace_mark(name, format, args...) \ - __trace_mark(0, name, NULL, format, ## args) - -/** - * _trace_mark - Marker using variable read - * @name: marker name, not quoted. - * @format: format string - * @args...: variable argument list - * - * Places a marker using a standard memory read (_imv_read()) to be - * enabled. Should be used for markers in code paths where instruction - * modification based enabling is not welcome. (__init and __exit functions, - * lockdep, some traps, printk). - */ -#define _trace_mark(name, format, args...) \ - __trace_mark(1, name, NULL, format, ## args) - -/** - * trace_mark_tp - Marker in a tracepoint callback - * @name: marker name, not quoted. - * @tp_name: tracepoint name, not quoted. - * @tp_cb: tracepoint callback. Should have an associated global symbol so it - * is not optimized away by the compiler (should not be static). - * @format: format string - * @args...: variable argument list - * - * Places a marker in a tracepoint callback. - */ -#define trace_mark_tp(name, tp_name, tp_cb, format, args...) \ - __trace_mark_tp(name, NULL, tp_name, tp_cb, format, ## args) - -/** - * MARK_NOARGS - Format string for a marker with no argument. - */ -#define MARK_NOARGS " " - -/* To be used for string format validity checking with gcc */ -static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...) -{ -} - -#define __mark_check_format(format, args...) \ - do { \ - if (0) \ - ___mark_check_format(format, ## args); \ - } while (0) - -extern marker_probe_func __mark_empty_function; - -extern void marker_probe_cb(const struct marker *mdata, - void *call_private, ...); - -/* - * Connect a probe to a marker. - * private data pointer must be a valid allocated memory address, or NULL. - */ -extern int marker_probe_register(const char *name, const char *format, - marker_probe_func *probe, void *probe_private); - -/* - * Returns the private data given to marker_probe_register. - */ -extern int marker_probe_unregister(const char *name, - marker_probe_func *probe, void *probe_private); -/* - * Unregister a marker by providing the registered private data. - */ -extern int marker_probe_unregister_private_data(marker_probe_func *probe, - void *probe_private); - -extern void *marker_get_private_data(const char *name, marker_probe_func *probe, - int num); - -/* - * marker_synchronize_unregister must be called between the last marker probe - * unregistration and the first one of - * - the end of module exit function - * - the free of any resource used by the probes - * to ensure the code and data are valid for any possibly running probes. - */ -#define marker_synchronize_unregister() synchronize_sched() - -#endif diff --git a/include/linux/module.h b/include/linux/module.h index f8f92d015efe..1c755b2f937d 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -327,10 +326,6 @@ struct module /* The command line arguments (may be mangled). People like keeping pointers to this stuff */ char *args; -#ifdef CONFIG_MARKERS - struct marker *markers; - unsigned int num_markers; -#endif #ifdef CONFIG_TRACEPOINTS struct tracepoint *tracepoints; unsigned int num_tracepoints; @@ -535,8 +530,6 @@ int unregister_module_notifier(struct notifier_block * nb); extern void print_modules(void); -extern void module_update_markers(void); - extern void module_update_tracepoints(void); extern int module_get_iter_tracepoints(struct tracepoint_iter *iter); @@ -651,10 +644,6 @@ static inline void print_modules(void) { } -static inline void module_update_markers(void) -{ -} - static inline void module_update_tracepoints(void) { } diff --git a/init/Kconfig b/init/Kconfig index 8e8b76d8a272..4cc0fa13d5eb 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1054,13 +1054,6 @@ config PROFILING config TRACEPOINTS bool -config MARKERS - bool "Activate markers" - select TRACEPOINTS - help - Place an empty function call at each marker site. Can be - dynamically changed for a probe function. - source "arch/Kconfig" config SLOW_WORK diff --git a/kernel/Makefile b/kernel/Makefile index 3d9c7e27e3f9..7c9b0a585502 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -87,7 +87,6 @@ obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o -obj-$(CONFIG_MARKERS) += marker.o obj-$(CONFIG_TRACEPOINTS) += tracepoint.o obj-$(CONFIG_LATENCYTOP) += latencytop.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ diff --git a/kernel/marker.c b/kernel/marker.c deleted file mode 100644 index ea54f2647868..000000000000 --- a/kernel/marker.c +++ /dev/null @@ -1,930 +0,0 @@ -/* - * Copyright (C) 2007 Mathieu Desnoyers - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -extern struct marker __start___markers[]; -extern struct marker __stop___markers[]; - -/* Set to 1 to enable marker debug output */ -static const int marker_debug; - -/* - * markers_mutex nests inside module_mutex. Markers mutex protects the builtin - * and module markers and the hash table. - */ -static DEFINE_MUTEX(markers_mutex); - -/* - * Marker hash table, containing the active markers. - * Protected by module_mutex. - */ -#define MARKER_HASH_BITS 6 -#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) -static struct hlist_head marker_table[MARKER_TABLE_SIZE]; - -/* - * Note about RCU : - * It is used to make sure every handler has finished using its private data - * between two consecutive operation (add or remove) on a given marker. It is - * also used to delay the free of multiple probes array until a quiescent state - * is reached. - * marker entries modifications are protected by the markers_mutex. - */ -struct marker_entry { - struct hlist_node hlist; - char *format; - /* Probe wrapper */ - void (*call)(const struct marker *mdata, void *call_private, ...); - struct marker_probe_closure single; - struct marker_probe_closure *multi; - int refcount; /* Number of times armed. 0 if disarmed. */ - struct rcu_head rcu; - void *oldptr; - int rcu_pending; - unsigned char ptype:1; - unsigned char format_allocated:1; - char name[0]; /* Contains name'\0'format'\0' */ -}; - -/** - * __mark_empty_function - Empty probe callback - * @probe_private: probe private data - * @call_private: call site private data - * @fmt: format string - * @...: variable argument list - * - * Empty callback provided as a probe to the markers. By providing this to a - * disabled marker, we make sure the execution flow is always valid even - * though the function pointer change and the marker enabling are two distinct - * operations that modifies the execution flow of preemptible code. - */ -notrace void __mark_empty_function(void *probe_private, void *call_private, - const char *fmt, va_list *args) -{ -} -EXPORT_SYMBOL_GPL(__mark_empty_function); - -/* - * marker_probe_cb Callback that prepares the variable argument list for probes. - * @mdata: pointer of type struct marker - * @call_private: caller site private data - * @...: Variable argument list. - * - * Since we do not use "typical" pointer based RCU in the 1 argument case, we - * need to put a full smp_rmb() in this branch. This is why we do not use - * rcu_dereference() for the pointer read. - */ -notrace void marker_probe_cb(const struct marker *mdata, - void *call_private, ...) -{ - va_list args; - char ptype; - - /* - * rcu_read_lock_sched does two things : disabling preemption to make - * sure the teardown of the callbacks can be done correctly when they - * are in modules and they insure RCU read coherency. - */ - rcu_read_lock_sched_notrace(); - ptype = mdata->ptype; - if (likely(!ptype)) { - marker_probe_func *func; - /* Must read the ptype before ptr. They are not data dependant, - * so we put an explicit smp_rmb() here. */ - smp_rmb(); - func = mdata->single.func; - /* Must read the ptr before private data. They are not data - * dependant, so we put an explicit smp_rmb() here. */ - smp_rmb(); - va_start(args, call_private); - func(mdata->single.probe_private, call_private, mdata->format, - &args); - va_end(args); - } else { - struct marker_probe_closure *multi; - int i; - /* - * Read mdata->ptype before mdata->multi. - */ - smp_rmb(); - multi = mdata->multi; - /* - * multi points to an array, therefore accessing the array - * depends on reading multi. However, even in this case, - * we must insure that the pointer is read _before_ the array - * data. Same as rcu_dereference, but we need a full smp_rmb() - * in the fast path, so put the explicit barrier here. - */ - smp_read_barrier_depends(); - for (i = 0; multi[i].func; i++) { - va_start(args, call_private); - multi[i].func(multi[i].probe_private, call_private, - mdata->format, &args); - va_end(args); - } - } - rcu_read_unlock_sched_notrace(); -} -EXPORT_SYMBOL_GPL(marker_probe_cb); - -/* - * marker_probe_cb Callback that does not prepare the variable argument list. - * @mdata: pointer of type struct marker - * @call_private: caller site private data - * @...: Variable argument list. - * - * Should be connected to markers "MARK_NOARGS". - */ -static notrace void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, ...) -{ - va_list args; /* not initialized */ - char ptype; - - rcu_read_lock_sched_notrace(); - ptype = mdata->ptype; - if (likely(!ptype)) { - marker_probe_func *func; - /* Must read the ptype before ptr. They are not data dependant, - * so we put an explicit smp_rmb() here. */ - smp_rmb(); - func = mdata->single.func; - /* Must read the ptr before private data. They are not data - * dependant, so we put an explicit smp_rmb() here. */ - smp_rmb(); - func(mdata->single.probe_private, call_private, mdata->format, - &args); - } else { - struct marker_probe_closure *multi; - int i; - /* - * Read mdata->ptype before mdata->multi. - */ - smp_rmb(); - multi = mdata->multi; - /* - * multi points to an array, therefore accessing the array - * depends on reading multi. However, even in this case, - * we must insure that the pointer is read _before_ the array - * data. Same as rcu_dereference, but we need a full smp_rmb() - * in the fast path, so put the explicit barrier here. - */ - smp_read_barrier_depends(); - for (i = 0; multi[i].func; i++) - multi[i].func(multi[i].probe_private, call_private, - mdata->format, &args); - } - rcu_read_unlock_sched_notrace(); -} - -static void free_old_closure(struct rcu_head *head) -{ - struct marker_entry *entry = container_of(head, - struct marker_entry, rcu); - kfree(entry->oldptr); - /* Make sure we free the data before setting the pending flag to 0 */ - smp_wmb(); - entry->rcu_pending = 0; -} - -static void debug_print_probes(struct marker_entry *entry) -{ - int i; - - if (!marker_debug) - return; - - if (!entry->ptype) { - printk(KERN_DEBUG "Single probe : %p %p\n", - entry->single.func, - entry->single.probe_private); - } else { - for (i = 0; entry->multi[i].func; i++) - printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, - entry->multi[i].func, - entry->multi[i].probe_private); - } -} - -static struct marker_probe_closure * -marker_entry_add_probe(struct marker_entry *entry, - marker_probe_func *probe, void *probe_private) -{ - int nr_probes = 0; - struct marker_probe_closure *old, *new; - - WARN_ON(!probe); - - debug_print_probes(entry); - old = entry->multi; - if (!entry->ptype) { - if (entry->single.func == probe && - entry->single.probe_private == probe_private) - return ERR_PTR(-EBUSY); - if (entry->single.func == __mark_empty_function) { - /* 0 -> 1 probes */ - entry->single.func = probe; - entry->single.probe_private = probe_private; - entry->refcount = 1; - entry->ptype = 0; - debug_print_probes(entry); - return NULL; - } else { - /* 1 -> 2 probes */ - nr_probes = 1; - old = NULL; - } - } else { - /* (N -> N+1), (N != 0, 1) probes */ - for (nr_probes = 0; old[nr_probes].func; nr_probes++) - if (old[nr_probes].func == probe - && old[nr_probes].probe_private - == probe_private) - return ERR_PTR(-EBUSY); - } - /* + 2 : one for new probe, one for NULL func */ - new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), - GFP_KERNEL); - if (new == NULL) - return ERR_PTR(-ENOMEM); - if (!old) - new[0] = entry->single; - else - memcpy(new, old, - nr_probes * sizeof(struct marker_probe_closure)); - new[nr_probes].func = probe; - new[nr_probes].probe_private = probe_private; - entry->refcount = nr_probes + 1; - entry->multi = new; - entry->ptype = 1; - debug_print_probes(entry); - return old; -} - -static struct marker_probe_closure * -marker_entry_remove_probe(struct marker_entry *entry, - marker_probe_func *probe, void *probe_private) -{ - int nr_probes = 0, nr_del = 0, i; - struct marker_probe_closure *old, *new; - - old = entry->multi; - - debug_print_probes(entry); - if (!entry->ptype) { - /* 0 -> N is an error */ - WARN_ON(entry->single.func == __mark_empty_function); - /* 1 -> 0 probes */ - WARN_ON(probe && entry->single.func != probe); - WARN_ON(entry->single.probe_private != probe_private); - entry->single.func = __mark_empty_function; - entry->refcount = 0; - entry->ptype = 0; - debug_print_probes(entry); - return NULL; - } else { - /* (N -> M), (N > 1, M >= 0) probes */ - for (nr_probes = 0; old[nr_probes].func; nr_probes++) { - if ((!probe || old[nr_probes].func == probe) - && old[nr_probes].probe_private - == probe_private) - nr_del++; - } - } - - if (nr_probes - nr_del == 0) { - /* N -> 0, (N > 1) */ - entry->single.func = __mark_empty_function; - entry->refcount = 0; - entry->ptype = 0; - } else if (nr_probes - nr_del == 1) { - /* N -> 1, (N > 1) */ - for (i = 0; old[i].func; i++) - if ((probe && old[i].func != probe) || - old[i].probe_private != probe_private) - entry->single = old[i]; - entry->refcount = 1; - entry->ptype = 0; - } else { - int j = 0; - /* N -> M, (N > 1, M > 1) */ - /* + 1 for NULL */ - new = kzalloc((nr_probes - nr_del + 1) - * sizeof(struct marker_probe_closure), GFP_KERNEL); - if (new == NULL) - return ERR_PTR(-ENOMEM); - for (i = 0; old[i].func; i++) - if ((probe && old[i].func != probe) || - old[i].probe_private != probe_private) - new[j++] = old[i]; - entry->refcount = nr_probes - nr_del; - entry->ptype = 1; - entry->multi = new; - } - debug_print_probes(entry); - return old; -} - -/* - * Get marker if the marker is present in the marker hash table. - * Must be called with markers_mutex held. - * Returns NULL if not present. - */ -static struct marker_entry *get_marker(const char *name) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - u32 hash = jhash(name, strlen(name), 0); - - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) - return e; - } - return NULL; -} - -/* - * Add the marker to the marker hash table. Must be called with markers_mutex - * held. - */ -static struct marker_entry *add_marker(const char *name, const char *format) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - size_t name_len = strlen(name) + 1; - size_t format_len = 0; - u32 hash = jhash(name, name_len-1, 0); - - if (format) - format_len = strlen(format) + 1; - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) { - printk(KERN_NOTICE - "Marker %s busy\n", name); - return ERR_PTR(-EBUSY); /* Already there */ - } - } - /* - * Using kmalloc here to allocate a variable length element. Could - * cause some memory fragmentation if overused. - */ - e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, - GFP_KERNEL); - if (!e) - return ERR_PTR(-ENOMEM); - memcpy(&e->name[0], name, name_len); - if (format) { - e->format = &e->name[name_len]; - memcpy(e->format, format, format_len); - if (strcmp(e->format, MARK_NOARGS) == 0) - e->call = marker_probe_cb_noarg; - else - e->call = marker_probe_cb; - trace_mark(core_marker_format, "name %s format %s", - e->name, e->format); - } else { - e->format = NULL; - e->call = marker_probe_cb; - } - e->single.func = __mark_empty_function; - e->single.probe_private = NULL; - e->multi = NULL; - e->ptype = 0; - e->format_allocated = 0; - e->refcount = 0; - e->rcu_pending = 0; - hlist_add_head(&e->hlist, head); - return e; -} - -/* - * Remove the marker from the marker hash table. Must be called with mutex_lock - * held. - */ -static int remove_marker(const char *name) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - int found = 0; - size_t len = strlen(name) + 1; - u32 hash = jhash(name, len-1, 0); - - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) { - found = 1; - break; - } - } - if (!found) - return -ENOENT; - if (e->single.func != __mark_empty_function) - return -EBUSY; - hlist_del(&e->hlist); - if (e->format_allocated) - kfree(e->format); - /* Make sure the call_rcu has been executed */ - if (e->rcu_pending) - rcu_barrier_sched(); - kfree(e); - return 0; -} - -/* - * Set the mark_entry format to the format found in the element. - */ -static int marker_set_format(struct marker_entry *entry, const char *format) -{ - entry->format = kstrdup(format, GFP_KERNEL); - if (!entry->format) - return -ENOMEM; - entry->format_allocated = 1; - - trace_mark(core_marker_format, "name %s format %s", - entry->name, entry->format); - return 0; -} - -/* - * Sets the probe callback corresponding to one marker. - */ -static int set_marker(struct marker_entry *entry, struct marker *elem, - int active) -{ - int ret = 0; - WARN_ON(strcmp(entry->name, elem->name) != 0); - - if (entry->format) { - if (strcmp(entry->format, elem->format) != 0) { - printk(KERN_NOTICE - "Format mismatch for probe %s " - "(%s), marker (%s)\n", - entry->name, - entry->format, - elem->format); - return -EPERM; - } - } else { - ret = marker_set_format(entry, elem->format); - if (ret) - return ret; - } - - /* - * probe_cb setup (statically known) is done here. It is - * asynchronous with the rest of execution, therefore we only - * pass from a "safe" callback (with argument) to an "unsafe" - * callback (does not set arguments). - */ - elem->call = entry->call; - /* - * Sanity check : - * We only update the single probe private data when the ptr is - * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) - */ - WARN_ON(elem->single.func != __mark_empty_function - && elem->single.probe_private != entry->single.probe_private - && !elem->ptype); - elem->single.probe_private = entry->single.probe_private; - /* - * Make sure the private data is valid when we update the - * single probe ptr. - */ - smp_wmb(); - elem->single.func = entry->single.func; - /* - * We also make sure that the new probe callbacks array is consistent - * before setting a pointer to it. - */ - rcu_assign_pointer(elem->multi, entry->multi); - /* - * Update the function or multi probe array pointer before setting the - * ptype. - */ - smp_wmb(); - elem->ptype = entry->ptype; - - if (elem->tp_name && (active ^ elem->state)) { - WARN_ON(!elem->tp_cb); - /* - * It is ok to directly call the probe registration because type - * checking has been done in the __trace_mark_tp() macro. - */ - - if (active) { - /* - * try_module_get should always succeed because we hold - * lock_module() to get the tp_cb address. - */ - ret = try_module_get(__module_text_address( - (unsigned long)elem->tp_cb)); - BUG_ON(!ret); - ret = tracepoint_probe_register_noupdate( - elem->tp_name, - elem->tp_cb); - } else { - ret = tracepoint_probe_unregister_noupdate( - elem->tp_name, - elem->tp_cb); - /* - * tracepoint_probe_update_all() must be called - * before the module containing tp_cb is unloaded. - */ - module_put(__module_text_address( - (unsigned long)elem->tp_cb)); - } - } - elem->state = active; - - return ret; -} - -/* - * Disable a marker and its probe callback. - * Note: only waiting an RCU period after setting elem->call to the empty - * function insures that the original callback is not used anymore. This insured - * by rcu_read_lock_sched around the call site. - */ -static void disable_marker(struct marker *elem) -{ - int ret; - - /* leave "call" as is. It is known statically. */ - if (elem->tp_name && elem->state) { - WARN_ON(!elem->tp_cb); - /* - * It is ok to directly call the probe registration because type - * checking has been done in the __trace_mark_tp() macro. - */ - ret = tracepoint_probe_unregister_noupdate(elem->tp_name, - elem->tp_cb); - WARN_ON(ret); - /* - * tracepoint_probe_update_all() must be called - * before the module containing tp_cb is unloaded. - */ - module_put(__module_text_address((unsigned long)elem->tp_cb)); - } - elem->state = 0; - elem->single.func = __mark_empty_function; - /* Update the function before setting the ptype */ - smp_wmb(); - elem->ptype = 0; /* single probe */ - /* - * Leave the private data and id there, because removal is racy and - * should be done only after an RCU period. These are never used until - * the next initialization anyway. - */ -} - -/** - * marker_update_probe_range - Update a probe range - * @begin: beginning of the range - * @end: end of the range - * - * Updates the probe callback corresponding to a range of markers. - */ -void marker_update_probe_range(struct marker *begin, - struct marker *end) -{ - struct marker *iter; - struct marker_entry *mark_entry; - - mutex_lock(&markers_mutex); - for (iter = begin; iter < end; iter++) { - mark_entry = get_marker(iter->name); - if (mark_entry) { - set_marker(mark_entry, iter, !!mark_entry->refcount); - /* - * ignore error, continue - */ - } else { - disable_marker(iter); - } - } - mutex_unlock(&markers_mutex); -} - -/* - * Update probes, removing the faulty probes. - * - * Internal callback only changed before the first probe is connected to it. - * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 - * transitions. All other transitions will leave the old private data valid. - * This makes the non-atomicity of the callback/private data updates valid. - * - * "special case" updates : - * 0 -> 1 callback - * 1 -> 0 callback - * 1 -> 2 callbacks - * 2 -> 1 callbacks - * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. - * Site effect : marker_set_format may delete the marker entry (creating a - * replacement). - */ -static void marker_update_probes(void) -{ - /* Core kernel markers */ - marker_update_probe_range(__start___markers, __stop___markers); - /* Markers in modules. */ - module_update_markers(); - tracepoint_probe_update_all(); -} - -/** - * marker_probe_register - Connect a probe to a marker - * @name: marker name - * @format: format string - * @probe: probe handler - * @probe_private: probe private data - * - * private data must be a valid allocated memory address, or NULL. - * Returns 0 if ok, error value on error. - * The probe address must at least be aligned on the architecture pointer size. - */ -int marker_probe_register(const char *name, const char *format, - marker_probe_func *probe, void *probe_private) -{ - struct marker_entry *entry; - int ret = 0; - struct marker_probe_closure *old; - - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) { - entry = add_marker(name, format); - if (IS_ERR(entry)) - ret = PTR_ERR(entry); - } else if (format) { - if (!entry->format) - ret = marker_set_format(entry, format); - else if (strcmp(entry->format, format)) - ret = -EPERM; - } - if (ret) - goto end; - - /* - * If we detect that a call_rcu is pending for this marker, - * make sure it's executed now. - */ - if (entry->rcu_pending) - rcu_barrier_sched(); - old = marker_entry_add_probe(entry, probe, probe_private); - if (IS_ERR(old)) { - ret = PTR_ERR(old); - goto end; - } - mutex_unlock(&markers_mutex); - marker_update_probes(); - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - entry->oldptr = old; - entry->rcu_pending = 1; - /* write rcu_pending before calling the RCU callback */ - smp_wmb(); - call_rcu_sched(&entry->rcu, free_old_closure); -end: - mutex_unlock(&markers_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(marker_probe_register); - -/** - * marker_probe_unregister - Disconnect a probe from a marker - * @name: marker name - * @probe: probe function pointer - * @probe_private: probe private data - * - * Returns the private data given to marker_probe_register, or an ERR_PTR(). - * We do not need to call a synchronize_sched to make sure the probes have - * finished running before doing a module unload, because the module unload - * itself uses stop_machine(), which insures that every preempt disabled section - * have finished. - */ -int marker_probe_unregister(const char *name, - marker_probe_func *probe, void *probe_private) -{ - struct marker_entry *entry; - struct marker_probe_closure *old; - int ret = -ENOENT; - - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - old = marker_entry_remove_probe(entry, probe, probe_private); - mutex_unlock(&markers_mutex); - marker_update_probes(); - mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - entry->oldptr = old; - entry->rcu_pending = 1; - /* write rcu_pending before calling the RCU callback */ - smp_wmb(); - call_rcu_sched(&entry->rcu, free_old_closure); - remove_marker(name); /* Ignore busy error message */ - ret = 0; -end: - mutex_unlock(&markers_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(marker_probe_unregister); - -static struct marker_entry * -get_marker_from_private_data(marker_probe_func *probe, void *probe_private) -{ - struct marker_entry *entry; - unsigned int i; - struct hlist_head *head; - struct hlist_node *node; - - for (i = 0; i < MARKER_TABLE_SIZE; i++) { - head = &marker_table[i]; - hlist_for_each_entry(entry, node, head, hlist) { - if (!entry->ptype) { - if (entry->single.func == probe - && entry->single.probe_private - == probe_private) - return entry; - } else { - struct marker_probe_closure *closure; - closure = entry->multi; - for (i = 0; closure[i].func; i++) { - if (closure[i].func == probe && - closure[i].probe_private - == probe_private) - return entry; - } - } - } - } - return NULL; -} - -/** - * marker_probe_unregister_private_data - Disconnect a probe from a marker - * @probe: probe function - * @probe_private: probe private data - * - * Unregister a probe by providing the registered private data. - * Only removes the first marker found in hash table. - * Return 0 on success or error value. - * We do not need to call a synchronize_sched to make sure the probes have - * finished running before doing a module unload, because the module unload - * itself uses stop_machine(), which insures that every preempt disabled section - * have finished. - */ -int marker_probe_unregister_private_data(marker_probe_func *probe, - void *probe_private) -{ - struct marker_entry *entry; - int ret = 0; - struct marker_probe_closure *old; - - mutex_lock(&markers_mutex); - entry = get_marker_from_private_data(probe, probe_private); - if (!entry) { - ret = -ENOENT; - goto end; - } - if (entry->rcu_pending) - rcu_barrier_sched(); - old = marker_entry_remove_probe(entry, NULL, probe_private); - mutex_unlock(&markers_mutex); - marker_update_probes(); - mutex_lock(&markers_mutex); - entry = get_marker_from_private_data(probe, probe_private); - if (!entry) - goto end; - if (entry->rcu_pending) - rcu_barrier_sched(); - entry->oldptr = old; - entry->rcu_pending = 1; - /* write rcu_pending before calling the RCU callback */ - smp_wmb(); - call_rcu_sched(&entry->rcu, free_old_closure); - remove_marker(entry->name); /* Ignore busy error message */ -end: - mutex_unlock(&markers_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); - -/** - * marker_get_private_data - Get a marker's probe private data - * @name: marker name - * @probe: probe to match - * @num: get the nth matching probe's private data - * - * Returns the nth private data pointer (starting from 0) matching, or an - * ERR_PTR. - * Returns the private data pointer, or an ERR_PTR. - * The private data pointer should _only_ be dereferenced if the caller is the - * owner of the data, or its content could vanish. This is mostly used to - * confirm that a caller is the owner of a registered probe. - */ -void *marker_get_private_data(const char *name, marker_probe_func *probe, - int num) -{ - struct hlist_head *head; - struct hlist_node *node; - struct marker_entry *e; - size_t name_len = strlen(name) + 1; - u32 hash = jhash(name, name_len-1, 0); - int i; - - head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; - hlist_for_each_entry(e, node, head, hlist) { - if (!strcmp(name, e->name)) { - if (!e->ptype) { - if (num == 0 && e->single.func == probe) - return e->single.probe_private; - } else { - struct marker_probe_closure *closure; - int match = 0; - closure = e->multi; - for (i = 0; closure[i].func; i++) { - if (closure[i].func != probe) - continue; - if (match++ == num) - return closure[i].probe_private; - } - } - break; - } - } - return ERR_PTR(-ENOENT); -} -EXPORT_SYMBOL_GPL(marker_get_private_data); - -#ifdef CONFIG_MODULES - -int marker_module_notify(struct notifier_block *self, - unsigned long val, void *data) -{ - struct module *mod = data; - - switch (val) { - case MODULE_STATE_COMING: - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - break; - case MODULE_STATE_GOING: - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - break; - } - return 0; -} - -struct notifier_block marker_module_nb = { - .notifier_call = marker_module_notify, - .priority = 0, -}; - -static int init_markers(void) -{ - return register_module_notifier(&marker_module_nb); -} -__initcall(init_markers); - -#endif /* CONFIG_MODULES */ diff --git a/kernel/module.c b/kernel/module.c index 05ce49ced8f6..b6ee424245dd 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2237,10 +2237,6 @@ static noinline struct module *load_module(void __user *umod, sizeof(*mod->ctors), &mod->num_ctors); #endif -#ifdef CONFIG_MARKERS - mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers", - sizeof(*mod->markers), &mod->num_markers); -#endif #ifdef CONFIG_TRACEPOINTS mod->tracepoints = section_objs(hdr, sechdrs, secstrings, "__tracepoints", @@ -2958,20 +2954,6 @@ void module_layout(struct module *mod, EXPORT_SYMBOL(module_layout); #endif -#ifdef CONFIG_MARKERS -void module_update_markers(void) -{ - struct module *mod; - - mutex_lock(&module_mutex); - list_for_each_entry(mod, &modules, list) - if (!mod->taints) - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - mutex_unlock(&module_mutex); -} -#endif - #ifdef CONFIG_TRACEPOINTS void module_update_tracepoints(void) { diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 687699d365ae..2547d8813cf0 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/samples/Kconfig b/samples/Kconfig index 428b065ba695..b92bde3c6a89 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -7,12 +7,6 @@ menuconfig SAMPLES if SAMPLES -config SAMPLE_MARKERS - tristate "Build markers examples -- loadable modules only" - depends on MARKERS && m - help - This build markers example modules. - config SAMPLE_TRACEPOINTS tristate "Build tracepoints examples -- loadable modules only" depends on TRACEPOINTS && m diff --git a/samples/Makefile b/samples/Makefile index 13e4b470b539..43343a03b1f4 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -1,3 +1,3 @@ # Makefile for Linux samples code -obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ trace_events/ +obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ diff --git a/samples/markers/Makefile b/samples/markers/Makefile deleted file mode 100644 index 6d7231265f0f..000000000000 --- a/samples/markers/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# builds the kprobes example kernel modules; -# then to use one (as root): insmod - -obj-$(CONFIG_SAMPLE_MARKERS) += probe-example.o marker-example.o diff --git a/samples/markers/marker-example.c b/samples/markers/marker-example.c deleted file mode 100644 index e9cd9c0bc84f..000000000000 --- a/samples/markers/marker-example.c +++ /dev/null @@ -1,53 +0,0 @@ -/* marker-example.c - * - * Executes a marker when /proc/marker-example is opened. - * - * (C) Copyright 2007 Mathieu Desnoyers - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#include -#include -#include -#include - -struct proc_dir_entry *pentry_example; - -static int my_open(struct inode *inode, struct file *file) -{ - int i; - - trace_mark(subsystem_event, "integer %d string %s", 123, - "example string"); - for (i = 0; i < 10; i++) - trace_mark(subsystem_eventb, MARK_NOARGS); - return -EPERM; -} - -static struct file_operations mark_ops = { - .open = my_open, -}; - -static int __init example_init(void) -{ - printk(KERN_ALERT "example init\n"); - pentry_example = proc_create("marker-example", 0444, NULL, &mark_ops); - if (!pentry_example) - return -EPERM; - return 0; -} - -static void __exit example_exit(void) -{ - printk(KERN_ALERT "example exit\n"); - remove_proc_entry("marker-example", NULL); -} - -module_init(example_init) -module_exit(example_exit) - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mathieu Desnoyers"); -MODULE_DESCRIPTION("Marker example"); diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c deleted file mode 100644 index 2dfb3b32937e..000000000000 --- a/samples/markers/probe-example.c +++ /dev/null @@ -1,92 +0,0 @@ -/* probe-example.c - * - * Connects two functions to marker call sites. - * - * (C) Copyright 2007 Mathieu Desnoyers - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#include -#include -#include -#include -#include - -struct probe_data { - const char *name; - const char *format; - marker_probe_func *probe_func; -}; - -void probe_subsystem_event(void *probe_data, void *call_data, - const char *format, va_list *args) -{ - /* Declare args */ - unsigned int value; - const char *mystr; - - /* Assign args */ - value = va_arg(*args, typeof(value)); - mystr = va_arg(*args, typeof(mystr)); - - /* Call printk */ - printk(KERN_INFO "Value %u, string %s\n", value, mystr); - - /* or count, check rights, serialize data in a buffer */ -} - -atomic_t eventb_count = ATOMIC_INIT(0); - -void probe_subsystem_eventb(void *probe_data, void *call_data, - const char *format, va_list *args) -{ - /* Increment counter */ - atomic_inc(&eventb_count); -} - -static struct probe_data probe_array[] = -{ - { .name = "subsystem_event", - .format = "integer %d string %s", - .probe_func = probe_subsystem_event }, - { .name = "subsystem_eventb", - .format = MARK_NOARGS, - .probe_func = probe_subsystem_eventb }, -}; - -static int __init probe_init(void) -{ - int result; - int i; - - for (i = 0; i < ARRAY_SIZE(probe_array); i++) { - result = marker_probe_register(probe_array[i].name, - probe_array[i].format, - probe_array[i].probe_func, &probe_array[i]); - if (result) - printk(KERN_INFO "Unable to register probe %s\n", - probe_array[i].name); - } - return 0; -} - -static void __exit probe_fini(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(probe_array); i++) - marker_probe_unregister(probe_array[i].name, - probe_array[i].probe_func, &probe_array[i]); - printk(KERN_INFO "Number of event b : %u\n", - atomic_read(&eventb_count)); - marker_synchronize_unregister(); -} - -module_init(probe_init); -module_exit(probe_fini); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Mathieu Desnoyers"); -MODULE_DESCRIPTION("SUBSYSTEM Probe"); diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index f4053dc7b5d6..8f14c81abbc7 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -13,7 +13,6 @@ # 2) modpost is then used to # 3) create one .mod.c file pr. module # 4) create one Module.symvers file with CRC for all exported symbols -# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers # 5) compile all .mod.c files # 6) final link of the module to a file @@ -59,10 +58,6 @@ include scripts/Makefile.lib kernelsymfile := $(objtree)/Module.symvers modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers -kernelmarkersfile := $(objtree)/Module.markers -modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers - -markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) # Step 1), find all modules listed in $(MODVERDIR)/ __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) @@ -85,8 +80,6 @@ modpost = scripts/mod/modpost \ $(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \ $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S) \ - $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ - $(if $(CONFIG_MARKERS),-M $(markersfile)) \ $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) \ $(if $(cross_build),-c) @@ -101,17 +94,12 @@ quiet_cmd_kernel-mod = MODPOST $@ cmd_kernel-mod = $(modpost) $@ vmlinux.o: FORCE - @rm -fr $(kernelmarkersfile) $(call cmd,kernel-mod) # Declare generated files as targets for modpost $(symverfile): __modpost ; $(modules:.ko=.mod.c): __modpost ; -ifdef CONFIG_MARKERS -$(markersfile): __modpost ; -endif - # Step 5), compile all *.mod.c files -- cgit v1.2.3 From 4a4962263f07d14660849ec134ee42b63e95ea9a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 6 Jul 2009 14:50:42 +0100 Subject: module: reduce symbol table for loaded modules (v2) Discard all symbols not interesting for kallsyms use: absolute, section, and in the common case (!KALLSYMS_ALL) data ones. Signed-off-by: Jan Beulich Signed-off-by: Rusty Russell --- include/linux/module.h | 10 ++++-- kernel/module.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 94 insertions(+), 7 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 1c755b2f937d..1d3ccb173fd6 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -308,9 +308,13 @@ struct module #endif #ifdef CONFIG_KALLSYMS - /* We keep the symbol and string tables for kallsyms. */ - Elf_Sym *symtab; - unsigned int num_symtab; + /* + * We keep the symbol and string tables for kallsyms. + * The core_* fields below are temporary, loader-only (they + * could really be discarded after module init). + */ + Elf_Sym *symtab, *core_symtab; + unsigned int num_symtab, core_num_syms; char *strtab; /* Section attributes */ diff --git a/kernel/module.c b/kernel/module.c index e6bc4b28aa62..97f4d5e15535 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1862,13 +1862,68 @@ static char elf_type(const Elf_Sym *sym, return '?'; } +static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, + unsigned int shnum) +{ + const Elf_Shdr *sec; + + if (src->st_shndx == SHN_UNDEF + || src->st_shndx >= shnum + || !src->st_name) + return false; + + sec = sechdrs + src->st_shndx; + if (!(sec->sh_flags & SHF_ALLOC) +#ifndef CONFIG_KALLSYMS_ALL + || !(sec->sh_flags & SHF_EXECINSTR) +#endif + || (sec->sh_entsize & INIT_OFFSET_MASK)) + return false; + + return true; +} + +static unsigned long layout_symtab(struct module *mod, + Elf_Shdr *sechdrs, + unsigned int symindex, + const Elf_Ehdr *hdr, + const char *secstrings) +{ + unsigned long symoffs; + Elf_Shdr *symsect = sechdrs + symindex; + const Elf_Sym *src; + unsigned int i, nsrc, ndst; + + /* Put symbol section at end of init part of module. */ + symsect->sh_flags |= SHF_ALLOC; + symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect, + symindex) | INIT_OFFSET_MASK; + DEBUGP("\t%s\n", secstrings + symsect->sh_name); + + src = (void *)hdr + symsect->sh_offset; + nsrc = symsect->sh_size / sizeof(*src); + for (ndst = i = 1; i < nsrc; ++i, ++src) + if (is_core_symbol(src, sechdrs, hdr->e_shnum)) + ++ndst; + + /* Append room for core symbols at end of core part. */ + symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); + mod->core_size = symoffs + ndst * sizeof(Elf_Sym); + + return symoffs; +} + static void add_kallsyms(struct module *mod, Elf_Shdr *sechdrs, + unsigned int shnum, unsigned int symindex, unsigned int strindex, + unsigned long symoffs, const char *secstrings) { - unsigned int i; + unsigned int i, ndst; + const Elf_Sym *src; + Elf_Sym *dst; mod->symtab = (void *)sechdrs[symindex].sh_addr; mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); @@ -1878,12 +1933,32 @@ static void add_kallsyms(struct module *mod, for (i = 0; i < mod->num_symtab; i++) mod->symtab[i].st_info = elf_type(&mod->symtab[i], sechdrs, secstrings, mod); + + mod->core_symtab = dst = mod->module_core + symoffs; + src = mod->symtab; + *dst = *src; + for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { + if (!is_core_symbol(src, sechdrs, shnum)) + continue; + dst[ndst] = *src; + ++ndst; + } + mod->core_num_syms = ndst; } #else +static inline unsigned long layout_symtab(struct module *mod, + Elf_Shdr *sechdrs, + unsigned int symindex, + const Elf_Hdr *hdr, + const char *secstrings) +{ +} static inline void add_kallsyms(struct module *mod, Elf_Shdr *sechdrs, + unsigned int shnum, unsigned int symindex, unsigned int strindex, + unsigned long symoffs, const char *secstrings) { } @@ -1959,6 +2034,9 @@ static noinline struct module *load_module(void __user *umod, struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ +#ifdef CONFIG_KALLSYMS + unsigned long symoffs; +#endif mm_segment_t old_fs; DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", @@ -2041,8 +2119,7 @@ static noinline struct module *load_module(void __user *umod, sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC; #ifdef CONFIG_KALLSYMS - /* Keep symbol and string tables for decoding later. */ - sechdrs[symindex].sh_flags |= SHF_ALLOC; + /* Keep string table for decoding later. */ sechdrs[strindex].sh_flags |= SHF_ALLOC; #endif @@ -2109,6 +2186,7 @@ static noinline struct module *load_module(void __user *umod, this is done generically; there doesn't appear to be any special cases for the architectures. */ layout_sections(mod, hdr, sechdrs, secstrings); + symoffs = layout_symtab(mod, sechdrs, symindex, hdr, secstrings); /* Do the allocs. */ ptr = module_alloc_update_bounds(mod->core_size); @@ -2313,7 +2391,8 @@ static noinline struct module *load_module(void __user *umod, percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, sechdrs[pcpuindex].sh_size); - add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, + symoffs, secstrings); if (!mod->taints) { struct _ddebug *debug; @@ -2491,6 +2570,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, /* Drop initial reference. */ module_put(mod); trim_init_extable(mod); +#ifdef CONFIG_KALLSYMS + mod->num_symtab = mod->core_num_syms; + mod->symtab = mod->core_symtab; +#endif module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; -- cgit v1.2.3 From 554bdfe5acf3715e87c8d5e25a4f9a896ac9f014 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 6 Jul 2009 14:51:44 +0100 Subject: module: reduce string table for loaded modules (v2) Also remove all parts of the string table (referenced by the symbol table) that are not needed for kallsyms use (i.e. which were only referenced by symbols discarded by the previous patch, or not referenced at all for whatever reason). Signed-off-by: Jan Beulich Signed-off-by: Rusty Russell --- include/linux/module.h | 2 +- kernel/module.c | 68 +++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 57 insertions(+), 13 deletions(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index 1d3ccb173fd6..aca980365956 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -315,7 +315,7 @@ struct module */ Elf_Sym *symtab, *core_symtab; unsigned int num_symtab, core_num_syms; - char *strtab; + char *strtab, *core_strtab; /* Section attributes */ struct module_sect_attrs *sect_attrs; diff --git a/kernel/module.c b/kernel/module.c index 97f4d5e15535..39827c3d9484 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1886,12 +1886,17 @@ static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, static unsigned long layout_symtab(struct module *mod, Elf_Shdr *sechdrs, unsigned int symindex, + unsigned int strindex, const Elf_Ehdr *hdr, - const char *secstrings) + const char *secstrings, + unsigned long *pstroffs, + unsigned long *strmap) { unsigned long symoffs; Elf_Shdr *symsect = sechdrs + symindex; + Elf_Shdr *strsect = sechdrs + strindex; const Elf_Sym *src; + const char *strtab; unsigned int i, nsrc, ndst; /* Put symbol section at end of init part of module. */ @@ -1902,14 +1907,31 @@ static unsigned long layout_symtab(struct module *mod, src = (void *)hdr + symsect->sh_offset; nsrc = symsect->sh_size / sizeof(*src); + strtab = (void *)hdr + strsect->sh_offset; for (ndst = i = 1; i < nsrc; ++i, ++src) - if (is_core_symbol(src, sechdrs, hdr->e_shnum)) + if (is_core_symbol(src, sechdrs, hdr->e_shnum)) { + unsigned int j = src->st_name; + + while(!__test_and_set_bit(j, strmap) && strtab[j]) + ++j; ++ndst; + } /* Append room for core symbols at end of core part. */ symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); mod->core_size = symoffs + ndst * sizeof(Elf_Sym); + /* Put string table section at end of init part of module. */ + strsect->sh_flags |= SHF_ALLOC; + strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, + strindex) | INIT_OFFSET_MASK; + DEBUGP("\t%s\n", secstrings + strsect->sh_name); + + /* Append room for core symbols' strings at end of core part. */ + *pstroffs = mod->core_size; + __set_bit(0, strmap); + mod->core_size += bitmap_weight(strmap, strsect->sh_size); + return symoffs; } @@ -1919,11 +1941,14 @@ static void add_kallsyms(struct module *mod, unsigned int symindex, unsigned int strindex, unsigned long symoffs, - const char *secstrings) + unsigned long stroffs, + const char *secstrings, + unsigned long *strmap) { unsigned int i, ndst; const Elf_Sym *src; Elf_Sym *dst; + char *s; mod->symtab = (void *)sechdrs[symindex].sh_addr; mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); @@ -1941,16 +1966,25 @@ static void add_kallsyms(struct module *mod, if (!is_core_symbol(src, sechdrs, shnum)) continue; dst[ndst] = *src; + dst[ndst].st_name = bitmap_weight(strmap, dst[ndst].st_name); ++ndst; } mod->core_num_syms = ndst; + + mod->core_strtab = s = mod->module_core + stroffs; + for (*s = 0, i = 1; i < sechdrs[strindex].sh_size; ++i) + if (test_bit(i, strmap)) + *++s = mod->strtab[i]; } #else static inline unsigned long layout_symtab(struct module *mod, Elf_Shdr *sechdrs, unsigned int symindex, + unsigned int strindex, const Elf_Hdr *hdr, - const char *secstrings) + const char *secstrings, + unsigned long *pstroffs, + unsigned long *strmap) { } static inline void add_kallsyms(struct module *mod, @@ -1959,7 +1993,9 @@ static inline void add_kallsyms(struct module *mod, unsigned int symindex, unsigned int strindex, unsigned long symoffs, - const char *secstrings) + unsigned long stroffs, + const char *secstrings, + const unsigned long *strmap) { } #endif /* CONFIG_KALLSYMS */ @@ -2035,7 +2071,7 @@ static noinline struct module *load_module(void __user *umod, long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ #ifdef CONFIG_KALLSYMS - unsigned long symoffs; + unsigned long symoffs, stroffs, *strmap; #endif mm_segment_t old_fs; @@ -2118,10 +2154,6 @@ static noinline struct module *load_module(void __user *umod, /* Don't keep modinfo and version sections. */ sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC; -#ifdef CONFIG_KALLSYMS - /* Keep string table for decoding later. */ - sechdrs[strindex].sh_flags |= SHF_ALLOC; -#endif /* Check module struct version now, before we try to use module. */ if (!check_modstruct_version(sechdrs, versindex, mod)) { @@ -2157,6 +2189,13 @@ static noinline struct module *load_module(void __user *umod, goto free_hdr; } + strmap = kzalloc(BITS_TO_LONGS(sechdrs[strindex].sh_size) + * sizeof(long), GFP_KERNEL); + if (!strmap) { + err = -ENOMEM; + goto free_mod; + } + if (find_module(mod->name)) { err = -EEXIST; goto free_mod; @@ -2186,7 +2225,8 @@ static noinline struct module *load_module(void __user *umod, this is done generically; there doesn't appear to be any special cases for the architectures. */ layout_sections(mod, hdr, sechdrs, secstrings); - symoffs = layout_symtab(mod, sechdrs, symindex, hdr, secstrings); + symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr, + secstrings, &stroffs, strmap); /* Do the allocs. */ ptr = module_alloc_update_bounds(mod->core_size); @@ -2392,7 +2432,9 @@ static noinline struct module *load_module(void __user *umod, sechdrs[pcpuindex].sh_size); add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, - symoffs, secstrings); + symoffs, stroffs, secstrings, strmap); + kfree(strmap); + strmap = NULL; if (!mod->taints) { struct _ddebug *debug; @@ -2481,6 +2523,7 @@ static noinline struct module *load_module(void __user *umod, percpu_modfree(percpu); free_mod: kfree(args); + kfree(strmap); free_hdr: vfree(hdr); return ERR_PTR(err); @@ -2573,6 +2616,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, #ifdef CONFIG_KALLSYMS mod->num_symtab = mod->core_num_syms; mod->symtab = mod->core_symtab; + mod->strtab = mod->core_strtab; #endif module_free(mod, mod->module_init); mod->module_init = NULL; -- cgit v1.2.3 From 1d7015caa082d465faeae5d6fd1be077ee6dfa87 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Sep 2009 00:32:58 -0600 Subject: module: preferred way to use MODULE_AUTHOR For the longest time now we've been using multiple MODULE_AUTHOR() statements when a module has more than one author, but the comment here disagrees. Signed-off-by: Johannes Berg Cc: Jiri Kosina Cc: Luciano Coelho Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell --- include/linux/module.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux/module.h') diff --git a/include/linux/module.h b/include/linux/module.h index aca980365956..482efc865acf 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -128,7 +128,10 @@ extern struct module __this_module; */ #define MODULE_LICENSE(_license) MODULE_INFO(license, _license) -/* Author, ideally of form NAME[, NAME]*[ and NAME] */ +/* + * Author(s), use "Name " or just "Name", for multiple + * authors use multiple MODULE_AUTHOR() statements/lines. + */ #define MODULE_AUTHOR(_author) MODULE_INFO(author, _author) /* What your module does. */ -- cgit v1.2.3