summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-16 01:59:46 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-16 01:59:46 +0300
commite4bf304f000e6fcceaf60b1455a5124b783b3a66 (patch)
tree27880cd98f6c232dbfecc6c5b6561c1f81148db2 /include
parent15218296329e489d861a3e4fd2bd299afc115b8e (diff)
parent6170922f137231b98fc568571befef63e1edff3f (diff)
downloadlinux-e4bf304f000e6fcceaf60b1455a5124b783b3a66.tar.xz
Merge tag 'trace-ringbuffer-v7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull ring-buffer updates from Steven Rostedt: - Add remote buffers for pKVM pKVM has a hypervisor component that is used to protect the guest from the host kernel. This hypervisor is a black box to the kernel as the kernel is to user space. The remote buffers are used to have a memory mapping between the hypervisor and the kernel where kernel may send commands to enable tracing within the hypervisor. Then the kernel will read this memory mapping just like user space can read the memory mapped ring buffer of the kernel tracing system. Since the hypervisor only has a single context, it doesn't need to worry about races between normal context, interrupt context and NMIs like the kernel does. The ring buffer it uses doesn't need to be as complex. The remote buffers are a simple version of the ring buffer that works in a single context. They are still per-CPU and use sub buffers. The data layout is the same as the kernel's ring buffer to share the same parsing. Currently, only ARM64 implements pKVM, but there's work to implement it also in x86. The remote buffer code is separated out from the ARM implementation so that it can be used in the future by x86. The ARM64 updates for pKVM is in the ARM/KVM tree and it merged in the remote buffers of this tree. - Make the backup instance non reusable The backup instance is a copy of the persistent ring buffer so that the persistent ring buffer could start recording again without using the data from the previous boot. The backup isn't for normal tracing. It is made read-only, and after it is consumed, it is automatically removed. - Have backup copy persistent instance before it starts recording To allow the persistent ring buffer to start recording from the kernel command line commands, move the copy of the backup instance to before the the command line options start recording. - Report header_page overwrite field as "char" and not "int' The rust parser of the header_page file was triggering a warning when it defined the overwrite variable as "int" but it was only a single byte in size. - Fix memory barriers for the trace_buffer CPU mask When a CPU comes online, the bit is set to allow readers to know that the CPU buffer is allocated. The bit is set after the allocation is done, and a smp_wmb() is performed after the allocation and before the setting of the bit. But instead of adding a smp_rmb() to all readers, since once a buffer is created for a CPU it is not deleted if that CPU goes offline, so this allocation is almost always done at boot up before any readers exist. If for the unlikely case where a CPU comes online for the first time after the system boot has finished, send an IPI to all CPUs to force the smp_rmb() for each CPU. - Show clock function being used in debugging ring buffer data When the ring buffer checks are enabled and the ring buffer detects an inconsistency in the times of the invents, print out the clock being used when the error occurred. There was a very hard to hit bug that would happen every so often and it ended up being only triggered when the jiffies clock was being used. If the bug showed the clock being used, it would have been much easier to find the problem (which was an internal function was being traced which caused the clock accounting to go off). * tag 'trace-ringbuffer-v7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: (26 commits) ring-buffer: Prevent off-by-one array access in ring_buffer_desc_page() ring-buffer: Report header_page overwrite as char tracing: Allow backup to save persistent ring buffer before it starts tracing/Documentation: Add a section about backup instance tracing: Remove the backup instance automatically after read tracing: Make the backup instance non-reusable ring-buffer: Enforce read ordering of trace_buffer cpumask and buffers ring-buffer: Show what clock function is used on timestamp errors tracing: Check for undefined symbols in simple_ring_buffer tracing: load/unload page callbacks for simple_ring_buffer Documentation: tracing: Add tracing remotes tracing: selftests: Add trace remote tests tracing: Add a trace remote module for testing tracing: Introduce simple_ring_buffer ring-buffer: Export buffer_data_page and macros tracing: Add helpers to create trace remote events tracing: Add events/ root files to trace remotes tracing: Add events to trace remotes tracing: Add init callback to trace remotes tracing: Add non-consuming read to trace remotes ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/ring_buffer.h58
-rw-r--r--include/linux/ring_buffer_types.h41
-rw-r--r--include/linux/simple_ring_buffer.h65
-rw-r--r--include/linux/trace_remote.h48
-rw-r--r--include/linux/trace_remote_event.h33
-rw-r--r--include/trace/define_remote_events.h73
-rw-r--r--include/uapi/linux/trace_mmap.h8
7 files changed, 322 insertions, 4 deletions
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index d862fa610270..994f52b34344 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -251,4 +251,62 @@ int ring_buffer_map(struct trace_buffer *buffer, int cpu,
void ring_buffer_map_dup(struct trace_buffer *buffer, int cpu);
int ring_buffer_unmap(struct trace_buffer *buffer, int cpu);
int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu);
+
+struct ring_buffer_desc {
+ int cpu;
+ unsigned int nr_page_va; /* excludes the meta page */
+ unsigned long meta_va;
+ unsigned long page_va[] __counted_by(nr_page_va);
+};
+
+struct trace_buffer_desc {
+ int nr_cpus;
+ size_t struct_len;
+ char __data[]; /* list of ring_buffer_desc */
+};
+
+static inline struct ring_buffer_desc *__next_ring_buffer_desc(struct ring_buffer_desc *desc)
+{
+ size_t len = struct_size(desc, page_va, desc->nr_page_va);
+
+ return (struct ring_buffer_desc *)((void *)desc + len);
+}
+
+static inline struct ring_buffer_desc *__first_ring_buffer_desc(struct trace_buffer_desc *desc)
+{
+ return (struct ring_buffer_desc *)(&desc->__data[0]);
+}
+
+static inline size_t trace_buffer_desc_size(size_t buffer_size, unsigned int nr_cpus)
+{
+ unsigned int nr_pages = max(DIV_ROUND_UP(buffer_size, PAGE_SIZE), 2UL) + 1;
+ struct ring_buffer_desc *rbdesc;
+
+ return size_add(offsetof(struct trace_buffer_desc, __data),
+ size_mul(nr_cpus, struct_size(rbdesc, page_va, nr_pages)));
+}
+
+#define for_each_ring_buffer_desc(__pdesc, __cpu, __trace_pdesc) \
+ for (__pdesc = __first_ring_buffer_desc(__trace_pdesc), __cpu = 0; \
+ (__cpu) < (__trace_pdesc)->nr_cpus; \
+ (__cpu)++, __pdesc = __next_ring_buffer_desc(__pdesc))
+
+struct ring_buffer_remote {
+ struct trace_buffer_desc *desc;
+ int (*swap_reader_page)(unsigned int cpu, void *priv);
+ int (*reset)(unsigned int cpu, void *priv);
+ void *priv;
+};
+
+int ring_buffer_poll_remote(struct trace_buffer *buffer, int cpu);
+
+struct trace_buffer *
+__ring_buffer_alloc_remote(struct ring_buffer_remote *remote,
+ struct lock_class_key *key);
+
+#define ring_buffer_alloc_remote(remote) \
+({ \
+ static struct lock_class_key __key; \
+ __ring_buffer_alloc_remote(remote, &__key); \
+})
#endif /* _LINUX_RING_BUFFER_H */
diff --git a/include/linux/ring_buffer_types.h b/include/linux/ring_buffer_types.h
new file mode 100644
index 000000000000..54577021a49d
--- /dev/null
+++ b/include/linux/ring_buffer_types.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_RING_BUFFER_TYPES_H
+#define _LINUX_RING_BUFFER_TYPES_H
+
+#include <asm/local.h>
+
+#define TS_SHIFT 27
+#define TS_MASK ((1ULL << TS_SHIFT) - 1)
+#define TS_DELTA_TEST (~TS_MASK)
+
+/*
+ * We need to fit the time_stamp delta into 27 bits.
+ */
+static inline bool test_time_stamp(u64 delta)
+{
+ return !!(delta & TS_DELTA_TEST);
+}
+
+#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
+
+#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
+#define RB_ALIGNMENT 4U
+#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
+#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
+
+#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
+# define RB_FORCE_8BYTE_ALIGNMENT 0
+# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
+#else
+# define RB_FORCE_8BYTE_ALIGNMENT 1
+# define RB_ARCH_ALIGNMENT 8U
+#endif
+
+#define RB_ALIGN_DATA __aligned(RB_ARCH_ALIGNMENT)
+
+struct buffer_data_page {
+ u64 time_stamp; /* page time stamp */
+ local_t commit; /* write committed index */
+ unsigned char data[] RB_ALIGN_DATA; /* data of buffer page */
+};
+#endif
diff --git a/include/linux/simple_ring_buffer.h b/include/linux/simple_ring_buffer.h
new file mode 100644
index 000000000000..21aec556293e
--- /dev/null
+++ b/include/linux/simple_ring_buffer.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SIMPLE_RING_BUFFER_H
+#define _LINUX_SIMPLE_RING_BUFFER_H
+
+#include <linux/list.h>
+#include <linux/ring_buffer.h>
+#include <linux/ring_buffer_types.h>
+#include <linux/types.h>
+
+/*
+ * Ideally those struct would stay private but the caller needs to know
+ * the allocation size for simple_ring_buffer_init().
+ */
+struct simple_buffer_page {
+ struct list_head link;
+ struct buffer_data_page *page;
+ u64 entries;
+ u32 write;
+ u32 id;
+};
+
+struct simple_rb_per_cpu {
+ struct simple_buffer_page *tail_page;
+ struct simple_buffer_page *reader_page;
+ struct simple_buffer_page *head_page;
+ struct simple_buffer_page *bpages;
+ struct trace_buffer_meta *meta;
+ u32 nr_pages;
+
+#define SIMPLE_RB_UNAVAILABLE 0
+#define SIMPLE_RB_READY 1
+#define SIMPLE_RB_WRITING 2
+ u32 status;
+
+ u64 last_overrun;
+ u64 write_stamp;
+
+ struct simple_rb_cbs *cbs;
+};
+
+int simple_ring_buffer_init(struct simple_rb_per_cpu *cpu_buffer, struct simple_buffer_page *bpages,
+ const struct ring_buffer_desc *desc);
+
+void simple_ring_buffer_unload(struct simple_rb_per_cpu *cpu_buffer);
+
+void *simple_ring_buffer_reserve(struct simple_rb_per_cpu *cpu_buffer, unsigned long length,
+ u64 timestamp);
+
+void simple_ring_buffer_commit(struct simple_rb_per_cpu *cpu_buffer);
+
+int simple_ring_buffer_enable_tracing(struct simple_rb_per_cpu *cpu_buffer, bool enable);
+
+int simple_ring_buffer_reset(struct simple_rb_per_cpu *cpu_buffer);
+
+int simple_ring_buffer_swap_reader_page(struct simple_rb_per_cpu *cpu_buffer);
+
+int simple_ring_buffer_init_mm(struct simple_rb_per_cpu *cpu_buffer,
+ struct simple_buffer_page *bpages,
+ const struct ring_buffer_desc *desc,
+ void *(*load_page)(unsigned long va),
+ void (*unload_page)(void *va));
+
+void simple_ring_buffer_unload_mm(struct simple_rb_per_cpu *cpu_buffer,
+ void (*unload_page)(void *));
+#endif
diff --git a/include/linux/trace_remote.h b/include/linux/trace_remote.h
new file mode 100644
index 000000000000..fcd1d46ea466
--- /dev/null
+++ b/include/linux/trace_remote.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_TRACE_REMOTE_H
+#define _LINUX_TRACE_REMOTE_H
+
+#include <linux/dcache.h>
+#include <linux/ring_buffer.h>
+#include <linux/trace_remote_event.h>
+
+/**
+ * struct trace_remote_callbacks - Callbacks used by Tracefs to control the remote
+ * @init: Called once the remote has been registered. Allows the
+ * caller to extend the Tracefs remote directory
+ * @load_trace_buffer: Called before Tracefs accesses the trace buffer for the first
+ * time. Must return a &trace_buffer_desc
+ * (most likely filled with trace_remote_alloc_buffer())
+ * @unload_trace_buffer:
+ * Called once Tracefs has no use for the trace buffer
+ * (most likely call trace_remote_free_buffer())
+ * @enable_tracing: Called on Tracefs tracing_on. It is expected from the
+ * remote to allow writing.
+ * @swap_reader_page: Called when Tracefs consumes a new page from a
+ * ring-buffer. It is expected from the remote to isolate a
+ * @reset: Called on `echo 0 > trace`. It is expected from the
+ * remote to reset all ring-buffer pages.
+ * new reader-page from the @cpu ring-buffer.
+ * @enable_event: Called on events/event_name/enable. It is expected from
+ * the remote to allow the writing event @id.
+ */
+struct trace_remote_callbacks {
+ int (*init)(struct dentry *d, void *priv);
+ struct trace_buffer_desc *(*load_trace_buffer)(unsigned long size, void *priv);
+ void (*unload_trace_buffer)(struct trace_buffer_desc *desc, void *priv);
+ int (*enable_tracing)(bool enable, void *priv);
+ int (*swap_reader_page)(unsigned int cpu, void *priv);
+ int (*reset)(unsigned int cpu, void *priv);
+ int (*enable_event)(unsigned short id, bool enable, void *priv);
+};
+
+int trace_remote_register(const char *name, struct trace_remote_callbacks *cbs, void *priv,
+ struct remote_event *events, size_t nr_events);
+
+int trace_remote_alloc_buffer(struct trace_buffer_desc *desc, size_t desc_size, size_t buffer_size,
+ const struct cpumask *cpumask);
+
+void trace_remote_free_buffer(struct trace_buffer_desc *desc);
+
+#endif
diff --git a/include/linux/trace_remote_event.h b/include/linux/trace_remote_event.h
new file mode 100644
index 000000000000..c8ae1e1f5e72
--- /dev/null
+++ b/include/linux/trace_remote_event.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_TRACE_REMOTE_EVENTS_H
+#define _LINUX_TRACE_REMOTE_EVENTS_H
+
+struct trace_remote;
+struct trace_event_fields;
+struct trace_seq;
+
+struct remote_event_hdr {
+ unsigned short id;
+};
+
+#define REMOTE_EVENT_NAME_MAX 30
+struct remote_event {
+ char name[REMOTE_EVENT_NAME_MAX];
+ unsigned short id;
+ bool enabled;
+ struct trace_remote *remote;
+ struct trace_event_fields *fields;
+ char *print_fmt;
+ void (*print)(void *evt, struct trace_seq *seq);
+};
+
+#define RE_STRUCT(__args...) __args
+#define re_field(__type, __field) __type __field;
+
+#define REMOTE_EVENT_FORMAT(__name, __struct) \
+ struct remote_event_format_##__name { \
+ struct remote_event_hdr hdr; \
+ __struct \
+ }
+#endif
diff --git a/include/trace/define_remote_events.h b/include/trace/define_remote_events.h
new file mode 100644
index 000000000000..676e803dc144
--- /dev/null
+++ b/include/trace/define_remote_events.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/trace_events.h>
+#include <linux/trace_remote_event.h>
+#include <linux/trace_seq.h>
+#include <linux/stringify.h>
+
+#define REMOTE_EVENT_INCLUDE(__file) __stringify(../../__file)
+
+#ifdef REMOTE_EVENT_SECTION
+# define __REMOTE_EVENT_SECTION(__name) __used __section(REMOTE_EVENT_SECTION"."#__name)
+#else
+# define __REMOTE_EVENT_SECTION(__name)
+#endif
+
+#define REMOTE_PRINTK_COUNT_ARGS(__args...) \
+ __COUNT_ARGS(, ##__args, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 0)
+
+#define __remote_printk0() \
+ trace_seq_putc(seq, '\n')
+
+#define __remote_printk1(__fmt) \
+ trace_seq_puts(seq, " " __fmt "\n") \
+
+#define __remote_printk2(__fmt, __args...) \
+do { \
+ trace_seq_putc(seq, ' '); \
+ trace_seq_printf(seq, __fmt, __args); \
+ trace_seq_putc(seq, '\n'); \
+} while (0)
+
+/* Apply the appropriate trace_seq sequence according to the number of arguments */
+#define remote_printk(__args...) \
+ CONCATENATE(__remote_printk, REMOTE_PRINTK_COUNT_ARGS(__args))(__args)
+
+#define RE_PRINTK(__args...) __args
+
+#define REMOTE_EVENT(__name, __id, __struct, __printk) \
+ REMOTE_EVENT_FORMAT(__name, __struct); \
+ static void remote_event_print_##__name(void *evt, struct trace_seq *seq) \
+ { \
+ struct remote_event_format_##__name __maybe_unused *__entry = evt; \
+ trace_seq_puts(seq, #__name); \
+ remote_printk(__printk); \
+ }
+#include REMOTE_EVENT_INCLUDE(REMOTE_EVENT_INCLUDE_FILE)
+
+#undef REMOTE_EVENT
+#undef RE_PRINTK
+#undef re_field
+#define re_field(__type, __field) \
+ { \
+ .type = #__type, .name = #__field, \
+ .size = sizeof(__type), .align = __alignof__(__type), \
+ .is_signed = is_signed_type(__type), \
+ },
+#define __entry REC
+#define RE_PRINTK(__fmt, __args...) "\"" __fmt "\", " __stringify(__args)
+#define REMOTE_EVENT(__name, __id, __struct, __printk) \
+ static struct trace_event_fields remote_event_fields_##__name[] = { \
+ __struct \
+ {} \
+ }; \
+ static char remote_event_print_fmt_##__name[] = __printk; \
+ static struct remote_event __REMOTE_EVENT_SECTION(__name) \
+ remote_event_##__name = { \
+ .name = #__name, \
+ .id = __id, \
+ .fields = remote_event_fields_##__name, \
+ .print_fmt = remote_event_print_fmt_##__name, \
+ .print = remote_event_print_##__name, \
+ }
+#include REMOTE_EVENT_INCLUDE(REMOTE_EVENT_INCLUDE_FILE)
diff --git a/include/uapi/linux/trace_mmap.h b/include/uapi/linux/trace_mmap.h
index c102ef35d11e..e8185889a1c8 100644
--- a/include/uapi/linux/trace_mmap.h
+++ b/include/uapi/linux/trace_mmap.h
@@ -17,8 +17,8 @@
* @entries: Number of entries in the ring-buffer.
* @overrun: Number of entries lost in the ring-buffer.
* @read: Number of entries that have been read.
- * @Reserved1: Internal use only.
- * @Reserved2: Internal use only.
+ * @pages_lost: Number of pages overwritten by the writer.
+ * @pages_touched: Number of pages written by the writer.
*/
struct trace_buffer_meta {
__u32 meta_page_size;
@@ -39,8 +39,8 @@ struct trace_buffer_meta {
__u64 overrun;
__u64 read;
- __u64 Reserved1;
- __u64 Reserved2;
+ __u64 pages_lost;
+ __u64 pages_touched;
};
#define TRACE_MMAP_IOCTL_GET_READER _IO('R', 0x20)