summaryrefslogtreecommitdiff
path: root/tools/testing
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2026-03-03 19:39:22 +0300
committerAlexei Starovoitov <ast@kernel.org>2026-03-03 19:39:22 +0300
commitb0cc2e069fae3fba74381609ebc523ceca85cd9a (patch)
tree721deace47cf7dcac462ea766b77e85789a2f171 /tools/testing
parent39948c2d42b5093b49f1ad6c3b75df455331ac99 (diff)
parent0c4fc6bd61054a9378bce149b3758f9b6e8fb5ab (diff)
downloadlinux-b0cc2e069fae3fba74381609ebc523ceca85cd9a.tar.xz
Merge branch 'libbpf-make-optimized-uprobes-backward-compatible'
Jiri Olsa says: ==================== libbpf: Make optimized uprobes backward compatible hi, we can currently optimize uprobes on top of nop5 instructions, so application can define USDT_NOP to nop5 and use USDT macro to define optimized usdt probes. This works fine on new kernels, but could have performance penalty on older kernels, that do not have the support to optimize and to emulate nop5 instruction. This patchset adds support to workaround the performance penalty on older kernels that do not support uprobe optimization, please see detailed description in patch 2. v1: https://lore.kernel.org/bpf/20251117083551.517393-1-jolsa@kernel.org/ v2: https://lore.kernel.org/bpf/20260210133649.524292-1-jolsa@kernel.org/ v3: https://lore.kernel.org/bpf/20260211084858.750950-1-jolsa@kernel.org/T/#t v4: https://lore.kernel.org/bpf/20260220104220.634154-1-jolsa@kernel.org/ v5 changes: - keep nop_combo on stack and levae buf uninitialized in has_nop_combo function [David] v4 changes: - rebased on latest bpf-next/master - use pread for nop combo read [Andrii] - renamed usdt triger benchmark names [Andrii] - added more ip address checks to tests [Andrii] v3 changes: - fix __x86_64 define and other typos [CI] - add missing '?' to usdt trigger program [CI] v2 changes: - after more investigation we realized there are some versions of bpftrace and stap that does not work with solution suggested in version 1, so we decided to switch to following solution: - change USDT macro [1] emits nop,nop5 instructions combo by default - libbpf detects nop,nop5 instructions combo for USDT probe, if there is and if uprobe syscall is detected libbpf installs usdt probe on top of nop5 instruction to get it optimized - added usdt trigger benchmarks [Andrii] - several small fixes on uprobe syscall detection, tests and other places [Andrii] - true usdt.h source [1] updated [Andrii] - compile usdt_* objects unconditionally [Andrii] thanks, jirka [1] https://github.com/libbpf/usdt --- ==================== Link: https://patch.msgid.link/20260224103915.1369690-1-jolsa@kernel.org Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'tools/testing')
-rw-r--r--tools/testing/selftests/bpf/.gitignore2
-rw-r--r--tools/testing/selftests/bpf/Makefile5
-rw-r--r--tools/testing/selftests/bpf/bench.c4
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c60
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_uprobes.sh2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/usdt.c92
-rw-r--r--tools/testing/selftests/bpf/progs/test_usdt.c12
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c10
-rw-r--r--tools/testing/selftests/bpf/usdt.h2
-rw-r--r--tools/testing/selftests/bpf/usdt_1.c18
-rw-r--r--tools/testing/selftests/bpf/usdt_2.c16
11 files changed, 220 insertions, 3 deletions
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index a3ea98211ea6..bfdc5518ecc8 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -47,3 +47,5 @@ verification_cert.h
*.BTF
*.BTF_ids
*.BTF.base
+usdt_1
+usdt_2
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 72a9ba41f95e..49455ad51d66 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -754,7 +754,8 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \
$(VERIFY_SIG_HDR) \
flow_dissector_load.h \
ip_check_defrag_frags.h \
- bpftool_helpers.c
+ bpftool_helpers.c \
+ usdt_1.c usdt_2.c
TRUNNER_LIB_SOURCES := find_bit.c
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
$(OUTPUT)/liburandom_read.so \
@@ -878,6 +879,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(OUTPUT)/bench_bpf_crypto.o \
$(OUTPUT)/bench_sockmap.o \
$(OUTPUT)/bench_lpm_trie_map.o \
+ $(OUTPUT)/usdt_1.o \
+ $(OUTPUT)/usdt_2.o \
#
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 8368bd3a0665..029b3e21f438 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -541,6 +541,8 @@ extern const struct bench bench_trig_uprobe_nop5;
extern const struct bench bench_trig_uretprobe_nop5;
extern const struct bench bench_trig_uprobe_multi_nop5;
extern const struct bench bench_trig_uretprobe_multi_nop5;
+extern const struct bench bench_trig_usdt_nop;
+extern const struct bench bench_trig_usdt_nop5;
#endif
extern const struct bench bench_rb_libbpf;
@@ -617,6 +619,8 @@ static const struct bench *benchs[] = {
&bench_trig_uretprobe_nop5,
&bench_trig_uprobe_multi_nop5,
&bench_trig_uretprobe_multi_nop5,
+ &bench_trig_usdt_nop,
+ &bench_trig_usdt_nop5,
#endif
/* ringbuf/perfbuf benchmarks */
&bench_rb_libbpf,
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index f74b313d6ae4..2f22ec61667b 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -407,6 +407,23 @@ static void *uprobe_producer_nop5(void *input)
uprobe_target_nop5();
return NULL;
}
+
+void usdt_1(void);
+void usdt_2(void);
+
+static void *uprobe_producer_usdt_nop(void *input)
+{
+ while (true)
+ usdt_1();
+ return NULL;
+}
+
+static void *uprobe_producer_usdt_nop5(void *input)
+{
+ while (true)
+ usdt_2();
+ return NULL;
+}
#endif
static void usetup(bool use_retprobe, bool use_multi, void *target_addr)
@@ -544,6 +561,47 @@ static void uretprobe_multi_nop5_setup(void)
{
usetup(true, true /* use_multi */, &uprobe_target_nop5);
}
+
+static void usdt_setup(const char *name)
+{
+ struct bpf_link *link;
+ int err;
+
+ setup_libbpf();
+
+ ctx.skel = trigger_bench__open();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_usdt, true);
+
+ err = trigger_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to load skeleton\n");
+ exit(1);
+ }
+
+ link = bpf_program__attach_usdt(ctx.skel->progs.bench_trigger_usdt,
+ 0 /*self*/, "/proc/self/exe",
+ "optimized_attach", name, NULL);
+ if (libbpf_get_error(link)) {
+ fprintf(stderr, "failed to attach optimized_attach:%s usdt probe\n", name);
+ exit(1);
+ }
+ ctx.skel->links.bench_trigger_usdt = link;
+}
+
+static void usdt_nop_setup(void)
+{
+ usdt_setup("usdt_1");
+}
+
+static void usdt_nop5_setup(void)
+{
+ usdt_setup("usdt_2");
+}
#endif
const struct bench bench_trig_syscall_count = {
@@ -611,4 +669,6 @@ BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5");
BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5");
BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5");
BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5");
+BENCH_TRIG_USERMODE(usdt_nop, usdt_nop, "usdt-nop");
+BENCH_TRIG_USERMODE(usdt_nop5, usdt_nop5, "usdt-nop5");
#endif
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
index 03f55405484b..9ec59423b949 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
@@ -2,7 +2,7 @@
set -eufo pipefail
-for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret,nop5}
+for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret,nop5} usdt-nop usdt-nop5
do
summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
printf "%-15s: %s\n" $i "$summary"
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
index f4be5269fa90..69759b27794d 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -247,6 +247,96 @@ cleanup:
#undef TRIGGER
}
+#ifdef __x86_64__
+extern void usdt_1(void);
+extern void usdt_2(void);
+
+static unsigned char nop1[1] = { 0x90 };
+static unsigned char nop1_nop5_combo[6] = { 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x00 };
+
+static void *find_instr(void *fn, unsigned char *instr, size_t cnt)
+{
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ if (!memcmp(instr, fn + i, cnt))
+ return fn + i;
+ }
+ return NULL;
+}
+
+static void subtest_optimized_attach(void)
+{
+ struct test_usdt *skel;
+ __u8 *addr_1, *addr_2;
+
+ /* usdt_1 USDT probe has single nop instruction */
+ addr_1 = find_instr(usdt_1, nop1_nop5_combo, 6);
+ if (!ASSERT_NULL(addr_1, "usdt_1_find_nop1_nop5_combo"))
+ return;
+
+ addr_1 = find_instr(usdt_1, nop1, 1);
+ if (!ASSERT_OK_PTR(addr_1, "usdt_1_find_nop1"))
+ return;
+
+ /* usdt_2 USDT probe has nop,nop5 instructions combo */
+ addr_2 = find_instr(usdt_2, nop1_nop5_combo, 6);
+ if (!ASSERT_OK_PTR(addr_2, "usdt_2_find_nop1_nop5_combo"))
+ return;
+
+ skel = test_usdt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_usdt__open_and_load"))
+ return;
+
+ skel->bss->expected_ip = (unsigned long) addr_1;
+
+ /*
+ * Attach program on top of usdt_1 which is single nop probe,
+ * so the probe won't get optimized.
+ */
+ skel->links.usdt_executed = bpf_program__attach_usdt(skel->progs.usdt_executed,
+ 0 /*self*/, "/proc/self/exe",
+ "optimized_attach", "usdt_1", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_executed, "bpf_program__attach_usdt"))
+ goto cleanup;
+
+ usdt_1();
+ usdt_1();
+
+ /* int3 is on addr_1 address */
+ ASSERT_EQ(*addr_1, 0xcc, "int3");
+ ASSERT_EQ(skel->bss->executed, 2, "executed");
+
+ bpf_link__destroy(skel->links.usdt_executed);
+
+ /* we expect the nop5 ip */
+ skel->bss->expected_ip = (unsigned long) addr_2 + 1;
+
+ /*
+ * Attach program on top of usdt_2 which is probe defined on top
+ * of nop1,nop5 combo, so the probe gets optimized on top of nop5.
+ */
+ skel->links.usdt_executed = bpf_program__attach_usdt(skel->progs.usdt_executed,
+ 0 /*self*/, "/proc/self/exe",
+ "optimized_attach", "usdt_2", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_executed, "bpf_program__attach_usdt"))
+ goto cleanup;
+
+ usdt_2();
+ usdt_2();
+
+ /* nop stays on addr_2 address */
+ ASSERT_EQ(*addr_2, 0x90, "nop");
+
+ /* call is on addr_2 + 1 address */
+ ASSERT_EQ(*(addr_2 + 1), 0xe8, "call");
+ ASSERT_EQ(skel->bss->executed, 4, "executed");
+
+cleanup:
+ test_usdt__destroy(skel);
+}
+#endif
+
unsigned short test_usdt_100_semaphore SEC(".probes");
unsigned short test_usdt_300_semaphore SEC(".probes");
unsigned short test_usdt_400_semaphore SEC(".probes");
@@ -516,6 +606,8 @@ void test_usdt(void)
#ifdef __x86_64__
if (test__start_subtest("basic_optimized"))
subtest_basic_usdt(true);
+ if (test__start_subtest("optimized_attach"))
+ subtest_optimized_attach();
#endif
if (test__start_subtest("multispec"))
subtest_multispec_usdt();
diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c
index a78c87537b07..f00cb52874e0 100644
--- a/tools/testing/selftests/bpf/progs/test_usdt.c
+++ b/tools/testing/selftests/bpf/progs/test_usdt.c
@@ -138,4 +138,16 @@ int usdt_sib(struct pt_regs *ctx)
return 0;
}
+#ifdef __TARGET_ARCH_x86
+int executed;
+unsigned long expected_ip;
+
+SEC("usdt")
+int usdt_executed(struct pt_regs *ctx)
+{
+ if (expected_ip == ctx->ip)
+ executed++;
+ return 0;
+}
+#endif
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 4ea0422d1042..3225b4aee8ff 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -1,10 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Facebook
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <asm/unistd.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
+#include "bpf/usdt.bpf.h"
char _license[] SEC("license") = "GPL";
@@ -180,3 +181,10 @@ int bench_trigger_rawtp(void *ctx)
handle(ctx);
return 0;
}
+
+SEC("?usdt")
+int bench_trigger_usdt(void *ctx)
+{
+ inc_counter();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/usdt.h b/tools/testing/selftests/bpf/usdt.h
index 549d1f774810..c71e21df38b3 100644
--- a/tools/testing/selftests/bpf/usdt.h
+++ b/tools/testing/selftests/bpf/usdt.h
@@ -312,6 +312,8 @@ struct usdt_sema { volatile unsigned short active; };
#ifndef USDT_NOP
#if defined(__ia64__) || defined(__s390__) || defined(__s390x__)
#define USDT_NOP nop 0
+#elif defined(__x86_64__)
+#define USDT_NOP .byte 0x90, 0x0f, 0x1f, 0x44, 0x00, 0x0 /* nop, nop5 */
#else
#define USDT_NOP nop
#endif
diff --git a/tools/testing/selftests/bpf/usdt_1.c b/tools/testing/selftests/bpf/usdt_1.c
new file mode 100644
index 000000000000..4f06e8bcf58b
--- /dev/null
+++ b/tools/testing/selftests/bpf/usdt_1.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#if defined(__x86_64__)
+
+/*
+ * Include usdt.h with defined USDT_NOP macro to use single
+ * nop instruction.
+ */
+#define USDT_NOP .byte 0x90
+#include "usdt.h"
+
+__attribute__((aligned(16)))
+void usdt_1(void)
+{
+ USDT(optimized_attach, usdt_1);
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/usdt_2.c b/tools/testing/selftests/bpf/usdt_2.c
new file mode 100644
index 000000000000..789883aaca4c
--- /dev/null
+++ b/tools/testing/selftests/bpf/usdt_2.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#if defined(__x86_64__)
+
+/*
+ * Include usdt.h with default nop,nop5 instructions combo.
+ */
+#include "usdt.h"
+
+__attribute__((aligned(16)))
+void usdt_2(void)
+{
+ USDT(optimized_attach, usdt_2);
+}
+
+#endif