diff options
Diffstat (limited to 'tools/perf/examples')
| -rw-r--r-- | tools/perf/examples/bpf/augmented_raw_syscalls.c | 131 | 
1 files changed, 131 insertions, 0 deletions
| diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c new file mode 100644 index 000000000000..90a19336310b --- /dev/null +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Augment the raw_syscalls tracepoints with the contents of the pointer arguments. + * + * Test it with: + * + * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null + * + * This exactly matches what is marshalled into the raw_syscall:sys_enter + * payload expected by the 'perf trace' beautifiers. + * + * For now it just uses the existing tracepoint augmentation code in 'perf + * trace', in the next csets we'll hook up these with the sys_enter/sys_exit + * code that will combine entry/exit in a strace like way. + */ + +#include <stdio.h> +#include <linux/socket.h> + +/* bpf-output associated map */ +struct bpf_map SEC("maps") __augmented_syscalls__ = { +	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, +	.key_size = sizeof(int), +	.value_size = sizeof(u32), +	.max_entries = __NR_CPUS__, +}; + +struct syscall_enter_args { +	unsigned long long common_tp_fields; +	long		   syscall_nr; +	unsigned long	   args[6]; +}; + +struct syscall_exit_args { +	unsigned long long common_tp_fields; +	long		   syscall_nr; +	long		   ret; +}; + +struct augmented_filename { +	unsigned int	size; +	int		reserved; +	char		value[256]; +}; + +#define SYS_OPEN 2 +#define SYS_OPENAT 257 + +SEC("raw_syscalls:sys_enter") +int sys_enter(struct syscall_enter_args *args) +{ +	struct { +		struct syscall_enter_args args; +		struct augmented_filename filename; +	} augmented_args; +	unsigned int len = sizeof(augmented_args); +	const void *filename_arg = NULL; + +	probe_read(&augmented_args.args, sizeof(augmented_args.args), args); +	/* +	 * Yonghong and Edward Cree sayz: +	 * +	 * https://www.spinics.net/lists/netdev/msg531645.html +	 * +	 * >>   R0=inv(id=0) R1=inv2 R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1 +	 * >> 10: (bf) r1 = r6 +	 * >> 11: (07) r1 += 16 +	 * >> 12: (05) goto pc+2 +	 * >> 15: (79) r3 = *(u64 *)(r1 +0) +	 * >> dereference of modified ctx ptr R1 off=16 disallowed +	 * > Aha, we at least got a different error message this time. +	 * > And indeed llvm has done that optimisation, rather than the more obvious +	 * > 11: r3 = *(u64 *)(r1 +16) +	 * > because it wants to have lots of reads share a single insn.  You may be able +	 * > to defeat that optimisation by adding compiler barriers, idk.  Maybe someone +	 * > with llvm knowledge can figure out how to stop it (ideally, llvm would know +	 * > when it's generating for bpf backend and not do that).  -O0?  ¯\_(ツ)_/¯ +	 * +	 * The optimization mostly likes below: +	 * +	 *	br1: +	 * 	... +	 *	r1 += 16 +	 *	goto merge +	 *	br2: +	 *	... +	 *	r1 += 20 +	 *	goto merge +	 *	merge: +	 *	*(u64 *)(r1 + 0) +	 * +	 * The compiler tries to merge common loads. There is no easy way to +	 * stop this compiler optimization without turning off a lot of other +	 * optimizations. The easiest way is to add barriers: +	 * +	 * 	 __asm__ __volatile__("": : :"memory") +	 * +	 * 	 after the ctx memory access to prevent their down stream merging. +	 */ +	switch (augmented_args.args.syscall_nr) { +	case SYS_OPEN:	 filename_arg = (const void *)args->args[0]; +			__asm__ __volatile__("": : :"memory"); +			 break; +	case SYS_OPENAT: filename_arg = (const void *)args->args[1]; +			 break; +	} + +	if (filename_arg != NULL) { +		augmented_args.filename.reserved = 0; +		augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, +							      sizeof(augmented_args.filename.value), +							      filename_arg); +		if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { +			len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; +			len &= sizeof(augmented_args.filename.value) - 1; +		} +	} else { +		len = sizeof(augmented_args.args); +	} + +	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); +	return 0; +} + +SEC("raw_syscalls:sys_exit") +int sys_exit(struct syscall_exit_args *args) +{ +	return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ +} + +license(GPL); | 
