From 62aa5790cec89108a23052cc2f00fdd31f9adbac Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Mon, 31 Mar 2025 20:36:17 +0000 Subject: bpf: Fix a comment describing bpf_attr The map_fd field of the bpf_attr union is used in the BPF_MAP_FREEZE syscall. Explicitly mention this in the comments. Signed-off-by: Anton Protopopov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250331203618.1973691-2-a.s.protopopov@gmail.com --- tools/include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 28705ae67784..07ee73cdf97b 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1506,7 +1506,7 @@ union bpf_attr { __s32 map_token_fd; }; - struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ + struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */ __u32 map_fd; __aligned_u64 key; union { -- cgit v1.2.3 From b412fd6bcc4c1696b0674434f56b198950c0e2bf Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 8 Apr 2025 11:00:04 +0200 Subject: bpf: Clarify role of BPF_F_RECOMPUTE_CSUM BPF_F_RECOMPUTE_CSUM doesn't update the actual L3 and L4 checksums in the packet, but simply updates skb->csum (according to skb->ip_summed). This patch clarifies that to avoid confusions. Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/ff6895d42936f03dbb82334d8bcfd50e00c79086.1744102490.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 14 +++++++++----- tools/include/uapi/linux/bpf.h | 14 +++++++++----- 2 files changed, 18 insertions(+), 10 deletions(-) (limited to 'tools/include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 07ee73cdf97b..14ef3db844fa 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1995,11 +1995,15 @@ union bpf_attr { * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description * Store *len* bytes from address *from* into the packet - * associated to *skb*, at *offset*. *flags* are a combination of - * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the - * checksum for the packet after storing the bytes) and - * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ - * **->swhash** and *skb*\ **->l4hash** to 0). + * associated to *skb*, at *offset*. The *flags* are a combination + * of the following values: + * + * **BPF_F_RECOMPUTE_CSUM** + * Automatically update *skb*\ **->csum** after storing the + * bytes. + * **BPF_F_INVALIDATE_HASH** + * Set *skb*\ **->hash**, *skb*\ **->swhash** and *skb*\ + * **->l4hash** to 0. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 07ee73cdf97b..14ef3db844fa 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1995,11 +1995,15 @@ union bpf_attr { * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description * Store *len* bytes from address *from* into the packet - * associated to *skb*, at *offset*. *flags* are a combination of - * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the - * checksum for the packet after storing the bytes) and - * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ - * **->swhash** and *skb*\ **->l4hash** to 0). + * associated to *skb*, at *offset*. The *flags* are a combination + * of the following values: + * + * **BPF_F_RECOMPUTE_CSUM** + * Automatically update *skb*\ **->csum** after storing the + * bytes. + * **BPF_F_INVALIDATE_HASH** + * Set *skb*\ **->hash**, *skb*\ **->swhash** and *skb*\ + * **->l4hash** to 0. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers -- cgit v1.2.3 From 5a15a050df714959f0d5a57ac3201bd1c6594984 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 8 Apr 2025 11:00:51 +0200 Subject: bpf: Clarify the meaning of BPF_F_PSEUDO_HDR In the bpf_l4_csum_replace helper, the BPF_F_PSEUDO_HDR flag should only be set if the modified header field is part of the pseudo-header. If you modify for example the UDP ports and pass BPF_F_PSEUDO_HDR, inet_proto_csum_replace4 will update skb->csum even though it shouldn't (the port and the UDP checksum updates null each other). Signed-off-by: Paul Chaignon Link: https://lore.kernel.org/r/5126ef84ba75425b689482cbc98bffe75e5d8ab0.1744102490.git.paul.chaignon@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 +- tools/include/uapi/linux/bpf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 14ef3db844fa..71d5ac83cf5d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2055,7 +2055,7 @@ union bpf_attr { * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates - * the checksum is to be computed against a pseudo-header. + * that the modified header field is part of the pseudo-header. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 14ef3db844fa..71d5ac83cf5d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2055,7 +2055,7 @@ union bpf_attr { * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates - * the checksum is to be computed against a pseudo-header. + * that the modified header field is part of the pseudo-header. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more -- cgit v1.2.3 From 9c138ac9392228835b520fd4dbb07e636b34a867 Mon Sep 17 00:00:00 2001 From: Jemmy Wong Date: Fri, 11 Apr 2025 15:36:24 +0800 Subject: tools/nolibc/types.h: fix mismatched parenthesis in minor() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix an imbalance where opening parentheses exceed closing ones. Fixes: eba6d00d38e7c ("tools/nolibc/types: move makedev to types.h and make it a macro") Signed-off-by: Jemmy Wong Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250411073624.22153-1-jemmywong512@gmail.com Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index b26a5d0c417c..32d0929c633b 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -201,7 +201,7 @@ struct stat { /* WARNING, it only deals with the 4096 first majors and 256 first minors */ #define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff))) #define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff)) -#define minor(dev) ((unsigned int)(((dev) & 0xff)) +#define minor(dev) ((unsigned int)((dev) & 0xff)) #ifndef offsetof #define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD) -- cgit v1.2.3 From 8e1930296f921d3246f331c394c0516feb36993a Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 26 Feb 2025 15:05:17 +0100 Subject: tools/nolibc: Add support for SPARC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for 32bit and 64bit SPARC to nolibc. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Tested-by: Sebastian Andrzej Siewior # UltraSparc T4 (Niagara4) Link: https://lore.kernel.org/lkml/20250322-nolibc-sparc-v2-1-89af018c6296@weissschuh.net/ --- tools/include/nolibc/arch-sparc.h | 191 ++++++++++++++++++++++++++++ tools/include/nolibc/arch.h | 2 + tools/testing/selftests/nolibc/Makefile | 11 ++ tools/testing/selftests/nolibc/run-tests.sh | 2 + 4 files changed, 206 insertions(+) create mode 100644 tools/include/nolibc/arch-sparc.h (limited to 'tools/include') diff --git a/tools/include/nolibc/arch-sparc.h b/tools/include/nolibc/arch-sparc.h new file mode 100644 index 000000000000..1435172f3dfe --- /dev/null +++ b/tools/include/nolibc/arch-sparc.h @@ -0,0 +1,191 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * SPARC (32bit and 64bit) specific definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh + */ + +#ifndef _NOLIBC_ARCH_SPARC_H +#define _NOLIBC_ARCH_SPARC_H + +#include + +#include "compiler.h" +#include "crt.h" + +/* + * Syscalls for SPARC: + * - registers are native word size + * - syscall number is passed in g1 + * - arguments are in o0-o5 + * - the system call is performed by calling a trap instruction + * - syscall return value is in o0 + * - syscall error flag is in the carry bit of the processor status register + */ + +#ifdef __arch64__ + +#define _NOLIBC_SYSCALL "t 0x6d\n" \ + "bcs,a %%xcc, 1f\n" \ + "sub %%g0, %%o0, %%o0\n" \ + "1:\n" + +#else + +#define _NOLIBC_SYSCALL "t 0x10\n" \ + "bcs,a 1f\n" \ + "sub %%g0, %%o0, %%o0\n" \ + "1:\n" + +#endif /* __arch64__ */ + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0"); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + register long _arg5 __asm__ ("o4") = (long)(arg5); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + register long _arg5 __asm__ ("o4") = (long)(arg5); \ + register long _arg6 __asm__ ("o5") = (long)(arg6); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +/* startup code */ +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +{ + __asm__ volatile ( + /* + * Save argc pointer to o0, as arg1 of _start_c. + * Account for the window save area, which is 16 registers wide. + */ +#ifdef __arch64__ + "add %sp, 128 + 2047, %o0\n" /* on sparc64 / v9 the stack is offset by 2047 */ +#else + "add %sp, 64, %o0\n" +#endif + "b,a _start_c\n" /* transfer to c runtime */ + ); + __nolibc_entrypoint_epilogue(); +} + +static pid_t getpid(void); + +static __attribute__((unused)) +pid_t sys_fork(void) +{ + pid_t parent, ret; + + parent = getpid(); + ret = my_syscall0(__NR_fork); + + /* The syscall returns the parent pid in the child instead of 0 */ + if (ret == parent) + return 0; + else + return ret; +} +#define sys_fork sys_fork + +#endif /* _NOLIBC_ARCH_SPARC_H */ diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h index 8a2c143c0fba..b8c1da9a88d1 100644 --- a/tools/include/nolibc/arch.h +++ b/tools/include/nolibc/arch.h @@ -33,6 +33,8 @@ #include "arch-s390.h" #elif defined(__loongarch__) #include "arch-loongarch.h" +#elif defined(__sparc__) +#include "arch-sparc.h" #else #error Unsupported Architecture #endif diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 89ee265f7467..aa05c1faac20 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -56,6 +56,8 @@ ARCH_mips32be = mips ARCH_riscv32 = riscv ARCH_riscv64 = riscv ARCH_s390x = s390 +ARCH_sparc32 = sparc +ARCH_sparc64 = sparc ARCH := $(or $(ARCH_$(XARCH)),$(XARCH)) # kernel image names by architecture @@ -76,6 +78,8 @@ IMAGE_riscv64 = arch/riscv/boot/Image IMAGE_s390x = arch/s390/boot/bzImage IMAGE_s390 = arch/s390/boot/bzImage IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi +IMAGE_sparc32 = arch/sparc/boot/image +IMAGE_sparc64 = arch/sparc/boot/image IMAGE = $(objtree)/$(IMAGE_$(XARCH)) IMAGE_NAME = $(notdir $(IMAGE)) @@ -97,6 +101,8 @@ DEFCONFIG_riscv64 = defconfig DEFCONFIG_s390x = defconfig DEFCONFIG_s390 = defconfig compat.config DEFCONFIG_loongarch = defconfig +DEFCONFIG_sparc32 = sparc32_defconfig +DEFCONFIG_sparc64 = sparc64_defconfig DEFCONFIG = $(DEFCONFIG_$(XARCH)) EXTRACONFIG = $(EXTRACONFIG_$(XARCH)) @@ -122,6 +128,8 @@ QEMU_ARCH_riscv64 = riscv64 QEMU_ARCH_s390x = s390x QEMU_ARCH_s390 = s390x QEMU_ARCH_loongarch = loongarch64 +QEMU_ARCH_sparc32 = sparc +QEMU_ARCH_sparc64 = sparc64 QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) QEMU_ARCH_USER_ppc64le = ppc64le @@ -152,6 +160,8 @@ QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_T QEMU_ARGS_s390x = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_sparc32 = -M SS-5 -m 256M -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_sparc64 = -M sun4u -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA) # OUTPUT is only set when run from the main makefile, otherwise @@ -174,6 +184,7 @@ CFLAGS_s390x = -m64 CFLAGS_s390 = -m31 CFLAGS_mips32le = -EL -mabi=32 -fPIC CFLAGS_mips32be = -EB -mabi=32 +CFLAGS_sparc32 = $(call cc-option,-m32) ifeq ($(origin XARCH),command line) CFLAGS_XARCH = $(CFLAGS_$(XARCH)) endif diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index 0299a0912d40..040956a9f5b8 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -25,6 +25,7 @@ all_archs=( riscv32 riscv64 s390x s390 loongarch + sparc32 sparc64 ) archs="${all_archs[@]}" @@ -111,6 +112,7 @@ crosstool_arch() { loongarch) echo loongarch64;; mips*) echo mips;; s390*) echo s390;; + sparc*) echo sparc64;; *) echo "$1";; esac } -- cgit v1.2.3 From 60ccc16f530a480abfde399bf80779e5b021b3fe Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sun, 16 Mar 2025 20:46:07 +0100 Subject: tools/nolibc: drop manual stack pointer alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The stack pointer is already aligned by the kernel to a multiple of 16. See STACK_ROUND() in fs/binfmt_elf.c. The manual realignment is unnecessary, drop it. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250316-nolibc-sp-align-v1-1-1e1fb073ca1e@weissschuh.net --- tools/include/nolibc/arch-aarch64.h | 1 - tools/include/nolibc/arch-arm.h | 2 -- tools/include/nolibc/arch-i386.h | 2 -- tools/include/nolibc/arch-loongarch.h | 7 ------- tools/include/nolibc/arch-powerpc.h | 2 -- tools/include/nolibc/arch-riscv.h | 1 - tools/include/nolibc/arch-x86_64.h | 1 - 7 files changed, 16 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h index 06fdef7b291a..937a348da42e 100644 --- a/tools/include/nolibc/arch-aarch64.h +++ b/tools/include/nolibc/arch-aarch64.h @@ -146,7 +146,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s { __asm__ volatile ( "mov x0, sp\n" /* save stack pointer to x0, as arg1 of _start_c */ - "and sp, x0, -16\n" /* sp must be 16-byte aligned in the callee */ "bl _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h index 6180ff99ab43..1f66e7e5a444 100644 --- a/tools/include/nolibc/arch-arm.h +++ b/tools/include/nolibc/arch-arm.h @@ -189,8 +189,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s { __asm__ volatile ( "mov r0, sp\n" /* save stack pointer to %r0, as arg1 of _start_c */ - "and ip, r0, #-8\n" /* sp must be 8-byte aligned in the callee */ - "mov sp, ip\n" "bl _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h index ff5afc35bbd8..7c9b38e96418 100644 --- a/tools/include/nolibc/arch-i386.h +++ b/tools/include/nolibc/arch-i386.h @@ -167,8 +167,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s __asm__ volatile ( "xor %ebp, %ebp\n" /* zero the stack frame */ "mov %esp, %eax\n" /* save stack pointer to %eax, as arg1 of _start_c */ - "add $12, %esp\n" /* avoid over-estimating after the 'and' & 'sub' below */ - "and $-16, %esp\n" /* the %esp must be 16-byte aligned on 'call' */ "sub $12, %esp\n" /* sub 12 to keep it aligned after the push %eax */ "push %eax\n" /* push arg1 on stack to support plain stack modes too */ "call _start_c\n" /* transfer to c runtime */ diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h index fb519545959e..5511705303ea 100644 --- a/tools/include/nolibc/arch-loongarch.h +++ b/tools/include/nolibc/arch-loongarch.h @@ -142,18 +142,11 @@ _arg1; \ }) -#if __loongarch_grlen == 32 -#define LONG_BSTRINS "bstrins.w" -#else /* __loongarch_grlen == 64 */ -#define LONG_BSTRINS "bstrins.d" -#endif - /* startup code */ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ - LONG_BSTRINS " $sp, $zero, 3, 0\n" /* $sp must be 16-byte aligned */ "bl _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h index ee2fdb8d601d..204564bbcd32 100644 --- a/tools/include/nolibc/arch-powerpc.h +++ b/tools/include/nolibc/arch-powerpc.h @@ -201,7 +201,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s __asm__ volatile ( "mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */ - "clrrdi 1, 1, 4\n" /* align the stack to 16 bytes */ "li 0, 0\n" /* zero the frame pointer */ "stdu 1, -32(1)\n" /* the initial stack frame */ "bl _start_c\n" /* transfer to c runtime */ @@ -209,7 +208,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s #else __asm__ volatile ( "mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */ - "clrrwi 1, 1, 4\n" /* align the stack to 16 bytes */ "li 0, 0\n" /* zero the frame pointer */ "stwu 1, -16(1)\n" /* the initial stack frame */ "bl _start_c\n" /* transfer to c runtime */ diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h index 8827bf936212..885383a86c38 100644 --- a/tools/include/nolibc/arch-riscv.h +++ b/tools/include/nolibc/arch-riscv.h @@ -148,7 +148,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s "lla gp, __global_pointer$\n" ".option pop\n" "mv a0, sp\n" /* save stack pointer to a0, as arg1 of _start_c */ - "andi sp, a0, -16\n" /* sp must be 16-byte aligned */ "call _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h index 1e40620a2b33..67305e24dbef 100644 --- a/tools/include/nolibc/arch-x86_64.h +++ b/tools/include/nolibc/arch-x86_64.h @@ -166,7 +166,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s __asm__ volatile ( "xor %ebp, %ebp\n" /* zero the stack frame */ "mov %rsp, %rdi\n" /* save stack pointer to %rdi, as arg1 of _start_c */ - "and $-16, %rsp\n" /* %rsp must be 16-byte aligned before call */ "call _start_c\n" /* transfer to c runtime */ "hlt\n" /* ensure it does not return */ ); -- cgit v1.2.3 From 8399f14666688be8d84cb3ccbec6ee790c020b36 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 19 Apr 2025 12:46:18 +0200 Subject: tools/nolibc: add __nolibc_has_feature() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain compiler features are signaled via the __has_feature() preprocessor builtin. Add a nolibc wrapper for it, similar to __nolibc_has_attribute(). Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250419-nolibc-ubsan-v2-1-060b8a016917@weissschuh.net --- tools/include/nolibc/compiler.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h index fa1f547e7f13..e6d6dc116e43 100644 --- a/tools/include/nolibc/compiler.h +++ b/tools/include/nolibc/compiler.h @@ -12,6 +12,12 @@ # define __nolibc_has_attribute(attr) 0 #endif +#if defined(__has_feature) +# define __nolibc_has_feature(feature) __has_feature(feature) +#else +# define __nolibc_has_feature(feature) 0 +#endif + #if __nolibc_has_attribute(naked) # define __nolibc_entrypoint __attribute__((naked)) # define __nolibc_entrypoint_epilogue() -- cgit v1.2.3 From f4152715dcd5c580dca6ea628ec03785bba44b9a Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 19 Apr 2025 12:46:19 +0200 Subject: tools/nolibc: add __nolibc_aligned() and __nolibc_aligned_as() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide a convenience macro around __attribute__((aligned)). Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250419-nolibc-ubsan-v2-2-060b8a016917@weissschuh.net --- tools/include/nolibc/compiler.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h index e6d6dc116e43..369cfb5a0e78 100644 --- a/tools/include/nolibc/compiler.h +++ b/tools/include/nolibc/compiler.h @@ -18,6 +18,9 @@ # define __nolibc_has_feature(feature) 0 #endif +#define __nolibc_aligned(alignment) __attribute__((aligned(alignment))) +#define __nolibc_aligned_as(type) __nolibc_aligned(__alignof__(type)) + #if __nolibc_has_attribute(naked) # define __nolibc_entrypoint __attribute__((naked)) # define __nolibc_entrypoint_epilogue() -- cgit v1.2.3 From 9fca5554af70fd86fb1ed78362d86ebd1a4be2ef Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 19 Apr 2025 12:46:20 +0200 Subject: tools/nolibc: disable function sanitizer for _start_c() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both constructors and main() may be executed with different function signatures than they are actually using. This is intentional but trips up UBSAN. Disable the function sanitizer of UBSAN in _start_c(). Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250419-nolibc-ubsan-v2-3-060b8a016917@weissschuh.net --- tools/include/nolibc/crt.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h index c4b10103bbec..961cfe777c35 100644 --- a/tools/include/nolibc/crt.h +++ b/tools/include/nolibc/crt.h @@ -7,6 +7,8 @@ #ifndef _NOLIBC_CRT_H #define _NOLIBC_CRT_H +#include "compiler.h" + char **environ __attribute__((weak)); const unsigned long *_auxv __attribute__((weak)); @@ -25,6 +27,9 @@ extern void (*const __fini_array_end[])(void) __attribute__((weak)); void _start_c(long *sp); __attribute__((weak,used)) +#if __nolibc_has_feature(undefined_behavior_sanitizer) + __attribute__((no_sanitize("function"))) +#endif void _start_c(long *sp) { long argc; -- cgit v1.2.3 From 0e75768ba24d669dbf76530e21fd51cfe2fbd2a9 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 19 Apr 2025 12:46:21 +0200 Subject: tools/nolibc: properly align dirent buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As byte buffer is overlaid with a 'struct dirent64'. it has to satisfy the structs alignment requirements. Signed-off-by: Thomas Weißschuh Fixes: 665fa8dea90d ("tools/nolibc: add support for directory access") Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250419-nolibc-ubsan-v2-4-060b8a016917@weissschuh.net --- tools/include/nolibc/dirent.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/dirent.h b/tools/include/nolibc/dirent.h index c5c30d0dd680..946a697e98e4 100644 --- a/tools/include/nolibc/dirent.h +++ b/tools/include/nolibc/dirent.h @@ -7,6 +7,7 @@ #ifndef _NOLIBC_DIRENT_H #define _NOLIBC_DIRENT_H +#include "compiler.h" #include "stdint.h" #include "types.h" @@ -58,7 +59,7 @@ int closedir(DIR *dirp) static __attribute__((unused)) int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result) { - char buf[sizeof(struct linux_dirent64) + NAME_MAX + 1]; + char buf[sizeof(struct linux_dirent64) + NAME_MAX + 1] __nolibc_aligned_as(struct linux_dirent64); struct linux_dirent64 *ldir = (void *)buf; intptr_t i = (intptr_t)dirp; int fd, ret; -- cgit v1.2.3 From 4d231a7df1a85c7572b67a4666cb73adb977fbf6 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 19 Apr 2025 12:46:22 +0200 Subject: tools/nolibc: fix integer overflow in i{64,}toa_r() and MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In twos complement the most negative number can not be negated. Fixes: b1c21e7d99cd ("tools/nolibc/stdlib: add i64toa() and u64toa()") Fixes: 66c397c4d2e1 ("tools/nolibc/stdlib: replace the ltoa() function with more efficient ones") Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250419-nolibc-ubsan-v2-5-060b8a016917@weissschuh.net --- tools/include/nolibc/stdlib.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h index 86ad378ab1ea..32b3038002c1 100644 --- a/tools/include/nolibc/stdlib.h +++ b/tools/include/nolibc/stdlib.h @@ -275,7 +275,7 @@ int itoa_r(long in, char *buffer) int len = 0; if (in < 0) { - in = -in; + in = -(unsigned long)in; *(ptr++) = '-'; len++; } @@ -411,7 +411,7 @@ int i64toa_r(int64_t in, char *buffer) int len = 0; if (in < 0) { - in = -in; + in = -(uint64_t)in; *(ptr++) = '-'; len++; } -- cgit v1.2.3 From 060525302ba9ee56dfbcb4149395a1e510a4ced9 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 16 Apr 2025 14:06:16 +0200 Subject: tools/nolibc: prepare for headers in subdirectories MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To support headers in subdirectories (like sys/), their subdirectory needs to be preserved during installation into the sysroot. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250416-nolibc-split-sys-v1-1-a069a3f1d145@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index f9702877ac21..f562cb53be10 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -72,7 +72,7 @@ help: headers: $(Q)mkdir -p $(OUTPUT)sysroot $(Q)mkdir -p $(OUTPUT)sysroot/include - $(Q)cp $(all_files) $(OUTPUT)sysroot/include/ + $(Q)cp --parents $(all_files) $(OUTPUT)sysroot/include/ $(Q)if [ "$(ARCH)" = "x86" ]; then \ sed -e \ 's,^#ifndef _NOLIBC_ARCH_X86_64_H,#if !defined(_NOLIBC_ARCH_X86_64_H) \&\& defined(__x86_64__),' \ -- cgit v1.2.3 From 2b45ceb915b004799e79f4ff68e63b7f88a7d195 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 16 Apr 2025 14:06:17 +0200 Subject: tools/nolibc: add elf.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The UAPI header do already provide an elf.h implementation. Reexport it under its libc name. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250416-nolibc-split-sys-v1-2-a069a3f1d145@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/elf.h | 15 +++++++++++++++ tools/include/nolibc/nolibc.h | 1 + 3 files changed, 17 insertions(+) create mode 100644 tools/include/nolibc/elf.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index f562cb53be10..fd76d267d79a 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -30,6 +30,7 @@ all_files := \ crt.h \ ctype.h \ dirent.h \ + elf.h \ errno.h \ limits.h \ nolibc.h \ diff --git a/tools/include/nolibc/elf.h b/tools/include/nolibc/elf.h new file mode 100644 index 000000000000..beb0b3a87569 --- /dev/null +++ b/tools/include/nolibc/elf.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Shim elf.h header for NOLIBC. + * Copyright (C) 2025 Thomas Weißschuh + */ + +#ifndef _NOLIBC_SYS_ELF_H +#define _NOLIBC_SYS_ELF_H + +#include + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#endif /* _NOLIBC_SYS_ELF_H */ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 70872401aca8..127f0d9068c6 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -97,6 +97,7 @@ #include "types.h" #include "sys.h" #include "ctype.h" +#include "elf.h" #include "signal.h" #include "unistd.h" #include "stdio.h" -- cgit v1.2.3 From ecc091d93a221b8541c94aeeeb741e9e1c39073f Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 16 Apr 2025 14:06:18 +0200 Subject: tools/nolibc: move open() and friends to fcntl.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the location regular userspace expects these definitions. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250416-nolibc-split-sys-v1-3-a069a3f1d145@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/dirent.h | 1 + tools/include/nolibc/fcntl.h | 69 +++++++++++++++++++++++++++++++++++++++++++ tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/sys.h | 52 -------------------------------- 5 files changed, 72 insertions(+), 52 deletions(-) create mode 100644 tools/include/nolibc/fcntl.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index fd76d267d79a..2132e4f4d216 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -32,6 +32,7 @@ all_files := \ dirent.h \ elf.h \ errno.h \ + fcntl.h \ limits.h \ nolibc.h \ signal.h \ diff --git a/tools/include/nolibc/dirent.h b/tools/include/nolibc/dirent.h index 946a697e98e4..6c60ec4ba27b 100644 --- a/tools/include/nolibc/dirent.h +++ b/tools/include/nolibc/dirent.h @@ -10,6 +10,7 @@ #include "compiler.h" #include "stdint.h" #include "types.h" +#include "fcntl.h" #include diff --git a/tools/include/nolibc/fcntl.h b/tools/include/nolibc/fcntl.h new file mode 100644 index 000000000000..5feb08ad54a7 --- /dev/null +++ b/tools/include/nolibc/fcntl.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * fcntl definition for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau + */ + +#ifndef _NOLIBC_FCNTL_H +#define _NOLIBC_FCNTL_H + +#include "arch.h" +#include "types.h" +#include "sys.h" + +/* + * int openat(int dirfd, const char *path, int flags[, mode_t mode]); + */ + +static __attribute__((unused)) +int sys_openat(int dirfd, const char *path, int flags, mode_t mode) +{ + return my_syscall4(__NR_openat, dirfd, path, flags, mode); +} + +static __attribute__((unused)) +int openat(int dirfd, const char *path, int flags, ...) +{ + mode_t mode = 0; + + if (flags & O_CREAT) { + va_list args; + + va_start(args, flags); + mode = va_arg(args, mode_t); + va_end(args); + } + + return __sysret(sys_openat(dirfd, path, flags, mode)); +} + +/* + * int open(const char *path, int flags[, mode_t mode]); + */ + +static __attribute__((unused)) +int sys_open(const char *path, int flags, mode_t mode) +{ + return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); +} + +static __attribute__((unused)) +int open(const char *path, int flags, ...) +{ + mode_t mode = 0; + + if (flags & O_CREAT) { + va_list args; + + va_start(args, flags); + mode = va_arg(args, mode_t); + va_end(args); + } + + return __sysret(sys_open(path, flags, mode)); +} + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#endif /* _NOLIBC_FCNTL_H */ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 127f0d9068c6..bb4183a8fdc4 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -106,6 +106,7 @@ #include "time.h" #include "stackprotector.h" #include "dirent.h" +#include "fcntl.h" /* Used by programs to avoid std includes */ #define NOLIBC diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 08c1c074bec8..5fa351e6a351 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -764,58 +764,6 @@ int mount(const char *src, const char *tgt, return __sysret(sys_mount(src, tgt, fst, flags, data)); } -/* - * int openat(int dirfd, const char *path, int flags[, mode_t mode]); - */ - -static __attribute__((unused)) -int sys_openat(int dirfd, const char *path, int flags, mode_t mode) -{ - return my_syscall4(__NR_openat, dirfd, path, flags, mode); -} - -static __attribute__((unused)) -int openat(int dirfd, const char *path, int flags, ...) -{ - mode_t mode = 0; - - if (flags & O_CREAT) { - va_list args; - - va_start(args, flags); - mode = va_arg(args, mode_t); - va_end(args); - } - - return __sysret(sys_openat(dirfd, path, flags, mode)); -} - -/* - * int open(const char *path, int flags[, mode_t mode]); - */ - -static __attribute__((unused)) -int sys_open(const char *path, int flags, mode_t mode) -{ - return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); -} - -static __attribute__((unused)) -int open(const char *path, int flags, ...) -{ - mode_t mode = 0; - - if (flags & O_CREAT) { - va_list args; - - va_start(args, flags); - mode = va_arg(args, mode_t); - va_end(args); - } - - return __sysret(sys_open(path, flags, mode)); -} - /* * int pipe2(int pipefd[2], int flags); -- cgit v1.2.3 From 9e67941dde6e7f2b0e46ca8e2c87d3fb12e5ead5 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 16 Apr 2025 14:06:19 +0200 Subject: tools/nolibc: move getauxval() to sys/auxv.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the location regular userspace expects the definition. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250416-nolibc-split-sys-v1-4-a069a3f1d145@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/stdlib.h | 26 -------------------------- tools/include/nolibc/sys/auxv.h | 41 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 26 deletions(-) create mode 100644 tools/include/nolibc/sys/auxv.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index 2132e4f4d216..a3781f396925 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -44,6 +44,7 @@ all_files := \ stdlib.h \ string.h \ sys.h \ + sys/auxv.h \ time.h \ types.h \ unistd.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index bb4183a8fdc4..0d8c49e0dddc 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -96,6 +96,7 @@ #include "arch.h" #include "types.h" #include "sys.h" +#include "sys/auxv.h" #include "ctype.h" #include "elf.h" #include "signal.h" diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h index 32b3038002c1..69cf1d4418f1 100644 --- a/tools/include/nolibc/stdlib.h +++ b/tools/include/nolibc/stdlib.h @@ -102,32 +102,6 @@ char *getenv(const char *name) return NULL; } -static __attribute__((unused)) -unsigned long getauxval(unsigned long type) -{ - const unsigned long *auxv = _auxv; - unsigned long ret; - - if (!auxv) - return 0; - - while (1) { - if (!auxv[0] && !auxv[1]) { - ret = 0; - break; - } - - if (auxv[0] == type) { - ret = auxv[1]; - break; - } - - auxv += 2; - } - - return ret; -} - static __attribute__((unused)) void *malloc(size_t len) { diff --git a/tools/include/nolibc/sys/auxv.h b/tools/include/nolibc/sys/auxv.h new file mode 100644 index 000000000000..04c2b9cbe51a --- /dev/null +++ b/tools/include/nolibc/sys/auxv.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * auxv definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau + */ + +#ifndef _NOLIBC_SYS_AUXV_H +#define _NOLIBC_SYS_AUXV_H + +#include "../crt.h" + +static __attribute__((unused)) +unsigned long getauxval(unsigned long type) +{ + const unsigned long *auxv = _auxv; + unsigned long ret; + + if (!auxv) + return 0; + + while (1) { + if (!auxv[0] && !auxv[1]) { + ret = 0; + break; + } + + if (auxv[0] == type) { + ret = auxv[1]; + break; + } + + auxv += 2; + } + + return ret; +} + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#endif /* _NOLIBC_SYS_AUXV_H */ -- cgit v1.2.3 From cce273161e7895f45208066154d6726bedba35e5 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 16 Apr 2025 14:06:20 +0200 Subject: tools/nolibc: move mmap() and friends to sys/mman.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the location regular userspace expects these definitions. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250416-nolibc-split-sys-v1-5-a069a3f1d145@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/sys.h | 48 ------------------------------- tools/include/nolibc/sys/mman.h | 63 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 48 deletions(-) create mode 100644 tools/include/nolibc/sys/mman.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index a3781f396925..ca8817e84c3a 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -45,6 +45,7 @@ all_files := \ string.h \ sys.h \ sys/auxv.h \ + sys/mman.h \ time.h \ types.h \ unistd.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 0d8c49e0dddc..ffdf501db1b6 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -97,6 +97,7 @@ #include "types.h" #include "sys.h" #include "sys/auxv.h" +#include "sys/mman.h" #include "ctype.h" #include "elf.h" #include "signal.h" diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 5fa351e6a351..13e6b2479fbf 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -24,7 +24,6 @@ #include #include -#include "arch.h" #include "errno.h" #include "stdarg.h" #include "types.h" @@ -697,53 +696,6 @@ int mknod(const char *path, mode_t mode, dev_t dev) return __sysret(sys_mknod(path, mode, dev)); } -#ifndef sys_mmap -static __attribute__((unused)) -void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd, - off_t offset) -{ - int n; - -#if defined(__NR_mmap2) - n = __NR_mmap2; - offset >>= 12; -#else - n = __NR_mmap; -#endif - - return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset); -} -#endif - -/* Note that on Linux, MAP_FAILED is -1 so we can use the generic __sysret() - * which returns -1 upon error and still satisfy user land that checks for - * MAP_FAILED. - */ - -static __attribute__((unused)) -void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) -{ - void *ret = sys_mmap(addr, length, prot, flags, fd, offset); - - if ((unsigned long)ret >= -4095UL) { - SET_ERRNO(-(long)ret); - ret = MAP_FAILED; - } - return ret; -} - -static __attribute__((unused)) -int sys_munmap(void *addr, size_t length) -{ - return my_syscall2(__NR_munmap, addr, length); -} - -static __attribute__((unused)) -int munmap(void *addr, size_t length) -{ - return __sysret(sys_munmap(addr, length)); -} - /* * int mount(const char *source, const char *target, * const char *fstype, unsigned long flags, diff --git a/tools/include/nolibc/sys/mman.h b/tools/include/nolibc/sys/mman.h new file mode 100644 index 000000000000..ad9d06b6b791 --- /dev/null +++ b/tools/include/nolibc/sys/mman.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * mm definition for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau + */ + +#ifndef _NOLIBC_SYS_MMAN_H +#define _NOLIBC_SYS_MMAN_H + +#include "../arch.h" +#include "../sys.h" + +#ifndef sys_mmap +static __attribute__((unused)) +void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd, + off_t offset) +{ + int n; + +#if defined(__NR_mmap2) + n = __NR_mmap2; + offset >>= 12; +#else + n = __NR_mmap; +#endif + + return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset); +} +#endif + +/* Note that on Linux, MAP_FAILED is -1 so we can use the generic __sysret() + * which returns -1 upon error and still satisfy user land that checks for + * MAP_FAILED. + */ + +static __attribute__((unused)) +void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) +{ + void *ret = sys_mmap(addr, length, prot, flags, fd, offset); + + if ((unsigned long)ret >= -4095UL) { + SET_ERRNO(-(long)ret); + ret = MAP_FAILED; + } + return ret; +} + +static __attribute__((unused)) +int sys_munmap(void *addr, size_t length) +{ + return my_syscall2(__NR_munmap, addr, length); +} + +static __attribute__((unused)) +int munmap(void *addr, size_t length) +{ + return __sysret(sys_munmap(addr, length)); +} + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#endif /* _NOLIBC_SYS_MMAN_H */ -- cgit v1.2.3 From c6e6c2c4d71035038d0ef07a465190cd35e193e8 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 16 Apr 2025 14:06:21 +0200 Subject: tools/nolibc: move stat() and friends to sys/stat.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the location regular userspace expects these definitions. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250416-nolibc-split-sys-v1-6-a069a3f1d145@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/sys.h | 56 ------------------------------- tools/include/nolibc/sys/stat.h | 74 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 76 insertions(+), 56 deletions(-) create mode 100644 tools/include/nolibc/sys/stat.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index ca8817e84c3a..747d73b45368 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -46,6 +46,7 @@ all_files := \ sys.h \ sys/auxv.h \ sys/mman.h \ + sys/stat.h \ time.h \ types.h \ unistd.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index ffdf501db1b6..8296cbbeebe9 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -98,6 +98,7 @@ #include "sys.h" #include "sys/auxv.h" #include "sys/mman.h" +#include "sys/stat.h" #include "ctype.h" #include "elf.h" #include "signal.h" diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 13e6b2479fbf..c76dc8014728 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -968,62 +968,6 @@ pid_t setsid(void) return __sysret(sys_setsid()); } -/* - * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf); - * int stat(const char *path, struct stat *buf); - */ - -static __attribute__((unused)) -int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) -{ -#ifdef __NR_statx - return my_syscall5(__NR_statx, fd, path, flags, mask, buf); -#else - return __nolibc_enosys(__func__, fd, path, flags, mask, buf); -#endif -} - -static __attribute__((unused)) -int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) -{ - return __sysret(sys_statx(fd, path, flags, mask, buf)); -} - - -static __attribute__((unused)) -int stat(const char *path, struct stat *buf) -{ - struct statx statx; - long ret; - - ret = __sysret(sys_statx(AT_FDCWD, path, AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx)); - if (ret == -1) - return ret; - - buf->st_dev = ((statx.stx_dev_minor & 0xff) - | (statx.stx_dev_major << 8) - | ((statx.stx_dev_minor & ~0xff) << 12)); - buf->st_ino = statx.stx_ino; - buf->st_mode = statx.stx_mode; - buf->st_nlink = statx.stx_nlink; - buf->st_uid = statx.stx_uid; - buf->st_gid = statx.stx_gid; - buf->st_rdev = ((statx.stx_rdev_minor & 0xff) - | (statx.stx_rdev_major << 8) - | ((statx.stx_rdev_minor & ~0xff) << 12)); - buf->st_size = statx.stx_size; - buf->st_blksize = statx.stx_blksize; - buf->st_blocks = statx.stx_blocks; - buf->st_atim.tv_sec = statx.stx_atime.tv_sec; - buf->st_atim.tv_nsec = statx.stx_atime.tv_nsec; - buf->st_mtim.tv_sec = statx.stx_mtime.tv_sec; - buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec; - buf->st_ctim.tv_sec = statx.stx_ctime.tv_sec; - buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec; - - return 0; -} - /* * int symlink(const char *old, const char *new); diff --git a/tools/include/nolibc/sys/stat.h b/tools/include/nolibc/sys/stat.h new file mode 100644 index 000000000000..0eaf5496ce23 --- /dev/null +++ b/tools/include/nolibc/sys/stat.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * stat definition for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau + */ + +#ifndef _NOLIBC_SYS_STAT_H +#define _NOLIBC_SYS_STAT_H + +#include "../arch.h" +#include "../types.h" +#include "../sys.h" + +/* + * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf); + * int stat(const char *path, struct stat *buf); + */ + +static __attribute__((unused)) +int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) +{ +#ifdef __NR_statx + return my_syscall5(__NR_statx, fd, path, flags, mask, buf); +#else + return __nolibc_enosys(__func__, fd, path, flags, mask, buf); +#endif +} + +static __attribute__((unused)) +int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) +{ + return __sysret(sys_statx(fd, path, flags, mask, buf)); +} + + +static __attribute__((unused)) +int stat(const char *path, struct stat *buf) +{ + struct statx statx; + long ret; + + ret = __sysret(sys_statx(AT_FDCWD, path, AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx)); + if (ret == -1) + return ret; + + buf->st_dev = ((statx.stx_dev_minor & 0xff) + | (statx.stx_dev_major << 8) + | ((statx.stx_dev_minor & ~0xff) << 12)); + buf->st_ino = statx.stx_ino; + buf->st_mode = statx.stx_mode; + buf->st_nlink = statx.stx_nlink; + buf->st_uid = statx.stx_uid; + buf->st_gid = statx.stx_gid; + buf->st_rdev = ((statx.stx_rdev_minor & 0xff) + | (statx.stx_rdev_major << 8) + | ((statx.stx_rdev_minor & ~0xff) << 12)); + buf->st_size = statx.stx_size; + buf->st_blksize = statx.stx_blksize; + buf->st_blocks = statx.stx_blocks; + buf->st_atim.tv_sec = statx.stx_atime.tv_sec; + buf->st_atim.tv_nsec = statx.stx_atime.tv_nsec; + buf->st_mtim.tv_sec = statx.stx_mtime.tv_sec; + buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec; + buf->st_ctim.tv_sec = statx.stx_ctime.tv_sec; + buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec; + + return 0; +} + + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#endif /* _NOLIBC_SYS_STAT_H */ -- cgit v1.2.3 From 0fd55773f47124ee27a1d201bb07649a056c58cc Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 16 Apr 2025 14:06:22 +0200 Subject: tools/nolibc: move syscall() to sys/syscall.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the location regular userspace expects the definition. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250416-nolibc-split-sys-v1-7-a069a3f1d145@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/sys/syscall.h | 19 +++++++++++++++++++ tools/include/nolibc/unistd.h | 6 ------ 4 files changed, 21 insertions(+), 6 deletions(-) create mode 100644 tools/include/nolibc/sys/syscall.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index 747d73b45368..65c3b90f8ba9 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -47,6 +47,7 @@ all_files := \ sys/auxv.h \ sys/mman.h \ sys/stat.h \ + sys/syscall.h \ time.h \ types.h \ unistd.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 8296cbbeebe9..cb5705d55d80 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -99,6 +99,7 @@ #include "sys/auxv.h" #include "sys/mman.h" #include "sys/stat.h" +#include "sys/syscall.h" #include "ctype.h" #include "elf.h" #include "signal.h" diff --git a/tools/include/nolibc/sys/syscall.h b/tools/include/nolibc/sys/syscall.h new file mode 100644 index 000000000000..59efdec8fd1c --- /dev/null +++ b/tools/include/nolibc/sys/syscall.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * syscall() definition for NOLIBC + * Copyright (C) 2024 Thomas Weißschuh + */ + +#ifndef _NOLIBC_SYS_SYSCALL_H +#define _NOLIBC_SYS_SYSCALL_H + +#define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N +#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0) +#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__)) +#define _syscall_n(N, ...) _syscall(N, __VA_ARGS__) +#define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__) + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#endif /* _NOLIBC_SYS_SYSCALL_H */ diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h index e38f3660c051..ac7d53d986cd 100644 --- a/tools/include/nolibc/unistd.h +++ b/tools/include/nolibc/unistd.h @@ -56,12 +56,6 @@ int tcsetpgrp(int fd, pid_t pid) return ioctl(fd, TIOCSPGRP, &pid); } -#define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N -#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0) -#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__)) -#define _syscall_n(N, ...) _syscall(N, __VA_ARGS__) -#define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__) - /* make sure to include all global symbols */ #include "nolibc.h" -- cgit v1.2.3 From face777a442bfb07017adf935dbb738a3504fdd2 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 16 Apr 2025 14:06:23 +0200 Subject: tools/nolibc: move gettimeofday() to sys/time.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the location regular userspace expects this definition. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250416-nolibc-split-sys-v1-8-a069a3f1d145@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/sys.h | 21 --------------------- tools/include/nolibc/sys/time.h | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 21 deletions(-) create mode 100644 tools/include/nolibc/sys/time.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index 65c3b90f8ba9..fd1fc769cbbe 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -48,6 +48,7 @@ all_files := \ sys/mman.h \ sys/stat.h \ sys/syscall.h \ + sys/time.h \ time.h \ types.h \ unistd.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index cb5705d55d80..d84e8610bf10 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -100,6 +100,7 @@ #include "sys/mman.h" #include "sys/stat.h" #include "sys/syscall.h" +#include "sys/time.h" #include "ctype.h" #include "elf.h" #include "signal.h" diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index c76dc8014728..aab0685af84f 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -487,27 +487,6 @@ int getpagesize(void) } -/* - * int gettimeofday(struct timeval *tv, struct timezone *tz); - */ - -static __attribute__((unused)) -int sys_gettimeofday(struct timeval *tv, struct timezone *tz) -{ -#ifdef __NR_gettimeofday - return my_syscall2(__NR_gettimeofday, tv, tz); -#else - return __nolibc_enosys(__func__, tv, tz); -#endif -} - -static __attribute__((unused)) -int gettimeofday(struct timeval *tv, struct timezone *tz) -{ - return __sysret(sys_gettimeofday(tv, tz)); -} - - /* * uid_t getuid(void); */ diff --git a/tools/include/nolibc/sys/time.h b/tools/include/nolibc/sys/time.h new file mode 100644 index 000000000000..1d326c05ee62 --- /dev/null +++ b/tools/include/nolibc/sys/time.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * time definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau + */ + +#ifndef _NOLIBC_SYS_TIME_H +#define _NOLIBC_SYS_TIME_H + +#include "../arch.h" +#include "../sys.h" + +/* + * int gettimeofday(struct timeval *tv, struct timezone *tz); + */ + +static __attribute__((unused)) +int sys_gettimeofday(struct timeval *tv, struct timezone *tz) +{ +#ifdef __NR_gettimeofday + return my_syscall2(__NR_gettimeofday, tv, tz); +#else + return __nolibc_enosys(__func__, tv, tz); +#endif +} + +static __attribute__((unused)) +int gettimeofday(struct timeval *tv, struct timezone *tz) +{ + return __sysret(sys_gettimeofday(tv, tz)); +} + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#endif /* _NOLIBC_SYS_TIME_H */ -- cgit v1.2.3 From ffb94910c3fff8d9528e62d45171cabbb1cc083a Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 16 Apr 2025 14:06:24 +0200 Subject: tools/nolibc: add sys/types.h shim MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the location regular userspace expects the header. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250416-nolibc-split-sys-v1-9-a069a3f1d145@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/sys/types.h | 7 +++++++ 2 files changed, 8 insertions(+) create mode 100644 tools/include/nolibc/sys/types.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index fd1fc769cbbe..fec0d4eb2119 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -49,6 +49,7 @@ all_files := \ sys/stat.h \ sys/syscall.h \ sys/time.h \ + sys/types.h \ time.h \ types.h \ unistd.h \ diff --git a/tools/include/nolibc/sys/types.h b/tools/include/nolibc/sys/types.h new file mode 100644 index 000000000000..8a264a13275c --- /dev/null +++ b/tools/include/nolibc/sys/types.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * sys/types.h shim for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh + */ + +#include "../types.h" -- cgit v1.2.3 From 6d1724ec864b577b50fa9cf668e80390767f4136 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 16 Apr 2025 14:06:25 +0200 Subject: tools/nolibc: move wait() and friends to sys/wait.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the location regular userspace expects these definitions. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250416-nolibc-split-sys-v1-10-a069a3f1d145@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/sys.h | 53 ------------------------------ tools/include/nolibc/sys/wait.h | 71 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 73 insertions(+), 53 deletions(-) create mode 100644 tools/include/nolibc/sys/wait.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index fec0d4eb2119..b5d4479abc3b 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -50,6 +50,7 @@ all_files := \ sys/syscall.h \ sys/time.h \ sys/types.h \ + sys/wait.h \ time.h \ types.h \ unistd.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index d84e8610bf10..e8843880e4df 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -101,6 +101,7 @@ #include "sys/stat.h" #include "sys/syscall.h" #include "sys/time.h" +#include "sys/wait.h" #include "ctype.h" #include "elf.h" #include "signal.h" diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index aab0685af84f..d07456d6e572 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -1054,59 +1054,6 @@ int unlink(const char *path) } -/* - * pid_t wait(int *status); - * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage); - * pid_t waitpid(pid_t pid, int *status, int options); - */ - -static __attribute__((unused)) -pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ -#ifdef __NR_wait4 - return my_syscall4(__NR_wait4, pid, status, options, rusage); -#else - return __nolibc_enosys(__func__, pid, status, options, rusage); -#endif -} - -static __attribute__((unused)) -pid_t wait(int *status) -{ - return __sysret(sys_wait4(-1, status, 0, NULL)); -} - -static __attribute__((unused)) -pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ - return __sysret(sys_wait4(pid, status, options, rusage)); -} - - -static __attribute__((unused)) -pid_t waitpid(pid_t pid, int *status, int options) -{ - return __sysret(sys_wait4(pid, status, options, NULL)); -} - - -/* - * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); - */ - -static __attribute__((unused)) -int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage) -{ - return my_syscall5(__NR_waitid, which, pid, infop, options, rusage); -} - -static __attribute__((unused)) -int waitid(int which, pid_t pid, siginfo_t *infop, int options) -{ - return __sysret(sys_waitid(which, pid, infop, options, NULL)); -} - - /* * ssize_t write(int fd, const void *buf, size_t count); */ diff --git a/tools/include/nolibc/sys/wait.h b/tools/include/nolibc/sys/wait.h new file mode 100644 index 000000000000..1af366a63f20 --- /dev/null +++ b/tools/include/nolibc/sys/wait.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * wait definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau + */ + +#ifndef _NOLIBC_SYS_WAIT_H +#define _NOLIBC_SYS_WAIT_H + +#include "../arch.h" +#include "../std.h" +#include "../types.h" + +/* + * pid_t wait(int *status); + * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage); + * pid_t waitpid(pid_t pid, int *status, int options); + */ + +static __attribute__((unused)) +pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage) +{ +#ifdef __NR_wait4 + return my_syscall4(__NR_wait4, pid, status, options, rusage); +#else + return __nolibc_enosys(__func__, pid, status, options, rusage); +#endif +} + +static __attribute__((unused)) +pid_t wait(int *status) +{ + return __sysret(sys_wait4(-1, status, 0, NULL)); +} + +static __attribute__((unused)) +pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) +{ + return __sysret(sys_wait4(pid, status, options, rusage)); +} + + +static __attribute__((unused)) +pid_t waitpid(pid_t pid, int *status, int options) +{ + return __sysret(sys_wait4(pid, status, options, NULL)); +} + + +/* + * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); + */ + +static __attribute__((unused)) +int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage) +{ + return my_syscall5(__NR_waitid, which, pid, infop, options, rusage); +} + +static __attribute__((unused)) +int waitid(int which, pid_t pid, siginfo_t *infop, int options) +{ + return __sysret(sys_waitid(which, pid, infop, options, NULL)); +} + + + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#endif /* _NOLIBC_SYS_WAIT_H */ -- cgit v1.2.3 From 4c99fbc6a06f56e266f60f5f24d0cfd8311b2c09 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 11 Apr 2025 11:00:38 +0200 Subject: tools/nolibc: handle intmax_t/uintmax_t in printf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In nolibc intmax_t and uintmax_t are always the same as (unsigned) long long/uint64_t as 128bit numbers are not supported. Even libcs that do support 128bit numbers often fix intmax_t to 64bit as it is used in ABIs and any change would break those. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/include/nolibc/stdio.h | 2 ++ tools/testing/selftests/nolibc/nolibc-test.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index a403351dbf60..b32b8b794015 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -286,6 +286,8 @@ int vfprintf(FILE *stream, const char *fmt, va_list args) if (c == 'l') { /* long format prefix, maintain the escape */ lpref++; + } else if (c == 'j') { + lpref = 2; } escape = 1; goto do_escape; diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index b176a706609b..143dddfcd01a 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1429,6 +1429,8 @@ static int run_vfprintf(int min, int max) CASE_TEST(char); EXPECT_VFPRINTF(1, "c", "%c", 'c'); break; CASE_TEST(hex); EXPECT_VFPRINTF(1, "f", "%x", 0xf); break; CASE_TEST(pointer); EXPECT_VFPRINTF(3, "0x1", "%p", (void *) 0x1); break; + CASE_TEST(uintmax_t); EXPECT_VFPRINTF(20, "18446744073709551615", "%ju", 0xffffffffffffffffULL); break; + CASE_TEST(intmax_t); EXPECT_VFPRINTF(20, "-9223372036854775807", "%jd", 0x8000000000000001LL); break; CASE_TEST(scanf); EXPECT_ZR(1, test_scanf()); break; case __LINE__: return ret; /* must be last */ -- cgit v1.2.3 From e5407c0820ea5fa7117b85ed32b724af73156d63 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 11 Apr 2025 11:00:39 +0200 Subject: tools/nolibc: use intmax definitions from compiler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The printf format checking in the compiler uses the intmax types from the compiler, not libc. This can lead to compiler errors. Instead use the types already provided by the compiler. Example issue with clang 19 for arm64: nolibc-test.c:30:2: error: format specifies type 'uintmax_t' (aka 'unsigned long') but the argument has type 'uintmax_t' (aka 'unsigned long long') [-Werror,-Wformat] Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/include/nolibc/stdint.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/stdint.h b/tools/include/nolibc/stdint.h index cd79ddd6170e..b052ad6303c3 100644 --- a/tools/include/nolibc/stdint.h +++ b/tools/include/nolibc/stdint.h @@ -39,8 +39,8 @@ typedef size_t uint_fast32_t; typedef int64_t int_fast64_t; typedef uint64_t uint_fast64_t; -typedef int64_t intmax_t; -typedef uint64_t uintmax_t; +typedef __INTMAX_TYPE__ intmax_t; +typedef __UINTMAX_TYPE__ uintmax_t; /* limits of integral types */ -- cgit v1.2.3 From 248ddc80b145515286bfb75d08034ad4c0fdb08e Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 11 Apr 2025 11:00:40 +0200 Subject: tools/nolibc: use pselect6_time64 if available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit riscv32 does not have any of the older select systemcalls. Use pselect6_time64 instead. poll() is also used to implement sleep(). Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/include/nolibc/sys.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index d07456d6e572..2f33efc3c5ee 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -902,6 +902,14 @@ int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeva t.tv_nsec = timeout->tv_usec * 1000; } return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); +#elif defined(__NR_pselect6_time64) + struct __kernel_timespec t; + + if (timeout) { + t.tv_sec = timeout->tv_sec; + t.tv_nsec = timeout->tv_usec * 1000; + } + return my_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); #else return __nolibc_enosys(__func__, nfds, rfds, wfds, efds, timeout); #endif -- cgit v1.2.3 From 4de88a88bcbe4cf89477d8c6a9145fdfd929bc96 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 11 Apr 2025 11:00:41 +0200 Subject: tools/nolibc: use ppoll_time64 if available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit riscv32 does not have any of the older poll systemcalls. Use ppoll_time64 instead. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/include/nolibc/sys.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 2f33efc3c5ee..aa0d0871b251 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -772,6 +772,14 @@ int sys_poll(struct pollfd *fds, int nfds, int timeout) t.tv_nsec = (timeout % 1000) * 1000000; } return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); +#elif defined(__NR_ppoll_time64) + struct __kernel_timespec t; + + if (timeout >= 0) { + t.tv_sec = timeout / 1000; + t.tv_nsec = (timeout % 1000) * 1000000; + } + return my_syscall5(__NR_ppoll_time64, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); #elif defined(__NR_poll) return my_syscall3(__NR_poll, fds, nfds, timeout); #else -- cgit v1.2.3 From 9b070d97d9e52195c3a2ee084fdd7c45b6b35adf Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 11 Apr 2025 11:00:42 +0200 Subject: tools/nolibc: add tolower() and toupper() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kselftest harness uses these functions. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/include/nolibc/string.h | 17 +++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 5 +++++ 2 files changed, 22 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h index ba84ab700e30..f0d335f0e467 100644 --- a/tools/include/nolibc/string.h +++ b/tools/include/nolibc/string.h @@ -289,6 +289,23 @@ char *strrchr(const char *s, int c) return (char *)ret; } +static __attribute__((unused)) +int tolower(int c) +{ + if (c >= 'A' && c <= 'Z') + return c - 'A' + 'a'; + return c; +} + +static __attribute__((unused)) +int toupper(int c) +{ + if (c >= 'a' && c <= 'z') + return c - 'a' + 'A'; + return c; +} + + /* make sure to include all global symbols */ #include "nolibc.h" diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 143dddfcd01a..b6f736fdeadf 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -39,6 +39,7 @@ #include #include #include +#include #endif #endif @@ -1280,6 +1281,10 @@ int run_stdlib(int min, int max) CASE_TEST(strerror_EINVAL); EXPECT_STREQ(is_nolibc, strerror(EINVAL), "errno=22"); break; CASE_TEST(strerror_int_max); EXPECT_STREQ(is_nolibc, strerror(INT_MAX), "errno=2147483647"); break; CASE_TEST(strerror_int_min); EXPECT_STREQ(is_nolibc, strerror(INT_MIN), "errno=-2147483648"); break; + CASE_TEST(tolower); EXPECT_EQ(1, tolower('A'), 'a'); break; + CASE_TEST(tolower_noop); EXPECT_EQ(1, tolower('a'), 'a'); break; + CASE_TEST(toupper); EXPECT_EQ(1, toupper('a'), 'A'); break; + CASE_TEST(toupper_noop); EXPECT_EQ(1, toupper('A'), 'A'); break; case __LINE__: return ret; /* must be last */ -- cgit v1.2.3 From 7b11531ed172cc8cc2465e99c295af5ac2fd55e3 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 11 Apr 2025 11:00:43 +0200 Subject: tools/nolibc: add _exit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _exit() is the faster variant of exit(), skipping all cleanup actions. As nolibc does not perform any cleanup anyways, the implementation is trivial. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/include/nolibc/sys.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index aa0d0871b251..6fe288237020 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -300,11 +300,17 @@ void sys_exit(int status) } static __attribute__((noreturn,unused)) -void exit(int status) +void _exit(int status) { sys_exit(status); } +static __attribute__((noreturn,unused)) +void exit(int status) +{ + _exit(status); +} + /* * pid_t fork(void); -- cgit v1.2.3 From 67fe525e3401d48a9fb1bf437555a9d82800f3d7 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 11 Apr 2025 11:00:44 +0200 Subject: tools/nolibc: add setpgrp() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit setpgrp() is defined to be identical to setpgid(0, 0). Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/include/nolibc/sys.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 6fe288237020..bc47007f0442 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -952,6 +952,16 @@ int setpgid(pid_t pid, pid_t pgid) return __sysret(sys_setpgid(pid, pgid)); } +/* + * pid_t setpgrp(void) + */ + +static __attribute__((unused)) +pid_t setpgrp(void) +{ + return setpgid(0, 0); +} + /* * pid_t setsid(void); -- cgit v1.2.3 From 0c89abf5ab3fba1e8f73cdaf0385ed9786c1c307 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 11 Apr 2025 11:00:45 +0200 Subject: tools/nolibc: implement waitpid() in terms of waitid() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old wait4() syscall used by waitpid() before is not available everywhere. Switch to the waitid() syscall which is the new replacement. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/include/nolibc/sys/wait.h | 70 ++++++++++++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 12 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys/wait.h b/tools/include/nolibc/sys/wait.h index 1af366a63f20..9a68e6a6b1df 100644 --- a/tools/include/nolibc/sys/wait.h +++ b/tools/include/nolibc/sys/wait.h @@ -15,6 +15,7 @@ * pid_t wait(int *status); * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage); * pid_t waitpid(pid_t pid, int *status, int options); + * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); */ static __attribute__((unused)) @@ -39,18 +40,6 @@ pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) return __sysret(sys_wait4(pid, status, options, rusage)); } - -static __attribute__((unused)) -pid_t waitpid(pid_t pid, int *status, int options) -{ - return __sysret(sys_wait4(pid, status, options, NULL)); -} - - -/* - * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); - */ - static __attribute__((unused)) int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage) { @@ -64,6 +53,63 @@ int waitid(int which, pid_t pid, siginfo_t *infop, int options) } +static __attribute__((unused)) +pid_t waitpid(pid_t pid, int *status, int options) +{ + int idtype, ret; + siginfo_t info; + pid_t id; + + if (pid == INT_MIN) { + SET_ERRNO(ESRCH); + return -1; + } else if (pid < -1) { + idtype = P_PGID; + id = -pid; + } else if (pid == -1) { + idtype = P_ALL; + id = 0; + } else if (pid == 0) { + idtype = P_PGID; + id = 0; + } else { + idtype = P_PID; + id = pid; + } + + options |= WEXITED; + + ret = waitid(idtype, id, &info, options); + if (ret) + return ret; + + switch (info.si_code) { + case 0: + *status = 0; + break; + case CLD_EXITED: + *status = (info.si_status & 0xff) << 8; + break; + case CLD_KILLED: + *status = info.si_status & 0x7f; + break; + case CLD_DUMPED: + *status = (info.si_status & 0x7f) | 0x80; + break; + case CLD_STOPPED: + case CLD_TRAPPED: + *status = (info.si_status << 8) + 0x7f; + break; + case CLD_CONTINUED: + *status = 0xffff; + break; + default: + return -1; + } + + return info.si_pid; +} + /* make sure to include all global symbols */ #include "../nolibc.h" -- cgit v1.2.3 From 5197b7b87cbfeed61b69ff54532bb58a0d55cb0b Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 11 Apr 2025 11:00:47 +0200 Subject: tools/nolibc: add dprintf() and vdprintf() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dprintf() and vdprintf() are printf() variants printing directly into a filedescriptor. As FILE in nolibc is based directly on filedescriptors, the implementation is trivial. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/include/nolibc/stdio.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index b32b8b794015..262d0da4da90 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -351,6 +351,30 @@ int printf(const char *fmt, ...) return ret; } +static __attribute__((unused, format(printf, 2, 0))) +int vdprintf(int fd, const char *fmt, va_list args) +{ + FILE *stream; + + stream = fdopen(fd, NULL); + if (!stream) + return -1; + /* Technically 'stream' is leaked, but as it's only a wrapper around 'fd' that is fine */ + return vfprintf(stream, fmt, args); +} + +static __attribute__((unused, format(printf, 2, 3))) +int dprintf(int fd, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vdprintf(fd, fmt, args); + va_end(args); + return ret; +} + static __attribute__((unused)) int vsscanf(const char *str, const char *format, va_list args) { -- cgit v1.2.3 From bae3cd708e8adef27ee7657cd877d9ba9aa4f2ae Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 11 Apr 2025 11:00:48 +0200 Subject: tools/nolibc: add getopt() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a getopt() implementation based on the one from musl. The only deviations are adaption to the kernel coding style and nolibc infrastructure and removal of multi-byte support. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/getopt.h | 101 ++++++++++++++++++++++++++++++++++++++++++ tools/include/nolibc/nolibc.h | 1 + 3 files changed, 103 insertions(+) create mode 100644 tools/include/nolibc/getopt.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index b5d4479abc3b..e05862cd0805 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -33,6 +33,7 @@ all_files := \ elf.h \ errno.h \ fcntl.h \ + getopt.h \ limits.h \ nolibc.h \ signal.h \ diff --git a/tools/include/nolibc/getopt.h b/tools/include/nolibc/getopt.h new file mode 100644 index 000000000000..5fd06c9702e9 --- /dev/null +++ b/tools/include/nolibc/getopt.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * getopt function definitions for NOLIBC, adapted from musl libc + * Copyright (C) 2005-2020 Rich Felker, et al. + * Copyright (C) 2025 Thomas Weißschuh + */ + +#ifndef _NOLIBC_GETOPT_H +#define _NOLIBC_GETOPT_H + +struct FILE; +static struct FILE *const stderr; +static int fprintf(struct FILE *stream, const char *fmt, ...); + +__attribute__((weak,unused,section(".data.nolibc_getopt"))) +char *optarg; + +__attribute__((weak,unused,section(".data.nolibc_getopt"))) +int optind = 1, opterr = 1, optopt; + +static __attribute__((unused)) +int getopt(int argc, char * const argv[], const char *optstring) +{ + static int __optpos; + int i; + char c, d; + char *optchar; + + if (!optind) { + __optpos = 0; + optind = 1; + } + + if (optind >= argc || !argv[optind]) + return -1; + + if (argv[optind][0] != '-') { + if (optstring[0] == '-') { + optarg = argv[optind++]; + return 1; + } + return -1; + } + + if (!argv[optind][1]) + return -1; + + if (argv[optind][1] == '-' && !argv[optind][2]) + return optind++, -1; + + if (!__optpos) + __optpos++; + c = argv[optind][__optpos]; + optchar = argv[optind] + __optpos; + __optpos++; + + if (!argv[optind][__optpos]) { + optind++; + __optpos = 0; + } + + if (optstring[0] == '-' || optstring[0] == '+') + optstring++; + + i = 0; + d = 0; + do { + d = optstring[i++]; + } while (d && d != c); + + if (d != c || c == ':') { + optopt = c; + if (optstring[0] != ':' && opterr) + fprintf(stderr, "%s: unrecognized option: %c\n", argv[0], *optchar); + return '?'; + } + if (optstring[i] == ':') { + optarg = 0; + if (optstring[i + 1] != ':' || __optpos) { + optarg = argv[optind++]; + if (__optpos) + optarg += __optpos; + __optpos = 0; + } + if (optind > argc) { + optopt = c; + if (optstring[0] == ':') + return ':'; + if (opterr) + fprintf(stderr, "%s: option requires argument: %c\n", + argv[0], *optchar); + return '?'; + } + } + return c; +} + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#endif /* _NOLIBC_GETOPT_H */ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index e8843880e4df..d1b949e094ee 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -113,6 +113,7 @@ #include "stackprotector.h" #include "dirent.h" #include "fcntl.h" +#include "getopt.h" /* Used by programs to avoid std includes */ #define NOLIBC -- cgit v1.2.3 From f7b3eeffd402c5eec32be9c3bfdeb3ec03e87cf3 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 11 Apr 2025 11:00:49 +0200 Subject: tools/nolibc: allow different write callbacks in printf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Decouple the formatting logic from the writing logic to later enable writing straight to a buffer in sprintf(). Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/include/nolibc/stdio.h | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index 262d0da4da90..5c893b4903a3 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -208,13 +208,15 @@ char *fgets(char *s, int size, FILE *stream) } -/* minimal vfprintf(). It supports the following formats: +/* minimal printf(). It supports the following formats: * - %[l*]{d,u,c,x,p} * - %s * - unknown modifiers are ignored. */ -static __attribute__((unused, format(printf, 2, 0))) -int vfprintf(FILE *stream, const char *fmt, va_list args) +typedef int (*__nolibc_printf_cb)(intptr_t state, const char *buf, size_t size); + +static __attribute__((unused, format(printf, 3, 0))) +int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, const char *fmt, va_list args) { char escape, lpref, c; unsigned long long v; @@ -304,7 +306,7 @@ int vfprintf(FILE *stream, const char *fmt, va_list args) outstr = fmt; len = ofs - 1; flush_str: - if (_fwrite(outstr, len, stream) != 0) + if (cb(state, outstr, len) != 0) break; written += len; @@ -321,6 +323,17 @@ int vfprintf(FILE *stream, const char *fmt, va_list args) return written; } +static int __nolibc_fprintf_cb(intptr_t state, const char *buf, size_t size) +{ + return _fwrite(buf, size, (FILE *)state); +} + +static __attribute__((unused, format(printf, 2, 0))) +int vfprintf(FILE *stream, const char *fmt, va_list args) +{ + return __nolibc_printf(__nolibc_fprintf_cb, (intptr_t)stream, fmt, args); +} + static __attribute__((unused, format(printf, 1, 0))) int vprintf(const char *fmt, va_list args) { -- cgit v1.2.3 From 9f4a2e28bc4714487d9a0c85af2a87c07155c552 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 11 Apr 2025 11:00:50 +0200 Subject: tools/nolibc: allow limiting of printf destination size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit snprintf() allows limiting the output buffer, while still returning the number of all bytes that would have been written. Implement the limitation logic in preparation for snprintf(). Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/include/nolibc/stdio.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index 5c893b4903a3..b17b473bd875 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -215,13 +215,13 @@ char *fgets(char *s, int size, FILE *stream) */ typedef int (*__nolibc_printf_cb)(intptr_t state, const char *buf, size_t size); -static __attribute__((unused, format(printf, 3, 0))) -int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, const char *fmt, va_list args) +static __attribute__((unused, format(printf, 4, 0))) +int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, size_t n, const char *fmt, va_list args) { char escape, lpref, c; unsigned long long v; unsigned int written; - size_t len, ofs; + size_t len, ofs, w; char tmpbuf[21]; const char *outstr; @@ -306,8 +306,12 @@ int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, const char *fmt, va_l outstr = fmt; len = ofs - 1; flush_str: - if (cb(state, outstr, len) != 0) - break; + if (n) { + w = len < n ? len : n; + n -= w; + if (cb(state, outstr, w) != 0) + break; + } written += len; do_escape: @@ -331,7 +335,7 @@ static int __nolibc_fprintf_cb(intptr_t state, const char *buf, size_t size) static __attribute__((unused, format(printf, 2, 0))) int vfprintf(FILE *stream, const char *fmt, va_list args) { - return __nolibc_printf(__nolibc_fprintf_cb, (intptr_t)stream, fmt, args); + return __nolibc_printf(__nolibc_fprintf_cb, (intptr_t)stream, SIZE_MAX, fmt, args); } static __attribute__((unused, format(printf, 1, 0))) -- cgit v1.2.3 From ed45d24cf23521b12271d95f8e87b839e79dd65e Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 11 Apr 2025 11:00:51 +0200 Subject: tools/nolibc: add snprintf() and friends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add more of the printf() functions. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/include/nolibc/stdio.h | 55 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index b17b473bd875..46bd90f96d65 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -389,6 +389,61 @@ int dprintf(int fd, const char *fmt, ...) va_start(args, fmt); ret = vdprintf(fd, fmt, args); va_end(args); + + return ret; +} + +static int __nolibc_sprintf_cb(intptr_t _state, const char *buf, size_t size) +{ + char **state = (char **)_state; + + memcpy(*state, buf, size); + *state += size; + return 0; +} + +static __attribute__((unused, format(printf, 3, 0))) +int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + char *state = buf; + int ret; + + ret = __nolibc_printf(__nolibc_sprintf_cb, (intptr_t)&state, size, fmt, args); + if (ret < 0) + return ret; + buf[(size_t)ret < size ? (size_t)ret : size - 1] = '\0'; + return ret; +} + +static __attribute__((unused, format(printf, 3, 4))) +int snprintf(char *buf, size_t size, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vsnprintf(buf, size, fmt, args); + va_end(args); + + return ret; +} + +static __attribute__((unused, format(printf, 2, 0))) +int vsprintf(char *buf, const char *fmt, va_list args) +{ + return vsnprintf(buf, SIZE_MAX, fmt, args); +} + +static __attribute__((unused, format(printf, 2, 3))) +int sprintf(char *buf, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vsprintf(buf, fmt, args); + va_end(args); + return ret; } -- cgit v1.2.3 From e90ce42e81381665dbcedc5fa12e74759ee89639 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Fri, 11 Apr 2025 11:00:55 +0200 Subject: tools/nolibc: implement width padding in printf() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit printf can pad each argument to a certain width. Implement this for compatibility with the kselftest harness. Currently only padding with spaces is supported. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau --- tools/include/nolibc/stdio.h | 17 ++++++++++++++++- tools/testing/selftests/nolibc/nolibc-test.c | 3 +++ 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index 46bd90f96d65..fb0417477759 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -220,7 +220,7 @@ int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, size_t n, const char { char escape, lpref, c; unsigned long long v; - unsigned int written; + unsigned int written, width; size_t len, ofs, w; char tmpbuf[21]; const char *outstr; @@ -228,10 +228,20 @@ int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, size_t n, const char written = ofs = escape = lpref = 0; while (1) { c = fmt[ofs++]; + width = 0; if (escape) { /* we're in an escape sequence, ofs == 1 */ escape = 0; + + /* width */ + while (c >= '0' && c <= '9') { + width *= 10; + width += c - '0'; + + c = fmt[ofs++]; + } + if (c == 'c' || c == 'd' || c == 'u' || c == 'x' || c == 'p') { char *out = tmpbuf; @@ -309,6 +319,11 @@ int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, size_t n, const char if (n) { w = len < n ? len : n; n -= w; + while (width-- > w) { + if (cb(state, " ", 1) != 0) + break; + written += 1; + } if (cb(state, outstr, w) != 0) break; } diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 33cd64a1b768..1ad0db92f0ed 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1414,6 +1414,9 @@ static int run_printf(int min, int max) CASE_TEST(uintmax_t); EXPECT_VFPRINTF(20, "18446744073709551615", "%ju", 0xffffffffffffffffULL); break; CASE_TEST(intmax_t); EXPECT_VFPRINTF(20, "-9223372036854775807", "%jd", 0x8000000000000001LL); break; CASE_TEST(truncation); EXPECT_VFPRINTF(25, "01234567890123456789", "%s", "0123456789012345678901234"); break; + CASE_TEST(string_width); EXPECT_VFPRINTF(10, " 1", "%10s", "1"); break; + CASE_TEST(number_width); EXPECT_VFPRINTF(10, " 1", "%10d", 1); break; + CASE_TEST(width_trunc); EXPECT_VFPRINTF(25, " ", "%25d", 1); break; CASE_TEST(scanf); EXPECT_ZR(1, test_scanf()); break; case __LINE__: return ret; /* must be last */ -- cgit v1.2.3 From f25051dce97cfd7a945add0c9e273e624e060624 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 16 Apr 2025 18:29:18 +0200 Subject: tools headers: Synchronize prctl.h ABI header Synchronize prctl.h with current uapi version after adding PR_FUTEX_HASH. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250416162921.513656-19-bigeasy@linutronix.de --- tools/include/uapi/linux/prctl.h | 44 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 35791791a879..21f30b3ded74 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -230,7 +230,7 @@ struct prctl_mm_map { # define PR_PAC_APDBKEY (1UL << 3) # define PR_PAC_APGAKEY (1UL << 4) -/* Tagged user address controls for arm64 */ +/* Tagged user address controls for arm64 and RISC-V */ #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) @@ -244,6 +244,9 @@ struct prctl_mm_map { # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Unused; kept only for source compatibility */ # define PR_MTE_TCF_SHIFT 1 +/* RISC-V pointer masking tag length */ +# define PR_PMLEN_SHIFT 24 +# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 @@ -328,4 +331,43 @@ struct prctl_mm_map { # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */ # define PR_PPC_DEXCR_CTRL_MASK 0x1f +/* + * Get the current shadow stack configuration for the current thread, + * this will be the value configured via PR_SET_SHADOW_STACK_STATUS. + */ +#define PR_GET_SHADOW_STACK_STATUS 74 + +/* + * Set the current shadow stack configuration. Enabling the shadow + * stack will cause a shadow stack to be allocated for the thread. + */ +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +/* + * Prevent further changes to the specified shadow stack + * configuration. All bits may be locked via this call, including + * undefined bits. + */ +#define PR_LOCK_SHADOW_STACK_STATUS 76 + +/* + * Controls the mode of timer_create() for CRIU restore operations. + * Enabling this allows CRIU to restore timers with explicit IDs. + * + * Don't use for normal operations as the result might be undefined. + */ +#define PR_TIMER_CREATE_RESTORE_IDS 77 +# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0 +# define PR_TIMER_CREATE_RESTORE_IDS_ON 1 +# define PR_TIMER_CREATE_RESTORE_IDS_GET 2 + +/* FUTEX hash management */ +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define PR_FUTEX_HASH_GET_SLOTS 2 +# define PR_FUTEX_HASH_GET_IMMUTABLE 3 + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From 823153334042746604fdb416ea358a90940c1d83 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 9 May 2025 17:35:37 +0200 Subject: bpf: Add support to retrieve ref_ctr_offset for uprobe perf link Adding support to retrieve ref_ctr_offset for uprobe perf link, which got somehow omitted from the initial uprobe link info changes. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Acked-by: Yafang Shao Link: https://lore.kernel.org/bpf/20250509153539.779599-2-jolsa@kernel.org --- include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 5 +++-- kernel/trace/trace_uprobe.c | 2 +- tools/include/uapi/linux/bpf.h | 1 + 4 files changed, 6 insertions(+), 3 deletions(-) (limited to 'tools/include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 71d5ac83cf5d..16e95398c91c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6724,6 +6724,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from file_name */ __u64 cookie; + __u64 ref_ctr_offset; } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */ struct { __aligned_u64 func_name; /* in/out */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index df33d19c5c3b..4b5f29168618 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3800,14 +3800,14 @@ static int bpf_perf_link_fill_kprobe(const struct perf_event *event, static int bpf_perf_link_fill_uprobe(const struct perf_event *event, struct bpf_link_info *info) { + u64 ref_ctr_offset, offset; char __user *uname; - u64 addr, offset; u32 ulen, type; int err; uname = u64_to_user_ptr(info->perf_event.uprobe.file_name); ulen = info->perf_event.uprobe.name_len; - err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &addr, + err = bpf_perf_link_fill_common(event, uname, &ulen, &offset, &ref_ctr_offset, &type, NULL); if (err) return err; @@ -3819,6 +3819,7 @@ static int bpf_perf_link_fill_uprobe(const struct perf_event *event, info->perf_event.uprobe.name_len = ulen; info->perf_event.uprobe.offset = offset; info->perf_event.uprobe.cookie = event->bpf_cookie; + info->perf_event.uprobe.ref_ctr_offset = ref_ctr_offset; return 0; } #endif diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 3386439ec9f6..d9cf6ed2c106 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1489,7 +1489,7 @@ int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type, : BPF_FD_TYPE_UPROBE; *filename = tu->filename; *probe_offset = tu->offset; - *probe_addr = 0; + *probe_addr = tu->ref_ctr_offset; return 0; } #endif /* CONFIG_PERF_EVENTS */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 71d5ac83cf5d..16e95398c91c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6724,6 +6724,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from file_name */ __u64 cookie; + __u64 ref_ctr_offset; } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */ struct { __aligned_u64 func_name; /* in/out */ -- cgit v1.2.3 From 267bee0cd87a98832fd9da1976f0f53788b6a2b2 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 24 Mar 2025 06:53:27 +0000 Subject: tools headers UAPI: sync linux/fs.h with the kernel sources Required for a new PAGEMAP_SCAN test to verify guard region reporting. Link: https://lkml.kernel.org/r/20250324065328.107678-3-avagin@google.com Signed-off-by: Andrei Vagin Reviewed-by: Lorenzo Stoakes Cc: David Hildenbrand Cc: Jonathan Corbet Cc: Shuah Khan Signed-off-by: Andrew Morton --- tools/include/uapi/linux/fs.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index 8a27bc5c7a7f..24ddf7bc4f25 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -40,6 +40,15 @@ #define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE (1< Date: Fri, 9 May 2025 15:32:34 +0200 Subject: selftests/fs/statmount: build with tools include dir Copy the required headers files (mount.h, nsfs.h) to the tools include dir and define the statmount/listmount syscall numbers to decouple dependency with headers_install for the common cases. Reviewed-by: John Hubbard Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/20250509133240.529330-3-amir73il@gmail.com Reviewed-by: Christian Brauner Signed-off-by: Christian Brauner --- tools/include/uapi/linux/mount.h | 235 +++++++++++++++++++++ tools/include/uapi/linux/nsfs.h | 45 ++++ .../selftests/filesystems/statmount/Makefile | 3 +- .../selftests/filesystems/statmount/statmount.h | 36 ++++ 4 files changed, 318 insertions(+), 1 deletion(-) create mode 100644 tools/include/uapi/linux/mount.h create mode 100644 tools/include/uapi/linux/nsfs.h (limited to 'tools/include') diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h new file mode 100644 index 000000000000..7fa67c2031a5 --- /dev/null +++ b/tools/include/uapi/linux/mount.h @@ -0,0 +1,235 @@ +#ifndef _UAPI_LINUX_MOUNT_H +#define _UAPI_LINUX_MOUNT_H + +#include + +/* + * These are the fs-independent mount-flags: up to 32 flags are supported + * + * Usage of these is restricted within the kernel to core mount(2) code and + * callers of sys_mount() only. Filesystems should be using the SB_* + * equivalent instead. + */ +#define MS_RDONLY 1 /* Mount read-only */ +#define MS_NOSUID 2 /* Ignore suid and sgid bits */ +#define MS_NODEV 4 /* Disallow access to device special files */ +#define MS_NOEXEC 8 /* Disallow program execution */ +#define MS_SYNCHRONOUS 16 /* Writes are synced at once */ +#define MS_REMOUNT 32 /* Alter flags of a mounted FS */ +#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ +#define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */ +#define MS_NOATIME 1024 /* Do not update access times. */ +#define MS_NODIRATIME 2048 /* Do not update directory access times */ +#define MS_BIND 4096 +#define MS_MOVE 8192 +#define MS_REC 16384 +#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. + MS_VERBOSE is deprecated. */ +#define MS_SILENT 32768 +#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ +#define MS_UNBINDABLE (1<<17) /* change to unbindable */ +#define MS_PRIVATE (1<<18) /* change to private */ +#define MS_SLAVE (1<<19) /* change to slave */ +#define MS_SHARED (1<<20) /* change to shared */ +#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ +#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ +#define MS_I_VERSION (1<<23) /* Update inode I_version field */ +#define MS_STRICTATIME (1<<24) /* Always perform atime updates */ +#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ + +/* These sb flags are internal to the kernel */ +#define MS_SUBMOUNT (1<<26) +#define MS_NOREMOTELOCK (1<<27) +#define MS_NOSEC (1<<28) +#define MS_BORN (1<<29) +#define MS_ACTIVE (1<<30) +#define MS_NOUSER (1<<31) + +/* + * Superblock flags that can be altered by MS_REMOUNT + */ +#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\ + MS_LAZYTIME) + +/* + * Old magic mount flag and mask + */ +#define MS_MGC_VAL 0xC0ED0000 +#define MS_MGC_MSK 0xffff0000 + +/* + * open_tree() flags. + */ +#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ +#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ + +/* + * move_mount() flags. + */ +#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */ +#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */ +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ +#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */ +#define MOVE_MOUNT_BENEATH 0x00000200 /* Mount beneath top mount */ +#define MOVE_MOUNT__MASK 0x00000377 + +/* + * fsopen() flags. + */ +#define FSOPEN_CLOEXEC 0x00000001 + +/* + * fspick() flags. + */ +#define FSPICK_CLOEXEC 0x00000001 +#define FSPICK_SYMLINK_NOFOLLOW 0x00000002 +#define FSPICK_NO_AUTOMOUNT 0x00000004 +#define FSPICK_EMPTY_PATH 0x00000008 + +/* + * The type of fsconfig() call made. + */ +enum fsconfig_command { + FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ + FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ + FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ + FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ + FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ + FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ + FSCONFIG_CMD_CREATE = 6, /* Create new or reuse existing superblock */ + FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ + FSCONFIG_CMD_CREATE_EXCL = 8, /* Create new superblock, fail if reusing existing superblock */ +}; + +/* + * fsmount() flags. + */ +#define FSMOUNT_CLOEXEC 0x00000001 + +/* + * Mount attributes. + */ +#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ +#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */ +#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */ +#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */ +#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */ +#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */ +#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ +#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ +#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ +#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ +#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */ + +/* + * mount_setattr() + */ +struct mount_attr { + __u64 attr_set; + __u64 attr_clr; + __u64 propagation; + __u64 userns_fd; +}; + +/* List of all mount_attr versions. */ +#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */ + + +/* + * Structure for getting mount/superblock/filesystem info with statmount(2). + * + * The interface is similar to statx(2): individual fields or groups can be + * selected with the @mask argument of statmount(). Kernel will set the @mask + * field according to the supported fields. + * + * If string fields are selected, then the caller needs to pass a buffer that + * has space after the fixed part of the structure. Nul terminated strings are + * copied there and offsets relative to @str are stored in the relevant fields. + * If the buffer is too small, then EOVERFLOW is returned. The actually used + * size is returned in @size. + */ +struct statmount { + __u32 size; /* Total size, including strings */ + __u32 mnt_opts; /* [str] Options (comma separated, escaped) */ + __u64 mask; /* What results were written */ + __u32 sb_dev_major; /* Device ID */ + __u32 sb_dev_minor; + __u64 sb_magic; /* ..._SUPER_MAGIC */ + __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */ + __u32 fs_type; /* [str] Filesystem type */ + __u64 mnt_id; /* Unique ID of mount */ + __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */ + __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */ + __u32 mnt_parent_id_old; + __u64 mnt_attr; /* MOUNT_ATTR_... */ + __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */ + __u64 mnt_peer_group; /* ID of shared peer group */ + __u64 mnt_master; /* Mount receives propagation from this ID */ + __u64 propagate_from; /* Propagation from in current namespace */ + __u32 mnt_root; /* [str] Root of mount relative to root of fs */ + __u32 mnt_point; /* [str] Mountpoint relative to current root */ + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ + __u32 sb_source; /* [str] Source string of the mount */ + __u32 opt_num; /* Number of fs options */ + __u32 opt_array; /* [str] Array of nul terminated fs options */ + __u32 opt_sec_num; /* Number of security options */ + __u32 opt_sec_array; /* [str] Array of nul terminated security options */ + __u64 supported_mask; /* Mask flags that this kernel supports */ + __u32 mnt_uidmap_num; /* Number of uid mappings */ + __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */ + __u32 mnt_gidmap_num; /* Number of gid mappings */ + __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */ + __u64 __spare2[43]; + char str[]; /* Variable size part containing strings */ +}; + +/* + * Structure for passing mount ID and miscellaneous parameters to statmount(2) + * and listmount(2). + * + * For statmount(2) @param represents the request mask. + * For listmount(2) @param represents the last listed mount id (or zero). + */ +struct mnt_id_req { + __u32 size; + __u32 spare; + __u64 mnt_id; + __u64 param; + __u64 mnt_ns_id; +}; + +/* List of all mnt_id_req versions. */ +#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ + +/* + * @mask bits for statmount(2) + */ +#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */ +#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */ +#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */ +#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ +#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ +#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ +#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ +#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ +#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ +#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ +#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */ +#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */ +#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */ + +/* + * Special @mnt_id values that can be passed to listmount + */ +#define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ + +#endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/include/uapi/linux/nsfs.h b/tools/include/uapi/linux/nsfs.h new file mode 100644 index 000000000000..34127653fd00 --- /dev/null +++ b/tools/include/uapi/linux/nsfs.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_NSFS_H +#define __LINUX_NSFS_H + +#include +#include + +#define NSIO 0xb7 + +/* Returns a file descriptor that refers to an owning user namespace */ +#define NS_GET_USERNS _IO(NSIO, 0x1) +/* Returns a file descriptor that refers to a parent namespace */ +#define NS_GET_PARENT _IO(NSIO, 0x2) +/* Returns the type of namespace (CLONE_NEW* value) referred to by + file descriptor */ +#define NS_GET_NSTYPE _IO(NSIO, 0x3) +/* Get owner UID (in the caller's user namespace) for a user namespace */ +#define NS_GET_OWNER_UID _IO(NSIO, 0x4) +/* Get the id for a mount namespace */ +#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64) +/* Translate pid from target pid namespace into the caller's pid namespace. */ +#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int) +/* Return thread-group leader id of pid in the callers pid namespace. */ +#define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int) +/* Translate pid from caller's pid namespace into a target pid namespace. */ +#define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x8, int) +/* Return thread-group leader id of pid in the target pid namespace. */ +#define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int) + +struct mnt_ns_info { + __u32 size; + __u32 nr_mounts; + __u64 mnt_ns_id; +}; + +#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */ + +/* Get information about namespace. */ +#define NS_MNT_GET_INFO _IOR(NSIO, 10, struct mnt_ns_info) +/* Get next namespace. */ +#define NS_MNT_GET_NEXT _IOR(NSIO, 11, struct mnt_ns_info) +/* Get previous namespace. */ +#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info) + +#endif /* __LINUX_NSFS_H */ diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile index 14ee91a41650..19adebfc2620 100644 --- a/tools/testing/selftests/filesystems/statmount/Makefile +++ b/tools/testing/selftests/filesystems/statmount/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) + TEST_GEN_PROGS := statmount_test statmount_test_ns listmount_test include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h index a7a5289ddae9..99e5ad082fb1 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount.h +++ b/tools/testing/selftests/filesystems/statmount/statmount.h @@ -7,6 +7,42 @@ #include #include +#ifndef __NR_statmount + #if defined __alpha__ + #define __NR_statmount 567 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_statmount 4457 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_statmount 6457 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_statmount 5457 + #endif + #else + #define __NR_statmount 457 + #endif +#endif + +#ifndef __NR_listmount + #if defined __alpha__ + #define __NR_listmount 568 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_listmount 4458 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_listmount 6458 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_listmount 5458 + #endif + #else + #define __NR_listmount 458 + #endif +#endif + static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask, struct statmount *buf, size_t bufsize, unsigned int flags) -- cgit v1.2.3 From c6d9775c2066a37385e784ee2e0ce83bd6644610 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 9 May 2025 15:32:37 +0200 Subject: selftests/fs/mount-notify: build with tools include dir Copy the fanotify uapi header files to the tools include dir and define __kernel_fsid_t to decouple dependency with headers_install and then remove the redundant re-definitions of fanotify macros. Reviewed-by: John Hubbard Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/20250509133240.529330-6-amir73il@gmail.com Reviewed-by: Christian Brauner Signed-off-by: Christian Brauner --- tools/include/uapi/linux/fanotify.h | 274 +++++++++++++++++++++ .../selftests/filesystems/mount-notify/Makefile | 3 +- .../filesystems/mount-notify/mount-notify_test.c | 25 +- 3 files changed, 282 insertions(+), 20 deletions(-) create mode 100644 tools/include/uapi/linux/fanotify.h (limited to 'tools/include') diff --git a/tools/include/uapi/linux/fanotify.h b/tools/include/uapi/linux/fanotify.h new file mode 100644 index 000000000000..e710967c7c26 --- /dev/null +++ b/tools/include/uapi/linux/fanotify.h @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_FANOTIFY_H +#define _UAPI_LINUX_FANOTIFY_H + +#include + +/* the following events that user-space can register for */ +#define FAN_ACCESS 0x00000001 /* File was accessed */ +#define FAN_MODIFY 0x00000002 /* File was modified */ +#define FAN_ATTRIB 0x00000004 /* Metadata changed */ +#define FAN_CLOSE_WRITE 0x00000008 /* Writable file closed */ +#define FAN_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */ +#define FAN_OPEN 0x00000020 /* File was opened */ +#define FAN_MOVED_FROM 0x00000040 /* File was moved from X */ +#define FAN_MOVED_TO 0x00000080 /* File was moved to Y */ +#define FAN_CREATE 0x00000100 /* Subfile was created */ +#define FAN_DELETE 0x00000200 /* Subfile was deleted */ +#define FAN_DELETE_SELF 0x00000400 /* Self was deleted */ +#define FAN_MOVE_SELF 0x00000800 /* Self was moved */ +#define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */ + +#define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ +#define FAN_FS_ERROR 0x00008000 /* Filesystem error */ + +#define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ +#define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ +#define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ +/* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */ + +#define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */ +#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */ +#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */ + +#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ + +#define FAN_RENAME 0x10000000 /* File was renamed */ + +#define FAN_ONDIR 0x40000000 /* Event occurred against dir */ + +/* helper events */ +#define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ +#define FAN_MOVE (FAN_MOVED_FROM | FAN_MOVED_TO) /* moves */ + +/* flags used for fanotify_init() */ +#define FAN_CLOEXEC 0x00000001 +#define FAN_NONBLOCK 0x00000002 + +/* These are NOT bitwise flags. Both bits are used together. */ +#define FAN_CLASS_NOTIF 0x00000000 +#define FAN_CLASS_CONTENT 0x00000004 +#define FAN_CLASS_PRE_CONTENT 0x00000008 + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ + FAN_CLASS_PRE_CONTENT) + +#define FAN_UNLIMITED_QUEUE 0x00000010 +#define FAN_UNLIMITED_MARKS 0x00000020 +#define FAN_ENABLE_AUDIT 0x00000040 + +/* Flags to determine fanotify event format */ +#define FAN_REPORT_PIDFD 0x00000080 /* Report pidfd for event->pid */ +#define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */ +#define FAN_REPORT_FID 0x00000200 /* Report unique file id */ +#define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */ +#define FAN_REPORT_NAME 0x00000800 /* Report events with name */ +#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ +#define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */ +#define FAN_REPORT_MNT 0x00004000 /* Report mount events */ + +/* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ +#define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) +/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */ +#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \ + FAN_REPORT_FID | FAN_REPORT_TARGET_FID) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ + FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\ + FAN_UNLIMITED_MARKS) + +/* flags used for fanotify_modify_mark() */ +#define FAN_MARK_ADD 0x00000001 +#define FAN_MARK_REMOVE 0x00000002 +#define FAN_MARK_DONT_FOLLOW 0x00000004 +#define FAN_MARK_ONLYDIR 0x00000008 +/* FAN_MARK_MOUNT is 0x00000010 */ +#define FAN_MARK_IGNORED_MASK 0x00000020 +#define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 +#define FAN_MARK_FLUSH 0x00000080 +/* FAN_MARK_FILESYSTEM is 0x00000100 */ +#define FAN_MARK_EVICTABLE 0x00000200 +/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */ +#define FAN_MARK_IGNORE 0x00000400 + +/* These are NOT bitwise flags. Both bits can be used togther. */ +#define FAN_MARK_INODE 0x00000000 +#define FAN_MARK_MOUNT 0x00000010 +#define FAN_MARK_FILESYSTEM 0x00000100 +#define FAN_MARK_MNTNS 0x00000110 + +/* + * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY + * for non-inode mark types. + */ +#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ + FAN_MARK_REMOVE |\ + FAN_MARK_DONT_FOLLOW |\ + FAN_MARK_ONLYDIR |\ + FAN_MARK_MOUNT |\ + FAN_MARK_IGNORED_MASK |\ + FAN_MARK_IGNORED_SURV_MODIFY |\ + FAN_MARK_FLUSH) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_EVENTS (FAN_ACCESS |\ + FAN_MODIFY |\ + FAN_CLOSE |\ + FAN_OPEN) + +/* + * All events which require a permission response from userspace + */ +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\ + FAN_ACCESS_PERM) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\ + FAN_ALL_PERM_EVENTS |\ + FAN_Q_OVERFLOW) + +#define FANOTIFY_METADATA_VERSION 3 + +struct fanotify_event_metadata { + __u32 event_len; + __u8 vers; + __u8 reserved; + __u16 metadata_len; + __aligned_u64 mask; + __s32 fd; + __s32 pid; +}; + +#define FAN_EVENT_INFO_TYPE_FID 1 +#define FAN_EVENT_INFO_TYPE_DFID_NAME 2 +#define FAN_EVENT_INFO_TYPE_DFID 3 +#define FAN_EVENT_INFO_TYPE_PIDFD 4 +#define FAN_EVENT_INFO_TYPE_ERROR 5 +#define FAN_EVENT_INFO_TYPE_RANGE 6 +#define FAN_EVENT_INFO_TYPE_MNT 7 + +/* Special info types for FAN_RENAME */ +#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10 +/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID 11 */ +#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME 12 +/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID 13 */ + +/* Variable length info record following event metadata */ +struct fanotify_event_info_header { + __u8 info_type; + __u8 pad; + __u16 len; +}; + +/* + * Unique file identifier info record. + * This structure is used for records of types FAN_EVENT_INFO_TYPE_FID, + * FAN_EVENT_INFO_TYPE_DFID and FAN_EVENT_INFO_TYPE_DFID_NAME. + * For FAN_EVENT_INFO_TYPE_DFID_NAME there is additionally a null terminated + * name immediately after the file handle. + */ +struct fanotify_event_info_fid { + struct fanotify_event_info_header hdr; + __kernel_fsid_t fsid; + /* + * Following is an opaque struct file_handle that can be passed as + * an argument to open_by_handle_at(2). + */ + unsigned char handle[]; +}; + +/* + * This structure is used for info records of type FAN_EVENT_INFO_TYPE_PIDFD. + * It holds a pidfd for the pid that was responsible for generating an event. + */ +struct fanotify_event_info_pidfd { + struct fanotify_event_info_header hdr; + __s32 pidfd; +}; + +struct fanotify_event_info_error { + struct fanotify_event_info_header hdr; + __s32 error; + __u32 error_count; +}; + +struct fanotify_event_info_range { + struct fanotify_event_info_header hdr; + __u32 pad; + __u64 offset; + __u64 count; +}; + +struct fanotify_event_info_mnt { + struct fanotify_event_info_header hdr; + __u64 mnt_id; +}; + +/* + * User space may need to record additional information about its decision. + * The extra information type records what kind of information is included. + * The default is none. We also define an extra information buffer whose + * size is determined by the extra information type. + * + * If the information type is Audit Rule, then the information following + * is the rule number that triggered the user space decision that + * requires auditing. + */ + +#define FAN_RESPONSE_INFO_NONE 0 +#define FAN_RESPONSE_INFO_AUDIT_RULE 1 + +struct fanotify_response { + __s32 fd; + __u32 response; +}; + +struct fanotify_response_info_header { + __u8 type; + __u8 pad; + __u16 len; +}; + +struct fanotify_response_info_audit_rule { + struct fanotify_response_info_header hdr; + __u32 rule_number; + __u32 subj_trust; + __u32 obj_trust; +}; + +/* Legit userspace responses to a _PERM event */ +#define FAN_ALLOW 0x01 +#define FAN_DENY 0x02 +/* errno other than EPERM can specified in upper byte of deny response */ +#define FAN_ERRNO_BITS 8 +#define FAN_ERRNO_SHIFT (32 - FAN_ERRNO_BITS) +#define FAN_ERRNO_MASK ((1 << FAN_ERRNO_BITS) - 1) +#define FAN_DENY_ERRNO(err) \ + (FAN_DENY | ((((__u32)(err)) & FAN_ERRNO_MASK) << FAN_ERRNO_SHIFT)) + +#define FAN_AUDIT 0x10 /* Bitmask to create audit record for result */ +#define FAN_INFO 0x20 /* Bitmask to indicate additional information */ + +/* No fd set in event */ +#define FAN_NOFD -1 +#define FAN_NOPIDFD FAN_NOFD +#define FAN_EPIDFD -2 + +/* Helper functions to deal with fanotify_event_metadata buffers */ +#define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata)) + +#define FAN_EVENT_NEXT(meta, len) ((len) -= (meta)->event_len, \ + (struct fanotify_event_metadata*)(((char *)(meta)) + \ + (meta)->event_len)) + +#define FAN_EVENT_OK(meta, len) ((long)(len) >= (long)FAN_EVENT_METADATA_LEN && \ + (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \ + (long)(meta)->event_len <= (long)(len)) + +#endif /* _UAPI_LINUX_FANOTIFY_H */ diff --git a/tools/testing/selftests/filesystems/mount-notify/Makefile b/tools/testing/selftests/filesystems/mount-notify/Makefile index 10be0227b5ae..41ebfe558a0a 100644 --- a/tools/testing/selftests/filesystems/mount-notify/Makefile +++ b/tools/testing/selftests/filesystems/mount-notify/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) + TEST_GEN_PROGS := mount-notify_test include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c index 59a71f22fb11..4f0f325379b5 100644 --- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c @@ -8,33 +8,20 @@ #include #include #include -#include #include -#include #include #include "../../kselftest_harness.h" #include "../statmount/statmount.h" -#ifndef FAN_MNT_ATTACH -struct fanotify_event_info_mnt { - struct fanotify_event_info_header hdr; - __u64 mnt_id; -}; -#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */ -#endif - -#ifndef FAN_MNT_DETACH -#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */ +// Needed for linux/fanotify.h +#ifndef __kernel_fsid_t +typedef struct { + int val[2]; +} __kernel_fsid_t; #endif -#ifndef FAN_REPORT_MNT -#define FAN_REPORT_MNT 0x00004000 /* Report mount events */ -#endif - -#ifndef FAN_MARK_MNTNS -#define FAN_MARK_MNTNS 0x00000110 -#endif +#include static uint64_t get_mnt_id(struct __test_metadata *const _metadata, const char *path) -- cgit v1.2.3 From 8802087d20c0e1c26c4b4fe30e22264bf8285e51 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 8 May 2025 00:48:23 +0000 Subject: net: devmem: TCP tx netlink api Add bind-tx netlink call to attach dmabuf for TX; queue is not required, only ifindex and dmabuf fd for attachment. Signed-off-by: Stanislav Fomichev Signed-off-by: Mina Almasry Link: https://patch.msgid.link/20250508004830.4100853-4-almasrymina@google.com Signed-off-by: Paolo Abeni --- Documentation/netlink/specs/netdev.yaml | 12 ++++++++++++ include/uapi/linux/netdev.h | 1 + net/core/netdev-genl-gen.c | 13 +++++++++++++ net/core/netdev-genl-gen.h | 1 + net/core/netdev-genl.c | 6 ++++++ tools/include/uapi/linux/netdev.h | 1 + 6 files changed, 34 insertions(+) (limited to 'tools/include') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index f5e0750ab71d..c0ef6d0d7786 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -743,6 +743,18 @@ operations: - defer-hard-irqs - gro-flush-timeout - irq-suspend-timeout + - + name: bind-tx + doc: Bind dmabuf to netdev for TX + attribute-set: dmabuf + do: + request: + attributes: + - ifindex + - fd + reply: + attributes: + - id kernel-family: headers: [ "net/netdev_netlink.h"] diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 7600bf62dbdf..7eb9571786b8 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -219,6 +219,7 @@ enum { NETDEV_CMD_QSTATS_GET, NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, + NETDEV_CMD_BIND_TX, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 739f7b6506a6..4fc44587f493 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -99,6 +99,12 @@ static const struct nla_policy netdev_napi_set_nl_policy[NETDEV_A_NAPI_IRQ_SUSPE [NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT] = { .type = NLA_UINT, }, }; +/* NETDEV_CMD_BIND_TX - do */ +static const struct nla_policy netdev_bind_tx_nl_policy[NETDEV_A_DMABUF_FD + 1] = { + [NETDEV_A_DMABUF_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), + [NETDEV_A_DMABUF_FD] = { .type = NLA_U32, }, +}; + /* Ops table for netdev */ static const struct genl_split_ops netdev_nl_ops[] = { { @@ -190,6 +196,13 @@ static const struct genl_split_ops netdev_nl_ops[] = { .maxattr = NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, + { + .cmd = NETDEV_CMD_BIND_TX, + .doit = netdev_nl_bind_tx_doit, + .policy = netdev_bind_tx_nl_policy, + .maxattr = NETDEV_A_DMABUF_FD, + .flags = GENL_CMD_CAP_DO, + }, }; static const struct genl_multicast_group netdev_nl_mcgrps[] = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index 17d39fd64c94..cf3fad74511f 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -34,6 +34,7 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info); +int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info); enum { NETDEV_NLGRP_MGMT, diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 17aa9e42e8d5..3beef87235d8 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -968,6 +968,12 @@ err_genlmsg_free: return err; } +/* stub */ +int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info) +{ + return 0; +} + void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv) { INIT_LIST_HEAD(&priv->bindings); diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 7600bf62dbdf..7eb9571786b8 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -219,6 +219,7 @@ enum { NETDEV_CMD_QSTATS_GET, NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, + NETDEV_CMD_BIND_TX, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) -- cgit v1.2.3 From 83dcc12fa54ed05dd18af80defbde4205dfdc137 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 May 2025 11:33:58 -0300 Subject: tools headers: Sync the linux/unaligned.h copy with the kernel sources To pick up the changes in: acea9943271b6290 ("vdso: Address variable shadowing in macros") Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/linux/unaligned.h include/linux/unaligned.h Please see tools/include/uapi/README for further details. Cc: Adrian Hunter Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peng Jiang Cc: Thomas Gleixner Link: https://lore.kernel.org/r/20250519214126.1652491-5-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/vdso/unaligned.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'tools/include') diff --git a/tools/include/vdso/unaligned.h b/tools/include/vdso/unaligned.h index eee3d2a4dbe4..ff0c06b6513e 100644 --- a/tools/include/vdso/unaligned.h +++ b/tools/include/vdso/unaligned.h @@ -2,14 +2,14 @@ #ifndef __VDSO_UNALIGNED_H #define __VDSO_UNALIGNED_H -#define __get_unaligned_t(type, ptr) ({ \ - const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x; \ +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed * __get_pptr = (typeof(__get_pptr))(ptr); \ + __get_pptr->x; \ }) -#define __put_unaligned_t(type, val, ptr) do { \ - struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x = (val); \ +#define __put_unaligned_t(type, val, ptr) do { \ + struct { type x; } __packed * __put_pptr = (typeof(__put_pptr))(ptr); \ + __put_pptr->x = (val); \ } while (0) #endif /* __VDSO_UNALIGNED_H */ -- cgit v1.2.3 From fa7a1e8d2d65a17bc5d6a9978d1352e0b0ef59d2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 May 2025 11:37:20 -0300 Subject: tools headers: Synchronize uapi/linux/bits.h with the kernel sources To pick up the changes in this cset: 1e7933a575ed8af4 ("uapi: Revert "bitops: avoid integer overflow in GENMASK(_ULL)"") This addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/bits.h include/uapi/linux/bits.h Please see tools/include/uapi/README for further details. Acked-by: Vincent Mailhol Acked-by: Yury Norov [NVIDIA] Cc: Adrian Hunter Cc: I Hsin Cheng Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: https://lore.kernel.org/r/20250519214126.1652491-6-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/bits.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h index 5ee30f882736..682b406e1067 100644 --- a/tools/include/uapi/linux/bits.h +++ b/tools/include/uapi/linux/bits.h @@ -4,13 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) \ - (((~_UL(0)) - (_UL(1) << (l)) + 1) & \ - (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) \ - (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \ - (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) -- cgit v1.2.3 From e6428c3492a34ed728b6152cf073efb3250a6d94 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 May 2025 11:48:19 -0300 Subject: tools headers compiler: Pick the const_true() define from the kernel sources The sync of include/linux/bits.h with the kernel sources will make use of this define, so add it to the tools/include/linux/compiler. variant used to build tools/ living code. Acked-by: Vincent Mailhol Acked-by: Yury Norov [NVIDIA] Cc: Adrian Hunter Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: https://lore.kernel.org/r/20250519214126.1652491-7-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'tools/include') diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 9c05a59f0184..d627e66a04a6 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -81,6 +81,28 @@ #define __is_constexpr(x) \ (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) +/* + * Similar to statically_true() but produces a constant expression + * + * To be used in conjunction with macros, such as BUILD_BUG_ON_ZERO(), + * which require their input to be a constant expression and for which + * statically_true() would otherwise fail. + * + * This is a trade-off: const_true() requires all its operands to be + * compile time constants. Else, it would always returns false even on + * the most trivial cases like: + * + * true || non_const_var + * + * On the opposite, statically_true() is able to fold more complex + * tautologies and will return true on expressions such as: + * + * !(non_const_var * 8 % 4) + * + * For the general case, statically_true() is better. + */ +#define const_true(x) __builtin_choose_expr(__is_constexpr(x), x, false) + #ifdef __ANDROID__ /* * FIXME: Big hammer to get rid of tons of: -- cgit v1.2.3 From e48b92f9e140ff632ad426c0a94e3630a61e45d8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 May 2025 11:51:16 -0300 Subject: tools headers: Synchronize linux/bits.h with the kernel sources To pick up the changes in this cset: 0312e94abe484b9e ("treewide: fix typo 'unsigned __init128' -> 'unsigned __int128'") This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/linux/bits.h include/linux/bits.h This required picking the const_true() define in linux/compiler.h as a prep patch as that macro is used in the new linux/bits.h Please see tools/include/uapi/README for further details. Acked-by: Vincent Mailhol Acked-by: Yury Norov [NVIDIA] Cc: Adrian Hunter Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: https://lore.kernel.org/r/20250519214126.1652491-8-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/bits.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools/include') diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 8de2914e6510..14fd0ca9a6cd 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -20,9 +20,8 @@ */ #if !defined(__ASSEMBLY__) #include -#define GENMASK_INPUT_CHECK(h, l) \ - (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __is_constexpr((l) > (h)), (l) > (h), 0))) +#include +#define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h))) #else /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, -- cgit v1.2.3 From 4140e2b31bedd87bfc53362441165979aa4fc5d8 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sat, 17 May 2025 17:14:54 +0200 Subject: tools headers: Synchronize prctl.h ABI header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prctl.h ABI header was slightly updated during the development of the interface. In particular the "immutable" parameter became a bit in the option argument. Synchronize prctl.h ABI header again and make use of the definition in the testsuite and "perf bench futex". Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: André Almeida Link: https://lore.kernel.org/r/20250517151455.1065363-5-bigeasy@linutronix.de --- tools/include/uapi/linux/prctl.h | 1 + tools/perf/bench/futex.c | 4 +++- .../selftests/futex/functional/futex_priv_hash.c | 21 +++++++++++---------- 3 files changed, 15 insertions(+), 11 deletions(-) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 21f30b3ded74..43dec6eed559 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -367,6 +367,7 @@ struct prctl_mm_map { /* FUTEX hash management */ #define PR_FUTEX_HASH 78 # define PR_FUTEX_HASH_SET_SLOTS 1 +# define FH_FLAG_IMMUTABLE (1ULL << 0) # define PR_FUTEX_HASH_GET_SLOTS 2 # define PR_FUTEX_HASH_GET_IMMUTABLE 3 diff --git a/tools/perf/bench/futex.c b/tools/perf/bench/futex.c index 02ae6c52ba88..26382e4d8d4c 100644 --- a/tools/perf/bench/futex.c +++ b/tools/perf/bench/futex.c @@ -9,12 +9,14 @@ void futex_set_nbuckets_param(struct bench_futex_parameters *params) { + unsigned long flags; int ret; if (params->nbuckets < 0) return; - ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, params->nbuckets, params->buckets_immutable); + flags = params->buckets_immutable ? FH_FLAG_IMMUTABLE : 0; + ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, params->nbuckets, flags); if (ret) { printf("Requesting %d hash buckets failed: %d/%m\n", params->nbuckets, ret); diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c index 72a621d9313f..2dca18fefedc 100644 --- a/tools/testing/selftests/futex/functional/futex_priv_hash.c +++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c @@ -26,13 +26,14 @@ static int counter; #ifndef PR_FUTEX_HASH #define PR_FUTEX_HASH 78 # define PR_FUTEX_HASH_SET_SLOTS 1 +# define FH_FLAG_IMMUTABLE (1ULL << 0) # define PR_FUTEX_HASH_GET_SLOTS 2 # define PR_FUTEX_HASH_GET_IMMUTABLE 3 #endif -static int futex_hash_slots_set(unsigned int slots, int immutable) +static int futex_hash_slots_set(unsigned int slots, int flags) { - return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, immutable); + return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, flags); } static int futex_hash_slots_get(void) @@ -63,13 +64,13 @@ static void futex_hash_slots_set_verify(int slots) ksft_test_result_pass("SET and GET slots %d passed\n", slots); } -static void futex_hash_slots_set_must_fail(int slots, int immutable) +static void futex_hash_slots_set_must_fail(int slots, int flags) { int ret; - ret = futex_hash_slots_set(slots, immutable); + ret = futex_hash_slots_set(slots, flags); ksft_test_result(ret < 0, "futex_hash_slots_set(%d, %d)\n", - slots, immutable); + slots, flags); } static void *thread_return_fn(void *arg) @@ -254,18 +255,18 @@ int main(int argc, char *argv[]) ret = futex_hash_slots_set(0, 0); ksft_test_result(ret == 0, "Global hash request\n"); } else { - ret = futex_hash_slots_set(4, 1); + ret = futex_hash_slots_set(4, FH_FLAG_IMMUTABLE); ksft_test_result(ret == 0, "Immutable resize to 4\n"); } if (ret != 0) goto out; futex_hash_slots_set_must_fail(4, 0); - futex_hash_slots_set_must_fail(4, 1); + futex_hash_slots_set_must_fail(4, FH_FLAG_IMMUTABLE); futex_hash_slots_set_must_fail(8, 0); - futex_hash_slots_set_must_fail(8, 1); - futex_hash_slots_set_must_fail(0, 1); - futex_hash_slots_set_must_fail(6, 1); + futex_hash_slots_set_must_fail(8, FH_FLAG_IMMUTABLE); + futex_hash_slots_set_must_fail(0, FH_FLAG_IMMUTABLE); + futex_hash_slots_set_must_fail(6, FH_FLAG_IMMUTABLE); ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS); if (ret != 0) { -- cgit v1.2.3 From a6a054c8ad32b04c2e5d3cc5592fa737a2bb771f Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 24 Apr 2025 13:48:11 +0200 Subject: tools/nolibc: add target to check header usability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each nolibc header should be valid for inclusion irrespective of any special ordering requirements. Add a new make target, based on the old kbuild "make header_check" target to validate this requirement. For now the check fails, but the following commits will fix the issues. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250424-nolibc-header-check-v1-1-011576b6ed6f@linutronix.de --- tools/include/nolibc/Makefile | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index e05862cd0805..aa2c81d78ead 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -101,5 +101,14 @@ headers_standalone: headers $(Q)$(MAKE) -C $(srctree) headers $(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)sysroot +# GCC uses "s390", clang "systemz" +CLANG_CROSS_FLAGS := $(subst --target=s390-linux,--target=systemz-linux,$(CLANG_CROSS_FLAGS)) + +headers_check: headers_standalone + for header in $(filter-out crt.h std.h,$(all_files)); do \ + $(CC) $(CLANG_CROSS_FLAGS) -Wall -Werror -nostdinc -fsyntax-only -x c /dev/null \ + -I$(or $(objtree),$(srctree))/usr/include -include $$header -include $$header || exit 1; \ + done + clean: $(call QUIET_CLEAN, nolibc) rm -rf "$(OUTPUT)sysroot" -- cgit v1.2.3 From 3785289f97e2118b157332ffaae9fd2ec71237c8 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 24 Apr 2025 13:48:12 +0200 Subject: tools/nolibc: include nolibc.h early from all header files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Inclusion of any nolibc header file should also bring all other headers. On the other hand it should also be possible to include any nolibc header files in any order. Currently this is implemented by including the catch-all nolibc.h after the headers own definitions. This is problematic if one nolibc header depends on another one. The first header has to include the other one before defining any symbols. That in turn will include the rest of nolibc while the current header has not defined anything yet. If any other part of nolibc depends on definitions from the current header, errors are encountered. This is already the case today. Effectively nolibc can only be included in the order of nolibc.h. Restructure the way "nolibc.h" is included. Move it to the beginning of the header files and before the include guards. Now any header will behave exactly like "nolibc.h" while the include guards prevent any duplicate definitions. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250424-nolibc-header-check-v1-2-011576b6ed6f@linutronix.de --- tools/include/nolibc/ctype.h | 6 +++--- tools/include/nolibc/dirent.h | 6 +++--- tools/include/nolibc/elf.h | 6 +++--- tools/include/nolibc/errno.h | 6 +++--- tools/include/nolibc/fcntl.h | 6 +++--- tools/include/nolibc/getopt.h | 6 +++--- tools/include/nolibc/signal.h | 6 +++--- tools/include/nolibc/stdio.h | 6 +++--- tools/include/nolibc/stdlib.h | 6 +++--- tools/include/nolibc/string.h | 7 +++---- tools/include/nolibc/sys.h | 6 +++--- tools/include/nolibc/sys/auxv.h | 6 +++--- tools/include/nolibc/sys/mman.h | 6 +++--- tools/include/nolibc/sys/stat.h | 7 +++---- tools/include/nolibc/sys/syscall.h | 6 +++--- tools/include/nolibc/sys/time.h | 6 +++--- tools/include/nolibc/sys/wait.h | 7 +++---- tools/include/nolibc/time.h | 6 +++--- tools/include/nolibc/types.h | 6 +++--- tools/include/nolibc/unistd.h | 6 +++--- 20 files changed, 60 insertions(+), 63 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h index 6f90706d0644..470fdf34394a 100644 --- a/tools/include/nolibc/ctype.h +++ b/tools/include/nolibc/ctype.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_CTYPE_H #define _NOLIBC_CTYPE_H @@ -96,7 +99,4 @@ int ispunct(int c) return isgraph(c) && !isalnum(c); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_CTYPE_H */ diff --git a/tools/include/nolibc/dirent.h b/tools/include/nolibc/dirent.h index 6c60ec4ba27b..758b95c48e7a 100644 --- a/tools/include/nolibc/dirent.h +++ b/tools/include/nolibc/dirent.h @@ -4,6 +4,9 @@ * Copyright (C) 2025 Thomas Weißschuh */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_DIRENT_H #define _NOLIBC_DIRENT_H @@ -94,7 +97,4 @@ int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result) return 0; } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_DIRENT_H */ diff --git a/tools/include/nolibc/elf.h b/tools/include/nolibc/elf.h index beb0b3a87569..3e2c5228bf3d 100644 --- a/tools/include/nolibc/elf.h +++ b/tools/include/nolibc/elf.h @@ -4,12 +4,12 @@ * Copyright (C) 2025 Thomas Weißschuh */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_SYS_ELF_H #define _NOLIBC_SYS_ELF_H #include -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_SYS_ELF_H */ diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h index 1d8d8033e8ff..08a33c40ec0c 100644 --- a/tools/include/nolibc/errno.h +++ b/tools/include/nolibc/errno.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2022 Willy Tarreau */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_ERRNO_H #define _NOLIBC_ERRNO_H @@ -22,7 +25,4 @@ int errno __attribute__((weak)); */ #define MAX_ERRNO 4095 -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_ERRNO_H */ diff --git a/tools/include/nolibc/fcntl.h b/tools/include/nolibc/fcntl.h index 5feb08ad54a7..bff2e542f20f 100644 --- a/tools/include/nolibc/fcntl.h +++ b/tools/include/nolibc/fcntl.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_FCNTL_H #define _NOLIBC_FCNTL_H @@ -63,7 +66,4 @@ int open(const char *path, int flags, ...) return __sysret(sys_open(path, flags, mode)); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_FCNTL_H */ diff --git a/tools/include/nolibc/getopt.h b/tools/include/nolibc/getopt.h index 5fd06c9702e9..217abb95264b 100644 --- a/tools/include/nolibc/getopt.h +++ b/tools/include/nolibc/getopt.h @@ -5,6 +5,9 @@ * Copyright (C) 2025 Thomas Weißschuh */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_GETOPT_H #define _NOLIBC_GETOPT_H @@ -95,7 +98,4 @@ int getopt(int argc, char * const argv[], const char *optstring) return c; } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_GETOPT_H */ diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h index cdcc5904c51e..ac13e53ac31d 100644 --- a/tools/include/nolibc/signal.h +++ b/tools/include/nolibc/signal.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2022 Willy Tarreau */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_SIGNAL_H #define _NOLIBC_SIGNAL_H @@ -20,7 +23,4 @@ int raise(int signal) return sys_kill(sys_getpid(), signal); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_SIGNAL_H */ diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index fb0417477759..8fa98abab212 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_STDIO_H #define _NOLIBC_STDIO_H @@ -598,7 +601,4 @@ const char *strerror(int errno) return buf; } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_STDIO_H */ diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h index 69cf1d4418f1..4790298f985c 100644 --- a/tools/include/nolibc/stdlib.h +++ b/tools/include/nolibc/stdlib.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_STDLIB_H #define _NOLIBC_STDLIB_H @@ -522,7 +525,4 @@ uintmax_t strtoumax(const char *nptr, char **endptr, int base) return __strtox(nptr, endptr, base, 0, UINTMAX_MAX); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_STDLIB_H */ diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h index f0d335f0e467..febfd6978966 100644 --- a/tools/include/nolibc/string.h +++ b/tools/include/nolibc/string.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_STRING_H #define _NOLIBC_STRING_H @@ -305,8 +308,4 @@ int toupper(int c) return c; } - -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_STRING_H */ diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index bc47007f0442..68e60e674211 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_SYS_H #define _NOLIBC_SYS_H @@ -1119,7 +1122,4 @@ int memfd_create(const char *name, unsigned int flags) return __sysret(sys_memfd_create(name, flags)); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_SYS_H */ diff --git a/tools/include/nolibc/sys/auxv.h b/tools/include/nolibc/sys/auxv.h index 04c2b9cbe51a..c52463d6c18d 100644 --- a/tools/include/nolibc/sys/auxv.h +++ b/tools/include/nolibc/sys/auxv.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau */ +/* make sure to include all global symbols */ +#include "../nolibc.h" + #ifndef _NOLIBC_SYS_AUXV_H #define _NOLIBC_SYS_AUXV_H @@ -35,7 +38,4 @@ unsigned long getauxval(unsigned long type) return ret; } -/* make sure to include all global symbols */ -#include "../nolibc.h" - #endif /* _NOLIBC_SYS_AUXV_H */ diff --git a/tools/include/nolibc/sys/mman.h b/tools/include/nolibc/sys/mman.h index ad9d06b6b791..41c7bf45e427 100644 --- a/tools/include/nolibc/sys/mman.h +++ b/tools/include/nolibc/sys/mman.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau */ +/* make sure to include all global symbols */ +#include "../nolibc.h" + #ifndef _NOLIBC_SYS_MMAN_H #define _NOLIBC_SYS_MMAN_H @@ -57,7 +60,4 @@ int munmap(void *addr, size_t length) return __sysret(sys_munmap(addr, length)); } -/* make sure to include all global symbols */ -#include "../nolibc.h" - #endif /* _NOLIBC_SYS_MMAN_H */ diff --git a/tools/include/nolibc/sys/stat.h b/tools/include/nolibc/sys/stat.h index 0eaf5496ce23..987c8bb52502 100644 --- a/tools/include/nolibc/sys/stat.h +++ b/tools/include/nolibc/sys/stat.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau */ +/* make sure to include all global symbols */ +#include "../nolibc.h" + #ifndef _NOLIBC_SYS_STAT_H #define _NOLIBC_SYS_STAT_H @@ -67,8 +70,4 @@ int stat(const char *path, struct stat *buf) return 0; } - -/* make sure to include all global symbols */ -#include "../nolibc.h" - #endif /* _NOLIBC_SYS_STAT_H */ diff --git a/tools/include/nolibc/sys/syscall.h b/tools/include/nolibc/sys/syscall.h index 59efdec8fd1c..4bf97f1386a0 100644 --- a/tools/include/nolibc/sys/syscall.h +++ b/tools/include/nolibc/sys/syscall.h @@ -4,6 +4,9 @@ * Copyright (C) 2024 Thomas Weißschuh */ +/* make sure to include all global symbols */ +#include "../nolibc.h" + #ifndef _NOLIBC_SYS_SYSCALL_H #define _NOLIBC_SYS_SYSCALL_H @@ -13,7 +16,4 @@ #define _syscall_n(N, ...) _syscall(N, __VA_ARGS__) #define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__) -/* make sure to include all global symbols */ -#include "../nolibc.h" - #endif /* _NOLIBC_SYS_SYSCALL_H */ diff --git a/tools/include/nolibc/sys/time.h b/tools/include/nolibc/sys/time.h index 1d326c05ee62..785961c52fa3 100644 --- a/tools/include/nolibc/sys/time.h +++ b/tools/include/nolibc/sys/time.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau */ +/* make sure to include all global symbols */ +#include "../nolibc.h" + #ifndef _NOLIBC_SYS_TIME_H #define _NOLIBC_SYS_TIME_H @@ -30,7 +33,4 @@ int gettimeofday(struct timeval *tv, struct timezone *tz) return __sysret(sys_gettimeofday(tv, tz)); } -/* make sure to include all global symbols */ -#include "../nolibc.h" - #endif /* _NOLIBC_SYS_TIME_H */ diff --git a/tools/include/nolibc/sys/wait.h b/tools/include/nolibc/sys/wait.h index 9a68e6a6b1df..f27be86ad5e4 100644 --- a/tools/include/nolibc/sys/wait.h +++ b/tools/include/nolibc/sys/wait.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau */ +/* make sure to include all global symbols */ +#include "../nolibc.h" + #ifndef _NOLIBC_SYS_WAIT_H #define _NOLIBC_SYS_WAIT_H @@ -110,8 +113,4 @@ pid_t waitpid(pid_t pid, int *status, int options) return info.si_pid; } - -/* make sure to include all global symbols */ -#include "../nolibc.h" - #endif /* _NOLIBC_SYS_WAIT_H */ diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index 84655361b9ad..9502f9aaf621 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2022 Willy Tarreau */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_TIME_H #define _NOLIBC_TIME_H @@ -25,7 +28,4 @@ time_t time(time_t *tptr) return tv.tv_sec; } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_TIME_H */ diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index 32d0929c633b..fe97953d1657 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_TYPES_H #define _NOLIBC_TYPES_H @@ -214,7 +217,4 @@ struct stat { }) #endif -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_TYPES_H */ diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h index ac7d53d986cd..ed253305fdba 100644 --- a/tools/include/nolibc/unistd.h +++ b/tools/include/nolibc/unistd.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2022 Willy Tarreau */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_UNISTD_H #define _NOLIBC_UNISTD_H @@ -56,7 +59,4 @@ int tcsetpgrp(int fd, pid_t pid) return ioctl(fd, TIOCSPGRP, &pid); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_UNISTD_H */ -- cgit v1.2.3 From 66a4f9bb1e895aa64f0e786f4cd9864929242e4f Mon Sep 17 00:00:00 2001 From: Daniel Palmer Date: Sun, 27 Apr 2025 07:47:38 +0900 Subject: tools/nolibc: Add m68k support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add nolibc support for m68k. Should be helpful for nommu where linking libc can bloat even hello world to the point where you get an OOM just trying to load it. Signed-off-by: Daniel Palmer Link: https://lore.kernel.org/r/20250426224738.284874-1-daniel@0x0f.com Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/arch-m68k.h | 141 ++++++++++++++++++++++++++++ tools/include/nolibc/arch.h | 2 + tools/testing/selftests/nolibc/Makefile | 5 + tools/testing/selftests/nolibc/run-tests.sh | 5 + 4 files changed, 153 insertions(+) create mode 100644 tools/include/nolibc/arch-m68k.h (limited to 'tools/include') diff --git a/tools/include/nolibc/arch-m68k.h b/tools/include/nolibc/arch-m68k.h new file mode 100644 index 000000000000..6dac1845f298 --- /dev/null +++ b/tools/include/nolibc/arch-m68k.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * m68k specific definitions for NOLIBC + * Copyright (C) 2025 Daniel Palmer + * + * Roughly based on one or more of the other arch files. + * + */ + +#ifndef _NOLIBC_ARCH_M68K_H +#define _NOLIBC_ARCH_M68K_H + +#include "compiler.h" +#include "crt.h" + +#define _NOLIBC_SYSCALL_CLOBBERLIST "memory" + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_arg1) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_arg1), "r"(_arg2) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + register long _arg4 __asm__ ("d4") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + register long _arg4 __asm__ ("d4") = (long)(arg4); \ + register long _arg5 __asm__ ("d5") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + register long _arg4 __asm__ ("d4") = (long)(arg4); \ + register long _arg5 __asm__ ("d5") = (long)(arg5); \ + register long _arg6 __asm__ ("a0") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +void _start(void); +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +{ + __asm__ volatile ( + "movel %sp, %sp@-\n" + "jsr _start_c\n" + ); + __nolibc_entrypoint_epilogue(); +} + +#endif /* _NOLIBC_ARCH_M68K_H */ diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h index b8c1da9a88d1..d20b2304aac2 100644 --- a/tools/include/nolibc/arch.h +++ b/tools/include/nolibc/arch.h @@ -35,6 +35,8 @@ #include "arch-loongarch.h" #elif defined(__sparc__) #include "arch-sparc.h" +#elif defined(__m68k__) +#include "arch-m68k.h" #else #error Unsupported Architecture #endif diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index d17750761d9f..2671383045db 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -80,6 +80,7 @@ IMAGE_s390 = arch/s390/boot/bzImage IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi IMAGE_sparc32 = arch/sparc/boot/image IMAGE_sparc64 = arch/sparc/boot/image +IMAGE_m68k = vmlinux IMAGE = $(objtree)/$(IMAGE_$(XARCH)) IMAGE_NAME = $(notdir $(IMAGE)) @@ -103,8 +104,10 @@ DEFCONFIG_s390 = defconfig compat.config DEFCONFIG_loongarch = defconfig DEFCONFIG_sparc32 = sparc32_defconfig DEFCONFIG_sparc64 = sparc64_defconfig +DEFCONFIG_m68k = virt_defconfig DEFCONFIG = $(DEFCONFIG_$(XARCH)) +EXTRACONFIG_m68k = -e CONFIG_BLK_DEV_INITRD EXTRACONFIG = $(EXTRACONFIG_$(XARCH)) # optional tests to run (default = all) @@ -130,6 +133,7 @@ QEMU_ARCH_s390 = s390x QEMU_ARCH_loongarch = loongarch64 QEMU_ARCH_sparc32 = sparc QEMU_ARCH_sparc64 = sparc64 +QEMU_ARCH_m68k = m68k QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) QEMU_ARCH_USER_ppc64le = ppc64le @@ -162,6 +166,7 @@ QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_sparc32 = -M SS-5 -m 256M -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_sparc64 = -M sun4u -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_m68k = -M virt -append "console=ttyGF0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA) # OUTPUT is only set when run from the main makefile, otherwise diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index 040956a9f5b8..8277599e6441 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -26,6 +26,7 @@ all_archs=( s390x s390 loongarch sparc32 sparc64 + m68k ) archs="${all_archs[@]}" @@ -186,6 +187,10 @@ test_arch() { echo "Unsupported configuration" return fi + if [ "$arch" = "m68k" ] && [ "$llvm" = "1" ]; then + echo "Unsupported configuration" + return + fi mkdir -p "$build_dir" swallow_output "${MAKE[@]}" defconfig -- cgit v1.2.3 From dc2c656e1f687d9f8decc4ee10092ee7258722c1 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 30 Apr 2025 11:35:32 +0200 Subject: tools/nolibc: move poll() to poll.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the location regular userspace expects the definition. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250430-poll-v1-1-44b5ceabdeee@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/poll.h | 55 +++++++++++++++++++++++++++++++++++++++++++ tools/include/nolibc/sys.h | 37 ----------------------------- 4 files changed, 57 insertions(+), 37 deletions(-) create mode 100644 tools/include/nolibc/poll.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index aa2c81d78ead..dda881d934d6 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -36,6 +36,7 @@ all_files := \ getopt.h \ limits.h \ nolibc.h \ + poll.h \ signal.h \ stackprotector.h \ std.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index d1b949e094ee..05a4bd5fba8b 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -114,6 +114,7 @@ #include "dirent.h" #include "fcntl.h" #include "getopt.h" +#include "poll.h" /* Used by programs to avoid std includes */ #define NOLIBC diff --git a/tools/include/nolibc/poll.h b/tools/include/nolibc/poll.h new file mode 100644 index 000000000000..be6e44fe022d --- /dev/null +++ b/tools/include/nolibc/poll.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * poll definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_POLL_H +#define _NOLIBC_POLL_H + +#include "arch.h" +#include "types.h" +#include "sys.h" + +#include + +/* + * int poll(struct pollfd *fds, int nfds, int timeout); + */ + +static __attribute__((unused)) +int sys_poll(struct pollfd *fds, int nfds, int timeout) +{ +#if defined(__NR_ppoll) + struct timespec t; + + if (timeout >= 0) { + t.tv_sec = timeout / 1000; + t.tv_nsec = (timeout % 1000) * 1000000; + } + return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); +#elif defined(__NR_ppoll_time64) + struct __kernel_timespec t; + + if (timeout >= 0) { + t.tv_sec = timeout / 1000; + t.tv_nsec = (timeout % 1000) * 1000000; + } + return my_syscall5(__NR_ppoll_time64, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); +#elif defined(__NR_poll) + return my_syscall3(__NR_poll, fds, nfds, timeout); +#else + return __nolibc_enosys(__func__, fds, nfds, timeout); +#endif +} + +static __attribute__((unused)) +int poll(struct pollfd *fds, int nfds, int timeout) +{ + return __sysret(sys_poll(fds, nfds, timeout)); +} + +#endif /* _NOLIBC_POLL_H */ diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 68e60e674211..5733fe54911d 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -766,43 +766,6 @@ int pivot_root(const char *new, const char *old) } -/* - * int poll(struct pollfd *fds, int nfds, int timeout); - */ - -static __attribute__((unused)) -int sys_poll(struct pollfd *fds, int nfds, int timeout) -{ -#if defined(__NR_ppoll) - struct timespec t; - - if (timeout >= 0) { - t.tv_sec = timeout / 1000; - t.tv_nsec = (timeout % 1000) * 1000000; - } - return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); -#elif defined(__NR_ppoll_time64) - struct __kernel_timespec t; - - if (timeout >= 0) { - t.tv_sec = timeout / 1000; - t.tv_nsec = (timeout % 1000) * 1000000; - } - return my_syscall5(__NR_ppoll_time64, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); -#elif defined(__NR_poll) - return my_syscall3(__NR_poll, fds, nfds, timeout); -#else - return __nolibc_enosys(__func__, fds, nfds, timeout); -#endif -} - -static __attribute__((unused)) -int poll(struct pollfd *fds, int nfds, int timeout) -{ - return __sysret(sys_poll(fds, nfds, timeout)); -} - - /* * ssize_t read(int fd, void *buf, size_t count); */ -- cgit v1.2.3 From 1f421ddf494d3263842bcf430a683aaf33c3e31c Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 30 Apr 2025 11:35:33 +0200 Subject: tools/nolibc: use poll-related definitions from UAPI headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The UAPI headers already provide definitions for these symbols. Using them makes the code shorter, more robust and compatible with applications using linux/poll.h directly. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250430-poll-v1-2-44b5ceabdeee@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/poll.h | 2 +- tools/include/nolibc/types.h | 14 -------------- 2 files changed, 1 insertion(+), 15 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/poll.h b/tools/include/nolibc/poll.h index be6e44fe022d..1765acb17ea0 100644 --- a/tools/include/nolibc/poll.h +++ b/tools/include/nolibc/poll.h @@ -11,9 +11,9 @@ #define _NOLIBC_POLL_H #include "arch.h" -#include "types.h" #include "sys.h" +#include #include /* diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index fe97953d1657..70f20519ebf9 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -159,20 +159,6 @@ typedef struct { __set->fds[__idx] = 0; \ } while (0) -/* for poll() */ -#define POLLIN 0x0001 -#define POLLPRI 0x0002 -#define POLLOUT 0x0004 -#define POLLERR 0x0008 -#define POLLHUP 0x0010 -#define POLLNVAL 0x0020 - -struct pollfd { - int fd; - short int events; - short int revents; -}; - /* for getdents64() */ struct linux_dirent64 { uint64_t d_ino; -- cgit v1.2.3 From 05b6b2a9efa4b40b7bc1668f887b7d98f719efb1 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 28 Apr 2025 14:40:02 +0200 Subject: tools/nolibc: add strstr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is used in various selftests and will be handy when integrating those with nolibc. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250428-nolibc-misc-v2-1-3c043eeab06c@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/string.h | 20 ++++++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 3 +++ 2 files changed, 23 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h index febfd6978966..163a17e7dd38 100644 --- a/tools/include/nolibc/string.h +++ b/tools/include/nolibc/string.h @@ -292,6 +292,26 @@ char *strrchr(const char *s, int c) return (char *)ret; } +static __attribute__((unused)) +char *strstr(const char *haystack, const char *needle) +{ + size_t len_haystack, len_needle; + + len_needle = strlen(needle); + if (!len_needle) + return NULL; + + len_haystack = strlen(haystack); + while (len_haystack >= len_needle) { + if (!memcmp(haystack, needle, len_needle)) + return (char *)haystack; + haystack++; + len_haystack--; + } + + return NULL; +} + static __attribute__((unused)) int tolower(int c) { diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 1ad0db92f0ed..3e15a25ccddf 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1211,6 +1211,9 @@ int run_stdlib(int min, int max) CASE_TEST(strlcpy_2); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 2), buf, 3, "b"); break; CASE_TEST(strlcpy_3); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 3), buf, 3, "ba"); break; CASE_TEST(strlcpy_4); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 4), buf, 3, "bar"); break; + CASE_TEST(strstr_foobar_foo); EXPECT_STREQ(1, strstr("foobar", "foo"), "foobar"); break; + CASE_TEST(strstr_foobar_bar); EXPECT_STREQ(1, strstr("foobar", "bar"), "bar"); break; + CASE_TEST(strstr_foobar_baz); EXPECT_PTREQ(1, strstr("foobar", "baz"), NULL); break; CASE_TEST(memcmp_20_20); EXPECT_EQ(1, memcmp("aaa\x20", "aaa\x20", 4), 0); break; CASE_TEST(memcmp_20_60); EXPECT_LT(1, memcmp("aaa\x20", "aaa\x60", 4), 0); break; CASE_TEST(memcmp_60_20); EXPECT_GT(1, memcmp("aaa\x60", "aaa\x20", 4), 0); break; -- cgit v1.2.3 From 7a7cd445d9275be8e4650d390156595685c3ac03 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 28 Apr 2025 14:40:03 +0200 Subject: tools/nolibc: add %m printf format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The %m format can be used to format the current errno. It is non-standard but supported by other commonly used libcs like glibc and musl, so applications do rely on them. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250428-nolibc-misc-v2-2-3c043eeab06c@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/stdio.h | 7 +++++++ tools/testing/selftests/nolibc/nolibc-test.c | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index 8fa98abab212..df5717d59182 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -20,6 +20,8 @@ #include "string.h" #include "compiler.h" +static const char *strerror(int errnum); + #ifndef EOF #define EOF (-1) #endif @@ -292,6 +294,11 @@ int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, size_t n, const char if (!outstr) outstr="(null)"; } +#ifndef NOLIBC_IGNORE_ERRNO + else if (c == 'm') { + outstr = strerror(errno); + } +#endif /* NOLIBC_IGNORE_ERRNO */ else if (c == '%') { /* queue it verbatim */ continue; diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 3e15a25ccddf..b7440a667db6 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1393,6 +1393,23 @@ static int test_scanf(void) return 0; } +int test_strerror(void) +{ + char buf[100]; + ssize_t ret; + + memset(buf, 'A', sizeof(buf)); + + errno = EINVAL; + ret = snprintf(buf, sizeof(buf), "%m"); + if (is_nolibc) { + if (ret < 6 || memcmp(buf, "errno=", 6)) + return 1; + } + + return 0; +} + static int run_printf(int min, int max) { int test; @@ -1421,6 +1438,7 @@ static int run_printf(int min, int max) CASE_TEST(number_width); EXPECT_VFPRINTF(10, " 1", "%10d", 1); break; CASE_TEST(width_trunc); EXPECT_VFPRINTF(25, " ", "%25d", 1); break; CASE_TEST(scanf); EXPECT_ZR(1, test_scanf()); break; + CASE_TEST(strerror); EXPECT_ZR(1, test_strerror()); break; case __LINE__: return ret; /* must be last */ /* note: do not set any defaults so as to permit holes above */ -- cgit v1.2.3 From 2337d39f7233fc3fb9d90489f0d48904eb129a2e Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 28 Apr 2025 14:40:04 +0200 Subject: tools/nolibc: add more stat() variants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add fstat(), fstatat() and lstat(). All of them use the existing implementation based on statx(). Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250428-nolibc-misc-v2-3-3c043eeab06c@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/sys/stat.h | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys/stat.h b/tools/include/nolibc/sys/stat.h index 987c8bb52502..8b4d80e3ea03 100644 --- a/tools/include/nolibc/sys/stat.h +++ b/tools/include/nolibc/sys/stat.h @@ -17,6 +17,9 @@ /* * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf); * int stat(const char *path, struct stat *buf); + * int fstatat(int fd, const char *path, struct stat *buf, int flag); + * int fstat(int fildes, struct stat *buf); + * int lstat(const char *path, struct stat *buf); */ static __attribute__((unused)) @@ -37,12 +40,12 @@ int statx(int fd, const char *path, int flags, unsigned int mask, struct statx * static __attribute__((unused)) -int stat(const char *path, struct stat *buf) +int fstatat(int fd, const char *path, struct stat *buf, int flag) { struct statx statx; long ret; - ret = __sysret(sys_statx(AT_FDCWD, path, AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx)); + ret = __sysret(sys_statx(fd, path, flag | AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx)); if (ret == -1) return ret; @@ -70,4 +73,22 @@ int stat(const char *path, struct stat *buf) return 0; } +static __attribute__((unused)) +int stat(const char *path, struct stat *buf) +{ + return fstatat(AT_FDCWD, path, buf, 0); +} + +static __attribute__((unused)) +int fstat(int fildes, struct stat *buf) +{ + return fstatat(fildes, "", buf, AT_EMPTY_PATH); +} + +static __attribute__((unused)) +int lstat(const char *path, struct stat *buf) +{ + return fstatat(AT_FDCWD, path, buf, AT_SYMLINK_NOFOLLOW); +} + #endif /* _NOLIBC_SYS_STAT_H */ -- cgit v1.2.3 From 55175d8659d22bfde30d4a1277328f090d8c0c2f Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 28 Apr 2025 14:40:05 +0200 Subject: tools/nolibc: add mremap() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is used in various selftests and will be handy when integrating those with nolibc. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250428-nolibc-misc-v2-4-3c043eeab06c@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/sys/mman.h | 19 +++++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 14 +++++++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys/mman.h b/tools/include/nolibc/sys/mman.h index 41c7bf45e427..5228751b458c 100644 --- a/tools/include/nolibc/sys/mman.h +++ b/tools/include/nolibc/sys/mman.h @@ -48,6 +48,25 @@ void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) return ret; } +static __attribute__((unused)) +void *sys_mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address) +{ + return (void *)my_syscall5(__NR_mremap, old_address, old_size, + new_size, flags, new_address); +} + +static __attribute__((unused)) +void *mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address) +{ + void *ret = sys_mremap(old_address, old_size, new_size, flags, new_address); + + if ((unsigned long)ret >= -4095UL) { + SET_ERRNO(-(long)ret); + ret = MAP_FAILED; + } + return ret; +} + static __attribute__((unused)) int sys_munmap(void *addr, size_t length) { diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index b7440a667db6..abe0ae794208 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -926,7 +926,7 @@ int test_mmap_munmap(void) { int ret, fd, i, page_size; void *mem; - size_t file_size, length; + size_t file_size, length, mem_length; off_t offset, pa_offset; struct stat stat_buf; const char * const files[] = { @@ -966,14 +966,22 @@ int test_mmap_munmap(void) offset = 0; length = file_size - offset; pa_offset = offset & ~(page_size - 1); + mem_length = length + offset - pa_offset; - mem = mmap(NULL, length + offset - pa_offset, PROT_READ, MAP_SHARED, fd, pa_offset); + mem = mmap(NULL, mem_length, PROT_READ, MAP_SHARED, fd, pa_offset); if (mem == MAP_FAILED) { ret = 1; goto end; } - ret = munmap(mem, length + offset - pa_offset); + mem = mremap(mem, mem_length, mem_length * 2, MREMAP_MAYMOVE, 0); + if (mem == MAP_FAILED) { + munmap(mem, mem_length); + ret = 1; + goto end; + } + + ret = munmap(mem, mem_length * 2); end: close(fd); -- cgit v1.2.3 From 801f020b5f3d6159826ca38f63cd55e5536b35dd Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 28 Apr 2025 14:40:06 +0200 Subject: tools/nolibc: add getrandom() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is used in various selftests and will be handy when integrating those with nolibc. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250428-nolibc-misc-v2-5-3c043eeab06c@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/sys/random.h | 34 ++++++++++++++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 22 ++++++++++++++++++ 4 files changed, 58 insertions(+) create mode 100644 tools/include/nolibc/sys/random.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index dda881d934d6..45beeecad52d 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -48,6 +48,7 @@ all_files := \ sys.h \ sys/auxv.h \ sys/mman.h \ + sys/random.h \ sys/stat.h \ sys/syscall.h \ sys/time.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 05a4bd5fba8b..1d2f391ca4d0 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -98,6 +98,7 @@ #include "sys.h" #include "sys/auxv.h" #include "sys/mman.h" +#include "sys/random.h" #include "sys/stat.h" #include "sys/syscall.h" #include "sys/time.h" diff --git a/tools/include/nolibc/sys/random.h b/tools/include/nolibc/sys/random.h new file mode 100644 index 000000000000..8d9749f1c845 --- /dev/null +++ b/tools/include/nolibc/sys/random.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * random definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_RANDOM_H +#define _NOLIBC_SYS_RANDOM_H + +#include "../arch.h" +#include "../sys.h" + +#include + +/* + * ssize_t getrandom(void *buf, size_t buflen, unsigned int flags); + */ + +static __attribute__((unused)) +ssize_t sys_getrandom(void *buf, size_t buflen, unsigned int flags) +{ + return my_syscall3(__NR_getrandom, buf, buflen, flags); +} + +static __attribute__((unused)) +ssize_t getrandom(void *buf, size_t buflen, unsigned int flags) +{ + return __sysret(sys_getrandom(buf, buflen, flags)); +} + +#endif /* _NOLIBC_SYS_RANDOM_H */ diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index abe0ae794208..df1cb3e62564 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -807,6 +808,26 @@ static int test_dirent(void) return 0; } +int test_getrandom(void) +{ + uint64_t rng = 0; + ssize_t ret; + + ret = getrandom(&rng, sizeof(rng), GRND_NONBLOCK); + if (ret == -1 && errno == EAGAIN) + return 0; /* No entropy available yet */ + + if (ret != sizeof(rng)) + return ret; + + if (!rng) { + errno = EINVAL; + return -1; + } + + return 0; +} + int test_getpagesize(void) { int x = getpagesize(); @@ -1124,6 +1145,7 @@ int run_syscall(int min, int max) CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break; CASE_TEST(getdents64_null); EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break; CASE_TEST(directories); EXPECT_SYSZR(proc, test_dirent()); break; + CASE_TEST(getrandom); EXPECT_SYSZR(1, test_getrandom()); break; CASE_TEST(gettimeofday_tv); EXPECT_SYSZR(1, gettimeofday(&tv, NULL)); break; CASE_TEST(gettimeofday_tv_tz);EXPECT_SYSZR(1, gettimeofday(&tv, &tz)); break; CASE_TEST(getpagesize); EXPECT_SYSZR(1, test_getpagesize()); break; -- cgit v1.2.3 From bf5e8a78beded3503bbcfe86b3094a42ce492556 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 28 Apr 2025 14:40:07 +0200 Subject: tools/nolibc: add abs() and friends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is used in various selftests and will be handy when integrating those with nolibc. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250428-nolibc-misc-v2-6-3c043eeab06c@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/math.h | 31 ++++++++++++++++++++++++++++ tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/stdlib.h | 18 ++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 2 ++ 5 files changed, 53 insertions(+) create mode 100644 tools/include/nolibc/math.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index 45beeecad52d..da4771ef8984 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -35,6 +35,7 @@ all_files := \ fcntl.h \ getopt.h \ limits.h \ + math.h \ nolibc.h \ poll.h \ signal.h \ diff --git a/tools/include/nolibc/math.h b/tools/include/nolibc/math.h new file mode 100644 index 000000000000..9df823ddd412 --- /dev/null +++ b/tools/include/nolibc/math.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * math definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_SYS_MATH_H +#define _NOLIBC_SYS_MATH_H + +static __inline__ +double fabs(double x) +{ + return x >= 0 ? x : -x; +} + +static __inline__ +float fabsf(float x) +{ + return x >= 0 ? x : -x; +} + +static __inline__ +long double fabsl(long double x) +{ + return x >= 0 ? x : -x; +} + +#endif /* _NOLIBC_SYS_MATH_H */ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 1d2f391ca4d0..966302c9e8e1 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -116,6 +116,7 @@ #include "fcntl.h" #include "getopt.h" #include "poll.h" +#include "math.h" /* Used by programs to avoid std includes */ #define NOLIBC diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h index 4790298f985c..5fd99a480f82 100644 --- a/tools/include/nolibc/stdlib.h +++ b/tools/include/nolibc/stdlib.h @@ -32,6 +32,24 @@ static __attribute__((unused)) char itoa_buffer[21]; * As much as possible, please keep functions alphabetically sorted. */ +static __inline__ +int abs(int j) +{ + return j >= 0 ? j : -j; +} + +static __inline__ +long labs(long j) +{ + return j >= 0 ? j : -j; +} + +static __inline__ +long long llabs(long long j) +{ + return j >= 0 ? j : -j; +} + /* must be exported, as it's used by libgcc for various divide functions */ void abort(void); __attribute__((weak,unused,noreturn,section(".text.nolibc_abort"))) diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index df1cb3e62564..14a27bc6c83e 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1318,6 +1318,8 @@ int run_stdlib(int min, int max) CASE_TEST(tolower_noop); EXPECT_EQ(1, tolower('a'), 'a'); break; CASE_TEST(toupper); EXPECT_EQ(1, toupper('a'), 'A'); break; CASE_TEST(toupper_noop); EXPECT_EQ(1, toupper('A'), 'A'); break; + CASE_TEST(abs); EXPECT_EQ(1, abs(-10), 10); break; + CASE_TEST(abs_noop); EXPECT_EQ(1, abs(10), 10); break; case __LINE__: return ret; /* must be last */ -- cgit v1.2.3 From 1e10b8534f5af42b7da528c254595f2a0cf4997d Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 28 Apr 2025 14:40:08 +0200 Subject: tools/nolibc: add support for access() and faccessat() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is used in various selftests and will be handy when integrating those with nolibc. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250428-nolibc-misc-v2-7-3c043eeab06c@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/unistd.h | 28 ++++++++++++++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 2 ++ 2 files changed, 30 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h index ed253305fdba..25bfc7732ec7 100644 --- a/tools/include/nolibc/unistd.h +++ b/tools/include/nolibc/unistd.h @@ -20,6 +20,34 @@ #define STDOUT_FILENO 1 #define STDERR_FILENO 2 +#define F_OK 0 +#define X_OK 1 +#define W_OK 2 +#define R_OK 4 + +/* + * int access(const char *path, int amode); + * int faccessat(int fd, const char *path, int amode, int flag); + */ + +static __attribute__((unused)) +int sys_faccessat(int fd, const char *path, int amode, int flag) +{ + return my_syscall4(__NR_faccessat, fd, path, amode, flag); +} + +static __attribute__((unused)) +int faccessat(int fd, const char *path, int amode, int flag) +{ + return __sysret(sys_faccessat(fd, path, amode, flag)); +} + +static __attribute__((unused)) +int access(const char *path, int amode) +{ + return faccessat(AT_FDCWD, path, amode, 0); +} + static __attribute__((unused)) int msleep(unsigned int msecs) diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 14a27bc6c83e..10db118b8b11 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1112,6 +1112,8 @@ int run_syscall(int min, int max) * test numbers. */ switch (test + __LINE__ + 1) { + CASE_TEST(access); EXPECT_SYSZR(proc, access("/proc/self", R_OK)); break; + CASE_TEST(access_bad); EXPECT_SYSER(proc, access("/proc/self", W_OK), -1, EPERM); break; CASE_TEST(getpid); EXPECT_SYSNE(1, getpid(), -1); break; CASE_TEST(getppid); EXPECT_SYSNE(1, getppid(), -1); break; CASE_TEST(gettid); EXPECT_SYSNE(has_gettid, gettid(), -1); break; -- cgit v1.2.3 From 50647213e115cbf5eb92c56a076af34dcd984174 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 28 Apr 2025 14:40:09 +0200 Subject: tools/nolibc: add clock_getres(), clock_gettime() and clock_settime() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is used in various selftests and will be handy when integrating those with nolibc. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250428-nolibc-misc-v2-8-3c043eeab06c@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/time.h | 92 ++++++++++++++++++++++++++++ tools/include/nolibc/types.h | 2 + tools/testing/selftests/nolibc/nolibc-test.c | 5 ++ 3 files changed, 99 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index 9502f9aaf621..7e0b7eac0b7c 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -15,6 +15,98 @@ #include "types.h" #include "sys.h" +#include + +static __inline__ +void __nolibc_timespec_user_to_kernel(const struct timespec *ts, struct __kernel_timespec *kts) +{ + kts->tv_sec = ts->tv_sec; + kts->tv_nsec = ts->tv_nsec; +} + +static __inline__ +void __nolibc_timespec_kernel_to_user(const struct __kernel_timespec *kts, struct timespec *ts) +{ + ts->tv_sec = kts->tv_sec; + ts->tv_nsec = kts->tv_nsec; +} + +/* + * int clock_getres(clockid_t clockid, struct timespec *res); + * int clock_gettime(clockid_t clockid, struct timespec *tp); + * int clock_settime(clockid_t clockid, const struct timespec *tp); + */ + +static __attribute__((unused)) +int sys_clock_getres(clockid_t clockid, struct timespec *res) +{ +#if defined(__NR_clock_getres) + return my_syscall2(__NR_clock_getres, clockid, res); +#elif defined(__NR_clock_getres_time64) + struct __kernel_timespec kres; + int ret; + + ret = my_syscall2(__NR_clock_getres_time64, clockid, &kres); + if (res) + __nolibc_timespec_kernel_to_user(&kres, res); + return ret; +#else + return __nolibc_enosys(__func__, clockid, res); +#endif +} + +static __attribute__((unused)) +int clock_getres(clockid_t clockid, struct timespec *res) +{ + return __sysret(sys_clock_getres(clockid, res)); +} + +static __attribute__((unused)) +int sys_clock_gettime(clockid_t clockid, struct timespec *tp) +{ +#if defined(__NR_clock_gettime) + return my_syscall2(__NR_clock_gettime, clockid, tp); +#elif defined(__NR_clock_gettime64) + struct __kernel_timespec ktp; + int ret; + + ret = my_syscall2(__NR_clock_gettime64, clockid, &ktp); + if (tp) + __nolibc_timespec_kernel_to_user(&ktp, tp); + return ret; +#else + return __nolibc_enosys(__func__, clockid, tp); +#endif +} + +static __attribute__((unused)) +int clock_gettime(clockid_t clockid, struct timespec *tp) +{ + return __sysret(sys_clock_gettime(clockid, tp)); +} + +static __attribute__((unused)) +int sys_clock_settime(clockid_t clockid, struct timespec *tp) +{ +#if defined(__NR_clock_settime) + return my_syscall2(__NR_clock_settime, clockid, tp); +#elif defined(__NR_clock_settime64) + struct __kernel_timespec ktp; + + __nolibc_timespec_user_to_kernel(tp, &ktp); + return my_syscall2(__NR_clock_settime64, clockid, &ktp); +#else + return __nolibc_enosys(__func__, clockid, tp); +#endif +} + +static __attribute__((unused)) +int clock_settime(clockid_t clockid, struct timespec *tp) +{ + return __sysret(sys_clock_settime(clockid, tp)); +} + + static __attribute__((unused)) time_t time(time_t *tptr) { diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index 70f20519ebf9..ba6867aeeb91 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -187,6 +187,8 @@ struct stat { union { time_t st_ctime; struct timespec st_ctim; }; /* time of last status change */ }; +typedef __kernel_clockid_t clockid_t; + /* WARNING, it only deals with the 4096 first majors and 256 first minors */ #define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff))) #define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff)) diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 10db118b8b11..9b8aa41a547b 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -1081,6 +1082,7 @@ int run_syscall(int min, int max) { struct timeval tv; struct timezone tz; + struct timespec ts; struct stat stat_buf; int euid0; int proc; @@ -1114,6 +1116,9 @@ int run_syscall(int min, int max) switch (test + __LINE__ + 1) { CASE_TEST(access); EXPECT_SYSZR(proc, access("/proc/self", R_OK)); break; CASE_TEST(access_bad); EXPECT_SYSER(proc, access("/proc/self", W_OK), -1, EPERM); break; + CASE_TEST(clock_getres); EXPECT_SYSZR(1, clock_getres(CLOCK_MONOTONIC, &ts)); break; + CASE_TEST(clock_gettime); EXPECT_SYSZR(1, clock_gettime(CLOCK_MONOTONIC, &ts)); break; + CASE_TEST(clock_settime); EXPECT_SYSER(1, clock_settime(CLOCK_MONOTONIC, &ts), -1, EINVAL); break; CASE_TEST(getpid); EXPECT_SYSNE(1, getpid(), -1); break; CASE_TEST(getppid); EXPECT_SYSNE(1, getppid(), -1); break; CASE_TEST(gettid); EXPECT_SYSNE(has_gettid, gettid(), -1); break; -- cgit v1.2.3 From fa7bf84486e48c65c5e450248b5d6922352203d1 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 28 Apr 2025 14:40:10 +0200 Subject: tools/nolibc: add timer functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is used in various selftests and will be handy when integrating those with nolibc. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250428-nolibc-misc-v2-9-3c043eeab06c@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/time.h | 86 ++++++++++++++++++++++++++++ tools/include/nolibc/types.h | 1 + tools/testing/selftests/nolibc/nolibc-test.c | 51 +++++++++++++++++ 3 files changed, 138 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index 7e0b7eac0b7c..48f602b661b3 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -15,6 +15,7 @@ #include "types.h" #include "sys.h" +#include #include static __inline__ @@ -120,4 +121,89 @@ time_t time(time_t *tptr) return tv.tv_sec; } + +/* + * int timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid); + * int timer_gettime(timer_t timerid, struct itimerspec *curr_value); + * int timer_settime(timer_t timerid, int flags, const struct itimerspec *new_value, struct itimerspec *old_value); + */ + +static __attribute__((unused)) +int sys_timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid) +{ + return my_syscall3(__NR_timer_create, clockid, evp, timerid); +} + +static __attribute__((unused)) +int timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid) +{ + return __sysret(sys_timer_create(clockid, evp, timerid)); +} + +static __attribute__((unused)) +int sys_timer_delete(timer_t timerid) +{ + return my_syscall1(__NR_timer_delete, timerid); +} + +static __attribute__((unused)) +int timer_delete(timer_t timerid) +{ + return __sysret(sys_timer_delete(timerid)); +} + +static __attribute__((unused)) +int sys_timer_gettime(timer_t timerid, struct itimerspec *curr_value) +{ +#if defined(__NR_timer_gettime) + return my_syscall2(__NR_timer_gettime, timerid, curr_value); +#elif defined(__NR_timer_gettime64) + struct __kernel_itimerspec kcurr_value; + int ret; + + ret = my_syscall2(__NR_timer_gettime64, timerid, &kcurr_value); + __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval); + __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value); + return ret; +#else + return __nolibc_enosys(__func__, timerid, curr_value); +#endif +} + +static __attribute__((unused)) +int timer_gettime(timer_t timerid, struct itimerspec *curr_value) +{ + return __sysret(sys_timer_gettime(timerid, curr_value)); +} + +static __attribute__((unused)) +int sys_timer_settime(timer_t timerid, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ +#if defined(__NR_timer_settime) + return my_syscall4(__NR_timer_settime, timerid, flags, new_value, old_value); +#elif defined(__NR_timer_settime64) + struct __kernel_itimerspec knew_value, kold_value; + int ret; + + __nolibc_timespec_user_to_kernel(&new_value->it_value, &knew_value.it_value); + __nolibc_timespec_user_to_kernel(&new_value->it_interval, &knew_value.it_interval); + ret = my_syscall4(__NR_timer_settime64, timerid, flags, &knew_value, &kold_value); + if (old_value) { + __nolibc_timespec_kernel_to_user(&kold_value.it_interval, &old_value->it_interval); + __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value); + } + return ret; +#else + return __nolibc_enosys(__func__, timerid, flags, new_value, old_value); +#endif +} + +static __attribute__((unused)) +int timer_settime(timer_t timerid, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ + return __sysret(sys_timer_settime(timerid, flags, new_value, old_value)); +} + #endif /* _NOLIBC_TIME_H */ diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index ba6867aeeb91..93da29fe7719 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -188,6 +188,7 @@ struct stat { }; typedef __kernel_clockid_t clockid_t; +typedef int timer_t; /* WARNING, it only deals with the 4096 first majors and 256 first minors */ #define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff))) diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 9b8aa41a547b..7530b442941f 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -905,6 +905,56 @@ int test_stat_timestamps(void) return 0; } +int test_timer(void) +{ + struct itimerspec timerspec; + struct sigevent evp; + timer_t timer; + int ret; + + evp.sigev_notify = SIGEV_NONE; + + ret = timer_create(CLOCK_MONOTONIC, &evp, &timer); + if (ret) + return ret; + + timerspec = (struct itimerspec) { + .it_value.tv_sec = 1000000, + }; + ret = timer_settime(timer, 0, &timerspec, NULL); + if (ret) + goto err; + + timerspec = (struct itimerspec) { + .it_value.tv_sec = -1, + .it_value.tv_nsec = -1, + .it_interval.tv_sec = -1, + .it_interval.tv_nsec = -1, + }; + ret = timer_gettime(timer, &timerspec); + if (ret) + goto err; + + errno = EINVAL; + ret = -1; + + if (timerspec.it_interval.tv_sec || timerspec.it_interval.tv_nsec) + goto err; + + if (timerspec.it_value.tv_sec > 1000000) + goto err; + + ret = timer_delete(timer); + if (ret) + return ret; + + return 0; + +err: + timer_delete(timer); + return ret; +} + int test_uname(void) { struct utsname buf; @@ -1186,6 +1236,7 @@ int run_syscall(int min, int max) CASE_TEST(stat_fault); EXPECT_SYSER(1, stat(NULL, &stat_buf), -1, EFAULT); break; CASE_TEST(stat_timestamps); EXPECT_SYSZR(1, test_stat_timestamps()); break; CASE_TEST(symlink_root); EXPECT_SYSER(1, symlink("/", "/"), -1, EEXIST); break; + CASE_TEST(timer); EXPECT_SYSZR(1, test_timer()); break; CASE_TEST(uname); EXPECT_SYSZR(proc, test_uname()); break; CASE_TEST(uname_fault); EXPECT_SYSER(1, uname(NULL), -1, EFAULT); break; CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break; -- cgit v1.2.3 From da69cfb17b2afbb8692b74eefb844f85febb6674 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 28 Apr 2025 14:40:11 +0200 Subject: tools/nolibc: add timerfd functionality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is used in various selftests and will be handy when integrating those with nolibc. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250428-nolibc-misc-v2-10-3c043eeab06c@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/sys/timerfd.h | 87 ++++++++++++++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 48 +++++++++++++++ 4 files changed, 137 insertions(+) create mode 100644 tools/include/nolibc/sys/timerfd.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index da4771ef8984..58c4ede9c294 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -53,6 +53,7 @@ all_files := \ sys/stat.h \ sys/syscall.h \ sys/time.h \ + sys/timerfd.h \ sys/types.h \ sys/wait.h \ time.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 966302c9e8e1..37a55f703dfc 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -102,6 +102,7 @@ #include "sys/stat.h" #include "sys/syscall.h" #include "sys/time.h" +#include "sys/timerfd.h" #include "sys/wait.h" #include "ctype.h" #include "elf.h" diff --git a/tools/include/nolibc/sys/timerfd.h b/tools/include/nolibc/sys/timerfd.h new file mode 100644 index 000000000000..4375d546ba58 --- /dev/null +++ b/tools/include/nolibc/sys/timerfd.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * timerfd definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_TIMERFD_H +#define _NOLIBC_SYS_TIMERFD_H + +#include "../sys.h" +#include "../time.h" + +#include + + +static __attribute__((unused)) +int sys_timerfd_create(int clockid, int flags) +{ + return my_syscall2(__NR_timerfd_create, clockid, flags); +} + +static __attribute__((unused)) +int timerfd_create(int clockid, int flags) +{ + return __sysret(sys_timerfd_create(clockid, flags)); +} + + +static __attribute__((unused)) +int sys_timerfd_gettime(int fd, struct itimerspec *curr_value) +{ +#if defined(__NR_timerfd_gettime) + return my_syscall2(__NR_timerfd_gettime, fd, curr_value); +#elif defined(__NR_timerfd_gettime64) + struct __kernel_itimerspec kcurr_value; + int ret; + + ret = my_syscall2(__NR_timerfd_gettime64, fd, &kcurr_value); + __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval); + __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value); + return ret; +#else + return __nolibc_enosys(__func__, fd, curr_value); +#endif +} + +static __attribute__((unused)) +int timerfd_gettime(int fd, struct itimerspec *curr_value) +{ + return __sysret(sys_timerfd_gettime(fd, curr_value)); +} + + +static __attribute__((unused)) +int sys_timerfd_settime(int fd, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ +#if defined(__NR_timerfd_settime) + return my_syscall4(__NR_timerfd_settime, fd, flags, new_value, old_value); +#elif defined(__NR_timerfd_settime64) + struct __kernel_itimerspec knew_value, kold_value; + int ret; + + __nolibc_timespec_user_to_kernel(&new_value->it_value, &knew_value.it_value); + __nolibc_timespec_user_to_kernel(&new_value->it_interval, &knew_value.it_interval); + ret = my_syscall4(__NR_timerfd_settime64, fd, flags, &knew_value, &kold_value); + if (old_value) { + __nolibc_timespec_kernel_to_user(&kold_value.it_interval, &old_value->it_interval); + __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value); + } + return ret; +#else + return __nolibc_enosys(__func__, fd, flags, new_value, old_value); +#endif +} + +static __attribute__((unused)) +int timerfd_settime(int fd, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ + return __sysret(sys_timerfd_settime(fd, flags, new_value, old_value)); +} + +#endif /* _NOLIBC_SYS_TIMERFD_H */ diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 7530b442941f..d73125c41096 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -955,6 +956,52 @@ err: return ret; } +int test_timerfd(void) +{ + struct itimerspec timerspec; + int timer, ret; + + timer = timerfd_create(CLOCK_MONOTONIC, 0); + if (timer == -1) + return -1; + + timerspec = (struct itimerspec) { + .it_value.tv_sec = 1000000, + }; + ret = timerfd_settime(timer, 0, &timerspec, NULL); + if (ret) + goto err; + + timerspec = (struct itimerspec) { + .it_value.tv_sec = -1, + .it_value.tv_nsec = -1, + .it_interval.tv_sec = -1, + .it_interval.tv_nsec = -1, + }; + ret = timerfd_gettime(timer, &timerspec); + if (ret) + goto err; + + errno = EINVAL; + ret = -1; + + if (timerspec.it_interval.tv_sec || timerspec.it_interval.tv_nsec) + goto err; + + if (timerspec.it_value.tv_sec > 1000000) + goto err; + + ret = close(timer); + if (ret) + return ret; + + return 0; + +err: + close(timer); + return ret; +} + int test_uname(void) { struct utsname buf; @@ -1237,6 +1284,7 @@ int run_syscall(int min, int max) CASE_TEST(stat_timestamps); EXPECT_SYSZR(1, test_stat_timestamps()); break; CASE_TEST(symlink_root); EXPECT_SYSER(1, symlink("/", "/"), -1, EEXIST); break; CASE_TEST(timer); EXPECT_SYSZR(1, test_timer()); break; + CASE_TEST(timerfd); EXPECT_SYSZR(1, test_timerfd()); break; CASE_TEST(uname); EXPECT_SYSZR(proc, test_uname()); break; CASE_TEST(uname_fault); EXPECT_SYSER(1, uname(NULL), -1, EFAULT); break; CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break; -- cgit v1.2.3 From 7ff3c71a4795f524b4dc643db4d00d4467be26a0 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 28 Apr 2025 14:40:12 +0200 Subject: tools/nolibc: add difftime() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is used in various selftests and will be handy when integrating those with nolibc. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250428-nolibc-misc-v2-11-3c043eeab06c@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/time.h | 7 +++++++ tools/testing/selftests/nolibc/nolibc-test.c | 12 ++++++++++++ 2 files changed, 19 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index 48f602b661b3..fc387940d51f 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -108,6 +108,13 @@ int clock_settime(clockid_t clockid, struct timespec *tp) } +static __inline__ +double difftime(time_t time1, time_t time2) +{ + return time1 - time2; +} + + static __attribute__((unused)) time_t time(time_t *tptr) { diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index d73125c41096..665d7631fbc6 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1304,6 +1304,17 @@ int run_syscall(int min, int max) return ret; } +int test_difftime(void) +{ + if (difftime(200., 100.) != 100.) + return 1; + + if (difftime(100., 200.) != -100.) + return 1; + + return 0; +} + int run_stdlib(int min, int max) { int test; @@ -1426,6 +1437,7 @@ int run_stdlib(int min, int max) CASE_TEST(toupper_noop); EXPECT_EQ(1, toupper('A'), 'A'); break; CASE_TEST(abs); EXPECT_EQ(1, abs(-10), 10); break; CASE_TEST(abs_noop); EXPECT_EQ(1, abs(10), 10); break; + CASE_TEST(difftime); EXPECT_ZR(1, test_difftime()); break; case __LINE__: return ret; /* must be last */ -- cgit v1.2.3 From 256dc7339d466f4d928be48ced6d590a99d447cc Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 28 Apr 2025 14:40:13 +0200 Subject: tools/nolibc: add namespace functionality MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is used in various selftests and will be handy when integrating those with nolibc. Not all configurations support namespaces, so skip the tests where necessary. Also if the tests are running without privileges. Enable the namespace configuration for those architectures where it is not enabled by default. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250428-nolibc-misc-v2-12-3c043eeab06c@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/sched.h | 50 +++++++++++++++++++++ tools/testing/selftests/nolibc/Makefile | 2 + tools/testing/selftests/nolibc/nolibc-test.c | 67 ++++++++++++++++++++++++++++ 5 files changed, 121 insertions(+) create mode 100644 tools/include/nolibc/sched.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index 58c4ede9c294..9ff78ae21522 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -38,6 +38,7 @@ all_files := \ math.h \ nolibc.h \ poll.h \ + sched.h \ signal.h \ stackprotector.h \ std.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 37a55f703dfc..51c423a36b59 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -106,6 +106,7 @@ #include "sys/wait.h" #include "ctype.h" #include "elf.h" +#include "sched.h" #include "signal.h" #include "unistd.h" #include "stdio.h" diff --git a/tools/include/nolibc/sched.h b/tools/include/nolibc/sched.h new file mode 100644 index 000000000000..32221562c166 --- /dev/null +++ b/tools/include/nolibc/sched.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * sched function definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_SCHED_H +#define _NOLIBC_SCHED_H + +#include "sys.h" + +#include + +/* + * int setns(int fd, int nstype); + */ + +static __attribute__((unused)) +int sys_setns(int fd, int nstype) +{ + return my_syscall2(__NR_setns, fd, nstype); +} + +static __attribute__((unused)) +int setns(int fd, int nstype) +{ + return __sysret(sys_setns(fd, nstype)); +} + + +/* + * int unshare(int flags); + */ + +static __attribute__((unused)) +int sys_unshare(int flags) +{ + return my_syscall1(__NR_unshare, flags); +} + +static __attribute__((unused)) +int unshare(int flags) +{ + return __sysret(sys_unshare(flags)); +} + +#endif /* _NOLIBC_SCHED_H */ diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 2671383045db..94176ffe4646 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -109,6 +109,8 @@ DEFCONFIG = $(DEFCONFIG_$(XARCH)) EXTRACONFIG_m68k = -e CONFIG_BLK_DEV_INITRD EXTRACONFIG = $(EXTRACONFIG_$(XARCH)) +EXTRACONFIG_arm = -e CONFIG_NAMESPACES +EXTRACONFIG_armthumb = -e CONFIG_NAMESPACES # optional tests to run (default = all) TEST = diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 665d7631fbc6..d1157319b5d7 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1172,6 +1172,72 @@ int test_openat(void) return 0; } +int test_namespace(void) +{ + int original_ns, new_ns, ret; + ino_t original_ns_ino; + struct stat stat_buf; + + original_ns = open("/proc/self/ns/uts", O_RDONLY); + if (original_ns == -1) + return -1; + + ret = fstat(original_ns, &stat_buf); + if (ret) + goto out; + + original_ns_ino = stat_buf.st_ino; + + ret = unshare(CLONE_NEWUTS); + if (ret) + goto out; + + new_ns = open("/proc/self/ns/uts", O_RDONLY); + if (new_ns == -1) { + ret = new_ns; + goto out; + } + + ret = fstat(new_ns, &stat_buf); + close(new_ns); + if (ret) + goto out; + + if (stat_buf.st_ino == original_ns_ino) { + errno = EINVAL; + ret = -1; + goto out; + } + + ret = setns(original_ns, CLONE_NEWUTS); + if (ret) + goto out; + + new_ns = open("/proc/self/ns/uts", O_RDONLY); + if (new_ns == -1) { + ret = new_ns; + goto out; + } + + ret = fstat(new_ns, &stat_buf); + if (ret) + goto out; + + close(new_ns); + + if (stat_buf.st_ino != original_ns_ino) { + errno = EINVAL; + ret = -1; + goto out; + } + + ret = 0; + +out: + close(original_ns); + return ret; +} + /* Run syscall tests between IDs and . * Return 0 on success, non-zero on failure. */ @@ -1296,6 +1362,7 @@ int run_syscall(int min, int max) CASE_TEST(write_zero); EXPECT_SYSZR(1, write(1, &tmp, 0)); break; CASE_TEST(syscall_noargs); EXPECT_SYSEQ(1, syscall(__NR_getpid), getpid()); break; CASE_TEST(syscall_args); EXPECT_SYSER(1, syscall(__NR_statx, 0, NULL, 0, 0, NULL), -1, EFAULT); break; + CASE_TEST(namespace); EXPECT_SYSZR(euid0 && proc, test_namespace()); break; case __LINE__: return ret; /* must be last */ /* note: do not set any defaults so as to permit holes above */ -- cgit v1.2.3 From a009a0c6faa9e7ffac13d55ef6cdac1f9a68efbe Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 28 Apr 2025 14:40:14 +0200 Subject: tools/nolibc: add fopen() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is used in various selftests and will be handy when integrating those with nolibc. Only the standard POSIX modes are supported. No extensions nor the (noop) "b" from ISO C are accepted. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250428-nolibc-misc-v2-13-3c043eeab06c@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/stdio.h | 27 +++++++++++++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 24 ++++++++++++++++++++++++ 2 files changed, 51 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index df5717d59182..c470d334ef3f 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -13,6 +13,7 @@ #include "std.h" #include "arch.h" #include "errno.h" +#include "fcntl.h" #include "types.h" #include "sys.h" #include "stdarg.h" @@ -55,6 +56,32 @@ FILE *fdopen(int fd, const char *mode __attribute__((unused))) return (FILE*)(intptr_t)~fd; } +static __attribute__((unused)) +FILE *fopen(const char *pathname, const char *mode) +{ + int flags, fd; + + switch (*mode) { + case 'r': + flags = O_RDONLY; + break; + case 'w': + flags = O_WRONLY | O_CREAT | O_TRUNC; + break; + case 'a': + flags = O_WRONLY | O_CREAT | O_APPEND; + break; + default: + SET_ERRNO(EINVAL); return NULL; + } + + if (mode[1] == '+') + flags = (flags & ~(O_RDONLY | O_WRONLY)) | O_RDWR; + + fd = open(pathname, flags, 0666); + return fdopen(fd, mode); +} + /* provides the fd of stream. */ static __attribute__((unused)) int fileno(FILE *stream) diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index d1157319b5d7..0391c7d01380 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -859,6 +859,29 @@ int test_getpagesize(void) return !c; } +int test_file_stream(void) +{ + FILE *f; + int r; + + f = fopen("/dev/null", "r"); + if (!f) + return -1; + + errno = 0; + r = fwrite("foo", 1, 3, f); + if (r != 0 || errno != EBADF) { + fclose(f); + return -1; + } + + r = fclose(f); + if (r == EOF) + return -1; + + return 0; +} + int test_fork(void) { int status; @@ -1311,6 +1334,7 @@ int run_syscall(int min, int max) CASE_TEST(dup3_0); tmp = dup3(0, 100, 0); EXPECT_SYSNE(1, tmp, -1); close(tmp); break; CASE_TEST(dup3_m1); tmp = dup3(-1, 100, 0); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break; CASE_TEST(execve_root); EXPECT_SYSER(1, execve("/", (char*[]){ [0] = "/", [1] = NULL }, NULL), -1, EACCES); break; + CASE_TEST(file_stream); EXPECT_SYSZR(1, test_file_stream()); break; CASE_TEST(fork); EXPECT_SYSZR(1, test_fork()); break; CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break; CASE_TEST(getdents64_null); EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break; -- cgit v1.2.3 From 5e7392dc82ed7bf3e734cbca77a1c6fd044ec871 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 28 Apr 2025 14:40:15 +0200 Subject: tools/nolibc: fall back to sys_clock_gettime() in gettimeofday() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newer architectures (like riscv32) do not implement sys_gettimeofday(). In those cases fall back to sys_clock_gettime(). While that does not support the timezone argument of sys_gettimeofday(), specifying this argument invokes undefined behaviour, so it's safe to ignore. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250428-nolibc-misc-v2-14-3c043eeab06c@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/sys/time.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys/time.h b/tools/include/nolibc/sys/time.h index 785961c52fa3..33782a19aae9 100644 --- a/tools/include/nolibc/sys/time.h +++ b/tools/include/nolibc/sys/time.h @@ -13,6 +13,8 @@ #include "../arch.h" #include "../sys.h" +static int sys_clock_gettime(clockid_t clockid, struct timespec *tp); + /* * int gettimeofday(struct timeval *tv, struct timezone *tz); */ @@ -23,7 +25,18 @@ int sys_gettimeofday(struct timeval *tv, struct timezone *tz) #ifdef __NR_gettimeofday return my_syscall2(__NR_gettimeofday, tv, tz); #else - return __nolibc_enosys(__func__, tv, tz); + (void) tz; /* Non-NULL tz is undefined behaviour */ + + struct timespec tp; + int ret; + + ret = sys_clock_gettime(CLOCK_REALTIME, &tp); + if (!ret && tv) { + tv->tv_sec = tp.tv_sec; + tv->tv_usec = tp.tv_nsec / 1000; + } + + return ret; #endif } -- cgit v1.2.3 From 59303930326ac00bec5ec61321d662a165350939 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 28 Apr 2025 14:40:16 +0200 Subject: tools/nolibc: implement wait() in terms of waitpid() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newer architectures like riscv 32-bit are missing sys_wait4(). Make use of the fact that wait(&status) is defined to be equivalent to waitpid(-1, status, 0) to implement it on all architectures. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250428-nolibc-misc-v2-15-3c043eeab06c@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/sys/wait.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys/wait.h b/tools/include/nolibc/sys/wait.h index f27be86ad5e4..4d44e3da0ba8 100644 --- a/tools/include/nolibc/sys/wait.h +++ b/tools/include/nolibc/sys/wait.h @@ -31,12 +31,6 @@ pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage) #endif } -static __attribute__((unused)) -pid_t wait(int *status) -{ - return __sysret(sys_wait4(-1, status, 0, NULL)); -} - static __attribute__((unused)) pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) { @@ -113,4 +107,10 @@ pid_t waitpid(pid_t pid, int *status, int options) return info.si_pid; } +static __attribute__((unused)) +pid_t wait(int *status) +{ + return waitpid(-1, status, 0); +} + #endif /* _NOLIBC_SYS_WAIT_H */ -- cgit v1.2.3 From 7281be583117d7ff12c52684ffc732e6ffca8f58 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 15 May 2025 21:57:47 +0200 Subject: tools/nolibc: move ioctl() to sys/ioctl.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the location regular userspace expects this definition. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250515-nolibc-sys-v1-1-74f82eea3b59@weissschuh.net --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/sys.h | 12 ------------ tools/include/nolibc/sys/ioctl.h | 29 +++++++++++++++++++++++++++++ 4 files changed, 31 insertions(+), 12 deletions(-) create mode 100644 tools/include/nolibc/sys/ioctl.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index 9ff78ae21522..54dccb23f1b3 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -49,6 +49,7 @@ all_files := \ string.h \ sys.h \ sys/auxv.h \ + sys/ioctl.h \ sys/mman.h \ sys/random.h \ sys/stat.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 51c423a36b59..d6048d1e9ea5 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -97,6 +97,7 @@ #include "types.h" #include "sys.h" #include "sys/auxv.h" +#include "sys/ioctl.h" #include "sys/mman.h" #include "sys/random.h" #include "sys/stat.h" diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 5733fe54911d..313c210173c8 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -517,18 +517,6 @@ uid_t getuid(void) } -/* - * int ioctl(int fd, unsigned long cmd, ... arg); - */ - -static __attribute__((unused)) -long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - return my_syscall3(__NR_ioctl, fd, cmd, arg); -} - -#define ioctl(fd, cmd, arg) __sysret(sys_ioctl(fd, cmd, (unsigned long)(arg))) - /* * int kill(pid_t pid, int signal); */ diff --git a/tools/include/nolibc/sys/ioctl.h b/tools/include/nolibc/sys/ioctl.h new file mode 100644 index 000000000000..fc880687e02a --- /dev/null +++ b/tools/include/nolibc/sys/ioctl.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Ioctl definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_IOCTL_H +#define _NOLIBC_SYS_IOCTL_H + +#include "../sys.h" + +#include + +/* + * int ioctl(int fd, unsigned long cmd, ... arg); + */ + +static __attribute__((unused)) +long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + return my_syscall3(__NR_ioctl, fd, cmd, arg); +} + +#define ioctl(fd, cmd, arg) __sysret(sys_ioctl(fd, cmd, (unsigned long)(arg))) + +#endif /* _NOLIBC_SYS_IOCTL_H */ -- cgit v1.2.3 From 6e7c805a93a07ca9b1de49e8a020ab6c05e93f4e Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 15 May 2025 21:57:48 +0200 Subject: tools/nolibc: move mount() to sys/mount.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the location regular userspace expects this definition. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250515-nolibc-sys-v1-2-74f82eea3b59@weissschuh.net --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/sys.h | 20 -------------------- tools/include/nolibc/sys/mount.h | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 39 insertions(+), 20 deletions(-) create mode 100644 tools/include/nolibc/sys/mount.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index 54dccb23f1b3..6129761f423a 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -51,6 +51,7 @@ all_files := \ sys/auxv.h \ sys/ioctl.h \ sys/mman.h \ + sys/mount.h \ sys/random.h \ sys/stat.h \ sys/syscall.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index d6048d1e9ea5..690368f8e46c 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -99,6 +99,7 @@ #include "sys/auxv.h" #include "sys/ioctl.h" #include "sys/mman.h" +#include "sys/mount.h" #include "sys/random.h" #include "sys/stat.h" #include "sys/syscall.h" diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 313c210173c8..e66dd6e76055 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -672,26 +672,6 @@ int mknod(const char *path, mode_t mode, dev_t dev) return __sysret(sys_mknod(path, mode, dev)); } -/* - * int mount(const char *source, const char *target, - * const char *fstype, unsigned long flags, - * const void *data); - */ -static __attribute__((unused)) -int sys_mount(const char *src, const char *tgt, const char *fst, - unsigned long flags, const void *data) -{ - return my_syscall5(__NR_mount, src, tgt, fst, flags, data); -} - -static __attribute__((unused)) -int mount(const char *src, const char *tgt, - const char *fst, unsigned long flags, - const void *data) -{ - return __sysret(sys_mount(src, tgt, fst, flags, data)); -} - /* * int pipe2(int pipefd[2], int flags); diff --git a/tools/include/nolibc/sys/mount.h b/tools/include/nolibc/sys/mount.h new file mode 100644 index 000000000000..e39ec02ea24c --- /dev/null +++ b/tools/include/nolibc/sys/mount.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Mount definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_MOUNT_H +#define _NOLIBC_SYS_MOUNT_H + +#include "../sys.h" + +#include + +/* + * int mount(const char *source, const char *target, + * const char *fstype, unsigned long flags, + * const void *data); + */ +static __attribute__((unused)) +int sys_mount(const char *src, const char *tgt, const char *fst, + unsigned long flags, const void *data) +{ + return my_syscall5(__NR_mount, src, tgt, fst, flags, data); +} + +static __attribute__((unused)) +int mount(const char *src, const char *tgt, + const char *fst, unsigned long flags, + const void *data) +{ + return __sysret(sys_mount(src, tgt, fst, flags, data)); +} + +#endif /* _NOLIBC_SYS_MOUNT_H */ -- cgit v1.2.3 From 3edd5365f99eeff38c5d31d9809afa6231e6de2d Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 15 May 2025 21:57:49 +0200 Subject: tools/nolibc: move prctl() to sys/prctl.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the location regular userspace expects this definition. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250515-nolibc-sys-v1-3-74f82eea3b59@weissschuh.net --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/sys.h | 21 --------------------- tools/include/nolibc/sys/prctl.h | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 21 deletions(-) create mode 100644 tools/include/nolibc/sys/prctl.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index 6129761f423a..08874fee3312 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -52,6 +52,7 @@ all_files := \ sys/ioctl.h \ sys/mman.h \ sys/mount.h \ + sys/prctl.h \ sys/random.h \ sys/stat.h \ sys/syscall.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 690368f8e46c..1c159e32a248 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -100,6 +100,7 @@ #include "sys/ioctl.h" #include "sys/mman.h" #include "sys/mount.h" +#include "sys/prctl.h" #include "sys/random.h" #include "sys/stat.h" #include "sys/syscall.h" diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index e66dd6e76055..a17fe98968a2 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -23,7 +23,6 @@ #include #include /* for O_* and AT_* */ #include /* for statx() */ -#include #include #include @@ -697,26 +696,6 @@ int pipe(int pipefd[2]) } -/* - * int prctl(int option, unsigned long arg2, unsigned long arg3, - * unsigned long arg4, unsigned long arg5); - */ - -static __attribute__((unused)) -int sys_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5) -{ - return my_syscall5(__NR_prctl, option, arg2, arg3, arg4, arg5); -} - -static __attribute__((unused)) -int prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5) -{ - return __sysret(sys_prctl(option, arg2, arg3, arg4, arg5)); -} - - /* * int pivot_root(const char *new, const char *old); */ diff --git a/tools/include/nolibc/sys/prctl.h b/tools/include/nolibc/sys/prctl.h new file mode 100644 index 000000000000..0205907b6ac8 --- /dev/null +++ b/tools/include/nolibc/sys/prctl.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Prctl definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_PRCTL_H +#define _NOLIBC_SYS_PRCTL_H + +#include "../sys.h" + +#include + +/* + * int prctl(int option, unsigned long arg2, unsigned long arg3, + * unsigned long arg4, unsigned long arg5); + */ + +static __attribute__((unused)) +int sys_prctl(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) +{ + return my_syscall5(__NR_prctl, option, arg2, arg3, arg4, arg5); +} + +static __attribute__((unused)) +int prctl(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) +{ + return __sysret(sys_prctl(option, arg2, arg3, arg4, arg5)); +} + +#endif /* _NOLIBC_SYS_PRCTL_H */ -- cgit v1.2.3 From 2efb9050909f1fc0db39b2600bada8341ebc56c3 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 15 May 2025 21:57:50 +0200 Subject: tools/nolibc: move reboot() to sys/reboot.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the location regular userspace expects this definition. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250515-nolibc-sys-v1-4-74f82eea3b59@weissschuh.net --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/sys.h | 18 ------------------ tools/include/nolibc/sys/reboot.h | 34 ++++++++++++++++++++++++++++++++++ tools/include/nolibc/types.h | 1 - 5 files changed, 36 insertions(+), 19 deletions(-) create mode 100644 tools/include/nolibc/sys/reboot.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index 08874fee3312..99fc7930430a 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -54,6 +54,7 @@ all_files := \ sys/mount.h \ sys/prctl.h \ sys/random.h \ + sys/reboot.h \ sys/stat.h \ sys/syscall.h \ sys/time.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 1c159e32a248..36ea7a02c743 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -102,6 +102,7 @@ #include "sys/mount.h" #include "sys/prctl.h" #include "sys/random.h" +#include "sys/reboot.h" #include "sys/stat.h" #include "sys/syscall.h" #include "sys/time.h" diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index a17fe98968a2..6c89dd0316dd 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -730,24 +730,6 @@ ssize_t read(int fd, void *buf, size_t count) } -/* - * int reboot(int cmd); - * is among LINUX_REBOOT_CMD_* - */ - -static __attribute__((unused)) -ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg) -{ - return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg); -} - -static __attribute__((unused)) -int reboot(int cmd) -{ - return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0)); -} - - /* * int getrlimit(int resource, struct rlimit *rlim); * int setrlimit(int resource, const struct rlimit *rlim); diff --git a/tools/include/nolibc/sys/reboot.h b/tools/include/nolibc/sys/reboot.h new file mode 100644 index 000000000000..4a1e435be669 --- /dev/null +++ b/tools/include/nolibc/sys/reboot.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Reboot definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_REBOOT_H +#define _NOLIBC_SYS_REBOOT_H + +#include "../sys.h" + +#include + +/* + * int reboot(int cmd); + * is among LINUX_REBOOT_CMD_* + */ + +static __attribute__((unused)) +ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg) +{ + return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg); +} + +static __attribute__((unused)) +int reboot(int cmd) +{ + return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0)); +} + +#endif /* _NOLIBC_SYS_REBOOT_H */ diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index 93da29fe7719..74c7694b2d5e 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -12,7 +12,6 @@ #include "std.h" #include -#include /* for LINUX_REBOOT_* */ #include #include #include -- cgit v1.2.3 From 9089524753b480b7a169004c46296d1e45103a31 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 15 May 2025 21:57:51 +0200 Subject: tools/nolibc: move getrlimit() and friends to sys/resource.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the location regular userspace expects these definitions. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250515-nolibc-sys-v1-5-74f82eea3b59@weissschuh.net --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/sys.h | 38 -------------------------- tools/include/nolibc/sys/resource.h | 53 +++++++++++++++++++++++++++++++++++++ tools/include/nolibc/types.h | 1 - 5 files changed, 55 insertions(+), 39 deletions(-) create mode 100644 tools/include/nolibc/sys/resource.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index 99fc7930430a..c2bb65b1f309 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -55,6 +55,7 @@ all_files := \ sys/prctl.h \ sys/random.h \ sys/reboot.h \ + sys/resource.h \ sys/stat.h \ sys/syscall.h \ sys/time.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 36ea7a02c743..7d151776e47a 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -103,6 +103,7 @@ #include "sys/prctl.h" #include "sys/random.h" #include "sys/reboot.h" +#include "sys/resource.h" #include "sys/stat.h" #include "sys/syscall.h" #include "sys/time.h" diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 6c89dd0316dd..282909b1992d 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -23,7 +23,6 @@ #include #include /* for O_* and AT_* */ #include /* for statx() */ -#include #include #include "errno.h" @@ -730,43 +729,6 @@ ssize_t read(int fd, void *buf, size_t count) } -/* - * int getrlimit(int resource, struct rlimit *rlim); - * int setrlimit(int resource, const struct rlimit *rlim); - */ - -static __attribute__((unused)) -int sys_prlimit64(pid_t pid, int resource, - const struct rlimit64 *new_limit, struct rlimit64 *old_limit) -{ - return my_syscall4(__NR_prlimit64, pid, resource, new_limit, old_limit); -} - -static __attribute__((unused)) -int getrlimit(int resource, struct rlimit *rlim) -{ - struct rlimit64 rlim64; - int ret; - - ret = __sysret(sys_prlimit64(0, resource, NULL, &rlim64)); - rlim->rlim_cur = rlim64.rlim_cur; - rlim->rlim_max = rlim64.rlim_max; - - return ret; -} - -static __attribute__((unused)) -int setrlimit(int resource, const struct rlimit *rlim) -{ - struct rlimit64 rlim64 = { - .rlim_cur = rlim->rlim_cur, - .rlim_max = rlim->rlim_max, - }; - - return __sysret(sys_prlimit64(0, resource, &rlim64, NULL)); -} - - /* * int sched_yield(void); */ diff --git a/tools/include/nolibc/sys/resource.h b/tools/include/nolibc/sys/resource.h new file mode 100644 index 000000000000..b990f914dc56 --- /dev/null +++ b/tools/include/nolibc/sys/resource.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Resource definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_RESOURCE_H +#define _NOLIBC_SYS_RESOURCE_H + +#include "../sys.h" + +#include + +/* + * int getrlimit(int resource, struct rlimit *rlim); + * int setrlimit(int resource, const struct rlimit *rlim); + */ + +static __attribute__((unused)) +int sys_prlimit64(pid_t pid, int resource, + const struct rlimit64 *new_limit, struct rlimit64 *old_limit) +{ + return my_syscall4(__NR_prlimit64, pid, resource, new_limit, old_limit); +} + +static __attribute__((unused)) +int getrlimit(int resource, struct rlimit *rlim) +{ + struct rlimit64 rlim64; + int ret; + + ret = __sysret(sys_prlimit64(0, resource, NULL, &rlim64)); + rlim->rlim_cur = rlim64.rlim_cur; + rlim->rlim_max = rlim64.rlim_max; + + return ret; +} + +static __attribute__((unused)) +int setrlimit(int resource, const struct rlimit *rlim) +{ + struct rlimit64 rlim64 = { + .rlim_cur = rlim->rlim_cur, + .rlim_max = rlim->rlim_max, + }; + + return __sysret(sys_prlimit64(0, resource, &rlim64, NULL)); +} + +#endif /* _NOLIBC_SYS_RESOURCE_H */ diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index 74c7694b2d5e..2225c9388a46 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -15,7 +15,6 @@ #include #include #include -#include /* Only the generic macros and types may be defined here. The arch-specific -- cgit v1.2.3 From e1211e2206356785365f065ece4965a997903f69 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 15 May 2025 21:57:52 +0200 Subject: tools/nolibc: move makedev() and friends to sys/sysmacros.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the location regular userspace expects these definitions. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250515-nolibc-sys-v1-6-74f82eea3b59@weissschuh.net --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/sys/sysmacros.h | 20 ++++++++++++++++++++ tools/include/nolibc/types.h | 5 ----- 4 files changed, 22 insertions(+), 5 deletions(-) create mode 100644 tools/include/nolibc/sys/sysmacros.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index c2bb65b1f309..2438b0331af3 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -58,6 +58,7 @@ all_files := \ sys/resource.h \ sys/stat.h \ sys/syscall.h \ + sys/sysmacros.h \ sys/time.h \ sys/timerfd.h \ sys/types.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 7d151776e47a..182dcfce1266 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -106,6 +106,7 @@ #include "sys/resource.h" #include "sys/stat.h" #include "sys/syscall.h" +#include "sys/sysmacros.h" #include "sys/time.h" #include "sys/timerfd.h" #include "sys/wait.h" diff --git a/tools/include/nolibc/sys/sysmacros.h b/tools/include/nolibc/sys/sysmacros.h new file mode 100644 index 000000000000..37c33f030f02 --- /dev/null +++ b/tools/include/nolibc/sys/sysmacros.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Sysmacro definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_SYSMACROS_H +#define _NOLIBC_SYS_SYSMACROS_H + +#include "../std.h" + +/* WARNING, it only deals with the 4096 first majors and 256 first minors */ +#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff))) +#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff)) +#define minor(dev) ((unsigned int)((dev) & 0xff)) + +#endif /* _NOLIBC_SYS_SYSMACROS_H */ diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index 2225c9388a46..0071bfbc2315 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -188,11 +188,6 @@ struct stat { typedef __kernel_clockid_t clockid_t; typedef int timer_t; -/* WARNING, it only deals with the 4096 first majors and 256 first minors */ -#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff))) -#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff)) -#define minor(dev) ((unsigned int)((dev) & 0xff)) - #ifndef offsetof #define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD) #endif -- cgit v1.2.3 From 0f971358dcf34ca4e430e828582cb1c70cfe1f70 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 15 May 2025 21:57:53 +0200 Subject: tools/nolibc: move uname() and friends to sys/utsname.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the location regular userspace expects these definitions. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250515-nolibc-sys-v1-7-74f82eea3b59@weissschuh.net --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/sys.h | 27 ------------------------ tools/include/nolibc/sys/utsname.h | 42 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 27 deletions(-) create mode 100644 tools/include/nolibc/sys/utsname.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index 2438b0331af3..7546c359e282 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -62,6 +62,7 @@ all_files := \ sys/time.h \ sys/timerfd.h \ sys/types.h \ + sys/utsname.h \ sys/wait.h \ time.h \ types.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 182dcfce1266..c199ade200c2 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -109,6 +109,7 @@ #include "sys/sysmacros.h" #include "sys/time.h" #include "sys/timerfd.h" +#include "sys/utsname.h" #include "sys/wait.h" #include "ctype.h" #include "elf.h" diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 282909b1992d..9556c69a6ae1 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -23,7 +23,6 @@ #include #include /* for O_* and AT_* */ #include /* for statx() */ -#include #include "errno.h" #include "stdarg.h" @@ -894,32 +893,6 @@ int umount2(const char *path, int flags) } -/* - * int uname(struct utsname *buf); - */ - -struct utsname { - char sysname[65]; - char nodename[65]; - char release[65]; - char version[65]; - char machine[65]; - char domainname[65]; -}; - -static __attribute__((unused)) -int sys_uname(struct utsname *buf) -{ - return my_syscall1(__NR_uname, buf); -} - -static __attribute__((unused)) -int uname(struct utsname *buf) -{ - return __sysret(sys_uname(buf)); -} - - /* * int unlink(const char *path); */ diff --git a/tools/include/nolibc/sys/utsname.h b/tools/include/nolibc/sys/utsname.h new file mode 100644 index 000000000000..01023e1bb439 --- /dev/null +++ b/tools/include/nolibc/sys/utsname.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Utsname definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_UTSNAME_H +#define _NOLIBC_SYS_UTSNAME_H + +#include "../sys.h" + +#include + +/* + * int uname(struct utsname *buf); + */ + +struct utsname { + char sysname[65]; + char nodename[65]; + char release[65]; + char version[65]; + char machine[65]; + char domainname[65]; +}; + +static __attribute__((unused)) +int sys_uname(struct utsname *buf) +{ + return my_syscall1(__NR_uname, buf); +} + +static __attribute__((unused)) +int uname(struct utsname *buf) +{ + return __sysret(sys_uname(buf)); +} + +#endif /* _NOLIBC_SYS_UTSNAME_H */ -- cgit v1.2.3 From 2217abe09ce4e0dcbe17ed83b44cb48de11fae1d Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 15 May 2025 21:57:54 +0200 Subject: tools/nolibc: move NULL and offsetof() to sys/stddef.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the location regular userspace expects these definitions. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250515-nolibc-sys-v1-8-74f82eea3b59@weissschuh.net --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/std.h | 6 +----- tools/include/nolibc/stddef.h | 24 ++++++++++++++++++++++++ tools/include/nolibc/types.h | 4 ---- 4 files changed, 26 insertions(+), 9 deletions(-) create mode 100644 tools/include/nolibc/stddef.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index 7546c359e282..c335ce0bd195 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -44,6 +44,7 @@ all_files := \ std.h \ stdarg.h \ stdbool.h \ + stddef.h \ stdint.h \ stdlib.h \ string.h \ diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h index 933bc0be7e1c..adda7333d12e 100644 --- a/tools/include/nolibc/std.h +++ b/tools/include/nolibc/std.h @@ -13,12 +13,8 @@ * syscall-specific stuff, as this file is expected to be included very early. */ -/* note: may already be defined */ -#ifndef NULL -#define NULL ((void *)0) -#endif - #include "stdint.h" +#include "stddef.h" /* those are commonly provided by sys/types.h */ typedef unsigned int dev_t; diff --git a/tools/include/nolibc/stddef.h b/tools/include/nolibc/stddef.h new file mode 100644 index 000000000000..ecbd13eab1f5 --- /dev/null +++ b/tools/include/nolibc/stddef.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Stddef definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_STDDEF_H +#define _NOLIBC_STDDEF_H + +#include "stdint.h" + +/* note: may already be defined */ +#ifndef NULL +#define NULL ((void *)0) +#endif + +#ifndef offsetof +#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD) +#endif + +#endif /* _NOLIBC_STDDEF_H */ diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index 0071bfbc2315..30904be544ed 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -188,10 +188,6 @@ struct stat { typedef __kernel_clockid_t clockid_t; typedef int timer_t; -#ifndef offsetof -#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD) -#endif - #ifndef container_of #define container_of(PTR, TYPE, FIELD) ({ \ __typeof__(((TYPE *)0)->FIELD) *__FIELD_PTR = (PTR); \ -- cgit v1.2.3 From f4b18ff2c147d3b56384fcc8adb30bf733bf2300 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 21 May 2025 15:15:28 -0700 Subject: perf/uapi: Fix PERF_RECORD_SAMPLE comments in AAUX data for PERF_SAMPLE_AUX appears last. PERF_SAMPLE_CGROUP is missing from the comment. This makes the comment match that in the perf_event_open man page. Signed-off-by: Ian Rogers Signed-off-by: Ingo Molnar Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: linux-perf-users@vger.kernel.org Link: https://lore.kernel.org/r/20250521221529.2547099-1-irogers@google.com --- include/uapi/linux/perf_event.h | 5 +++-- tools/include/uapi/linux/perf_event.h | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'tools/include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 5fc753c23734..b2722dae6f1e 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1035,10 +1035,11 @@ enum perf_event_type { * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR - * { u64 size; - * char data[size]; } && PERF_SAMPLE_AUX + * { u64 cgroup;} && PERF_SAMPLE_CGROUP * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE + * { u64 size; + * char data[size]; } && PERF_SAMPLE_AUX * }; */ PERF_RECORD_SAMPLE = 9, diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 5fc753c23734..b2722dae6f1e 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1035,10 +1035,11 @@ enum perf_event_type { * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR - * { u64 size; - * char data[size]; } && PERF_SAMPLE_AUX + * { u64 cgroup;} && PERF_SAMPLE_CGROUP * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE + * { u64 size; + * char data[size]; } && PERF_SAMPLE_AUX * }; */ PERF_RECORD_SAMPLE = 9, -- cgit v1.2.3 From 44889ff67cee7b9ee2d305690ce7a5488b137a66 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 22 May 2025 09:51:22 +0200 Subject: perf/uapi: Clean up a bit When applying a recent commit to the header I noticed that we have accumulated quite a bit of historic noise in this header, so do a bit of spring cleaning: - Define bitfields in a vertically aligned fashion, like perf_event_mmap_page::capabilities already does. This makes it easier to see the distribution and sizing of bits within a word, at a glance. The following is much more readable: __u64 cap_bit0 : 1, cap_bit0_is_deprecated : 1, cap_user_rdpmc : 1, cap_user_time : 1, cap_user_time_zero : 1, cap_user_time_short : 1, cap_____res : 58; Than: __u64 cap_bit0:1, cap_bit0_is_deprecated:1, cap_user_rdpmc:1, cap_user_time:1, cap_user_time_zero:1, cap_user_time_short:1, cap_____res:58; So convert all bitfield definitions from the latter style to the former style. - Fix typos and grammar - Fix capitalization - Remove whitespace noise - Harmonize the definitions of various generations and groups of PERF_MEM_ ABI values. - Vertically align all definitions and assignments to the same column (48), as the first definition (enum perf_type_id), throughout the entire header. - And in general make the code and comments to be more in sync with each other and to be more readable overall. No change in functionality. Copy the changes over to tools/include/uapi/linux/perf_event.h. Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Ian Rogers Link: https://lore.kernel.org/r/20250521221529.2547099-1-irogers@google.com --- include/uapi/linux/perf_event.h | 652 +++++++++++++++++----------------- tools/include/uapi/linux/perf_event.h | 652 +++++++++++++++++----------------- 2 files changed, 662 insertions(+), 642 deletions(-) (limited to 'tools/include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index b2722dae6f1e..78a362b80027 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -39,18 +39,21 @@ enum perf_type_id { /* * attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE + * * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA * AA: hardware event ID * EEEEEEEE: PMU type ID + * * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB * BB: hardware cache ID * CC: hardware cache op ID * DD: hardware cache op result ID * EEEEEEEE: PMU type ID - * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. + * + * If the PMU type ID is 0, PERF_TYPE_RAW will be applied. */ -#define PERF_PMU_TYPE_SHIFT 32 -#define PERF_HW_EVENT_MASK 0xffffffff +#define PERF_PMU_TYPE_SHIFT 32 +#define PERF_HW_EVENT_MASK 0xffffffff /* * Generalized performance event event_id types, used by the @@ -112,7 +115,7 @@ enum perf_hw_cache_op_result_id { /* * Special "software" events provided by the kernel, even if the hardware * does not support performance events. These events measure various - * physical and sw events of the kernel (and allow the profiling of them as + * physical and SW events of the kernel (and allow the profiling of them as * well): */ enum perf_sw_ids { @@ -167,8 +170,9 @@ enum perf_event_sample_format { }; #define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) + /* - * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set + * Values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set. * * If the user does not pass priv level information via branch_sample_type, * the kernel uses the event's priv level. Branch and event priv levels do @@ -178,20 +182,20 @@ enum perf_event_sample_format { * of branches and therefore it supersedes all the other types. */ enum perf_branch_sample_type_shift { - PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ - PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ - PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ - - PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ - PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ - PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ - PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ - PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ - PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ - PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ + PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ + PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ + PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ + + PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ + PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ + PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ + PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ + PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ + PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ + PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ - PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ + PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* CALL/RET stack */ PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */ PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */ @@ -210,96 +214,95 @@ enum perf_branch_sample_type_shift { }; enum perf_branch_sample_type { - PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, - PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, - PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, + PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, + PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, + PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, - PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, - PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, - PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, - PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, - PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, - PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, + PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, + PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, + PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, + PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, + PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, + PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, - PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, - PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, - PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, + PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, + PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, + PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, - PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, - PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, + PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, + PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, - PERF_SAMPLE_BRANCH_TYPE_SAVE = - 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, - PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, - PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; /* - * Common flow change classification + * Common control flow change classifications: */ enum { - PERF_BR_UNKNOWN = 0, /* unknown */ - PERF_BR_COND = 1, /* conditional */ - PERF_BR_UNCOND = 2, /* unconditional */ - PERF_BR_IND = 3, /* indirect */ - PERF_BR_CALL = 4, /* function call */ - PERF_BR_IND_CALL = 5, /* indirect function call */ - PERF_BR_RET = 6, /* function return */ - PERF_BR_SYSCALL = 7, /* syscall */ - PERF_BR_SYSRET = 8, /* syscall return */ - PERF_BR_COND_CALL = 9, /* conditional function call */ - PERF_BR_COND_RET = 10, /* conditional function return */ - PERF_BR_ERET = 11, /* exception return */ - PERF_BR_IRQ = 12, /* irq */ - PERF_BR_SERROR = 13, /* system error */ - PERF_BR_NO_TX = 14, /* not in transaction */ - PERF_BR_EXTEND_ABI = 15, /* extend ABI */ + PERF_BR_UNKNOWN = 0, /* Unknown */ + PERF_BR_COND = 1, /* Conditional */ + PERF_BR_UNCOND = 2, /* Unconditional */ + PERF_BR_IND = 3, /* Indirect */ + PERF_BR_CALL = 4, /* Function call */ + PERF_BR_IND_CALL = 5, /* Indirect function call */ + PERF_BR_RET = 6, /* Function return */ + PERF_BR_SYSCALL = 7, /* Syscall */ + PERF_BR_SYSRET = 8, /* Syscall return */ + PERF_BR_COND_CALL = 9, /* Conditional function call */ + PERF_BR_COND_RET = 10, /* Conditional function return */ + PERF_BR_ERET = 11, /* Exception return */ + PERF_BR_IRQ = 12, /* IRQ */ + PERF_BR_SERROR = 13, /* System error */ + PERF_BR_NO_TX = 14, /* Not in transaction */ + PERF_BR_EXTEND_ABI = 15, /* Extend ABI */ PERF_BR_MAX, }; /* - * Common branch speculation outcome classification + * Common branch speculation outcome classifications: */ enum { - PERF_BR_SPEC_NA = 0, /* Not available */ - PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ - PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ - PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ + PERF_BR_SPEC_NA = 0, /* Not available */ + PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ + PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ + PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ PERF_BR_SPEC_MAX, }; enum { - PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ - PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ - PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ - PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ - PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ - PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ - PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ - PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ + PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ + PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ + PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ + PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ + PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ + PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ + PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ + PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ PERF_BR_NEW_MAX, }; enum { - PERF_BR_PRIV_UNKNOWN = 0, - PERF_BR_PRIV_USER = 1, - PERF_BR_PRIV_KERNEL = 2, - PERF_BR_PRIV_HV = 3, + PERF_BR_PRIV_UNKNOWN = 0, + PERF_BR_PRIV_USER = 1, + PERF_BR_PRIV_KERNEL = 2, + PERF_BR_PRIV_HV = 3, }; -#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 -#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 -#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 -#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 -#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 +#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 +#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 +#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 +#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 +#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ @@ -310,9 +313,9 @@ enum { * Values to determine ABI of the registers dump. */ enum perf_sample_regs_abi { - PERF_SAMPLE_REGS_ABI_NONE = 0, - PERF_SAMPLE_REGS_ABI_32 = 1, - PERF_SAMPLE_REGS_ABI_64 = 2, + PERF_SAMPLE_REGS_ABI_NONE = 0, + PERF_SAMPLE_REGS_ABI_32 = 1, + PERF_SAMPLE_REGS_ABI_64 = 2, }; /* @@ -320,21 +323,21 @@ enum perf_sample_regs_abi { * abort events. Multiple bits can be set. */ enum { - PERF_TXN_ELISION = (1 << 0), /* From elision */ - PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ - PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ - PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */ - PERF_TXN_RETRY = (1 << 4), /* Retry possible */ - PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ - PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ - PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ + PERF_TXN_ELISION = (1 << 0), /* From elision */ + PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ + PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ + PERF_TXN_ASYNC = (1 << 3), /* Instruction is not related */ + PERF_TXN_RETRY = (1 << 4), /* Retry possible */ + PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ + PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ + PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ - PERF_TXN_MAX = (1 << 8), /* non-ABI */ + PERF_TXN_MAX = (1 << 8), /* non-ABI */ - /* bits 32..63 are reserved for the abort code */ + /* Bits 32..63 are reserved for the abort code */ - PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), - PERF_TXN_ABORT_SHIFT = 32, + PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), + PERF_TXN_ABORT_SHIFT = 32, }; /* @@ -369,24 +372,22 @@ enum perf_event_read_format { PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; -#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ -#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ -#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ -#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ - /* add: sample_stack_user */ -#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ -#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ -#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ -#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ -#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */ +#define PERF_ATTR_SIZE_VER0 64 /* Size of first published 'struct perf_event_attr' */ +#define PERF_ATTR_SIZE_VER1 72 /* Add: config2 */ +#define PERF_ATTR_SIZE_VER2 80 /* Add: branch_sample_type */ +#define PERF_ATTR_SIZE_VER3 96 /* Add: sample_regs_user */ + /* Add: sample_stack_user */ +#define PERF_ATTR_SIZE_VER4 104 /* Add: sample_regs_intr */ +#define PERF_ATTR_SIZE_VER5 112 /* Add: aux_watermark */ +#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */ +#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */ +#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */ /* - * Hardware event_id to monitor via a performance monitoring event: - * - * @sample_max_stack: Max number of frame pointers in a callchain, - * should be < /proc/sys/kernel/perf_event_max_stack - * Max number of entries of branch stack - * should be < hardware limit + * 'struct perf_event_attr' contains various attributes that define + * a performance event - most of them hardware related configuration + * details, but also a lot of behavioral switches and values implemented + * by the kernel. */ struct perf_event_attr { @@ -396,7 +397,7 @@ struct perf_event_attr { __u32 type; /* - * Size of the attr structure, for fwd/bwd compat. + * Size of the attr structure, for forward/backwards compatibility. */ __u32 size; @@ -451,21 +452,21 @@ struct perf_event_attr { comm_exec : 1, /* flag comm events that are due to an exec */ use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ - write_backward : 1, /* Write ring buffer from end to beginning */ + write_backward : 1, /* write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ ksymbol : 1, /* include ksymbol events */ - bpf_event : 1, /* include bpf events */ + bpf_event : 1, /* include BPF events */ aux_output : 1, /* generate AUX records instead of events */ cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ - build_id : 1, /* use build id in mmap2 events */ + build_id : 1, /* use build ID in mmap2 events */ inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ remove_on_exec : 1, /* event is removed from task on exec */ sigtrap : 1, /* send synchronous SIGTRAP on event */ __reserved_1 : 26; union { - __u32 wakeup_events; /* wakeup every n events */ + __u32 wakeup_events; /* wake up every n events */ __u32 wakeup_watermark; /* bytes before wakeup */ }; @@ -474,13 +475,13 @@ struct perf_event_attr { __u64 bp_addr; __u64 kprobe_func; /* for perf_kprobe */ __u64 uprobe_path; /* for perf_uprobe */ - __u64 config1; /* extension of config */ + __u64 config1; /* extension of config */ }; union { __u64 bp_len; - __u64 kprobe_addr; /* when kprobe_func == NULL */ + __u64 kprobe_addr; /* when kprobe_func == NULL */ __u64 probe_offset; /* for perf_[k,u]probe */ - __u64 config2; /* extension of config1 */ + __u64 config2; /* extension of config1 */ }; __u64 branch_sample_type; /* enum perf_branch_sample_type */ @@ -510,7 +511,16 @@ struct perf_event_attr { * Wakeup watermark for AUX area */ __u32 aux_watermark; + + /* + * Max number of frame pointers in a callchain, should be + * lower than /proc/sys/kernel/perf_event_max_stack. + * + * Max number of entries of branch stack should be lower + * than the hardware limit. + */ __u16 sample_max_stack; + __u16 __reserved_2; __u32 aux_sample_size; @@ -537,7 +547,7 @@ struct perf_event_attr { /* * Structure used by below PERF_EVENT_IOC_QUERY_BPF command - * to query bpf programs attached to the same perf tracepoint + * to query BPF programs attached to the same perf tracepoint * as the given perf event. */ struct perf_event_query_bpf { @@ -559,21 +569,21 @@ struct perf_event_query_bpf { /* * Ioctls that can be done on a perf event fd: */ -#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) -#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) -#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) -#define PERF_EVENT_IOC_RESET _IO ('$', 3) -#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) -#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) -#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) -#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) -#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) -#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW ('$', 4, __u64) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW ('$', 6, char *) +#define PERF_EVENT_IOC_ID _IOR ('$', 7, __u64 *) +#define PERF_EVENT_IOC_SET_BPF _IOW ('$', 8, __u32) +#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW ('$', 9, __u32) #define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) -#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *) +#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW ('$', 11, struct perf_event_attr *) enum perf_event_ioc_flags { - PERF_IOC_FLAG_GROUP = 1U << 0, + PERF_IOC_FLAG_GROUP = 1U << 0, }; /* @@ -584,7 +594,7 @@ struct perf_event_mmap_page { __u32 compat_version; /* lowest version this is compat with */ /* - * Bits needed to read the hw events in user-space. + * Bits needed to read the HW events in user-space. * * u32 seq, time_mult, time_shift, index, width; * u64 count, enabled, running; @@ -622,7 +632,7 @@ struct perf_event_mmap_page { __u32 index; /* hardware event identifier */ __s64 offset; /* add to hardware event value */ __u64 time_enabled; /* time event active */ - __u64 time_running; /* time event on cpu */ + __u64 time_running; /* time event on CPU */ union { __u64 capabilities; struct { @@ -650,7 +660,7 @@ struct perf_event_mmap_page { /* * If cap_usr_time the below fields can be used to compute the time - * delta since time_enabled (in ns) using rdtsc or similar. + * delta since time_enabled (in ns) using RDTSC or similar. * * u64 quot, rem; * u64 delta; @@ -723,7 +733,7 @@ struct perf_event_mmap_page { * after reading this value. * * When the mapping is PROT_WRITE the @data_tail value should be - * written by userspace to reflect the last read data, after issueing + * written by user-space to reflect the last read data, after issuing * an smp_mb() to separate the data read from the ->data_tail store. * In this case the kernel will not over-write unread data. * @@ -739,7 +749,7 @@ struct perf_event_mmap_page { /* * AUX area is defined by aux_{offset,size} fields that should be set - * by the userspace, so that + * by the user-space, so that * * aux_offset >= data_offset + data_size * @@ -813,7 +823,7 @@ struct perf_event_mmap_page { * Indicates that thread was preempted in TASK_RUNNING state. * * PERF_RECORD_MISC_MMAP_BUILD_ID: - * Indicates that mmap2 event carries build id data. + * Indicates that mmap2 event carries build ID data. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) #define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) @@ -824,26 +834,26 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) struct perf_event_header { - __u32 type; - __u16 misc; - __u16 size; + __u32 type; + __u16 misc; + __u16 size; }; struct perf_ns_link_info { - __u64 dev; - __u64 ino; + __u64 dev; + __u64 ino; }; enum { - NET_NS_INDEX = 0, - UTS_NS_INDEX = 1, - IPC_NS_INDEX = 2, - PID_NS_INDEX = 3, - USER_NS_INDEX = 4, - MNT_NS_INDEX = 5, - CGROUP_NS_INDEX = 6, - - NR_NAMESPACES, /* number of available namespaces */ + NET_NS_INDEX = 0, + UTS_NS_INDEX = 1, + IPC_NS_INDEX = 2, + PID_NS_INDEX = 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX = 5, + CGROUP_NS_INDEX = 6, + + NR_NAMESPACES, /* number of available namespaces */ }; enum perf_event_type { @@ -859,11 +869,11 @@ enum perf_event_type { * optional fields being ignored. * * struct sample_id { - * { u32 pid, tid; } && PERF_SAMPLE_TID - * { u64 time; } && PERF_SAMPLE_TIME - * { u64 id; } && PERF_SAMPLE_ID - * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID - * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU * { u64 id; } && PERF_SAMPLE_IDENTIFIER * } && perf_event_attr::sample_id_all * @@ -874,7 +884,7 @@ enum perf_event_type { /* * The MMAP events record the PROT_EXEC mappings so that we can - * correlate userspace IPs to code. They have the following structure: + * correlate user-space IPs to code. They have the following structure: * * struct { * struct perf_event_header header; @@ -884,7 +894,7 @@ enum perf_event_type { * u64 len; * u64 pgoff; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP = 1, @@ -894,7 +904,7 @@ enum perf_event_type { * struct perf_event_header header; * u64 id; * u64 lost; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_LOST = 2, @@ -905,7 +915,7 @@ enum perf_event_type { * * u32 pid, tid; * char comm[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_COMM = 3, @@ -916,7 +926,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_EXIT = 4, @@ -927,7 +937,7 @@ enum perf_event_type { * u64 time; * u64 id; * u64 stream_id; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_THROTTLE = 5, @@ -939,7 +949,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_FORK = 7, @@ -950,7 +960,7 @@ enum perf_event_type { * u32 pid, tid; * * struct read_format values; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_READ = 8, @@ -1005,12 +1015,12 @@ enum perf_event_type { * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS * } && PERF_SAMPLE_BRANCH_STACK * - * { u64 abi; # enum perf_sample_regs_abi - * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER * - * { u64 size; - * char data[size]; - * u64 dyn_size; } && PERF_SAMPLE_STACK_USER + * { u64 size; + * char data[size]; + * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * * { union perf_sample_weight * { @@ -1071,7 +1081,7 @@ enum perf_event_type { * }; * u32 prot, flags; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP2 = 10, @@ -1080,12 +1090,12 @@ enum perf_event_type { * Records that new data landed in the AUX buffer part. * * struct { - * struct perf_event_header header; + * struct perf_event_header header; * - * u64 aux_offset; - * u64 aux_size; + * u64 aux_offset; + * u64 aux_size; * u64 flags; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_AUX = 11, @@ -1168,7 +1178,7 @@ enum perf_event_type { PERF_RECORD_KSYMBOL = 17, /* - * Record bpf events: + * Record BPF events: * enum perf_bpf_event_type { * PERF_BPF_EVENT_UNKNOWN = 0, * PERF_BPF_EVENT_PROG_LOAD = 1, @@ -1246,181 +1256,181 @@ enum perf_record_ksymbol_type { #define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) enum perf_bpf_event_type { - PERF_BPF_EVENT_UNKNOWN = 0, - PERF_BPF_EVENT_PROG_LOAD = 1, - PERF_BPF_EVENT_PROG_UNLOAD = 2, - PERF_BPF_EVENT_MAX, /* non-ABI */ + PERF_BPF_EVENT_UNKNOWN = 0, + PERF_BPF_EVENT_PROG_LOAD = 1, + PERF_BPF_EVENT_PROG_UNLOAD = 2, + PERF_BPF_EVENT_MAX, /* non-ABI */ }; -#define PERF_MAX_STACK_DEPTH 127 -#define PERF_MAX_CONTEXTS_PER_STACK 8 +#define PERF_MAX_STACK_DEPTH 127 +#define PERF_MAX_CONTEXTS_PER_STACK 8 enum perf_callchain_context { - PERF_CONTEXT_HV = (__u64)-32, - PERF_CONTEXT_KERNEL = (__u64)-128, - PERF_CONTEXT_USER = (__u64)-512, + PERF_CONTEXT_HV = (__u64)-32, + PERF_CONTEXT_KERNEL = (__u64)-128, + PERF_CONTEXT_USER = (__u64)-512, - PERF_CONTEXT_GUEST = (__u64)-2048, - PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, - PERF_CONTEXT_GUEST_USER = (__u64)-2560, + PERF_CONTEXT_GUEST = (__u64)-2048, + PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, + PERF_CONTEXT_GUEST_USER = (__u64)-2560, - PERF_CONTEXT_MAX = (__u64)-4095, + PERF_CONTEXT_MAX = (__u64)-4095, }; /** * PERF_RECORD_AUX::flags bits */ -#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ -#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ -#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ -#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_TRUNCATED 0x0001 /* Record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x0002 /* Snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x0004 /* Record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x0008 /* Sample collided with another */ #define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ /* CoreSight PMU AUX buffer formats */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ -#define PERF_FLAG_FD_NO_GROUP (1UL << 0) -#define PERF_FLAG_FD_OUTPUT (1UL << 1) -#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ -#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ +#define PERF_FLAG_FD_NO_GROUP (1UL << 0) +#define PERF_FLAG_FD_OUTPUT (1UL << 1) +#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup ID, per-CPU mode only */ +#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ #if defined(__LITTLE_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_op:5, /* type of opcode */ - mem_lvl:14, /* memory hierarchy level */ - mem_snoop:5, /* snoop mode */ - mem_lock:2, /* lock instr */ - mem_dtlb:7, /* tlb access */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_remote:1, /* remote */ - mem_snoopx:2, /* snoop mode, ext */ - mem_blk:3, /* access blocked */ - mem_hops:3, /* hop level */ - mem_rsvd:18; + __u64 mem_op : 5, /* Type of opcode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_snoop : 5, /* Snoop mode */ + mem_lock : 2, /* Lock instr */ + mem_dtlb : 7, /* TLB access */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_remote : 1, /* Remote */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_blk : 3, /* Access blocked */ + mem_hops : 3, /* Hop level */ + mem_rsvd : 18; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:18, - mem_hops:3, /* hop level */ - mem_blk:3, /* access blocked */ - mem_snoopx:2, /* snoop mode, ext */ - mem_remote:1, /* remote */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_dtlb:7, /* tlb access */ - mem_lock:2, /* lock instr */ - mem_snoop:5, /* snoop mode */ - mem_lvl:14, /* memory hierarchy level */ - mem_op:5; /* type of opcode */ + __u64 mem_rsvd : 18, + mem_hops : 3, /* Hop level */ + mem_blk : 3, /* Access blocked */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_remote : 1, /* Remote */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_dtlb : 7, /* TLB access */ + mem_lock : 2, /* Lock instr */ + mem_snoop : 5, /* Snoop mode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_op : 5; /* Type of opcode */ }; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif -/* type of opcode (load/store/prefetch,code) */ -#define PERF_MEM_OP_NA 0x01 /* not available */ -#define PERF_MEM_OP_LOAD 0x02 /* load instruction */ -#define PERF_MEM_OP_STORE 0x04 /* store instruction */ -#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */ -#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ -#define PERF_MEM_OP_SHIFT 0 +/* Type of memory opcode: */ +#define PERF_MEM_OP_NA 0x0001 /* Not available */ +#define PERF_MEM_OP_LOAD 0x0002 /* Load instruction */ +#define PERF_MEM_OP_STORE 0x0004 /* Store instruction */ +#define PERF_MEM_OP_PFETCH 0x0008 /* Prefetch */ +#define PERF_MEM_OP_EXEC 0x0010 /* Code (execution) */ +#define PERF_MEM_OP_SHIFT 0 /* - * PERF_MEM_LVL_* namespace being depricated to some extent in the + * The PERF_MEM_LVL_* namespace is being deprecated to some extent in * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. - * Supporting this namespace inorder to not break defined ABIs. + * We support this namespace in order to not break defined ABIs. * - * memory hierarchy (memory level, hit or miss) + * Memory hierarchy (memory level, hit or miss) */ -#define PERF_MEM_LVL_NA 0x01 /* not available */ -#define PERF_MEM_LVL_HIT 0x02 /* hit level */ -#define PERF_MEM_LVL_MISS 0x04 /* miss level */ -#define PERF_MEM_LVL_L1 0x08 /* L1 */ -#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */ -#define PERF_MEM_LVL_L2 0x20 /* L2 */ -#define PERF_MEM_LVL_L3 0x40 /* L3 */ -#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */ -#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */ -#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */ -#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */ -#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */ -#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ -#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ -#define PERF_MEM_LVL_SHIFT 5 - -#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */ -#define PERF_MEM_REMOTE_SHIFT 37 - -#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */ -#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ -#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ -#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ -/* 0x7 available */ -#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ -#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ -#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ -#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ -#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ -#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ - -#define PERF_MEM_LVLNUM_SHIFT 33 - -/* snoop mode */ -#define PERF_MEM_SNOOP_NA 0x01 /* not available */ -#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ -#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */ -#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */ -#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ -#define PERF_MEM_SNOOP_SHIFT 19 - -#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ -#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */ -#define PERF_MEM_SNOOPX_SHIFT 38 - -/* locked instruction */ -#define PERF_MEM_LOCK_NA 0x01 /* not available */ -#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ -#define PERF_MEM_LOCK_SHIFT 24 +#define PERF_MEM_LVL_NA 0x0001 /* Not available */ +#define PERF_MEM_LVL_HIT 0x0002 /* Hit level */ +#define PERF_MEM_LVL_MISS 0x0004 /* Miss level */ +#define PERF_MEM_LVL_L1 0x0008 /* L1 */ +#define PERF_MEM_LVL_LFB 0x0010 /* Line Fill Buffer */ +#define PERF_MEM_LVL_L2 0x0020 /* L2 */ +#define PERF_MEM_LVL_L3 0x0040 /* L3 */ +#define PERF_MEM_LVL_LOC_RAM 0x0080 /* Local DRAM */ +#define PERF_MEM_LVL_REM_RAM1 0x0100 /* Remote DRAM (1 hop) */ +#define PERF_MEM_LVL_REM_RAM2 0x0200 /* Remote DRAM (2 hops) */ +#define PERF_MEM_LVL_REM_CCE1 0x0400 /* Remote Cache (1 hop) */ +#define PERF_MEM_LVL_REM_CCE2 0x0800 /* Remote Cache (2 hops) */ +#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ +#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ +#define PERF_MEM_LVL_SHIFT 5 + +#define PERF_MEM_REMOTE_REMOTE 0x0001 /* Remote */ +#define PERF_MEM_REMOTE_SHIFT 37 + +#define PERF_MEM_LVLNUM_L1 0x0001 /* L1 */ +#define PERF_MEM_LVLNUM_L2 0x0002 /* L2 */ +#define PERF_MEM_LVLNUM_L3 0x0003 /* L3 */ +#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */ +#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */ +/* 0x007 available */ +#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */ +#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */ +#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */ +#define PERF_MEM_LVLNUM_ANY_CACHE 0x000b /* Any cache */ +#define PERF_MEM_LVLNUM_LFB 0x000c /* LFB / L1 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_RAM 0x000d /* RAM */ +#define PERF_MEM_LVLNUM_PMEM 0x000e /* PMEM */ +#define PERF_MEM_LVLNUM_NA 0x000f /* N/A */ + +#define PERF_MEM_LVLNUM_SHIFT 33 + +/* Snoop mode */ +#define PERF_MEM_SNOOP_NA 0x0001 /* Not available */ +#define PERF_MEM_SNOOP_NONE 0x0002 /* No snoop */ +#define PERF_MEM_SNOOP_HIT 0x0004 /* Snoop hit */ +#define PERF_MEM_SNOOP_MISS 0x0008 /* Snoop miss */ +#define PERF_MEM_SNOOP_HITM 0x0010 /* Snoop hit modified */ +#define PERF_MEM_SNOOP_SHIFT 19 + +#define PERF_MEM_SNOOPX_FWD 0x0001 /* Forward */ +#define PERF_MEM_SNOOPX_PEER 0x0002 /* Transfer from peer */ +#define PERF_MEM_SNOOPX_SHIFT 38 + +/* Locked instruction */ +#define PERF_MEM_LOCK_NA 0x0001 /* Not available */ +#define PERF_MEM_LOCK_LOCKED 0x0002 /* Locked transaction */ +#define PERF_MEM_LOCK_SHIFT 24 /* TLB access */ -#define PERF_MEM_TLB_NA 0x01 /* not available */ -#define PERF_MEM_TLB_HIT 0x02 /* hit level */ -#define PERF_MEM_TLB_MISS 0x04 /* miss level */ -#define PERF_MEM_TLB_L1 0x08 /* L1 */ -#define PERF_MEM_TLB_L2 0x10 /* L2 */ -#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/ -#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ -#define PERF_MEM_TLB_SHIFT 26 +#define PERF_MEM_TLB_NA 0x0001 /* Not available */ +#define PERF_MEM_TLB_HIT 0x0002 /* Hit level */ +#define PERF_MEM_TLB_MISS 0x0004 /* Miss level */ +#define PERF_MEM_TLB_L1 0x0008 /* L1 */ +#define PERF_MEM_TLB_L2 0x0010 /* L2 */ +#define PERF_MEM_TLB_WK 0x0020 /* Hardware Walker*/ +#define PERF_MEM_TLB_OS 0x0040 /* OS fault handler */ +#define PERF_MEM_TLB_SHIFT 26 /* Access blocked */ -#define PERF_MEM_BLK_NA 0x01 /* not available */ -#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */ -#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ -#define PERF_MEM_BLK_SHIFT 40 - -/* hop level */ -#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ -#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ -#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ -#define PERF_MEM_HOPS_3 0x04 /* remote board */ +#define PERF_MEM_BLK_NA 0x0001 /* Not available */ +#define PERF_MEM_BLK_DATA 0x0002 /* Data could not be forwarded */ +#define PERF_MEM_BLK_ADDR 0x0004 /* Address conflict */ +#define PERF_MEM_BLK_SHIFT 40 + +/* Hop level */ +#define PERF_MEM_HOPS_0 0x0001 /* Remote core, same node */ +#define PERF_MEM_HOPS_1 0x0002 /* Remote node, same socket */ +#define PERF_MEM_HOPS_2 0x0003 /* Remote socket, same board */ +#define PERF_MEM_HOPS_3 0x0004 /* Remote board */ /* 5-7 available */ -#define PERF_MEM_HOPS_SHIFT 43 +#define PERF_MEM_HOPS_SHIFT 43 #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) /* - * single taken branch record layout: + * Layout of single taken branch records: * * from: source instruction (may not always be a branch insn) * to: branch target @@ -1439,37 +1449,37 @@ union perf_mem_data_src { struct perf_branch_entry { __u64 from; __u64 to; - __u64 mispred:1, /* target mispredicted */ - predicted:1,/* target predicted */ - in_tx:1, /* in transaction */ - abort:1, /* transaction abort */ - cycles:16, /* cycle count to last branch */ - type:4, /* branch type */ - spec:2, /* branch speculation info */ - new_type:4, /* additional branch type */ - priv:3, /* privilege level */ - reserved:31; + __u64 mispred : 1, /* target mispredicted */ + predicted : 1, /* target predicted */ + in_tx : 1, /* in transaction */ + abort : 1, /* transaction abort */ + cycles : 16, /* cycle count to last branch */ + type : 4, /* branch type */ + spec : 2, /* branch speculation info */ + new_type : 4, /* additional branch type */ + priv : 3, /* privilege level */ + reserved : 31; }; /* Size of used info bits in struct perf_branch_entry */ #define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 union perf_sample_weight { - __u64 full; + __u64 full; #if defined(__LITTLE_ENDIAN_BITFIELD) struct { - __u32 var1_dw; - __u16 var2_w; - __u16 var3_w; + __u32 var1_dw; + __u16 var2_w; + __u16 var3_w; }; #elif defined(__BIG_ENDIAN_BITFIELD) struct { - __u16 var3_w; - __u16 var2_w; - __u32 var1_dw; + __u16 var3_w; + __u16 var2_w; + __u32 var1_dw; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif }; diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index b2722dae6f1e..78a362b80027 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -39,18 +39,21 @@ enum perf_type_id { /* * attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE + * * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA * AA: hardware event ID * EEEEEEEE: PMU type ID + * * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB * BB: hardware cache ID * CC: hardware cache op ID * DD: hardware cache op result ID * EEEEEEEE: PMU type ID - * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. + * + * If the PMU type ID is 0, PERF_TYPE_RAW will be applied. */ -#define PERF_PMU_TYPE_SHIFT 32 -#define PERF_HW_EVENT_MASK 0xffffffff +#define PERF_PMU_TYPE_SHIFT 32 +#define PERF_HW_EVENT_MASK 0xffffffff /* * Generalized performance event event_id types, used by the @@ -112,7 +115,7 @@ enum perf_hw_cache_op_result_id { /* * Special "software" events provided by the kernel, even if the hardware * does not support performance events. These events measure various - * physical and sw events of the kernel (and allow the profiling of them as + * physical and SW events of the kernel (and allow the profiling of them as * well): */ enum perf_sw_ids { @@ -167,8 +170,9 @@ enum perf_event_sample_format { }; #define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) + /* - * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set + * Values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set. * * If the user does not pass priv level information via branch_sample_type, * the kernel uses the event's priv level. Branch and event priv levels do @@ -178,20 +182,20 @@ enum perf_event_sample_format { * of branches and therefore it supersedes all the other types. */ enum perf_branch_sample_type_shift { - PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ - PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ - PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ - - PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ - PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ - PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ - PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ - PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ - PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ - PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ + PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ + PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ + PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ + + PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ + PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ + PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ + PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ + PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ + PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ + PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ - PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ + PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* CALL/RET stack */ PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */ PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */ @@ -210,96 +214,95 @@ enum perf_branch_sample_type_shift { }; enum perf_branch_sample_type { - PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, - PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, - PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, + PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, + PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, + PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, - PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, - PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, - PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, - PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, - PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, - PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, + PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, + PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, + PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, + PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, + PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, + PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, - PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, - PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, - PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, + PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, + PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, + PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, - PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, - PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, + PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, + PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, - PERF_SAMPLE_BRANCH_TYPE_SAVE = - 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, - PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, - PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; /* - * Common flow change classification + * Common control flow change classifications: */ enum { - PERF_BR_UNKNOWN = 0, /* unknown */ - PERF_BR_COND = 1, /* conditional */ - PERF_BR_UNCOND = 2, /* unconditional */ - PERF_BR_IND = 3, /* indirect */ - PERF_BR_CALL = 4, /* function call */ - PERF_BR_IND_CALL = 5, /* indirect function call */ - PERF_BR_RET = 6, /* function return */ - PERF_BR_SYSCALL = 7, /* syscall */ - PERF_BR_SYSRET = 8, /* syscall return */ - PERF_BR_COND_CALL = 9, /* conditional function call */ - PERF_BR_COND_RET = 10, /* conditional function return */ - PERF_BR_ERET = 11, /* exception return */ - PERF_BR_IRQ = 12, /* irq */ - PERF_BR_SERROR = 13, /* system error */ - PERF_BR_NO_TX = 14, /* not in transaction */ - PERF_BR_EXTEND_ABI = 15, /* extend ABI */ + PERF_BR_UNKNOWN = 0, /* Unknown */ + PERF_BR_COND = 1, /* Conditional */ + PERF_BR_UNCOND = 2, /* Unconditional */ + PERF_BR_IND = 3, /* Indirect */ + PERF_BR_CALL = 4, /* Function call */ + PERF_BR_IND_CALL = 5, /* Indirect function call */ + PERF_BR_RET = 6, /* Function return */ + PERF_BR_SYSCALL = 7, /* Syscall */ + PERF_BR_SYSRET = 8, /* Syscall return */ + PERF_BR_COND_CALL = 9, /* Conditional function call */ + PERF_BR_COND_RET = 10, /* Conditional function return */ + PERF_BR_ERET = 11, /* Exception return */ + PERF_BR_IRQ = 12, /* IRQ */ + PERF_BR_SERROR = 13, /* System error */ + PERF_BR_NO_TX = 14, /* Not in transaction */ + PERF_BR_EXTEND_ABI = 15, /* Extend ABI */ PERF_BR_MAX, }; /* - * Common branch speculation outcome classification + * Common branch speculation outcome classifications: */ enum { - PERF_BR_SPEC_NA = 0, /* Not available */ - PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ - PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ - PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ + PERF_BR_SPEC_NA = 0, /* Not available */ + PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ + PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ + PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ PERF_BR_SPEC_MAX, }; enum { - PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ - PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ - PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ - PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ - PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ - PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ - PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ - PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ + PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ + PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ + PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ + PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ + PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ + PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ + PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ + PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ PERF_BR_NEW_MAX, }; enum { - PERF_BR_PRIV_UNKNOWN = 0, - PERF_BR_PRIV_USER = 1, - PERF_BR_PRIV_KERNEL = 2, - PERF_BR_PRIV_HV = 3, + PERF_BR_PRIV_UNKNOWN = 0, + PERF_BR_PRIV_USER = 1, + PERF_BR_PRIV_KERNEL = 2, + PERF_BR_PRIV_HV = 3, }; -#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 -#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 -#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 -#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 -#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 +#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 +#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 +#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 +#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 +#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ @@ -310,9 +313,9 @@ enum { * Values to determine ABI of the registers dump. */ enum perf_sample_regs_abi { - PERF_SAMPLE_REGS_ABI_NONE = 0, - PERF_SAMPLE_REGS_ABI_32 = 1, - PERF_SAMPLE_REGS_ABI_64 = 2, + PERF_SAMPLE_REGS_ABI_NONE = 0, + PERF_SAMPLE_REGS_ABI_32 = 1, + PERF_SAMPLE_REGS_ABI_64 = 2, }; /* @@ -320,21 +323,21 @@ enum perf_sample_regs_abi { * abort events. Multiple bits can be set. */ enum { - PERF_TXN_ELISION = (1 << 0), /* From elision */ - PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ - PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ - PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */ - PERF_TXN_RETRY = (1 << 4), /* Retry possible */ - PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ - PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ - PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ + PERF_TXN_ELISION = (1 << 0), /* From elision */ + PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ + PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ + PERF_TXN_ASYNC = (1 << 3), /* Instruction is not related */ + PERF_TXN_RETRY = (1 << 4), /* Retry possible */ + PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ + PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ + PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ - PERF_TXN_MAX = (1 << 8), /* non-ABI */ + PERF_TXN_MAX = (1 << 8), /* non-ABI */ - /* bits 32..63 are reserved for the abort code */ + /* Bits 32..63 are reserved for the abort code */ - PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), - PERF_TXN_ABORT_SHIFT = 32, + PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), + PERF_TXN_ABORT_SHIFT = 32, }; /* @@ -369,24 +372,22 @@ enum perf_event_read_format { PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; -#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ -#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ -#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ -#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ - /* add: sample_stack_user */ -#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ -#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ -#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ -#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ -#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */ +#define PERF_ATTR_SIZE_VER0 64 /* Size of first published 'struct perf_event_attr' */ +#define PERF_ATTR_SIZE_VER1 72 /* Add: config2 */ +#define PERF_ATTR_SIZE_VER2 80 /* Add: branch_sample_type */ +#define PERF_ATTR_SIZE_VER3 96 /* Add: sample_regs_user */ + /* Add: sample_stack_user */ +#define PERF_ATTR_SIZE_VER4 104 /* Add: sample_regs_intr */ +#define PERF_ATTR_SIZE_VER5 112 /* Add: aux_watermark */ +#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */ +#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */ +#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */ /* - * Hardware event_id to monitor via a performance monitoring event: - * - * @sample_max_stack: Max number of frame pointers in a callchain, - * should be < /proc/sys/kernel/perf_event_max_stack - * Max number of entries of branch stack - * should be < hardware limit + * 'struct perf_event_attr' contains various attributes that define + * a performance event - most of them hardware related configuration + * details, but also a lot of behavioral switches and values implemented + * by the kernel. */ struct perf_event_attr { @@ -396,7 +397,7 @@ struct perf_event_attr { __u32 type; /* - * Size of the attr structure, for fwd/bwd compat. + * Size of the attr structure, for forward/backwards compatibility. */ __u32 size; @@ -451,21 +452,21 @@ struct perf_event_attr { comm_exec : 1, /* flag comm events that are due to an exec */ use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ - write_backward : 1, /* Write ring buffer from end to beginning */ + write_backward : 1, /* write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ ksymbol : 1, /* include ksymbol events */ - bpf_event : 1, /* include bpf events */ + bpf_event : 1, /* include BPF events */ aux_output : 1, /* generate AUX records instead of events */ cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ - build_id : 1, /* use build id in mmap2 events */ + build_id : 1, /* use build ID in mmap2 events */ inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ remove_on_exec : 1, /* event is removed from task on exec */ sigtrap : 1, /* send synchronous SIGTRAP on event */ __reserved_1 : 26; union { - __u32 wakeup_events; /* wakeup every n events */ + __u32 wakeup_events; /* wake up every n events */ __u32 wakeup_watermark; /* bytes before wakeup */ }; @@ -474,13 +475,13 @@ struct perf_event_attr { __u64 bp_addr; __u64 kprobe_func; /* for perf_kprobe */ __u64 uprobe_path; /* for perf_uprobe */ - __u64 config1; /* extension of config */ + __u64 config1; /* extension of config */ }; union { __u64 bp_len; - __u64 kprobe_addr; /* when kprobe_func == NULL */ + __u64 kprobe_addr; /* when kprobe_func == NULL */ __u64 probe_offset; /* for perf_[k,u]probe */ - __u64 config2; /* extension of config1 */ + __u64 config2; /* extension of config1 */ }; __u64 branch_sample_type; /* enum perf_branch_sample_type */ @@ -510,7 +511,16 @@ struct perf_event_attr { * Wakeup watermark for AUX area */ __u32 aux_watermark; + + /* + * Max number of frame pointers in a callchain, should be + * lower than /proc/sys/kernel/perf_event_max_stack. + * + * Max number of entries of branch stack should be lower + * than the hardware limit. + */ __u16 sample_max_stack; + __u16 __reserved_2; __u32 aux_sample_size; @@ -537,7 +547,7 @@ struct perf_event_attr { /* * Structure used by below PERF_EVENT_IOC_QUERY_BPF command - * to query bpf programs attached to the same perf tracepoint + * to query BPF programs attached to the same perf tracepoint * as the given perf event. */ struct perf_event_query_bpf { @@ -559,21 +569,21 @@ struct perf_event_query_bpf { /* * Ioctls that can be done on a perf event fd: */ -#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) -#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) -#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) -#define PERF_EVENT_IOC_RESET _IO ('$', 3) -#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) -#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) -#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) -#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) -#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) -#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW ('$', 4, __u64) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW ('$', 6, char *) +#define PERF_EVENT_IOC_ID _IOR ('$', 7, __u64 *) +#define PERF_EVENT_IOC_SET_BPF _IOW ('$', 8, __u32) +#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW ('$', 9, __u32) #define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) -#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *) +#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW ('$', 11, struct perf_event_attr *) enum perf_event_ioc_flags { - PERF_IOC_FLAG_GROUP = 1U << 0, + PERF_IOC_FLAG_GROUP = 1U << 0, }; /* @@ -584,7 +594,7 @@ struct perf_event_mmap_page { __u32 compat_version; /* lowest version this is compat with */ /* - * Bits needed to read the hw events in user-space. + * Bits needed to read the HW events in user-space. * * u32 seq, time_mult, time_shift, index, width; * u64 count, enabled, running; @@ -622,7 +632,7 @@ struct perf_event_mmap_page { __u32 index; /* hardware event identifier */ __s64 offset; /* add to hardware event value */ __u64 time_enabled; /* time event active */ - __u64 time_running; /* time event on cpu */ + __u64 time_running; /* time event on CPU */ union { __u64 capabilities; struct { @@ -650,7 +660,7 @@ struct perf_event_mmap_page { /* * If cap_usr_time the below fields can be used to compute the time - * delta since time_enabled (in ns) using rdtsc or similar. + * delta since time_enabled (in ns) using RDTSC or similar. * * u64 quot, rem; * u64 delta; @@ -723,7 +733,7 @@ struct perf_event_mmap_page { * after reading this value. * * When the mapping is PROT_WRITE the @data_tail value should be - * written by userspace to reflect the last read data, after issueing + * written by user-space to reflect the last read data, after issuing * an smp_mb() to separate the data read from the ->data_tail store. * In this case the kernel will not over-write unread data. * @@ -739,7 +749,7 @@ struct perf_event_mmap_page { /* * AUX area is defined by aux_{offset,size} fields that should be set - * by the userspace, so that + * by the user-space, so that * * aux_offset >= data_offset + data_size * @@ -813,7 +823,7 @@ struct perf_event_mmap_page { * Indicates that thread was preempted in TASK_RUNNING state. * * PERF_RECORD_MISC_MMAP_BUILD_ID: - * Indicates that mmap2 event carries build id data. + * Indicates that mmap2 event carries build ID data. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) #define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) @@ -824,26 +834,26 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) struct perf_event_header { - __u32 type; - __u16 misc; - __u16 size; + __u32 type; + __u16 misc; + __u16 size; }; struct perf_ns_link_info { - __u64 dev; - __u64 ino; + __u64 dev; + __u64 ino; }; enum { - NET_NS_INDEX = 0, - UTS_NS_INDEX = 1, - IPC_NS_INDEX = 2, - PID_NS_INDEX = 3, - USER_NS_INDEX = 4, - MNT_NS_INDEX = 5, - CGROUP_NS_INDEX = 6, - - NR_NAMESPACES, /* number of available namespaces */ + NET_NS_INDEX = 0, + UTS_NS_INDEX = 1, + IPC_NS_INDEX = 2, + PID_NS_INDEX = 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX = 5, + CGROUP_NS_INDEX = 6, + + NR_NAMESPACES, /* number of available namespaces */ }; enum perf_event_type { @@ -859,11 +869,11 @@ enum perf_event_type { * optional fields being ignored. * * struct sample_id { - * { u32 pid, tid; } && PERF_SAMPLE_TID - * { u64 time; } && PERF_SAMPLE_TIME - * { u64 id; } && PERF_SAMPLE_ID - * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID - * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU * { u64 id; } && PERF_SAMPLE_IDENTIFIER * } && perf_event_attr::sample_id_all * @@ -874,7 +884,7 @@ enum perf_event_type { /* * The MMAP events record the PROT_EXEC mappings so that we can - * correlate userspace IPs to code. They have the following structure: + * correlate user-space IPs to code. They have the following structure: * * struct { * struct perf_event_header header; @@ -884,7 +894,7 @@ enum perf_event_type { * u64 len; * u64 pgoff; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP = 1, @@ -894,7 +904,7 @@ enum perf_event_type { * struct perf_event_header header; * u64 id; * u64 lost; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_LOST = 2, @@ -905,7 +915,7 @@ enum perf_event_type { * * u32 pid, tid; * char comm[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_COMM = 3, @@ -916,7 +926,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_EXIT = 4, @@ -927,7 +937,7 @@ enum perf_event_type { * u64 time; * u64 id; * u64 stream_id; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_THROTTLE = 5, @@ -939,7 +949,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_FORK = 7, @@ -950,7 +960,7 @@ enum perf_event_type { * u32 pid, tid; * * struct read_format values; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_READ = 8, @@ -1005,12 +1015,12 @@ enum perf_event_type { * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS * } && PERF_SAMPLE_BRANCH_STACK * - * { u64 abi; # enum perf_sample_regs_abi - * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER * - * { u64 size; - * char data[size]; - * u64 dyn_size; } && PERF_SAMPLE_STACK_USER + * { u64 size; + * char data[size]; + * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * * { union perf_sample_weight * { @@ -1071,7 +1081,7 @@ enum perf_event_type { * }; * u32 prot, flags; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP2 = 10, @@ -1080,12 +1090,12 @@ enum perf_event_type { * Records that new data landed in the AUX buffer part. * * struct { - * struct perf_event_header header; + * struct perf_event_header header; * - * u64 aux_offset; - * u64 aux_size; + * u64 aux_offset; + * u64 aux_size; * u64 flags; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_AUX = 11, @@ -1168,7 +1178,7 @@ enum perf_event_type { PERF_RECORD_KSYMBOL = 17, /* - * Record bpf events: + * Record BPF events: * enum perf_bpf_event_type { * PERF_BPF_EVENT_UNKNOWN = 0, * PERF_BPF_EVENT_PROG_LOAD = 1, @@ -1246,181 +1256,181 @@ enum perf_record_ksymbol_type { #define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) enum perf_bpf_event_type { - PERF_BPF_EVENT_UNKNOWN = 0, - PERF_BPF_EVENT_PROG_LOAD = 1, - PERF_BPF_EVENT_PROG_UNLOAD = 2, - PERF_BPF_EVENT_MAX, /* non-ABI */ + PERF_BPF_EVENT_UNKNOWN = 0, + PERF_BPF_EVENT_PROG_LOAD = 1, + PERF_BPF_EVENT_PROG_UNLOAD = 2, + PERF_BPF_EVENT_MAX, /* non-ABI */ }; -#define PERF_MAX_STACK_DEPTH 127 -#define PERF_MAX_CONTEXTS_PER_STACK 8 +#define PERF_MAX_STACK_DEPTH 127 +#define PERF_MAX_CONTEXTS_PER_STACK 8 enum perf_callchain_context { - PERF_CONTEXT_HV = (__u64)-32, - PERF_CONTEXT_KERNEL = (__u64)-128, - PERF_CONTEXT_USER = (__u64)-512, + PERF_CONTEXT_HV = (__u64)-32, + PERF_CONTEXT_KERNEL = (__u64)-128, + PERF_CONTEXT_USER = (__u64)-512, - PERF_CONTEXT_GUEST = (__u64)-2048, - PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, - PERF_CONTEXT_GUEST_USER = (__u64)-2560, + PERF_CONTEXT_GUEST = (__u64)-2048, + PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, + PERF_CONTEXT_GUEST_USER = (__u64)-2560, - PERF_CONTEXT_MAX = (__u64)-4095, + PERF_CONTEXT_MAX = (__u64)-4095, }; /** * PERF_RECORD_AUX::flags bits */ -#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ -#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ -#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ -#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_TRUNCATED 0x0001 /* Record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x0002 /* Snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x0004 /* Record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x0008 /* Sample collided with another */ #define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ /* CoreSight PMU AUX buffer formats */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ -#define PERF_FLAG_FD_NO_GROUP (1UL << 0) -#define PERF_FLAG_FD_OUTPUT (1UL << 1) -#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ -#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ +#define PERF_FLAG_FD_NO_GROUP (1UL << 0) +#define PERF_FLAG_FD_OUTPUT (1UL << 1) +#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup ID, per-CPU mode only */ +#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ #if defined(__LITTLE_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_op:5, /* type of opcode */ - mem_lvl:14, /* memory hierarchy level */ - mem_snoop:5, /* snoop mode */ - mem_lock:2, /* lock instr */ - mem_dtlb:7, /* tlb access */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_remote:1, /* remote */ - mem_snoopx:2, /* snoop mode, ext */ - mem_blk:3, /* access blocked */ - mem_hops:3, /* hop level */ - mem_rsvd:18; + __u64 mem_op : 5, /* Type of opcode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_snoop : 5, /* Snoop mode */ + mem_lock : 2, /* Lock instr */ + mem_dtlb : 7, /* TLB access */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_remote : 1, /* Remote */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_blk : 3, /* Access blocked */ + mem_hops : 3, /* Hop level */ + mem_rsvd : 18; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:18, - mem_hops:3, /* hop level */ - mem_blk:3, /* access blocked */ - mem_snoopx:2, /* snoop mode, ext */ - mem_remote:1, /* remote */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_dtlb:7, /* tlb access */ - mem_lock:2, /* lock instr */ - mem_snoop:5, /* snoop mode */ - mem_lvl:14, /* memory hierarchy level */ - mem_op:5; /* type of opcode */ + __u64 mem_rsvd : 18, + mem_hops : 3, /* Hop level */ + mem_blk : 3, /* Access blocked */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_remote : 1, /* Remote */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_dtlb : 7, /* TLB access */ + mem_lock : 2, /* Lock instr */ + mem_snoop : 5, /* Snoop mode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_op : 5; /* Type of opcode */ }; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif -/* type of opcode (load/store/prefetch,code) */ -#define PERF_MEM_OP_NA 0x01 /* not available */ -#define PERF_MEM_OP_LOAD 0x02 /* load instruction */ -#define PERF_MEM_OP_STORE 0x04 /* store instruction */ -#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */ -#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ -#define PERF_MEM_OP_SHIFT 0 +/* Type of memory opcode: */ +#define PERF_MEM_OP_NA 0x0001 /* Not available */ +#define PERF_MEM_OP_LOAD 0x0002 /* Load instruction */ +#define PERF_MEM_OP_STORE 0x0004 /* Store instruction */ +#define PERF_MEM_OP_PFETCH 0x0008 /* Prefetch */ +#define PERF_MEM_OP_EXEC 0x0010 /* Code (execution) */ +#define PERF_MEM_OP_SHIFT 0 /* - * PERF_MEM_LVL_* namespace being depricated to some extent in the + * The PERF_MEM_LVL_* namespace is being deprecated to some extent in * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. - * Supporting this namespace inorder to not break defined ABIs. + * We support this namespace in order to not break defined ABIs. * - * memory hierarchy (memory level, hit or miss) + * Memory hierarchy (memory level, hit or miss) */ -#define PERF_MEM_LVL_NA 0x01 /* not available */ -#define PERF_MEM_LVL_HIT 0x02 /* hit level */ -#define PERF_MEM_LVL_MISS 0x04 /* miss level */ -#define PERF_MEM_LVL_L1 0x08 /* L1 */ -#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */ -#define PERF_MEM_LVL_L2 0x20 /* L2 */ -#define PERF_MEM_LVL_L3 0x40 /* L3 */ -#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */ -#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */ -#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */ -#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */ -#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */ -#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ -#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ -#define PERF_MEM_LVL_SHIFT 5 - -#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */ -#define PERF_MEM_REMOTE_SHIFT 37 - -#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */ -#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ -#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ -#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ -/* 0x7 available */ -#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ -#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ -#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ -#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ -#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ -#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ - -#define PERF_MEM_LVLNUM_SHIFT 33 - -/* snoop mode */ -#define PERF_MEM_SNOOP_NA 0x01 /* not available */ -#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ -#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */ -#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */ -#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ -#define PERF_MEM_SNOOP_SHIFT 19 - -#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ -#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */ -#define PERF_MEM_SNOOPX_SHIFT 38 - -/* locked instruction */ -#define PERF_MEM_LOCK_NA 0x01 /* not available */ -#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ -#define PERF_MEM_LOCK_SHIFT 24 +#define PERF_MEM_LVL_NA 0x0001 /* Not available */ +#define PERF_MEM_LVL_HIT 0x0002 /* Hit level */ +#define PERF_MEM_LVL_MISS 0x0004 /* Miss level */ +#define PERF_MEM_LVL_L1 0x0008 /* L1 */ +#define PERF_MEM_LVL_LFB 0x0010 /* Line Fill Buffer */ +#define PERF_MEM_LVL_L2 0x0020 /* L2 */ +#define PERF_MEM_LVL_L3 0x0040 /* L3 */ +#define PERF_MEM_LVL_LOC_RAM 0x0080 /* Local DRAM */ +#define PERF_MEM_LVL_REM_RAM1 0x0100 /* Remote DRAM (1 hop) */ +#define PERF_MEM_LVL_REM_RAM2 0x0200 /* Remote DRAM (2 hops) */ +#define PERF_MEM_LVL_REM_CCE1 0x0400 /* Remote Cache (1 hop) */ +#define PERF_MEM_LVL_REM_CCE2 0x0800 /* Remote Cache (2 hops) */ +#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ +#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ +#define PERF_MEM_LVL_SHIFT 5 + +#define PERF_MEM_REMOTE_REMOTE 0x0001 /* Remote */ +#define PERF_MEM_REMOTE_SHIFT 37 + +#define PERF_MEM_LVLNUM_L1 0x0001 /* L1 */ +#define PERF_MEM_LVLNUM_L2 0x0002 /* L2 */ +#define PERF_MEM_LVLNUM_L3 0x0003 /* L3 */ +#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */ +#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */ +/* 0x007 available */ +#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */ +#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */ +#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */ +#define PERF_MEM_LVLNUM_ANY_CACHE 0x000b /* Any cache */ +#define PERF_MEM_LVLNUM_LFB 0x000c /* LFB / L1 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_RAM 0x000d /* RAM */ +#define PERF_MEM_LVLNUM_PMEM 0x000e /* PMEM */ +#define PERF_MEM_LVLNUM_NA 0x000f /* N/A */ + +#define PERF_MEM_LVLNUM_SHIFT 33 + +/* Snoop mode */ +#define PERF_MEM_SNOOP_NA 0x0001 /* Not available */ +#define PERF_MEM_SNOOP_NONE 0x0002 /* No snoop */ +#define PERF_MEM_SNOOP_HIT 0x0004 /* Snoop hit */ +#define PERF_MEM_SNOOP_MISS 0x0008 /* Snoop miss */ +#define PERF_MEM_SNOOP_HITM 0x0010 /* Snoop hit modified */ +#define PERF_MEM_SNOOP_SHIFT 19 + +#define PERF_MEM_SNOOPX_FWD 0x0001 /* Forward */ +#define PERF_MEM_SNOOPX_PEER 0x0002 /* Transfer from peer */ +#define PERF_MEM_SNOOPX_SHIFT 38 + +/* Locked instruction */ +#define PERF_MEM_LOCK_NA 0x0001 /* Not available */ +#define PERF_MEM_LOCK_LOCKED 0x0002 /* Locked transaction */ +#define PERF_MEM_LOCK_SHIFT 24 /* TLB access */ -#define PERF_MEM_TLB_NA 0x01 /* not available */ -#define PERF_MEM_TLB_HIT 0x02 /* hit level */ -#define PERF_MEM_TLB_MISS 0x04 /* miss level */ -#define PERF_MEM_TLB_L1 0x08 /* L1 */ -#define PERF_MEM_TLB_L2 0x10 /* L2 */ -#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/ -#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ -#define PERF_MEM_TLB_SHIFT 26 +#define PERF_MEM_TLB_NA 0x0001 /* Not available */ +#define PERF_MEM_TLB_HIT 0x0002 /* Hit level */ +#define PERF_MEM_TLB_MISS 0x0004 /* Miss level */ +#define PERF_MEM_TLB_L1 0x0008 /* L1 */ +#define PERF_MEM_TLB_L2 0x0010 /* L2 */ +#define PERF_MEM_TLB_WK 0x0020 /* Hardware Walker*/ +#define PERF_MEM_TLB_OS 0x0040 /* OS fault handler */ +#define PERF_MEM_TLB_SHIFT 26 /* Access blocked */ -#define PERF_MEM_BLK_NA 0x01 /* not available */ -#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */ -#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ -#define PERF_MEM_BLK_SHIFT 40 - -/* hop level */ -#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ -#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ -#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ -#define PERF_MEM_HOPS_3 0x04 /* remote board */ +#define PERF_MEM_BLK_NA 0x0001 /* Not available */ +#define PERF_MEM_BLK_DATA 0x0002 /* Data could not be forwarded */ +#define PERF_MEM_BLK_ADDR 0x0004 /* Address conflict */ +#define PERF_MEM_BLK_SHIFT 40 + +/* Hop level */ +#define PERF_MEM_HOPS_0 0x0001 /* Remote core, same node */ +#define PERF_MEM_HOPS_1 0x0002 /* Remote node, same socket */ +#define PERF_MEM_HOPS_2 0x0003 /* Remote socket, same board */ +#define PERF_MEM_HOPS_3 0x0004 /* Remote board */ /* 5-7 available */ -#define PERF_MEM_HOPS_SHIFT 43 +#define PERF_MEM_HOPS_SHIFT 43 #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) /* - * single taken branch record layout: + * Layout of single taken branch records: * * from: source instruction (may not always be a branch insn) * to: branch target @@ -1439,37 +1449,37 @@ union perf_mem_data_src { struct perf_branch_entry { __u64 from; __u64 to; - __u64 mispred:1, /* target mispredicted */ - predicted:1,/* target predicted */ - in_tx:1, /* in transaction */ - abort:1, /* transaction abort */ - cycles:16, /* cycle count to last branch */ - type:4, /* branch type */ - spec:2, /* branch speculation info */ - new_type:4, /* additional branch type */ - priv:3, /* privilege level */ - reserved:31; + __u64 mispred : 1, /* target mispredicted */ + predicted : 1, /* target predicted */ + in_tx : 1, /* in transaction */ + abort : 1, /* transaction abort */ + cycles : 16, /* cycle count to last branch */ + type : 4, /* branch type */ + spec : 2, /* branch speculation info */ + new_type : 4, /* additional branch type */ + priv : 3, /* privilege level */ + reserved : 31; }; /* Size of used info bits in struct perf_branch_entry */ #define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 union perf_sample_weight { - __u64 full; + __u64 full; #if defined(__LITTLE_ENDIAN_BITFIELD) struct { - __u32 var1_dw; - __u16 var2_w; - __u16 var3_w; + __u32 var1_dw; + __u16 var2_w; + __u16 var3_w; }; #elif defined(__BIG_ENDIAN_BITFIELD) struct { - __u16 var3_w; - __u16 var2_w; - __u32 var1_dw; + __u16 var3_w; + __u16 var2_w; + __u32 var1_dw; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif }; -- cgit v1.2.3 From 77cbe1a6d8730a07f99f9263c2d5f2304cf5e830 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 19 May 2025 13:57:59 -0700 Subject: af_unix: Introduce SO_PASSRIGHTS. As long as recvmsg() or recvmmsg() is used with cmsg, it is not possible to avoid receiving file descriptors via SCM_RIGHTS. This behaviour has occasionally been flagged as problematic, as it can be (ab)used to trigger DoS during close(), for example, by passing a FUSE-controlled fd or a hung NFS fd. For instance, as noted on the uAPI Group page [0], an untrusted peer could send a file descriptor pointing to a hung NFS mount and then close it. Once the receiver calls recvmsg() with msg_control, the descriptor is automatically installed, and then the responsibility for the final close() now falls on the receiver, which may result in blocking the process for a long time. Regarding this, systemd calls cmsg_close_all() [1] after each recvmsg() to close() unwanted file descriptors sent via SCM_RIGHTS. However, this cannot work around the issue at all, because the final fput() may still occur on the receiver's side once sendmsg() with SCM_RIGHTS succeeds. Also, even filtering by LSM at recvmsg() does not work for the same reason. Thus, we need a better way to refuse SCM_RIGHTS at sendmsg(). Let's introduce SO_PASSRIGHTS to disable SCM_RIGHTS. Note that this option is enabled by default for backward compatibility. Link: https://uapi-group.org/kernel-features/#disabling-reception-of-scm_rights-for-af_unix-sockets #[0] Link: https://github.com/systemd/systemd/blob/v257.5/src/basic/fd-util.c#L612-L628 #[1] Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 2 ++ arch/mips/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 2 ++ include/net/sock.h | 4 +++- include/uapi/asm-generic/socket.h | 2 ++ net/core/sock.c | 14 ++++++++++++++ net/unix/af_unix.c | 22 ++++++++++++++++++++-- tools/include/uapi/asm-generic/socket.h | 2 ++ 9 files changed, 49 insertions(+), 3 deletions(-) (limited to 'tools/include') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 3df5f2dd4c0f..8f1f18adcdb5 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -150,6 +150,8 @@ #define SO_RCVPRIORITY 82 +#define SO_PASSRIGHTS 83 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index 22fa8f19924a..31ac655b7837 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -161,6 +161,8 @@ #define SO_RCVPRIORITY 82 +#define SO_PASSRIGHTS 83 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 96831c988606..1f2d5b7a7f5d 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -142,6 +142,8 @@ #define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF #define SO_DEVMEM_DONTNEED 0x4050 +#define SO_PASSRIGHTS 0x4051 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 5b464a568664..adcba7329386 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -143,6 +143,8 @@ #define SO_RCVPRIORITY 0x005b +#define SO_PASSRIGHTS 0x005c + #if !defined(__KERNEL__) diff --git a/include/net/sock.h b/include/net/sock.h index d90a71f66ab8..92e7c1aae3cc 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -341,6 +341,7 @@ struct sk_filter; * @sk_scm_credentials: flagged by SO_PASSCRED to recv SCM_CREDENTIALS * @sk_scm_security: flagged by SO_PASSSEC to recv SCM_SECURITY * @sk_scm_pidfd: flagged by SO_PASSPIDFD to recv SCM_PIDFD + * @sk_scm_rights: flagged by SO_PASSRIGHTS to recv SCM_RIGHTS * @sk_scm_unused: unused flags for scm_recv() * @ns_tracker: tracker for netns reference * @sk_user_frags: xarray of pages the user is holding a reference on. @@ -535,7 +536,8 @@ struct sock { u8 sk_scm_credentials : 1, sk_scm_security : 1, sk_scm_pidfd : 1, - sk_scm_unused : 5; + sk_scm_rights : 1, + sk_scm_unused : 4; }; }; u8 sk_clockid; diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index aa5016ff3d91..f333a0ac4ee4 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -145,6 +145,8 @@ #define SO_RCVPRIORITY 82 +#define SO_PASSRIGHTS 83 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/net/core/sock.c b/net/core/sock.c index 381abf8f25b7..0cb52e590094 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1571,6 +1571,13 @@ set_sndbuf: ret = -EOPNOTSUPP; break; + case SO_PASSRIGHTS: + if (sk_is_unix(sk)) + sk->sk_scm_rights = valbool; + else + ret = -EOPNOTSUPP; + break; + case SO_INCOMING_CPU: reuseport_update_incoming_cpu(sk, val); break; @@ -1879,6 +1886,13 @@ int sk_getsockopt(struct sock *sk, int level, int optname, v.val = sk->sk_scm_pidfd; break; + case SO_PASSRIGHTS: + if (!sk_is_unix(sk)) + return -EOPNOTSUPP; + + v.val = sk->sk_scm_rights; + break; + case SO_PEERCRED: { struct ucred peercred; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 900bad88fbd2..bd507f74e35e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1015,6 +1015,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, sock_init_data(sock, sk); + sk->sk_scm_rights = 1; sk->sk_hash = unix_unbound_hash(sk); sk->sk_allocation = GFP_KERNEL_ACCOUNT; sk->sk_write_space = unix_write_space; @@ -2073,6 +2074,11 @@ restart_locked: goto out_unlock; } + if (UNIXCB(skb).fp && !other->sk_scm_rights) { + err = -EPERM; + goto out_unlock; + } + if (sk->sk_type != SOCK_SEQPACKET) { err = security_unix_may_send(sk->sk_socket, other->sk_socket); if (err) @@ -2174,9 +2180,13 @@ static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other, if (sock_flag(other, SOCK_DEAD) || (other->sk_shutdown & RCV_SHUTDOWN)) { - unix_state_unlock(other); err = -EPIPE; - goto out; + goto out_unlock; + } + + if (UNIXCB(skb).fp && !other->sk_scm_rights) { + err = -EPERM; + goto out_unlock; } unix_maybe_add_creds(skb, sk, other); @@ -2192,6 +2202,8 @@ static int queue_oob(struct sock *sk, struct msghdr *msg, struct sock *other, other->sk_data_ready(other); return 0; +out_unlock: + unix_state_unlock(other); out: consume_skb(skb); return err; @@ -2295,6 +2307,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, (other->sk_shutdown & RCV_SHUTDOWN)) goto out_pipe_unlock; + if (UNIXCB(skb).fp && !other->sk_scm_rights) { + unix_state_unlock(other); + err = -EPERM; + goto out_free; + } + unix_maybe_add_creds(skb, sk, other); scm_stat_add(other, skb); skb_queue_tail(&other->sk_receive_queue, skb); diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index aa5016ff3d91..f333a0ac4ee4 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -145,6 +145,8 @@ #define SO_RCVPRIORITY 82 +#define SO_PASSRIGHTS 83 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) -- cgit v1.2.3 From acea6b132d813a12ac414f6b1efb05921623afc0 Mon Sep 17 00:00:00 2001 From: Saket Kumar Bhaskar Date: Tue, 27 May 2025 11:11:38 +0530 Subject: selftests/bpf: Fix bpf selftest build warning On linux-next, build for bpf selftest displays a warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'. Commit 8066e388be48 ("net: add UAPI to the header guard in various network headers") changed the header guard from _LINUX_IF_XDP_H to _UAPI_LINUX_IF_XDP_H in include/uapi/linux/if_xdp.h. To resolve the warning, update tools/include/uapi/linux/if_xdp.h to align with the changes in include/uapi/linux/if_xdp.h Fixes: 8066e388be48 ("net: add UAPI to the header guard in various network headers") Reported-by: Venkat Rao Bagalkote Closes: https://lore.kernel.org/all/c2bc466d-dff2-4d0d-a797-9af7f676c065@linux.ibm.com/ Tested-by: Venkat Rao Bagalkote Signed-off-by: Saket Kumar Bhaskar Acked-by: Daniel Borkmann Link: https://patch.msgid.link/20250527054138.1086006-1-skb99@linux.ibm.com Signed-off-by: Paolo Abeni --- tools/include/uapi/linux/if_xdp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index 42869770776e..44f2bb93e7e6 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -7,8 +7,8 @@ * Magnus Karlsson */ -#ifndef _LINUX_IF_XDP_H -#define _LINUX_IF_XDP_H +#ifndef _UAPI_LINUX_IF_XDP_H +#define _UAPI_LINUX_IF_XDP_H #include @@ -180,4 +180,4 @@ struct xdp_desc { /* TX packet carries valid metadata. */ #define XDP_TX_METADATA (1 << 1) -#endif /* _LINUX_IF_XDP_H */ +#endif /* _UAPI_LINUX_IF_XDP_H */ -- cgit v1.2.3 From ead7f9b8de65632ef8060b84b0c55049a33cfea1 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Thu, 29 May 2025 12:28:35 +0200 Subject: bpf: Fix L4 csum update on IPv6 in CHECKSUM_COMPLETE In Cilium, we use bpf_csum_diff + bpf_l4_csum_replace to, among other things, update the L4 checksum after reverse SNATing IPv6 packets. That use case is however not currently supported and leads to invalid skb->csum values in some cases. This patch adds support for IPv6 address changes in bpf_l4_csum_update via a new flag. When calling bpf_l4_csum_replace in Cilium, it ends up calling inet_proto_csum_replace_by_diff: 1: void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, 2: __wsum diff, bool pseudohdr) 3: { 4: if (skb->ip_summed != CHECKSUM_PARTIAL) { 5: csum_replace_by_diff(sum, diff); 6: if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) 7: skb->csum = ~csum_sub(diff, skb->csum); 8: } else if (pseudohdr) { 9: *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum))); 10: } 11: } The bug happens when we're in the CHECKSUM_COMPLETE state. We've just updated one of the IPv6 addresses. The helper now updates the L4 header checksum on line 5. Next, it updates skb->csum on line 7. It shouldn't. For an IPv6 packet, the updates of the IPv6 address and of the L4 checksum will cancel each other. The checksums are set such that computing a checksum over the packet including its checksum will result in a sum of 0. So the same is true here when we update the L4 checksum on line 5. We'll update it as to cancel the previous IPv6 address update. Hence skb->csum should remain untouched in this case. The same bug doesn't affect IPv4 packets because, in that case, three fields are updated: the IPv4 address, the IP checksum, and the L4 checksum. The change to the IPv4 address and one of the checksums still cancel each other in skb->csum, but we're left with one checksum update and should therefore update skb->csum accordingly. That's exactly what inet_proto_csum_replace_by_diff does. This special case for IPv6 L4 checksums is also described atop inet_proto_csum_replace16, the function we should be using in this case. This patch introduces a new bpf_l4_csum_replace flag, BPF_F_IPV6, to indicate that we're updating the L4 checksum of an IPv6 packet. When the flag is set, inet_proto_csum_replace_by_diff will skip the skb->csum update. Fixes: 7d672345ed295 ("bpf: add generic bpf_csum_diff helper") Signed-off-by: Paul Chaignon Acked-by: Daniel Borkmann Link: https://patch.msgid.link/96a6bc3a443e6f0b21ff7b7834000e17fb549e05.1748509484.git.paul.chaignon@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/bpf.h | 2 ++ net/core/filter.c | 5 +++-- tools/include/uapi/linux/bpf.h | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 85180e4aaa5a..0b4a2f124d11 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2056,6 +2056,7 @@ union bpf_attr { * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates * that the modified header field is part of the pseudo-header. + * Flag **BPF_F_IPV6** should be set for IPv6 packets. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more @@ -6072,6 +6073,7 @@ enum { BPF_F_PSEUDO_HDR = (1ULL << 4), BPF_F_MARK_MANGLED_0 = (1ULL << 5), BPF_F_MARK_ENFORCE = (1ULL << 6), + BPF_F_IPV6 = (1ULL << 7), }; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ diff --git a/net/core/filter.c b/net/core/filter.c index f1de7bd8b547..327ca73f9cd7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1968,10 +1968,11 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset, bool is_pseudo = flags & BPF_F_PSEUDO_HDR; bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; bool do_mforce = flags & BPF_F_MARK_ENFORCE; + bool is_ipv6 = flags & BPF_F_IPV6; __sum16 *ptr; if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE | - BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) + BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK | BPF_F_IPV6))) return -EINVAL; if (unlikely(offset > 0xffff || offset & 1)) return -EFAULT; @@ -1987,7 +1988,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset, if (unlikely(from != 0)) return -EINVAL; - inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, false); + inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, is_ipv6); break; case 2: inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 85180e4aaa5a..0b4a2f124d11 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2056,6 +2056,7 @@ union bpf_attr { * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates * that the modified header field is part of the pseudo-header. + * Flag **BPF_F_IPV6** should be set for IPv6 packets. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more @@ -6072,6 +6073,7 @@ enum { BPF_F_PSEUDO_HDR = (1ULL << 4), BPF_F_MARK_MANGLED_0 = (1ULL << 5), BPF_F_MARK_ENFORCE = (1ULL << 6), + BPF_F_IPV6 = (1ULL << 7), }; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ -- cgit v1.2.3