From 1201f6fb5bfdbd10985ac3c8f49ef8f4f88b5c94 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 31 Jul 2025 10:00:31 +0200 Subject: tools/nolibc: fix error return value of clock_nanosleep() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clock_nanosleep() returns a positive error value. Unlike other libc functions it *does not* return -1 nor set errno. Fix the return value and also adapt nanosleep(). Fixes: 7c02bc4088af ("tools/nolibc: add support for clock_nanosleep() and nanosleep()") Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250731-nolibc-clock_nanosleep-ret-v1-1-9e4af7855e61@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/time.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index d02bc44d2643..e9c1b976791a 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -133,7 +133,8 @@ static __attribute__((unused)) int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, struct timespec *rmtp) { - return __sysret(sys_clock_nanosleep(clockid, flags, rqtp, rmtp)); + /* Directly return a positive error number */ + return -sys_clock_nanosleep(clockid, flags, rqtp, rmtp); } static __inline__ @@ -145,7 +146,7 @@ double difftime(time_t time1, time_t time2) static __inline__ int nanosleep(const struct timespec *rqtp, struct timespec *rmtp) { - return clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp); + return __sysret(sys_clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp)); } -- cgit v1.2.3 From 894af4a1cde61c3401f237184fb770f72ff12df8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 12 Apr 2025 13:56:01 +0200 Subject: objtool: Validate kCFI calls Validate that all indirect calls adhere to kCFI rules. Notably doing nocfi indirect call to a cfi function is broken. Apparently some Rust 'core' code violates this and explodes when ran with FineIBT. All the ANNOTATE_NOCFI_SYM sites are prime targets for attackers. - runtime EFI is especially henous because it also needs to disable IBT. Basically calling unknown code without CFI protection at runtime is a massice security issue. - Kexec image handover; if you can exploit this, you get to keep it :-) Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Acked-by: Sean Christopherson Link: https://lkml.kernel.org/r/20250714103441.496787279@infradead.org --- arch/x86/kernel/machine_kexec_64.c | 4 ++++ arch/x86/kvm/vmx/vmenter.S | 4 ++++ arch/x86/platform/efi/efi_stub_64.S | 4 ++++ drivers/misc/lkdtm/perms.c | 5 +++++ include/linux/objtool.h | 10 +++++++++ include/linux/objtool_types.h | 1 + tools/include/linux/objtool_types.h | 1 + tools/objtool/check.c | 42 +++++++++++++++++++++++++++++++++++++ tools/objtool/include/objtool/elf.h | 1 + 9 files changed, 72 insertions(+) (limited to 'tools/include') diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 697fb99406e6..8593760c255a 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -453,6 +453,10 @@ void __nocfi machine_kexec(struct kimage *image) __ftrace_enabled_restore(save_ftrace_enabled); } +/* + * Handover to the next kernel, no CFI concern. + */ +ANNOTATE_NOCFI_SYM(machine_kexec); /* arch-dependent functionality related to kexec file-based syscall */ diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 0a6cf5bff2aa..bc255d709d8a 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -361,6 +361,10 @@ SYM_FUNC_END(vmread_error_trampoline) .section .text, "ax" +#ifndef CONFIG_X86_FRED + SYM_FUNC_START(vmx_do_interrupt_irqoff) VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1 SYM_FUNC_END(vmx_do_interrupt_irqoff) + +#endif diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 2206b8bc47b8..f0a5fba0717e 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -11,6 +11,10 @@ #include SYM_FUNC_START(__efi_call) + /* + * The EFI code doesn't have any CFI, annotate away the CFI violation. + */ + ANNOTATE_NOCFI_SYM pushq %rbp movq %rsp, %rbp and $~0xf, %rsp diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c index 6c24426104ba..e1f5e9abb301 100644 --- a/drivers/misc/lkdtm/perms.c +++ b/drivers/misc/lkdtm/perms.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -86,6 +87,10 @@ static noinline __nocfi void execute_location(void *dst, bool write) func(); pr_err("FAIL: func returned\n"); } +/* + * Explicitly doing the wrong thing for testing. + */ +ANNOTATE_NOCFI_SYM(execute_location); static void execute_user_location(void *dst) { diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 366ad004d794..46ebaa46e6c5 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -184,6 +184,15 @@ * WARN using UD2. */ #define ANNOTATE_REACHABLE(label) __ASM_ANNOTATE(label, ANNOTYPE_REACHABLE) +/* + * This should not be used; it annotates away CFI violations. There are a few + * valid use cases like kexec handover to the next kernel image, and there is + * no security concern there. + * + * There are also a few real issues annotated away, like EFI because we can't + * control the EFI code. + */ +#define ANNOTATE_NOCFI_SYM(sym) asm(__ASM_ANNOTATE(sym, ANNOTYPE_NOCFI)) #else #define ANNOTATE_NOENDBR ANNOTATE type=ANNOTYPE_NOENDBR @@ -194,6 +203,7 @@ #define ANNOTATE_INTRA_FUNCTION_CALL ANNOTATE type=ANNOTYPE_INTRA_FUNCTION_CALL #define ANNOTATE_UNRET_BEGIN ANNOTATE type=ANNOTYPE_UNRET_BEGIN #define ANNOTATE_REACHABLE ANNOTATE type=ANNOTYPE_REACHABLE +#define ANNOTATE_NOCFI_SYM ANNOTATE type=ANNOTYPE_NOCFI #endif #if defined(CONFIG_NOINSTR_VALIDATION) && \ diff --git a/include/linux/objtool_types.h b/include/linux/objtool_types.h index df5d9fa84dba..aceac94632c8 100644 --- a/include/linux/objtool_types.h +++ b/include/linux/objtool_types.h @@ -65,5 +65,6 @@ struct unwind_hint { #define ANNOTYPE_IGNORE_ALTS 6 #define ANNOTYPE_INTRA_FUNCTION_CALL 7 #define ANNOTYPE_REACHABLE 8 +#define ANNOTYPE_NOCFI 9 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index df5d9fa84dba..aceac94632c8 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -65,5 +65,6 @@ struct unwind_hint { #define ANNOTYPE_IGNORE_ALTS 6 #define ANNOTYPE_INTRA_FUNCTION_CALL 7 #define ANNOTYPE_REACHABLE 8 +#define ANNOTYPE_NOCFI 9 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index d14f20ef1db1..79eab61cd944 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2392,6 +2392,8 @@ static int __annotate_ifc(struct objtool_file *file, int type, struct instructio static int __annotate_late(struct objtool_file *file, int type, struct instruction *insn) { + struct symbol *sym; + switch (type) { case ANNOTYPE_NOENDBR: /* early */ @@ -2433,6 +2435,15 @@ static int __annotate_late(struct objtool_file *file, int type, struct instructi insn->dead_end = false; break; + case ANNOTYPE_NOCFI: + sym = insn->sym; + if (!sym) { + ERROR_INSN(insn, "dodgy NOCFI annotation"); + return -1; + } + insn->sym->nocfi = 1; + break; + default: ERROR_INSN(insn, "Unknown annotation type: %d", type); return -1; @@ -4002,6 +4013,37 @@ static int validate_retpoline(struct objtool_file *file) warnings++; } + if (!opts.cfi) + return warnings; + + /* + * kCFI call sites look like: + * + * movl $(-0x12345678), %r10d + * addl -4(%r11), %r10d + * jz 1f + * ud2 + * 1: cs call __x86_indirect_thunk_r11 + * + * Verify all indirect calls are kCFI adorned by checking for the + * UD2. Notably, doing __nocfi calls to regular (cfi) functions is + * broken. + */ + list_for_each_entry(insn, &file->retpoline_call_list, call_node) { + struct symbol *sym = insn->sym; + + if (sym && (sym->type == STT_NOTYPE || + sym->type == STT_FUNC) && !sym->nocfi) { + struct instruction *prev = + prev_insn_same_sym(file, insn); + + if (!prev || prev->type != INSN_BUG) { + WARN_INSN(insn, "no-cfi indirect call!"); + warnings++; + } + } + } + return warnings; } diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 0a2fa3ac0079..df8434d3b744 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -70,6 +70,7 @@ struct symbol { u8 local_label : 1; u8 frame_pointer : 1; u8 ignore : 1; + u8 nocfi : 1; struct list_head pv_target; struct reloc *relocs; struct section *group_sec; -- cgit v1.2.3 From bd842ff41543af424c2473dc16c678ac8ba2b43f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: tools headers: Sync KVM headers with the kernel source To pick up the changes in this cset: f55ce5a6cd33211c KVM: arm64: Expose new KVM cap for cacheable PFNMAP 28224ef02b56fcee KVM: TDX: Report supported optional TDVMCALLs in TDX capabilities 4580dbef5ce0f95a KVM: TDX: Exit to userspace for SetupEventNotifyInterrupt 25e8b1dd4883e6c2 KVM: TDX: Exit to userspace for GetTdVmCallInfo cf207eac06f661fb KVM: TDX: Handle TDG.VP.VMCALL This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Please see tools/include/uapi/README for further details. Cc: Paolo Bonzini Cc: kvm@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/arch/x86/include/uapi/asm/kvm.h | 8 +++++++- tools/include/uapi/linux/kvm.h | 27 +++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 6f3499507c5e..0f15d683817d 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -965,7 +965,13 @@ struct kvm_tdx_cmd { struct kvm_tdx_capabilities { __u64 supported_attrs; __u64 supported_xfam; - __u64 reserved[254]; + + __u64 kernel_tdvmcallinfo_1_r11; + __u64 user_tdvmcallinfo_1_r11; + __u64 kernel_tdvmcallinfo_1_r12; + __u64 user_tdvmcallinfo_1_r12; + + __u64 reserved[250]; /* Configurable CPUID bits for userspace */ struct kvm_cpuid2 cpuid; diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 7415a3863891..f0f0d49d2544 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -178,6 +178,7 @@ struct kvm_xen_exit { #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 +#define KVM_EXIT_TDX 40 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -447,6 +448,31 @@ struct kvm_run { __u64 gpa; __u64 size; } memory_fault; + /* KVM_EXIT_TDX */ + struct { + __u64 flags; + __u64 nr; + union { + struct { + __u64 ret; + __u64 data[5]; + } unknown; + struct { + __u64 ret; + __u64 gpa; + __u64 size; + } get_quote; + struct { + __u64 ret; + __u64 leaf; + __u64 r11, r12, r13, r14; + } get_tdvmcall_info; + struct { + __u64 ret; + __u64 vector; + } setup_event_notify; + }; + } tdx; /* Fix the size of the union. */ char padding[256]; }; @@ -935,6 +961,7 @@ struct kvm_enable_cap { #define KVM_CAP_ARM_EL2 240 #define KVM_CAP_ARM_EL2_E2H0 241 #define KVM_CAP_RISCV_MP_STATE_RESET 242 +#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 struct kvm_irq_routing_irqchip { __u32 irqchip; -- cgit v1.2.3 From 6cb8607934d937f4ad24ec9ad26aeb669e266937 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: tools headers: Sync linux/bits.h with the kernel source To pick up the changes in this cset: 104ea1c84b91c9f4 bits: unify the non-asm GENMASK*() 6d4471252ccc1722 bits: split the definition of the asm and non-asm GENMASK*() This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/linux/bits.h include/linux/bits.h Please see tools/include/uapi/README for further details. Cc: Yury Norov Signed-off-by: Namhyung Kim --- tools/include/linux/bits.h | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) (limited to 'tools/include') diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 7ad056219115..a40cc861b3a7 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -2,10 +2,8 @@ #ifndef __LINUX_BITS_H #define __LINUX_BITS_H -#include #include #include -#include #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) @@ -50,10 +48,14 @@ (type_max(t) << (l) & \ type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h))))) +#define GENMASK(h, l) GENMASK_TYPE(unsigned long, h, l) +#define GENMASK_ULL(h, l) GENMASK_TYPE(unsigned long long, h, l) + #define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l) #define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l) #define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l) #define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l) +#define GENMASK_U128(h, l) GENMASK_TYPE(u128, h, l) /* * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The @@ -79,28 +81,9 @@ * BUILD_BUG_ON_ZERO is not available in h files included from asm files, * disable the input check if that is the case. */ -#define GENMASK_INPUT_CHECK(h, l) 0 +#define GENMASK(h, l) __GENMASK(h, l) +#define GENMASK_ULL(h, l) __GENMASK_ULL(h, l) #endif /* !defined(__ASSEMBLY__) */ -#define GENMASK(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) -#define GENMASK_ULL(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) - -#if !defined(__ASSEMBLY__) -/* - * Missing asm support - * - * __GENMASK_U128() depends on _BIT128() which would not work - * in the asm code, as it shifts an 'unsigned __int128' data - * type instead of direct representation of 128 bit constants - * such as long and unsigned long. The fundamental problem is - * that a 128 bit constant will get silently truncated by the - * gcc compiler. - */ -#define GENMASK_U128(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l)) -#endif - #endif /* __LINUX_BITS_H */ -- cgit v1.2.3 From aa34642f6fc36a436de5ae5b30d414578b3622f5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: tools headers: Sync linux/cfi_types.h with the kernel source To pick up the changes in this cset: 5ccaeedb489b41ce cfi: add C CFI type macro This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/linux/cfi_types.h include/linux/cfi_types.h Please see tools/include/uapi/README for further details. Cc: Mark Rutland Signed-off-by: Namhyung Kim --- tools/include/linux/cfi_types.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'tools/include') diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h index 6b8713675765..685f7181780f 100644 --- a/tools/include/linux/cfi_types.h +++ b/tools/include/linux/cfi_types.h @@ -41,5 +41,28 @@ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_CFI_CLANG +#define DEFINE_CFI_TYPE(name, func) \ + /* \ + * Force a reference to the function so the compiler generates \ + * __kcfi_typeid_. \ + */ \ + __ADDRESSABLE(func); \ + /* u32 name __ro_after_init = __kcfi_typeid_ */ \ + extern u32 name; \ + asm ( \ + " .pushsection .data..ro_after_init,\"aw\",\%progbits \n" \ + " .type " #name ",\%object \n" \ + " .globl " #name " \n" \ + " .p2align 2, 0x0 \n" \ + #name ": \n" \ + " .4byte __kcfi_typeid_" #func " \n" \ + " .size " #name ", 4 \n" \ + " .popsection \n" \ + ); +#endif + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_CFI_TYPES_H */ -- cgit v1.2.3 From 52174e0eb13876654f56701c26a672890aa5e7e3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 18 Aug 2025 10:32:18 -0700 Subject: tools headers: Sync syscall tables with the kernel source To pick up the changes in this cset: be7efb2d20d67f33 fs: introduce file_getattr and file_setattr syscalls This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h diff -u tools/scripts/syscall.tbl scripts/syscall.tbl diff -u tools/perf/arch/x86/entry/syscalls/syscall_32.tbl arch/x86/entry/syscalls/syscall_32.tbl diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl diff -u tools/perf/arch/arm/entry/syscalls/syscall.tbl arch/arm/tools/syscall.tbl diff -u tools/perf/arch/sh/entry/syscalls/syscall.tbl arch/sh/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/sparc/entry/syscalls/syscall.tbl arch/sparc/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/xtensa/entry/syscalls/syscall.tbl arch/xtensa/kernel/syscalls/syscall.tbl Please see tools/include/uapi/README for further details. Cc: Arnd Bergmann CC: linux-api@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/asm-generic/unistd.h | 8 +++++++- tools/perf/arch/arm/entry/syscalls/syscall.tbl | 2 ++ tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl | 2 ++ tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 2 ++ tools/perf/arch/s390/entry/syscalls/syscall.tbl | 2 ++ tools/perf/arch/sh/entry/syscalls/syscall.tbl | 2 ++ tools/perf/arch/sparc/entry/syscalls/syscall.tbl | 2 ++ tools/perf/arch/x86/entry/syscalls/syscall_32.tbl | 2 ++ tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 2 ++ tools/perf/arch/xtensa/entry/syscalls/syscall.tbl | 2 ++ tools/scripts/syscall.tbl | 2 ++ 11 files changed, 27 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 2892a45023af..04e0077fb4c9 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -852,8 +852,14 @@ __SYSCALL(__NR_removexattrat, sys_removexattrat) #define __NR_open_tree_attr 467 __SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) +/* fs/inode.c */ +#define __NR_file_getattr 468 +__SYSCALL(__NR_file_getattr, sys_file_getattr) +#define __NR_file_setattr 469 +__SYSCALL(__NR_file_setattr, sys_file_setattr) + #undef __NR_syscalls -#define __NR_syscalls 468 +#define __NR_syscalls 470 /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/arm/entry/syscalls/syscall.tbl b/tools/perf/arch/arm/entry/syscalls/syscall.tbl index 27c1d5ebcd91..b07e699aaa3c 100644 --- a/tools/perf/arch/arm/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/arm/entry/syscalls/syscall.tbl @@ -482,3 +482,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 1e8c44c7b614..7a7049c2c307 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -382,3 +382,5 @@ 465 n64 listxattrat sys_listxattrat 466 n64 removexattrat sys_removexattrat 467 n64 open_tree_attr sys_open_tree_attr +468 n64 file_getattr sys_file_getattr +469 n64 file_setattr sys_file_setattr diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 9a084bdb8926..b453e80dfc00 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -558,3 +558,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index a4569b96ef06..8a6744d658db 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -470,3 +470,5 @@ 465 common listxattrat sys_listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr sys_file_setattr diff --git a/tools/perf/arch/sh/entry/syscalls/syscall.tbl b/tools/perf/arch/sh/entry/syscalls/syscall.tbl index 52a7652fcff6..5e9c9eff5539 100644 --- a/tools/perf/arch/sh/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sh/entry/syscalls/syscall.tbl @@ -471,3 +471,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl index 83e45eb6c095..ebb7d06d1044 100644 --- a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl @@ -513,3 +513,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl index ac007ea00979..4877e16da69a 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl @@ -473,3 +473,5 @@ 465 i386 listxattrat sys_listxattrat 466 i386 removexattrat sys_removexattrat 467 i386 open_tree_attr sys_open_tree_attr +468 i386 file_getattr sys_file_getattr +469 i386 file_setattr sys_file_setattr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index cfb5ca41e30d..92cf0fe2291e 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -391,6 +391,8 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl index f657a77314f8..374e4cb788d8 100644 --- a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl @@ -438,3 +438,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr diff --git a/tools/scripts/syscall.tbl b/tools/scripts/syscall.tbl index 580b4e246aec..d1ae5e92c615 100644 --- a/tools/scripts/syscall.tbl +++ b/tools/scripts/syscall.tbl @@ -408,3 +408,5 @@ 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat 467 common open_tree_attr sys_open_tree_attr +468 common file_getattr sys_file_getattr +469 common file_setattr sys_file_setattr -- cgit v1.2.3 From 9a6a6a3191574a01dcf7a7d9385246d7bc8736bc Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 11 Aug 2025 06:26:54 +0100 Subject: tools/testing: add linux/args.h header and fix radix, VMA tests Commit 857d18f23ab1 ("cleanup: Introduce ACQUIRE() and ACQUIRE_ERR() for conditional locks") accidentally broke the radix tree, VMA userland tests by including linux/args.h which is not present in the tools/include directory. This patch copies this over and adds an #ifdef block to avoid duplicate __CONCAT declaration in conflict with system headers when we ultimately include this. Link: https://lkml.kernel.org/r/20250811052654.33286-1-lorenzo.stoakes@oracle.com Fixes: 857d18f23ab1 ("cleanup: Introduce ACQUIRE() and ACQUIRE_ERR() for conditional locks") Signed-off-by: Lorenzo Stoakes Cc: Jann Horn Cc: John Hubbard Cc: Liam Howlett Cc: Sidhartha Kumar Cc: Vlastimil Babka Cc: Dan Williams Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- tools/include/linux/args.h | 28 ++++++++++++++++++++++++++++ tools/testing/shared/linux/idr.h | 4 ++++ 2 files changed, 32 insertions(+) create mode 100644 tools/include/linux/args.h (limited to 'tools/include') diff --git a/tools/include/linux/args.h b/tools/include/linux/args.h new file mode 100644 index 000000000000..2e8e65d975c7 --- /dev/null +++ b/tools/include/linux/args.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_ARGS_H +#define _LINUX_ARGS_H + +/* + * How do these macros work? + * + * In __COUNT_ARGS() _0 to _12 are just placeholders from the start + * in order to make sure _n is positioned over the correct number + * from 12 to 0 (depending on X, which is a variadic argument list). + * They serve no purpose other than occupying a position. Since each + * macro parameter must have a distinct identifier, those identifiers + * are as good as any. + * + * In COUNT_ARGS() we use actual integers, so __COUNT_ARGS() returns + * that as _n. + */ + +/* This counts to 15. Any more, it will return 16th argument. */ +#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _n, X...) _n +#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +/* Concatenate two parameters, but allow them to be expanded beforehand. */ +#define __CONCAT(a, b) a ## b +#define CONCATENATE(a, b) __CONCAT(a, b) + +#endif /* _LINUX_ARGS_H */ diff --git a/tools/testing/shared/linux/idr.h b/tools/testing/shared/linux/idr.h index 4e342f2e37cf..676c5564e33f 100644 --- a/tools/testing/shared/linux/idr.h +++ b/tools/testing/shared/linux/idr.h @@ -1 +1,5 @@ +/* Avoid duplicate definitions due to system headers. */ +#ifdef __CONCAT +#undef __CONCAT +#endif #include "../../../../include/linux/idr.h" -- cgit v1.2.3 From 9bf9b185e3ce76c9fddb4c6edb0ec3334b7649df Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 22 Aug 2025 21:25:01 +0000 Subject: tools headers: Add stub definition for __iomem Add an empty definition for __iomem so that kernel headers that use __iomem can be imported into tools/include/ with less modifications. Acked-by: Shuah Khan Signed-off-by: David Matlack Link: https://lore.kernel.org/r/20250822212518.4156428-15-dmatlack@google.com Signed-off-by: Alex Williamson --- tools/include/linux/compiler.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools/include') diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 33411ca0cc90..f40bd2b04c29 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -138,6 +138,10 @@ # define __force #endif +#ifndef __iomem +# define __iomem +#endif + #ifndef __weak # define __weak __attribute__((weak)) #endif -- cgit v1.2.3 From 1f9c8edd6a7e9b0fd914cfeef8ce075307e8e702 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 22 Aug 2025 21:25:02 +0000 Subject: tools headers: Import asm-generic MMIO helpers Import the asm-generic MMIO helper functions from the kernel headers into tools/include/. The top-level include is which then includes the arch-specific , which then includes . This layout is chosen to match the kernel header layout and to appease checkpatch.pl (which warns against including or directly). Changes made when importing: - Add missing includes at the top. - Stub out mmiowb_set_pending(). - Stub out _THIS_IP_. - Stub out log_*_mmio() calls. - Drop the CONFIG_64BIT checks, since tools/include/linux/types.h always defines u64. Acked-by: Shuah Khan Signed-off-by: David Matlack Link: https://lore.kernel.org/r/20250822212518.4156428-16-dmatlack@google.com Signed-off-by: Alex Williamson --- tools/include/asm-generic/io.h | 482 +++++++++++++++++++++++++++++++++++++++++ tools/include/asm/io.h | 7 + tools/include/linux/io.h | 4 +- 3 files changed, 492 insertions(+), 1 deletion(-) create mode 100644 tools/include/asm-generic/io.h create mode 100644 tools/include/asm/io.h (limited to 'tools/include') diff --git a/tools/include/asm-generic/io.h b/tools/include/asm-generic/io.h new file mode 100644 index 000000000000..e5a0b07ad452 --- /dev/null +++ b/tools/include/asm-generic/io.h @@ -0,0 +1,482 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_ASM_GENERIC_IO_H +#define _TOOLS_ASM_GENERIC_IO_H + +#include +#include + +#include +#include +#include + +#ifndef mmiowb_set_pending +#define mmiowb_set_pending() do { } while (0) +#endif + +#ifndef __io_br +#define __io_br() barrier() +#endif + +/* prevent prefetching of coherent DMA data ahead of a dma-complete */ +#ifndef __io_ar +#ifdef rmb +#define __io_ar(v) rmb() +#else +#define __io_ar(v) barrier() +#endif +#endif + +/* flush writes to coherent DMA data before possibly triggering a DMA read */ +#ifndef __io_bw +#ifdef wmb +#define __io_bw() wmb() +#else +#define __io_bw() barrier() +#endif +#endif + +/* serialize device access against a spin_unlock, usually handled there. */ +#ifndef __io_aw +#define __io_aw() mmiowb_set_pending() +#endif + +#ifndef __io_pbw +#define __io_pbw() __io_bw() +#endif + +#ifndef __io_paw +#define __io_paw() __io_aw() +#endif + +#ifndef __io_pbr +#define __io_pbr() __io_br() +#endif + +#ifndef __io_par +#define __io_par(v) __io_ar(v) +#endif + +#ifndef _THIS_IP_ +#define _THIS_IP_ 0 +#endif + +static inline void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} +static inline void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} +static inline void log_read_mmio(u8 width, const volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} +static inline void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} + +/* + * __raw_{read,write}{b,w,l,q}() access memory in native endianness. + * + * On some architectures memory mapped IO needs to be accessed differently. + * On the simple architectures, we just read/write the memory location + * directly. + */ + +#ifndef __raw_readb +#define __raw_readb __raw_readb +static inline u8 __raw_readb(const volatile void __iomem *addr) +{ + return *(const volatile u8 __force *)addr; +} +#endif + +#ifndef __raw_readw +#define __raw_readw __raw_readw +static inline u16 __raw_readw(const volatile void __iomem *addr) +{ + return *(const volatile u16 __force *)addr; +} +#endif + +#ifndef __raw_readl +#define __raw_readl __raw_readl +static inline u32 __raw_readl(const volatile void __iomem *addr) +{ + return *(const volatile u32 __force *)addr; +} +#endif + +#ifndef __raw_readq +#define __raw_readq __raw_readq +static inline u64 __raw_readq(const volatile void __iomem *addr) +{ + return *(const volatile u64 __force *)addr; +} +#endif + +#ifndef __raw_writeb +#define __raw_writeb __raw_writeb +static inline void __raw_writeb(u8 value, volatile void __iomem *addr) +{ + *(volatile u8 __force *)addr = value; +} +#endif + +#ifndef __raw_writew +#define __raw_writew __raw_writew +static inline void __raw_writew(u16 value, volatile void __iomem *addr) +{ + *(volatile u16 __force *)addr = value; +} +#endif + +#ifndef __raw_writel +#define __raw_writel __raw_writel +static inline void __raw_writel(u32 value, volatile void __iomem *addr) +{ + *(volatile u32 __force *)addr = value; +} +#endif + +#ifndef __raw_writeq +#define __raw_writeq __raw_writeq +static inline void __raw_writeq(u64 value, volatile void __iomem *addr) +{ + *(volatile u64 __force *)addr = value; +} +#endif + +/* + * {read,write}{b,w,l,q}() access little endian memory and return result in + * native endianness. + */ + +#ifndef readb +#define readb readb +static inline u8 readb(const volatile void __iomem *addr) +{ + u8 val; + + log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __raw_readb(addr); + __io_ar(val); + log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readw +#define readw readw +static inline u16 readw(const volatile void __iomem *addr) +{ + u16 val; + + log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); + __io_ar(val); + log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readl +#define readl readl +static inline u32 readl(const volatile void __iomem *addr) +{ + u32 val; + + log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); + __io_ar(val); + log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readq +#define readq readq +static inline u64 readq(const volatile void __iomem *addr) +{ + u64 val; + + log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); + __io_ar(val); + log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef writeb +#define writeb writeb +static inline void writeb(u8 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writeb(value, addr); + __io_aw(); + log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writew +#define writew writew +static inline void writew(u16 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writew((u16 __force)cpu_to_le16(value), addr); + __io_aw(); + log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writel +#define writel writel +static inline void writel(u32 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writel((u32 __force)__cpu_to_le32(value), addr); + __io_aw(); + log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writeq +#define writeq writeq +static inline void writeq(u64 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); + __io_aw(); + log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); +} +#endif + +/* + * {read,write}{b,w,l,q}_relaxed() are like the regular version, but + * are not guaranteed to provide ordering against spinlocks or memory + * accesses. + */ +#ifndef readb_relaxed +#define readb_relaxed readb_relaxed +static inline u8 readb_relaxed(const volatile void __iomem *addr) +{ + u8 val; + + log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); + val = __raw_readb(addr); + log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readw_relaxed +#define readw_relaxed readw_relaxed +static inline u16 readw_relaxed(const volatile void __iomem *addr) +{ + u16 val; + + log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); + val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); + log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readl_relaxed +#define readl_relaxed readl_relaxed +static inline u32 readl_relaxed(const volatile void __iomem *addr) +{ + u32 val; + + log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); + val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); + log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#if defined(readq) && !defined(readq_relaxed) +#define readq_relaxed readq_relaxed +static inline u64 readq_relaxed(const volatile void __iomem *addr) +{ + u64 val; + + log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); + log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef writeb_relaxed +#define writeb_relaxed writeb_relaxed +static inline void writeb_relaxed(u8 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); + __raw_writeb(value, addr); + log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writew_relaxed +#define writew_relaxed writew_relaxed +static inline void writew_relaxed(u16 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); + __raw_writew((u16 __force)cpu_to_le16(value), addr); + log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writel_relaxed +#define writel_relaxed writel_relaxed +static inline void writel_relaxed(u32 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); + __raw_writel((u32 __force)__cpu_to_le32(value), addr); + log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#if defined(writeq) && !defined(writeq_relaxed) +#define writeq_relaxed writeq_relaxed +static inline void writeq_relaxed(u64 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); + log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); +} +#endif + +/* + * {read,write}s{b,w,l,q}() repeatedly access the same memory address in + * native endianness in 8-, 16-, 32- or 64-bit chunks (@count times). + */ +#ifndef readsb +#define readsb readsb +static inline void readsb(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u8 *buf = buffer; + + do { + u8 x = __raw_readb(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef readsw +#define readsw readsw +static inline void readsw(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u16 *buf = buffer; + + do { + u16 x = __raw_readw(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef readsl +#define readsl readsl +static inline void readsl(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u32 *buf = buffer; + + do { + u32 x = __raw_readl(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef readsq +#define readsq readsq +static inline void readsq(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u64 *buf = buffer; + + do { + u64 x = __raw_readq(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef writesb +#define writesb writesb +static inline void writesb(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u8 *buf = buffer; + + do { + __raw_writeb(*buf++, addr); + } while (--count); + } +} +#endif + +#ifndef writesw +#define writesw writesw +static inline void writesw(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u16 *buf = buffer; + + do { + __raw_writew(*buf++, addr); + } while (--count); + } +} +#endif + +#ifndef writesl +#define writesl writesl +static inline void writesl(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u32 *buf = buffer; + + do { + __raw_writel(*buf++, addr); + } while (--count); + } +} +#endif + +#ifndef writesq +#define writesq writesq +static inline void writesq(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u64 *buf = buffer; + + do { + __raw_writeq(*buf++, addr); + } while (--count); + } +} +#endif + +#endif /* _TOOLS_ASM_GENERIC_IO_H */ diff --git a/tools/include/asm/io.h b/tools/include/asm/io.h new file mode 100644 index 000000000000..9ae219b12604 --- /dev/null +++ b/tools/include/asm/io.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_ASM_IO_H +#define _TOOLS_ASM_IO_H + +#include + +#endif /* _TOOLS_ASM_IO_H */ diff --git a/tools/include/linux/io.h b/tools/include/linux/io.h index e129871fe661..4b94b84160b8 100644 --- a/tools/include/linux/io.h +++ b/tools/include/linux/io.h @@ -2,4 +2,6 @@ #ifndef _TOOLS_IO_H #define _TOOLS_IO_H -#endif +#include + +#endif /* _TOOLS_IO_H */ -- cgit v1.2.3 From ce5dc9aa72d9c3d6cb14b3a6aab900124999d8d0 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 22 Aug 2025 21:25:03 +0000 Subject: tools headers: Import x86 MMIO helper overrides Import the x86-specific overrides for from the kernel headers into tools/include/. Changes made when importing: - Replace CONFIG_X86_64 with __x86_64__. Acked-by: Shuah Khan Signed-off-by: David Matlack Link: https://lore.kernel.org/r/20250822212518.4156428-17-dmatlack@google.com Signed-off-by: Alex Williamson --- tools/arch/x86/include/asm/io.h | 75 +++++++++++++++++++++++++++++++++++++++++ tools/include/asm/io.h | 4 +++ 2 files changed, 79 insertions(+) create mode 100644 tools/arch/x86/include/asm/io.h (limited to 'tools/include') diff --git a/tools/arch/x86/include/asm/io.h b/tools/arch/x86/include/asm/io.h new file mode 100644 index 000000000000..4c787a2363de --- /dev/null +++ b/tools/arch/x86/include/asm/io.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_ASM_X86_IO_H +#define _TOOLS_ASM_X86_IO_H + +#include +#include + +#define build_mmio_read(name, size, type, reg, barrier) \ +static inline type name(const volatile void __iomem *addr) \ +{ type ret; asm volatile("mov" size " %1,%0":reg (ret) \ +:"m" (*(volatile type __force *)addr) barrier); return ret; } + +#define build_mmio_write(name, size, type, reg, barrier) \ +static inline void name(type val, volatile void __iomem *addr) \ +{ asm volatile("mov" size " %0,%1": :reg (val), \ +"m" (*(volatile type __force *)addr) barrier); } + +build_mmio_read(readb, "b", unsigned char, "=q", :"memory") +build_mmio_read(readw, "w", unsigned short, "=r", :"memory") +build_mmio_read(readl, "l", unsigned int, "=r", :"memory") + +build_mmio_read(__readb, "b", unsigned char, "=q", ) +build_mmio_read(__readw, "w", unsigned short, "=r", ) +build_mmio_read(__readl, "l", unsigned int, "=r", ) + +build_mmio_write(writeb, "b", unsigned char, "q", :"memory") +build_mmio_write(writew, "w", unsigned short, "r", :"memory") +build_mmio_write(writel, "l", unsigned int, "r", :"memory") + +build_mmio_write(__writeb, "b", unsigned char, "q", ) +build_mmio_write(__writew, "w", unsigned short, "r", ) +build_mmio_write(__writel, "l", unsigned int, "r", ) + +#define readb readb +#define readw readw +#define readl readl +#define readb_relaxed(a) __readb(a) +#define readw_relaxed(a) __readw(a) +#define readl_relaxed(a) __readl(a) +#define __raw_readb __readb +#define __raw_readw __readw +#define __raw_readl __readl + +#define writeb writeb +#define writew writew +#define writel writel +#define writeb_relaxed(v, a) __writeb(v, a) +#define writew_relaxed(v, a) __writew(v, a) +#define writel_relaxed(v, a) __writel(v, a) +#define __raw_writeb __writeb +#define __raw_writew __writew +#define __raw_writel __writel + +#ifdef __x86_64__ + +build_mmio_read(readq, "q", u64, "=r", :"memory") +build_mmio_read(__readq, "q", u64, "=r", ) +build_mmio_write(writeq, "q", u64, "r", :"memory") +build_mmio_write(__writeq, "q", u64, "r", ) + +#define readq_relaxed(a) __readq(a) +#define writeq_relaxed(v, a) __writeq(v, a) + +#define __raw_readq __readq +#define __raw_writeq __writeq + +/* Let people know that we have them */ +#define readq readq +#define writeq writeq + +#endif /* __x86_64__ */ + +#include + +#endif /* _TOOLS_ASM_X86_IO_H */ diff --git a/tools/include/asm/io.h b/tools/include/asm/io.h index 9ae219b12604..eed5066f25c4 100644 --- a/tools/include/asm/io.h +++ b/tools/include/asm/io.h @@ -2,6 +2,10 @@ #ifndef _TOOLS_ASM_IO_H #define _TOOLS_ASM_IO_H +#if defined(__i386__) || defined(__x86_64__) +#include "../../arch/x86/include/asm/io.h" +#else #include +#endif #endif /* _TOOLS_ASM_IO_H */ -- cgit v1.2.3 From dc0e216cf00b74cf61fdc882f8373beb8cdbec5e Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 22 Aug 2025 21:25:04 +0000 Subject: tools headers: Add symlink to linux/pci_ids.h Add a symlink to include/linux/pci_ids.h to tools/include/. This will be used by VFIO selftests in subsequent commits to match device and vendor IDs. Acked-by: Shuah Khan Signed-off-by: David Matlack Link: https://lore.kernel.org/r/20250822212518.4156428-18-dmatlack@google.com Signed-off-by: Alex Williamson --- tools/include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) create mode 120000 tools/include/linux/pci_ids.h (limited to 'tools/include') diff --git a/tools/include/linux/pci_ids.h b/tools/include/linux/pci_ids.h new file mode 120000 index 000000000000..1c9e88f41261 --- /dev/null +++ b/tools/include/linux/pci_ids.h @@ -0,0 +1 @@ +../../../include/linux/pci_ids.h \ No newline at end of file -- cgit v1.2.3 From d1ff0e2d13d6ac3a15be7870e15216726b0a809a Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 20 Aug 2025 10:29:27 +0200 Subject: tools/nolibc: avoid error in dup2() if old fd equals new fd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dup2() allows both 'old' and 'new' to have the same value, which dup3() does not. If libc dup2() is implemented through the dup3() system call, then it would incorrectly fail in this case. Avoid the error by handling old == new explicitly. Fixes: 30ca20517ac1 ("tools headers: Move the nolibc header from rcutorture to tools/include/nolibc/") Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250820-nolibc-dup2-einval-v2-1-807185a45c56@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/sys.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 295e71d34aba..90aadad31f6c 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -238,6 +238,19 @@ static __attribute__((unused)) int sys_dup2(int old, int new) { #if defined(__NR_dup3) + int ret, nr_fcntl; + +#ifdef __NR_fcntl64 + nr_fcntl = __NR_fcntl64; +#else + nr_fcntl = __NR_fcntl; +#endif + + if (old == new) { + ret = my_syscall2(nr_fcntl, old, F_GETFD); + return ret < 0 ? ret : old; + } + return my_syscall3(__NR_dup3, old, new, 0); #elif defined(__NR_dup2) return my_syscall2(__NR_dup2, old, new); -- cgit v1.2.3 From b22d81ed319cbe2d5b45f605cab18aaf82a66a50 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sun, 24 Aug 2025 08:58:46 +0200 Subject: tools/nolibc: use tabs instead of spaces for indentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some lines are using spaces for indentation instead of the standard tabs. Fix them. Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/sys/random.h | 4 ++-- tools/include/nolibc/unistd.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys/random.h b/tools/include/nolibc/sys/random.h index 8d9749f1c845..cd5d25c571a8 100644 --- a/tools/include/nolibc/sys/random.h +++ b/tools/include/nolibc/sys/random.h @@ -22,13 +22,13 @@ static __attribute__((unused)) ssize_t sys_getrandom(void *buf, size_t buflen, unsigned int flags) { - return my_syscall3(__NR_getrandom, buf, buflen, flags); + return my_syscall3(__NR_getrandom, buf, buflen, flags); } static __attribute__((unused)) ssize_t getrandom(void *buf, size_t buflen, unsigned int flags) { - return __sysret(sys_getrandom(buf, buflen, flags)); + return __sysret(sys_getrandom(buf, buflen, flags)); } #endif /* _NOLIBC_SYS_RANDOM_H */ diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h index 25bfc7732ec7..7405fa2b89ba 100644 --- a/tools/include/nolibc/unistd.h +++ b/tools/include/nolibc/unistd.h @@ -33,7 +33,7 @@ static __attribute__((unused)) int sys_faccessat(int fd, const char *path, int amode, int flag) { - return my_syscall4(__NR_faccessat, fd, path, amode, flag); + return my_syscall4(__NR_faccessat, fd, path, amode, flag); } static __attribute__((unused)) -- cgit v1.2.3 From e6366101ce1fab9e42ae66ff0fed5360fce65bb5 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 21 Aug 2025 17:40:32 +0200 Subject: tools/nolibc: remove __nolibc_enosys() fallback from time64-related functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These fallbacks where added when no explicit fallbacks for time64 was implemented. Now that these fallbacks are in place, the additional fallback to __nolibc_enosys() is superfluous. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250821-nolibc-enosys-v1-1-4b63f2caaa89@weissschuh.net --- tools/include/nolibc/poll.h | 4 +--- tools/include/nolibc/sys.h | 4 +--- tools/include/nolibc/sys/timerfd.h | 8 ++------ tools/include/nolibc/time.h | 8 ++------ 4 files changed, 6 insertions(+), 18 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/poll.h b/tools/include/nolibc/poll.h index 1765acb17ea0..0d053f93ea99 100644 --- a/tools/include/nolibc/poll.h +++ b/tools/include/nolibc/poll.h @@ -39,10 +39,8 @@ int sys_poll(struct pollfd *fds, int nfds, int timeout) t.tv_nsec = (timeout % 1000) * 1000000; } return my_syscall5(__NR_ppoll_time64, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); -#elif defined(__NR_poll) - return my_syscall3(__NR_poll, fds, nfds, timeout); #else - return __nolibc_enosys(__func__, fds, nfds, timeout); + return my_syscall3(__NR_poll, fds, nfds, timeout); #endif } diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 90aadad31f6c..80a5cef3bdf4 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -814,7 +814,7 @@ int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeva t.tv_nsec = timeout->tv_usec * 1000; } return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); -#elif defined(__NR_pselect6_time64) +#else struct __kernel_timespec t; if (timeout) { @@ -822,8 +822,6 @@ int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeva t.tv_nsec = timeout->tv_usec * 1000; } return my_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); -#else - return __nolibc_enosys(__func__, nfds, rfds, wfds, efds, timeout); #endif } diff --git a/tools/include/nolibc/sys/timerfd.h b/tools/include/nolibc/sys/timerfd.h index 4375d546ba58..5dd61030c991 100644 --- a/tools/include/nolibc/sys/timerfd.h +++ b/tools/include/nolibc/sys/timerfd.h @@ -34,7 +34,7 @@ int sys_timerfd_gettime(int fd, struct itimerspec *curr_value) { #if defined(__NR_timerfd_gettime) return my_syscall2(__NR_timerfd_gettime, fd, curr_value); -#elif defined(__NR_timerfd_gettime64) +#else struct __kernel_itimerspec kcurr_value; int ret; @@ -42,8 +42,6 @@ int sys_timerfd_gettime(int fd, struct itimerspec *curr_value) __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval); __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value); return ret; -#else - return __nolibc_enosys(__func__, fd, curr_value); #endif } @@ -60,7 +58,7 @@ int sys_timerfd_settime(int fd, int flags, { #if defined(__NR_timerfd_settime) return my_syscall4(__NR_timerfd_settime, fd, flags, new_value, old_value); -#elif defined(__NR_timerfd_settime64) +#else struct __kernel_itimerspec knew_value, kold_value; int ret; @@ -72,8 +70,6 @@ int sys_timerfd_settime(int fd, int flags, __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value); } return ret; -#else - return __nolibc_enosys(__func__, fd, flags, new_value, old_value); #endif } diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index e9c1b976791a..6c276b8d646a 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -45,7 +45,7 @@ int sys_clock_getres(clockid_t clockid, struct timespec *res) { #if defined(__NR_clock_getres) return my_syscall2(__NR_clock_getres, clockid, res); -#elif defined(__NR_clock_getres_time64) +#else struct __kernel_timespec kres; int ret; @@ -53,8 +53,6 @@ int sys_clock_getres(clockid_t clockid, struct timespec *res) if (res) __nolibc_timespec_kernel_to_user(&kres, res); return ret; -#else - return __nolibc_enosys(__func__, clockid, res); #endif } @@ -69,7 +67,7 @@ int sys_clock_gettime(clockid_t clockid, struct timespec *tp) { #if defined(__NR_clock_gettime) return my_syscall2(__NR_clock_gettime, clockid, tp); -#elif defined(__NR_clock_gettime64) +#else struct __kernel_timespec ktp; int ret; @@ -77,8 +75,6 @@ int sys_clock_gettime(clockid_t clockid, struct timespec *tp) if (tp) __nolibc_timespec_kernel_to_user(&ktp, tp); return ret; -#else - return __nolibc_enosys(__func__, clockid, tp); #endif } -- cgit v1.2.3 From 4b6ffb2d87a76b02f63d029533cb3e8c1482f358 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 21 Aug 2025 17:40:33 +0200 Subject: tools/nolibc: remove __nolibc_enosys() fallback from *at() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All architectures have had one of the real functions available since Linux 2.6.12. The additional fallback to __nolibc_enosys() is superfluous. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250821-nolibc-enosys-v1-2-4b63f2caaa89@weissschuh.net --- tools/include/nolibc/sys.h | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 80a5cef3bdf4..12030c803173 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -142,10 +142,8 @@ int sys_chmod(const char *path, mode_t mode) { #if defined(__NR_fchmodat) return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0); -#elif defined(__NR_chmod) - return my_syscall2(__NR_chmod, path, mode); #else - return __nolibc_enosys(__func__, path, mode); + return my_syscall2(__NR_chmod, path, mode); #endif } @@ -165,10 +163,8 @@ int sys_chown(const char *path, uid_t owner, gid_t group) { #if defined(__NR_fchownat) return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0); -#elif defined(__NR_chown) - return my_syscall3(__NR_chown, path, owner, group); #else - return __nolibc_enosys(__func__, path, owner, group); + return my_syscall3(__NR_chown, path, owner, group); #endif } @@ -582,10 +578,8 @@ int sys_link(const char *old, const char *new) { #if defined(__NR_linkat) return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0); -#elif defined(__NR_link) - return my_syscall2(__NR_link, old, new); #else - return __nolibc_enosys(__func__, old, new); + return my_syscall2(__NR_link, old, new); #endif } @@ -653,10 +647,8 @@ int sys_mkdir(const char *path, mode_t mode) { #if defined(__NR_mkdirat) return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode); -#elif defined(__NR_mkdir) - return my_syscall2(__NR_mkdir, path, mode); #else - return __nolibc_enosys(__func__, path, mode); + return my_syscall2(__NR_mkdir, path, mode); #endif } @@ -675,10 +667,8 @@ int sys_rmdir(const char *path) { #if defined(__NR_rmdir) return my_syscall1(__NR_rmdir, path); -#elif defined(__NR_unlinkat) - return my_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR); #else - return __nolibc_enosys(__func__, path); + return my_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR); #endif } @@ -698,10 +688,8 @@ long sys_mknod(const char *path, mode_t mode, dev_t dev) { #if defined(__NR_mknodat) return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev); -#elif defined(__NR_mknod) - return my_syscall3(__NR_mknod, path, mode, dev); #else - return __nolibc_enosys(__func__, path, mode, dev); + return my_syscall3(__NR_mknod, path, mode, dev); #endif } @@ -885,10 +873,8 @@ int sys_symlink(const char *old, const char *new) { #if defined(__NR_symlinkat) return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new); -#elif defined(__NR_symlink) - return my_syscall2(__NR_symlink, old, new); #else - return __nolibc_enosys(__func__, old, new); + return my_syscall2(__NR_symlink, old, new); #endif } @@ -942,10 +928,8 @@ int sys_unlink(const char *path) { #if defined(__NR_unlinkat) return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0); -#elif defined(__NR_unlink) - return my_syscall1(__NR_unlink, path); #else - return __nolibc_enosys(__func__, path); + return my_syscall1(__NR_unlink, path); #endif } -- cgit v1.2.3 From 09adec1f4b446e8788c70022fae41356ee916260 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 21 Aug 2025 17:40:34 +0200 Subject: tools/nolibc: remove __nolibc_enosys() fallback from dup2() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All architectures have one of the real functions available. The additional fallback to __nolibc_enosys() is superfluous. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250821-nolibc-enosys-v1-3-4b63f2caaa89@weissschuh.net --- tools/include/nolibc/sys.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 12030c803173..32cc741f2f7d 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -248,10 +248,8 @@ int sys_dup2(int old, int new) } return my_syscall3(__NR_dup3, old, new, 0); -#elif defined(__NR_dup2) - return my_syscall2(__NR_dup2, old, new); #else - return __nolibc_enosys(__func__, old, new); + return my_syscall2(__NR_dup2, old, new); #endif } -- cgit v1.2.3 From fbd47de75502c63933016912325582309d72cd03 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 21 Aug 2025 17:40:35 +0200 Subject: tools/nolibc: remove __nolibc_enosys() fallback from fork functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All architectures have one of the real functions available. The additional fallback to __nolibc_enosys() is superfluous. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250821-nolibc-enosys-v1-4-4b63f2caaa89@weissschuh.net --- tools/include/nolibc/sys.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 32cc741f2f7d..34f20182792a 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -334,10 +334,8 @@ pid_t sys_fork(void) * will not use the rest with no other flag. */ return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0); -#elif defined(__NR_fork) - return my_syscall0(__NR_fork); #else - return __nolibc_enosys(__func__); + return my_syscall0(__NR_fork); #endif } #endif @@ -354,7 +352,7 @@ pid_t sys_vfork(void) { #if defined(__NR_vfork) return my_syscall0(__NR_vfork); -#elif defined(__NR_clone3) +#else /* * clone() could be used but has different argument orders per * architecture. @@ -365,8 +363,6 @@ pid_t sys_vfork(void) }; return my_syscall2(__NR_clone3, &args, sizeof(args)); -#else - return __nolibc_enosys(__func__); #endif } #endif -- cgit v1.2.3 From f11e156e0f1a582744fc04b614599fc8bcfc1da6 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 21 Aug 2025 17:40:36 +0200 Subject: tools/nolibc: fold llseek fallback into lseek() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Align the implementation of the fallback handling inside sys_lseek() with the rest of nolibc. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250821-nolibc-enosys-v1-5-4b63f2caaa89@weissschuh.net --- tools/include/nolibc/sys.h | 42 ++++++++++++++---------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 34f20182792a..c5564f57deec 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -594,41 +594,27 @@ off_t sys_lseek(int fd, off_t offset, int whence) #if defined(__NR_lseek) return my_syscall3(__NR_lseek, fd, offset, whence); #else - return __nolibc_enosys(__func__, fd, offset, whence); -#endif -} + __kernel_loff_t loff = 0; + off_t result; + int ret; -static __attribute__((unused)) -int sys_llseek(int fd, unsigned long offset_high, unsigned long offset_low, - __kernel_loff_t *result, int whence) -{ -#if defined(__NR_llseek) - return my_syscall5(__NR_llseek, fd, offset_high, offset_low, result, whence); -#else - return __nolibc_enosys(__func__, fd, offset_high, offset_low, result, whence); + /* Only exists on 32bit where nolibc off_t is also 32bit */ + ret = my_syscall5(__NR_llseek, fd, 0, offset, &loff, whence); + if (ret < 0) + result = ret; + else if (loff != (off_t)loff) + result = -EOVERFLOW; + else + result = loff; + + return result; #endif } static __attribute__((unused)) off_t lseek(int fd, off_t offset, int whence) { - __kernel_loff_t loff = 0; - off_t result; - int ret; - - result = sys_lseek(fd, offset, whence); - if (result == -ENOSYS) { - /* Only exists on 32bit where nolibc off_t is also 32bit */ - ret = sys_llseek(fd, 0, offset, &loff, whence); - if (ret < 0) - result = ret; - else if (loff != (off_t)loff) - result = -EOVERFLOW; - else - result = loff; - } - - return __sysret(result); + return __sysret(sys_lseek(fd, offset, whence)); } -- cgit v1.2.3 From 4c2ef951cfe68bcd823d59a9e7ef9e3c16aa3974 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 21 Aug 2025 17:40:38 +0200 Subject: tools/nolibc: drop wait4() support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not all architectures implement the wait4() syscall. It can be implemented in terms of the waitid() syscall, but that would require some rework of the other wait-related functions in wait.h. As wait4() is non-standard and deprecated, remove it. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250821-nolibc-enosys-v1-7-4b63f2caaa89@weissschuh.net --- tools/include/nolibc/sys/wait.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys/wait.h b/tools/include/nolibc/sys/wait.h index 56ddb806da7f..4e66e1f7a03e 100644 --- a/tools/include/nolibc/sys/wait.h +++ b/tools/include/nolibc/sys/wait.h @@ -16,27 +16,10 @@ /* * pid_t wait(int *status); - * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage); * pid_t waitpid(pid_t pid, int *status, int options); * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); */ -static __attribute__((unused)) -pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ -#ifdef __NR_wait4 - return my_syscall4(__NR_wait4, pid, status, options, rusage); -#else - return __nolibc_enosys(__func__, pid, status, options, rusage); -#endif -} - -static __attribute__((unused)) -pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ - return __sysret(sys_wait4(pid, status, options, rusage)); -} - static __attribute__((unused)) int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage) { -- cgit v1.2.3 From ff0db419b278256a45db9bf6bd3f9a9a2c22b762 Mon Sep 17 00:00:00 2001 From: Brendan Jackman Date: Thu, 28 Aug 2025 12:27:58 +0000 Subject: tools/include: implement a couple of atomic_t ops Patch series "tools: testing: Use existing atomic.h for vma/maple tests", v2. De-duplicating this lets us delete a bit of code. Ulterior motive: I'm working on a new set of the userspace-based unit tests, which will need the atomics API too. That would involve even more duplication, so while the win in this patchset alone is very minimal, it looks a lot more significant with my other WIP patchset. I've tested these commands: make -C tools/testing/vma -j tools/testing/vma/vma make -C tools/testing/radix-tree -j tools/testing/radix-tree/maple Note the EXTRA_CFLAGS patch is actually orthogonal, let me know if you'd prefer I send it separately. This patch (of 4): The VMA tests need an operation equivalent to atomic_inc_unless_negative() to implement a fake mapping_map_writable(). Adding it will enable them to switch to the shared atomic headers and simplify that fake implementation. In order to add that, also add atomic_try_cmpxchg() which can be used to implement it. This is copied from Documentation/atomic_t.txt. Then, implement atomic_inc_unless_negative() itself based on the raw_atomic_dec_unless_positive() in include/linux/atomic/atomic-arch-fallback.h. There's no present need for a highly-optimised version of this (nor any reason to think this implementation is sub-optimal on x86) so just implement this with generic C, no x86-specifics. Link: https://lkml.kernel.org/r/20250828-b4-vma-no-atomic-h-v2-0-02d146a58ed2@google.com Link: https://lkml.kernel.org/r/20250828-b4-vma-no-atomic-h-v2-1-02d146a58ed2@google.com Signed-off-by: Brendan Jackman Reviewed-by: Pedro Falcato Cc: Jann Horn Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/include/linux/atomic.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'tools/include') diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h index 01907b33537e..50c66ba9ada5 100644 --- a/tools/include/linux/atomic.h +++ b/tools/include/linux/atomic.h @@ -12,4 +12,26 @@ void atomic_long_set(atomic_long_t *v, long i); #define atomic_cmpxchg_release atomic_cmpxchg #endif /* atomic_cmpxchg_relaxed */ +static inline bool atomic_try_cmpxchg(atomic_t *ptr, int *oldp, int new) +{ + int ret, old = *oldp; + + ret = atomic_cmpxchg(ptr, old, new); + if (ret != old) + *oldp = ret; + return ret == old; +} + +static inline bool atomic_inc_unless_negative(atomic_t *v) +{ + int c = atomic_read(v); + + do { + if (unlikely(c < 0)) + return false; + } while (!atomic_try_cmpxchg(v, &c, c + 1)); + + return true; +} + #endif /* __TOOLS_LINUX_ATOMIC_H */ -- cgit v1.2.3 From baefdbdf6812e120c9fba9cfb101d3656f478026 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Sun, 14 Sep 2025 23:51:31 +0200 Subject: bpf: Implement exclusive map creation Exclusive maps allow maps to only be accessed by program with a program with a matching hash which is specified in the excl_prog_hash attr. For the signing use-case, this allows the trusted loader program to load the map and verify the integrity Signed-off-by: KP Singh Link: https://lore.kernel.org/r/20250914215141.15144-3-kpsingh@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 6 ++++++ kernel/bpf/syscall.c | 31 +++++++++++++++++++++++++++---- kernel/bpf/verifier.c | 6 ++++++ tools/include/uapi/linux/bpf.h | 6 ++++++ 5 files changed, 46 insertions(+), 4 deletions(-) (limited to 'tools/include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d75902074bd1..c6a6ee1b2938 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -329,6 +329,7 @@ struct bpf_map { atomic64_t sleepable_refcnt; s64 __percpu *elem_count; u64 cookie; /* write-once */ + char *excl_prog_sha; }; static inline const char *btf_field_type_name(enum btf_field_type type) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 233de8677382..57687b2e1c47 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1522,6 +1522,12 @@ union bpf_attr { * If provided, map_flags should have BPF_F_TOKEN_FD flag set. */ __s32 map_token_fd; + + /* Hash of the program that has exclusive access to the map. + */ + __aligned_u64 excl_prog_hash; + /* Size of the passed excl_prog_hash. */ + __u32 excl_prog_hash_size; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 3f178a0f8eb1..c8ef91acfe98 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -860,6 +860,7 @@ static void bpf_map_free(struct bpf_map *map) * the free of values or special fields allocated from bpf memory * allocator. */ + kfree(map->excl_prog_sha); migrate_disable(); map->ops->map_free(map); migrate_enable(); @@ -1338,9 +1339,9 @@ static bool bpf_net_capable(void) return capable(CAP_NET_ADMIN) || capable(CAP_SYS_ADMIN); } -#define BPF_MAP_CREATE_LAST_FIELD map_token_fd +#define BPF_MAP_CREATE_LAST_FIELD excl_prog_hash_size /* called via syscall */ -static int map_create(union bpf_attr *attr, bool kernel) +static int map_create(union bpf_attr *attr, bpfptr_t uattr) { const struct bpf_map_ops *ops; struct bpf_token *token = NULL; @@ -1534,7 +1535,29 @@ static int map_create(union bpf_attr *attr, bool kernel) attr->btf_vmlinux_value_type_id; } - err = security_bpf_map_create(map, attr, token, kernel); + if (attr->excl_prog_hash) { + bpfptr_t uprog_hash = make_bpfptr(attr->excl_prog_hash, uattr.is_kernel); + + if (attr->excl_prog_hash_size != SHA256_DIGEST_SIZE) { + err = -EINVAL; + goto free_map; + } + + map->excl_prog_sha = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); + if (!map->excl_prog_sha) { + err = -ENOMEM; + goto free_map; + } + + if (copy_from_bpfptr(map->excl_prog_sha, uprog_hash, SHA256_DIGEST_SIZE)) { + err = -EFAULT; + goto free_map; + } + } else if (attr->excl_prog_hash_size) { + return -EINVAL; + } + + err = security_bpf_map_create(map, attr, token, uattr.is_kernel); if (err) goto free_map_sec; @@ -6008,7 +6031,7 @@ static int __sys_bpf(enum bpf_cmd cmd, bpfptr_t uattr, unsigned int size) switch (cmd) { case BPF_MAP_CREATE: - err = map_create(&attr, uattr.is_kernel); + err = map_create(&attr, uattr); break; case BPF_MAP_LOOKUP_ELEM: err = map_lookup_elem(&attr); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6625570ac23d..aef6b266f08d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -20407,6 +20407,12 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, { enum bpf_prog_type prog_type = resolve_prog_type(prog); + if (map->excl_prog_sha && + memcmp(map->excl_prog_sha, prog->digest, SHA256_DIGEST_SIZE)) { + verbose(env, "program's hash doesn't match map's excl_prog_hash\n"); + return -EACCES; + } + if (btf_record_has_field(map->record, BPF_LIST_HEAD) || btf_record_has_field(map->record, BPF_RB_ROOT)) { if (is_tracing_prog_type(prog_type)) { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 233de8677382..57687b2e1c47 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1522,6 +1522,12 @@ union bpf_attr { * If provided, map_flags should have BPF_F_TOKEN_FD flag set. */ __s32 map_token_fd; + + /* Hash of the program that has exclusive access to the map. + */ + __aligned_u64 excl_prog_hash; + /* Size of the passed excl_prog_hash. */ + __u32 excl_prog_hash_size; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */ -- cgit v1.2.3 From ea2e6467ac36bf3d785defc89e58269b15d182f7 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Sun, 14 Sep 2025 23:51:35 +0200 Subject: bpf: Return hashes of maps in BPF_OBJ_GET_INFO_BY_FD Currently only array maps are supported, but the implementation can be extended for other maps and objects. The hash is memoized only for exclusive and frozen maps as their content is stable until the exclusive program modifies the map. This is required for BPF signing, enabling a trusted loader program to verify a map's integrity. The loader retrieves the map's runtime hash from the kernel and compares it against an expected hash computed at build time. Signed-off-by: KP Singh Link: https://lore.kernel.org/r/20250914215141.15144-7-kpsingh@kernel.org Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 3 +++ include/uapi/linux/bpf.h | 2 ++ kernel/bpf/arraymap.c | 13 ++++++++++++ kernel/bpf/syscall.c | 23 ++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 2 ++ .../testing/selftests/bpf/progs/verifier_map_ptr.c | 7 +++++-- 6 files changed, 48 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c6a6ee1b2938..e0c2c78a5faa 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -110,6 +111,7 @@ struct bpf_map_ops { long (*map_pop_elem)(struct bpf_map *map, void *value); long (*map_peek_elem)(struct bpf_map *map, void *value); void *(*map_lookup_percpu_elem)(struct bpf_map *map, void *key, u32 cpu); + int (*map_get_hash)(struct bpf_map *map, u32 hash_buf_size, void *hash_buf); /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, @@ -289,6 +291,7 @@ struct bpf_map_owner { }; struct bpf_map { + u8 sha[SHA256_DIGEST_SIZE]; const struct bpf_map_ops *ops; struct bpf_map *inner_map_meta; #ifdef CONFIG_SECURITY diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 57687b2e1c47..0987b52d5648 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6672,6 +6672,8 @@ struct bpf_map_info { __u32 btf_value_type_id; __u32 btf_vmlinux_id; __u64 map_extra; + __aligned_u64 hash; + __u32 hash_size; } __attribute__((aligned(8))); struct bpf_btf_info { diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 3d080916faf9..26d5dda989bc 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "map_in_map.h" @@ -174,6 +175,17 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) return array->value + (u64)array->elem_size * (index & array->index_mask); } +static int array_map_get_hash(struct bpf_map *map, u32 hash_buf_size, + void *hash_buf) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + + sha256(array->value, (u64)array->elem_size * array->map.max_entries, + hash_buf); + memcpy(array->map.sha, hash_buf, sizeof(array->map.sha)); + return 0; +} + static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm, u32 off) { @@ -800,6 +812,7 @@ const struct bpf_map_ops array_map_ops = { .map_mem_usage = array_map_mem_usage, .map_btf_id = &array_map_btf_ids[0], .iter_seq_info = &iter_seq_info, + .map_get_hash = &array_map_get_hash, }; const struct bpf_map_ops percpu_array_map_ops = { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c8ef91acfe98..cf7173b1bb83 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com */ +#include #include #include #include @@ -5184,6 +5185,9 @@ static int bpf_map_get_info_by_fd(struct file *file, info_len = min_t(u32, sizeof(info), info_len); memset(&info, 0, sizeof(info)); + if (copy_from_user(&info, uinfo, info_len)) + return -EFAULT; + info.type = map->map_type; info.id = map->id; info.key_size = map->key_size; @@ -5208,6 +5212,25 @@ static int bpf_map_get_info_by_fd(struct file *file, return err; } + if (info.hash) { + char __user *uhash = u64_to_user_ptr(info.hash); + + if (!map->ops->map_get_hash) + return -EINVAL; + + if (info.hash_size != SHA256_DIGEST_SIZE) + return -EINVAL; + + err = map->ops->map_get_hash(map, SHA256_DIGEST_SIZE, map->sha); + if (err != 0) + return err; + + if (copy_to_user(uhash, map->sha, SHA256_DIGEST_SIZE) != 0) + return -EFAULT; + } else if (info.hash_size) { + return -EINVAL; + } + if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) return -EFAULT; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 57687b2e1c47..0987b52d5648 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6672,6 +6672,8 @@ struct bpf_map_info { __u32 btf_value_type_id; __u32 btf_vmlinux_id; __u64 map_extra; + __aligned_u64 hash; + __u32 hash_size; } __attribute__((aligned(8))); struct bpf_btf_info { diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c index 11a079145966..e2767d27d8aa 100644 --- a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c +++ b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c @@ -70,10 +70,13 @@ __naked void bpf_map_ptr_write_rejected(void) : __clobber_all); } +/* The first element of struct bpf_map is a SHA256 hash of 32 bytes, accessing + * into this array is valid. The opts field is now at offset 33. + */ SEC("socket") __description("bpf_map_ptr: read non-existent field rejected") __failure -__msg("cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4") +__msg("cannot access ptr member ops with moff 32 in struct bpf_map with off 33 size 4") __failure_unpriv __msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN") __flag(BPF_F_ANY_ALIGNMENT) @@ -82,7 +85,7 @@ __naked void read_non_existent_field_rejected(void) asm volatile (" \ r6 = 0; \ r1 = %[map_array_48b] ll; \ - r6 = *(u32*)(r1 + 1); \ + r6 = *(u32*)(r1 + 33); \ r0 = 1; \ exit; \ " : -- cgit v1.2.3 From 87a1716c7d6551e59ca4bd9cdcdf04c67deaa337 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 12 Sep 2025 13:52:54 +0200 Subject: tools: update nsfs.h uapi header Update the nsfs.h tools header to the uapi/nsfs.h header so we can rely on it in the selftests. Signed-off-by: Christian Brauner --- tools/include/uapi/linux/nsfs.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/nsfs.h b/tools/include/uapi/linux/nsfs.h index 34127653fd00..33c9b578b3b2 100644 --- a/tools/include/uapi/linux/nsfs.h +++ b/tools/include/uapi/linux/nsfs.h @@ -16,8 +16,6 @@ #define NS_GET_NSTYPE _IO(NSIO, 0x3) /* Get owner UID (in the caller's user namespace) for a user namespace */ #define NS_GET_OWNER_UID _IO(NSIO, 0x4) -/* Get the id for a mount namespace */ -#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64) /* Translate pid from target pid namespace into the caller's pid namespace. */ #define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int) /* Return thread-group leader id of pid in the callers pid namespace. */ @@ -42,4 +40,19 @@ struct mnt_ns_info { /* Get previous namespace. */ #define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info) +/* Retrieve namespace identifiers. */ +#define NS_GET_MNTNS_ID _IOR(NSIO, 5, __u64) +#define NS_GET_ID _IOR(NSIO, 13, __u64) + +enum init_ns_ino { + IPC_NS_INIT_INO = 0xEFFFFFFFU, + UTS_NS_INIT_INO = 0xEFFFFFFEU, + USER_NS_INIT_INO = 0xEFFFFFFDU, + PID_NS_INIT_INO = 0xEFFFFFFCU, + CGROUP_NS_INIT_INO = 0xEFFFFFFBU, + TIME_NS_INIT_INO = 0xEFFFFFFAU, + NET_NS_INIT_INO = 0xEFFFFFF9U, + MNT_NS_INIT_INO = 0xEFFFFFF8U, +}; + #endif /* __LINUX_NSFS_H */ -- cgit v1.2.3 From 0ff52df6b32a6b04a7c9dfe3d7a387aff215b482 Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Fri, 19 Sep 2025 01:46:43 +0000 Subject: tools/nolibc: make time_t robust if __kernel_old_time_t is missing in host headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit d5094bcb5bfd ("tools/nolibc: define time_t in terms of __kernel_old_time_t") made nolibc use the kernel's time type so that `time_t` matches `timespec::tv_sec` on all ABIs (notably x32). But since __kernel_old_time_t is fairly new, notably from 2020 in commit 94c467ddb273 ("y2038: add __kernel_old_timespec and __kernel_old_time_t"), nolibc builds that rely on host headers may fail. Switch to __kernel_time_t, which is the same as __kernel_old_time_t and has existed for longer. Tested in PPC VM of Open Source Lab of Oregon State University (./tools/testing/selftests/rcutorture/bin/mkinitrd.sh) Fixes: d5094bcb5bfd ("tools/nolibc: define time_t in terms of __kernel_old_time_t") Signed-off-by: Zhouyi Zhou [Thomas: Reformat commit and its message a bit] Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/std.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h index ba950f0e7338..2c1ad23b9b5c 100644 --- a/tools/include/nolibc/std.h +++ b/tools/include/nolibc/std.h @@ -29,6 +29,6 @@ typedef unsigned long nlink_t; typedef signed long off_t; typedef signed long blksize_t; typedef signed long blkcnt_t; -typedef __kernel_old_time_t time_t; +typedef __kernel_time_t time_t; #endif /* _NOLIBC_STD_H */ -- cgit v1.2.3 From 349271568303695f0ac3563af153d2b4542f6986 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Sun, 21 Sep 2025 18:01:16 +0200 Subject: bpf: Implement signature verification for BPF programs This patch extends the BPF_PROG_LOAD command by adding three new fields to `union bpf_attr` in the user-space API: - signature: A pointer to the signature blob. - signature_size: The size of the signature blob. - keyring_id: The serial number of a loaded kernel keyring (e.g., the user or session keyring) containing the trusted public keys. When a BPF program is loaded with a signature, the kernel: 1. Retrieves the trusted keyring using the provided `keyring_id`. 2. Verifies the supplied signature against the BPF program's instruction buffer. 3. If the signature is valid and was generated by a key in the trusted keyring, the program load proceeds. 4. If no signature is provided, the load proceeds as before, allowing for backward compatibility. LSMs can chose to restrict unsigned programs and implement a security policy. 5. If signature verification fails for any reason, the program is not loaded. Tested-by: syzbot@syzkaller.appspotmail.com Signed-off-by: KP Singh Link: https://lore.kernel.org/r/20250921160120.9711-2-kpsingh@kernel.org Signed-off-by: Alexei Starovoitov --- crypto/asymmetric_keys/pkcs7_verify.c | 1 + include/linux/verification.h | 1 + include/uapi/linux/bpf.h | 10 ++++++++ kernel/bpf/helpers.c | 2 +- kernel/bpf/syscall.c | 45 ++++++++++++++++++++++++++++++++++- tools/include/uapi/linux/bpf.h | 10 ++++++++ tools/lib/bpf/bpf.c | 2 +- 7 files changed, 68 insertions(+), 3 deletions(-) (limited to 'tools/include') diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c index f0d4ff3c20a8..6d6475e3a9bf 100644 --- a/crypto/asymmetric_keys/pkcs7_verify.c +++ b/crypto/asymmetric_keys/pkcs7_verify.c @@ -429,6 +429,7 @@ int pkcs7_verify(struct pkcs7_message *pkcs7, /* Authattr presence checked in parser */ break; case VERIFYING_UNSPECIFIED_SIGNATURE: + case VERIFYING_BPF_SIGNATURE: if (pkcs7->data_type != OID_data) { pr_warn("Invalid unspecified sig (not pkcs7-data)\n"); return -EKEYREJECTED; diff --git a/include/linux/verification.h b/include/linux/verification.h index 4f3022d081c3..dec7f2beabfd 100644 --- a/include/linux/verification.h +++ b/include/linux/verification.h @@ -36,6 +36,7 @@ enum key_being_used_for { VERIFYING_KEY_SIGNATURE, VERIFYING_KEY_SELF_SIGNATURE, VERIFYING_UNSPECIFIED_SIGNATURE, + VERIFYING_BPF_SIGNATURE, NR__KEY_BEING_USED_FOR }; #ifdef CONFIG_SYSTEM_DATA_VERIFICATION diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0987b52d5648..f3b173e48b0f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1611,6 +1611,16 @@ union bpf_attr { * continuous. */ __u32 fd_array_cnt; + /* Pointer to a buffer containing the signature of the BPF + * program. + */ + __aligned_u64 signature; + /* Size of the signature buffer in bytes. */ + __u32 signature_size; + /* ID of the kernel keyring to be used for signature + * verification. + */ + __s32 keyring_id; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index ef4ede8bb74f..969f63f8ca28 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -3898,7 +3898,7 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p, return verify_pkcs7_signature(data, data_len, sig, sig_len, trusted_keyring->key, - VERIFYING_UNSPECIFIED_SIGNATURE, NULL, + VERIFYING_BPF_SIGNATURE, NULL, NULL); #else return -EOPNOTSUPP; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index cf7173b1bb83..8a3c3d26f6e2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -2785,8 +2786,44 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) } } +static int bpf_prog_verify_signature(struct bpf_prog *prog, union bpf_attr *attr, + bool is_kernel) +{ + bpfptr_t usig = make_bpfptr(attr->signature, is_kernel); + struct bpf_dynptr_kern sig_ptr, insns_ptr; + struct bpf_key *key = NULL; + void *sig; + int err = 0; + + if (system_keyring_id_check(attr->keyring_id) == 0) + key = bpf_lookup_system_key(attr->keyring_id); + else + key = bpf_lookup_user_key(attr->keyring_id, 0); + + if (!key) + return -EINVAL; + + sig = kvmemdup_bpfptr(usig, attr->signature_size); + if (IS_ERR(sig)) { + bpf_key_put(key); + return -ENOMEM; + } + + bpf_dynptr_init(&sig_ptr, sig, BPF_DYNPTR_TYPE_LOCAL, 0, + attr->signature_size); + bpf_dynptr_init(&insns_ptr, prog->insnsi, BPF_DYNPTR_TYPE_LOCAL, 0, + prog->len * sizeof(struct bpf_insn)); + + err = bpf_verify_pkcs7_signature((struct bpf_dynptr *)&insns_ptr, + (struct bpf_dynptr *)&sig_ptr, key); + + bpf_key_put(key); + kvfree(sig); + return err; +} + /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD fd_array_cnt +#define BPF_PROG_LOAD_LAST_FIELD keyring_id static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) { @@ -2950,6 +2987,12 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) /* eBPF programs must be GPL compatible to use GPL-ed functions */ prog->gpl_compatible = license_is_gpl_compatible(license) ? 1 : 0; + if (attr->signature) { + err = bpf_prog_verify_signature(prog, attr, uattr.is_kernel); + if (err) + goto free_prog; + } + prog->orig_prog = NULL; prog->jited = 0; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0987b52d5648..f3b173e48b0f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1611,6 +1611,16 @@ union bpf_attr { * continuous. */ __u32 fd_array_cnt; + /* Pointer to a buffer containing the signature of the BPF + * program. + */ + __aligned_u64 signature; + /* Size of the signature buffer in bytes. */ + __u32 signature_size; + /* ID of the kernel keyring to be used for signature + * verification. + */ + __s32 keyring_id; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 19ad7bcf0c2f..339b19797237 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -240,7 +240,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, size_t insn_cnt, struct bpf_prog_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, fd_array_cnt); + const size_t attr_sz = offsetofend(union bpf_attr, keyring_id); void *finfo = NULL, *linfo = NULL; const char *func_info, *line_info; __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; -- cgit v1.2.3 From 5c8fd7e2b5b0a527cf88740da122166695382a78 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Tue, 23 Sep 2025 12:24:00 +0100 Subject: bpf: bpf task work plumbing This patch adds necessary plumbing in verifier, syscall and maps to support handling new kfunc bpf_task_work_schedule and kernel structure bpf_task_work. The idea is similar to how we already handle bpf_wq and bpf_timer. verifier changes validate calls to bpf_task_work_schedule to make sure it is safe and expected invariants hold. btf part is required to detect bpf_task_work structure inside map value and store its offset, which will be used in the next patch to calculate key and value addresses. arraymap and hashtab changes are needed to handle freeing of the bpf_task_work: run code needed to deinitialize it, for example cancel task_work callback if possible. The use of bpf_task_work and proper implementation for kfuncs are introduced in the next patch. Signed-off-by: Mykyta Yatsenko Acked-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250923112404.668720-6-mykyta.yatsenko5@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 11 ++++ include/uapi/linux/bpf.h | 4 ++ kernel/bpf/arraymap.c | 8 +-- kernel/bpf/btf.c | 7 +++ kernel/bpf/hashtab.c | 19 ++++--- kernel/bpf/helpers.c | 40 ++++++++++++++ kernel/bpf/syscall.c | 16 +++++- kernel/bpf/verifier.c | 117 ++++++++++++++++++++++++++++++++++++++--- tools/include/uapi/linux/bpf.h | 4 ++ 9 files changed, 208 insertions(+), 18 deletions(-) (limited to 'tools/include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index dfc1a27b56d5..a2ab51fa8b0a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -209,6 +209,7 @@ enum btf_field_type { BPF_WORKQUEUE = (1 << 10), BPF_UPTR = (1 << 11), BPF_RES_SPIN_LOCK = (1 << 12), + BPF_TASK_WORK = (1 << 13), }; enum bpf_cgroup_storage_type { @@ -262,6 +263,7 @@ struct btf_record { int timer_off; int wq_off; int refcount_off; + int task_work_off; struct btf_field fields[]; }; @@ -363,6 +365,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type) return "bpf_rb_node"; case BPF_REFCOUNT: return "bpf_refcount"; + case BPF_TASK_WORK: + return "bpf_task_work"; default: WARN_ON_ONCE(1); return "unknown"; @@ -401,6 +405,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type) return sizeof(struct bpf_rb_node); case BPF_REFCOUNT: return sizeof(struct bpf_refcount); + case BPF_TASK_WORK: + return sizeof(struct bpf_task_work); default: WARN_ON_ONCE(1); return 0; @@ -433,6 +439,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type) return __alignof__(struct bpf_rb_node); case BPF_REFCOUNT: return __alignof__(struct bpf_refcount); + case BPF_TASK_WORK: + return __alignof__(struct bpf_task_work); default: WARN_ON_ONCE(1); return 0; @@ -464,6 +472,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr) case BPF_KPTR_REF: case BPF_KPTR_PERCPU: case BPF_UPTR: + case BPF_TASK_WORK: break; default: WARN_ON_ONCE(1); @@ -600,6 +609,7 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src, bool lock_src); void bpf_timer_cancel_and_free(void *timer); void bpf_wq_cancel_and_free(void *timer); +void bpf_task_work_cancel_and_free(void *timer); void bpf_list_head_free(const struct btf_field *field, void *list_head, struct bpf_spin_lock *spin_lock); void bpf_rb_root_free(const struct btf_field *field, void *rb_root, @@ -2426,6 +2436,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec); bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b); void bpf_obj_free_timer(const struct btf_record *rec, void *obj); void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj); +void bpf_obj_free_task_work(const struct btf_record *rec, void *obj); void bpf_obj_free_fields(const struct btf_record *rec, void *obj); void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f3b173e48b0f..ae83d8649ef1 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7436,6 +7436,10 @@ struct bpf_timer { __u64 __opaque[2]; } __attribute__((aligned(8))); +struct bpf_task_work { + __u64 __opaque; +} __attribute__((aligned(8))); + struct bpf_wq { __u64 __opaque[2]; } __attribute__((aligned(8))); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 26d5dda989bc..80b1765a3159 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -443,7 +443,7 @@ static void *array_map_vmalloc_addr(struct bpf_array *array) return (void *)round_down((unsigned long)array, PAGE_SIZE); } -static void array_map_free_timers_wq(struct bpf_map *map) +static void array_map_free_internal_structs(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; @@ -451,12 +451,14 @@ static void array_map_free_timers_wq(struct bpf_map *map) /* We don't reset or free fields other than timer and workqueue * on uref dropping to zero. */ - if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE)) { + if (btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) { for (i = 0; i < array->map.max_entries; i++) { if (btf_record_has_field(map->record, BPF_TIMER)) bpf_obj_free_timer(map->record, array_map_elem_ptr(array, i)); if (btf_record_has_field(map->record, BPF_WORKQUEUE)) bpf_obj_free_workqueue(map->record, array_map_elem_ptr(array, i)); + if (btf_record_has_field(map->record, BPF_TASK_WORK)) + bpf_obj_free_task_work(map->record, array_map_elem_ptr(array, i)); } } } @@ -795,7 +797,7 @@ const struct bpf_map_ops array_map_ops = { .map_alloc = array_map_alloc, .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, - .map_release_uref = array_map_free_timers_wq, + .map_release_uref = array_map_free_internal_structs, .map_lookup_elem = array_map_lookup_elem, .map_update_elem = array_map_update_elem, .map_delete_elem = array_map_delete_elem, diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index c51e16bbf0c1..9f47a3aa7ff8 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3490,6 +3490,7 @@ static int btf_get_field_type(const struct btf *btf, const struct btf_type *var_ { BPF_RES_SPIN_LOCK, "bpf_res_spin_lock", true }, { BPF_TIMER, "bpf_timer", true }, { BPF_WORKQUEUE, "bpf_wq", true }, + { BPF_TASK_WORK, "bpf_task_work", true }, { BPF_LIST_HEAD, "bpf_list_head", false }, { BPF_LIST_NODE, "bpf_list_node", false }, { BPF_RB_ROOT, "bpf_rb_root", false }, @@ -3675,6 +3676,7 @@ static int btf_find_field_one(const struct btf *btf, case BPF_LIST_NODE: case BPF_RB_NODE: case BPF_REFCOUNT: + case BPF_TASK_WORK: ret = btf_find_struct(btf, var_type, off, sz, field_type, info_cnt ? &info[0] : &tmp); if (ret < 0) @@ -3967,6 +3969,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type rec->timer_off = -EINVAL; rec->wq_off = -EINVAL; rec->refcount_off = -EINVAL; + rec->task_work_off = -EINVAL; for (i = 0; i < cnt; i++) { field_type_size = btf_field_type_size(info_arr[i].type); if (info_arr[i].off + field_type_size > value_size) { @@ -4006,6 +4009,10 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type /* Cache offset for faster lookup at runtime */ rec->wq_off = rec->fields[i].offset; break; + case BPF_TASK_WORK: + WARN_ON_ONCE(rec->task_work_off >= 0); + rec->task_work_off = rec->fields[i].offset; + break; case BPF_REFCOUNT: WARN_ON_ONCE(rec->refcount_off >= 0); /* Cache offset for faster lookup at runtime */ diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 2319f8f8fa3e..c2fcd0cd51e5 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -223,9 +223,12 @@ static void htab_free_internal_structs(struct bpf_htab *htab, struct htab_elem * if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE)) bpf_obj_free_workqueue(htab->map.record, htab_elem_value(elem, htab->map.key_size)); + if (btf_record_has_field(htab->map.record, BPF_TASK_WORK)) + bpf_obj_free_task_work(htab->map.record, + htab_elem_value(elem, htab->map.key_size)); } -static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab) +static void htab_free_prealloced_internal_structs(struct bpf_htab *htab) { u32 num_entries = htab->map.max_entries; int i; @@ -1495,7 +1498,7 @@ static void delete_all_elements(struct bpf_htab *htab) } } -static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab) +static void htab_free_malloced_internal_structs(struct bpf_htab *htab) { int i; @@ -1514,16 +1517,16 @@ static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab) rcu_read_unlock(); } -static void htab_map_free_timers_and_wq(struct bpf_map *map) +static void htab_map_free_internal_structs(struct bpf_map *map) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); /* We only free timer and workqueue on uref dropping to zero */ - if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE)) { + if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) { if (!htab_is_prealloc(htab)) - htab_free_malloced_timers_and_wq(htab); + htab_free_malloced_internal_structs(htab); else - htab_free_prealloced_timers_and_wq(htab); + htab_free_prealloced_internal_structs(htab); } } @@ -2255,7 +2258,7 @@ const struct bpf_map_ops htab_map_ops = { .map_alloc = htab_map_alloc, .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, - .map_release_uref = htab_map_free_timers_and_wq, + .map_release_uref = htab_map_free_internal_structs, .map_lookup_elem = htab_map_lookup_elem, .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem, .map_update_elem = htab_map_update_elem, @@ -2276,7 +2279,7 @@ const struct bpf_map_ops htab_lru_map_ops = { .map_alloc = htab_map_alloc, .map_free = htab_map_free, .map_get_next_key = htab_map_get_next_key, - .map_release_uref = htab_map_free_timers_and_wq, + .map_release_uref = htab_map_free_internal_structs, .map_lookup_elem = htab_lru_map_lookup_elem, .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem, .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 969f63f8ca28..7f5e528df13b 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -3906,8 +3906,48 @@ __bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p, } #endif /* CONFIG_KEYS */ +typedef int (*bpf_task_work_callback_t)(struct bpf_map *map, void *key, void *value); + +/** + * bpf_task_work_schedule_signal - Schedule BPF callback using task_work_add with TWA_SIGNAL mode + * @task: Task struct for which callback should be scheduled + * @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping + * @map__map: bpf_map that embeds struct bpf_task_work in the values + * @callback: pointer to BPF subprogram to call + * @aux__prog: user should pass NULL + * + * Return: 0 if task work has been scheduled successfully, negative error code otherwise + */ +__bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task, struct bpf_task_work *tw, + void *map__map, bpf_task_work_callback_t callback, + void *aux__prog) +{ + return 0; +} + +/** + * bpf_task_work_schedule_resume - Schedule BPF callback using task_work_add with TWA_RESUME mode + * @task: Task struct for which callback should be scheduled + * @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping + * @map__map: bpf_map that embeds struct bpf_task_work in the values + * @callback: pointer to BPF subprogram to call + * @aux__prog: user should pass NULL + * + * Return: 0 if task work has been scheduled successfully, negative error code otherwise + */ +__bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task, struct bpf_task_work *tw, + void *map__map, bpf_task_work_callback_t callback, + void *aux__prog) +{ + return 0; +} + __bpf_kfunc_end_defs(); +void bpf_task_work_cancel_and_free(void *val) +{ +} + BTF_KFUNCS_START(generic_btf_ids) #ifdef CONFIG_CRASH_DUMP BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8a3c3d26f6e2..adb05d235011 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -674,6 +674,7 @@ void btf_record_free(struct btf_record *rec) case BPF_TIMER: case BPF_REFCOUNT: case BPF_WORKQUEUE: + case BPF_TASK_WORK: /* Nothing to release */ break; default: @@ -727,6 +728,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec) case BPF_TIMER: case BPF_REFCOUNT: case BPF_WORKQUEUE: + case BPF_TASK_WORK: /* Nothing to acquire */ break; default: @@ -785,6 +787,13 @@ void bpf_obj_free_workqueue(const struct btf_record *rec, void *obj) bpf_wq_cancel_and_free(obj + rec->wq_off); } +void bpf_obj_free_task_work(const struct btf_record *rec, void *obj) +{ + if (WARN_ON_ONCE(!btf_record_has_field(rec, BPF_TASK_WORK))) + return; + bpf_task_work_cancel_and_free(obj + rec->task_work_off); +} + void bpf_obj_free_fields(const struct btf_record *rec, void *obj) { const struct btf_field *fields; @@ -809,6 +818,9 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj) case BPF_WORKQUEUE: bpf_wq_cancel_and_free(field_ptr); break; + case BPF_TASK_WORK: + bpf_task_work_cancel_and_free(field_ptr); + break; case BPF_KPTR_UNREF: WRITE_ONCE(*(u64 *)field_ptr, 0); break; @@ -1240,7 +1252,8 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token, map->record = btf_parse_fields(btf, value_type, BPF_SPIN_LOCK | BPF_RES_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD | - BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR, + BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR | + BPF_TASK_WORK, map->value_size); if (!IS_ERR_OR_NULL(map->record)) { int i; @@ -1272,6 +1285,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token, break; case BPF_TIMER: case BPF_WORKQUEUE: + case BPF_TASK_WORK: if (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_LRU_HASH && map->map_type != BPF_MAP_TYPE_ARRAY) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 02b93a54a446..ceeb0ffe7d67 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2224,10 +2224,10 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg) /* transfer reg's id which is unique for every map_lookup_elem * as UID of the inner map. */ - if (btf_record_has_field(map->inner_map_meta->record, BPF_TIMER)) - reg->map_uid = reg->id; - if (btf_record_has_field(map->inner_map_meta->record, BPF_WORKQUEUE)) + if (btf_record_has_field(map->inner_map_meta->record, + BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) { reg->map_uid = reg->id; + } } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { reg->type = PTR_TO_XDP_SOCK; } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP || @@ -8461,6 +8461,9 @@ static int check_map_field_pointer(struct bpf_verifier_env *env, u32 regno, case BPF_TIMER: field_off = map->record->timer_off; break; + case BPF_TASK_WORK: + field_off = map->record->task_work_off; + break; default: verifier_bug(env, "unsupported BTF field type: %s\n", struct_name); return -EINVAL; @@ -8514,6 +8517,26 @@ static int process_wq_func(struct bpf_verifier_env *env, int regno, return 0; } +static int process_task_work_func(struct bpf_verifier_env *env, int regno, + struct bpf_kfunc_call_arg_meta *meta) +{ + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + struct bpf_map *map = reg->map_ptr; + int err; + + err = check_map_field_pointer(env, regno, BPF_TASK_WORK); + if (err) + return err; + + if (meta->map.ptr) { + verifier_bug(env, "Two map pointers in a bpf_task_work helper"); + return -EFAULT; + } + meta->map.uid = reg->map_uid; + meta->map.ptr = map; + return 0; +} + static int process_kptr_func(struct bpf_verifier_env *env, int regno, struct bpf_call_arg_meta *meta) { @@ -10343,6 +10366,8 @@ typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env, struct bpf_func_state *callee, int insn_idx); +static bool is_task_work_add_kfunc(u32 func_id); + static int set_callee_state(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee, int insn_idx); @@ -10561,7 +10586,8 @@ static int push_callback_call(struct bpf_verifier_env *env, struct bpf_insn *ins env->subprog_info[subprog].is_async_cb = true; async_cb = push_async_cb(env, env->subprog_info[subprog].start, insn_idx, subprog, - is_bpf_wq_set_callback_impl_kfunc(insn->imm)); + is_bpf_wq_set_callback_impl_kfunc(insn->imm) || + is_task_work_add_kfunc(insn->imm)); if (!async_cb) return -EFAULT; callee = async_cb->frame[0]; @@ -10876,6 +10902,36 @@ static int set_rbtree_add_callback_state(struct bpf_verifier_env *env, return 0; } +static int set_task_work_schedule_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) +{ + struct bpf_map *map_ptr = caller->regs[BPF_REG_3].map_ptr; + + /* + * callback_fn(struct bpf_map *map, void *key, void *value); + */ + callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP; + __mark_reg_known_zero(&callee->regs[BPF_REG_1]); + callee->regs[BPF_REG_1].map_ptr = map_ptr; + + callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY; + __mark_reg_known_zero(&callee->regs[BPF_REG_2]); + callee->regs[BPF_REG_2].map_ptr = map_ptr; + + callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE; + __mark_reg_known_zero(&callee->regs[BPF_REG_3]); + callee->regs[BPF_REG_3].map_ptr = map_ptr; + + /* unused */ + __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); + callee->in_async_callback_fn = true; + callee->callback_ret_range = retval_range(S32_MIN, S32_MAX); + return 0; +} + static bool is_rbtree_lock_required_kfunc(u32 btf_id); /* Are we currently verifying the callback for a rbtree helper that must @@ -12000,6 +12056,7 @@ enum { KF_ARG_RB_NODE_ID, KF_ARG_WORKQUEUE_ID, KF_ARG_RES_SPIN_LOCK_ID, + KF_ARG_TASK_WORK_ID, }; BTF_ID_LIST(kf_arg_btf_ids) @@ -12010,6 +12067,7 @@ BTF_ID(struct, bpf_rb_root) BTF_ID(struct, bpf_rb_node) BTF_ID(struct, bpf_wq) BTF_ID(struct, bpf_res_spin_lock) +BTF_ID(struct, bpf_task_work) static bool __is_kfunc_ptr_arg_type(const struct btf *btf, const struct btf_param *arg, int type) @@ -12058,6 +12116,11 @@ static bool is_kfunc_arg_wq(const struct btf *btf, const struct btf_param *arg) return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_WORKQUEUE_ID); } +static bool is_kfunc_arg_task_work(const struct btf *btf, const struct btf_param *arg) +{ + return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_TASK_WORK_ID); +} + static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_param *arg) { return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID); @@ -12145,6 +12208,7 @@ enum kfunc_ptr_arg_type { KF_ARG_PTR_TO_WORKQUEUE, KF_ARG_PTR_TO_IRQ_FLAG, KF_ARG_PTR_TO_RES_SPIN_LOCK, + KF_ARG_PTR_TO_TASK_WORK, }; enum special_kfunc_type { @@ -12194,6 +12258,8 @@ enum special_kfunc_type { KF_bpf_res_spin_lock_irqsave, KF_bpf_res_spin_unlock_irqrestore, KF___bpf_trap, + KF_bpf_task_work_schedule_signal, + KF_bpf_task_work_schedule_resume, }; BTF_ID_LIST(special_kfunc_list) @@ -12262,6 +12328,14 @@ BTF_ID(func, bpf_res_spin_unlock) BTF_ID(func, bpf_res_spin_lock_irqsave) BTF_ID(func, bpf_res_spin_unlock_irqrestore) BTF_ID(func, __bpf_trap) +BTF_ID(func, bpf_task_work_schedule_signal) +BTF_ID(func, bpf_task_work_schedule_resume) + +static bool is_task_work_add_kfunc(u32 func_id) +{ + return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] || + func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume]; +} static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) { @@ -12352,6 +12426,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_wq(meta->btf, &args[argno])) return KF_ARG_PTR_TO_WORKQUEUE; + if (is_kfunc_arg_task_work(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_TASK_WORK; + if (is_kfunc_arg_irq_flag(meta->btf, &args[argno])) return KF_ARG_PTR_TO_IRQ_FLAG; @@ -12695,7 +12772,8 @@ static bool is_sync_callback_calling_kfunc(u32 btf_id) static bool is_async_callback_calling_kfunc(u32 btf_id) { - return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl]; + return btf_id == special_kfunc_list[KF_bpf_wq_set_callback_impl] || + is_task_work_add_kfunc(btf_id); } static bool is_bpf_throw_kfunc(struct bpf_insn *insn) @@ -13076,7 +13154,8 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ verbose(env, "pointer in R%d isn't map pointer\n", regno); return -EINVAL; } - if (meta->map.ptr && reg->map_ptr->record->wq_off >= 0) { + if (meta->map.ptr && (reg->map_ptr->record->wq_off >= 0 || + reg->map_ptr->record->task_work_off >= 0)) { /* Use map_uid (which is unique id of inner map) to reject: * inner_map1 = bpf_map_lookup_elem(outer_map, key1) * inner_map2 = bpf_map_lookup_elem(outer_map, key2) @@ -13091,6 +13170,12 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ */ if (meta->map.ptr != reg->map_ptr || meta->map.uid != reg->map_uid) { + if (reg->map_ptr->record->task_work_off >= 0) { + verbose(env, + "bpf_task_work pointer in R2 map_uid=%d doesn't match map pointer in R3 map_uid=%d\n", + meta->map.uid, reg->map_uid); + return -EINVAL; + } verbose(env, "workqueue pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n", meta->map.uid, reg->map_uid); @@ -13129,6 +13214,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_REFCOUNTED_KPTR: case KF_ARG_PTR_TO_CONST_STR: case KF_ARG_PTR_TO_WORKQUEUE: + case KF_ARG_PTR_TO_TASK_WORK: case KF_ARG_PTR_TO_IRQ_FLAG: case KF_ARG_PTR_TO_RES_SPIN_LOCK: break; @@ -13422,6 +13508,15 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (ret < 0) return ret; break; + case KF_ARG_PTR_TO_TASK_WORK: + if (reg->type != PTR_TO_MAP_VALUE) { + verbose(env, "arg#%d doesn't point to a map value\n", i); + return -EINVAL; + } + ret = process_task_work_func(env, regno, meta); + if (ret < 0) + return ret; + break; case KF_ARG_PTR_TO_IRQ_FLAG: if (reg->type != PTR_TO_STACK) { verbose(env, "arg#%d doesn't point to an irq flag on stack\n", i); @@ -13788,6 +13883,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, } } + if (is_task_work_add_kfunc(meta.func_id)) { + err = push_callback_call(env, insn, insn_idx, meta.subprogno, + set_task_work_schedule_callback_state); + if (err) { + verbose(env, "kfunc %s#%d failed callback verification\n", + func_name, meta.func_id); + return err; + } + } + rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta); rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f3b173e48b0f..ae83d8649ef1 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -7436,6 +7436,10 @@ struct bpf_timer { __u64 __opaque[2]; } __attribute__((aligned(8))); +struct bpf_task_work { + __u64 __opaque; +} __attribute__((aligned(8))); + struct bpf_wq { __u64 __opaque[2]; } __attribute__((aligned(8))); -- cgit v1.2.3 From 23ef9d439769d5f35353650e771c63d13824235b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 23 Sep 2025 14:34:19 -0700 Subject: kcfi: Rename CONFIG_CFI_CLANG to CONFIG_CFI The kernel's CFI implementation uses the KCFI ABI specifically, and is not strictly tied to a particular compiler. In preparation for GCC supporting KCFI, rename CONFIG_CFI_CLANG to CONFIG_CFI (along with associated options). Use new "transitional" Kconfig option for old CONFIG_CFI_CLANG that will enable CONFIG_CFI during olddefconfig. Reviewed-by: Linus Walleij Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20250923213422.1105654-3-kees@kernel.org Signed-off-by: Kees Cook --- Makefile | 2 +- arch/Kconfig | 36 +++++++++++++++++++++------------ arch/arm/Kconfig | 2 +- arch/arm/kernel/hw_breakpoint.c | 2 +- arch/arm/mm/Makefile | 2 +- arch/arm/mm/cache-fa.S | 2 +- arch/arm/mm/cache-v4.S | 2 +- arch/arm/mm/cache-v4wb.S | 4 ++-- arch/arm/mm/cache-v4wt.S | 2 +- arch/arm/mm/cache-v6.S | 2 +- arch/arm/mm/cache-v7.S | 2 +- arch/arm/mm/cache-v7m.S | 2 +- arch/arm/mm/proc-arm1020.S | 2 +- arch/arm/mm/proc-arm1020e.S | 2 +- arch/arm/mm/proc-arm1022.S | 2 +- arch/arm/mm/proc-arm1026.S | 2 +- arch/arm/mm/proc-arm920.S | 2 +- arch/arm/mm/proc-arm922.S | 2 +- arch/arm/mm/proc-arm925.S | 2 +- arch/arm/mm/proc-arm926.S | 2 +- arch/arm/mm/proc-arm940.S | 2 +- arch/arm/mm/proc-arm946.S | 2 +- arch/arm/mm/proc-feroceon.S | 2 +- arch/arm/mm/proc-mohawk.S | 2 +- arch/arm/mm/proc-xsc3.S | 2 +- arch/arm/mm/tlb-v4.S | 2 +- arch/arm64/Kconfig | 4 ++-- arch/arm64/kernel/debug-monitors.c | 2 +- arch/arm64/kernel/traps.c | 4 ++-- arch/arm64/kvm/handle_exit.c | 2 +- arch/arm64/net/bpf_jit_comp.c | 2 +- arch/riscv/Kconfig | 6 +++--- arch/riscv/include/asm/cfi.h | 4 ++-- arch/riscv/kernel/Makefile | 2 +- arch/riscv/net/bpf_jit_comp64.c | 4 ++-- arch/riscv/purgatory/Makefile | 2 +- arch/x86/Kconfig | 12 +++++------ arch/x86/include/asm/cfi.h | 4 ++-- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/alternative.c | 4 ++-- arch/x86/kernel/kprobes/core.c | 2 +- arch/x86/purgatory/Makefile | 2 +- drivers/misc/lkdtm/cfi.c | 2 +- include/asm-generic/vmlinux.lds.h | 2 +- include/linux/cfi.h | 6 +++--- include/linux/cfi_types.h | 8 ++++---- include/linux/compiler.h | 2 +- init/Kconfig | 4 ++-- kernel/Makefile | 2 +- kernel/configs/hardening.config | 4 ++-- kernel/module/Kconfig | 2 +- kernel/module/tree_lookup.c | 2 +- lib/Kconfig.debug | 2 +- tools/include/linux/cfi_types.h | 6 +++--- tools/perf/util/include/linux/linkage.h | 2 +- 55 files changed, 100 insertions(+), 90 deletions(-) (limited to 'tools/include') diff --git a/Makefile b/Makefile index d1adb78c3596..437989d6e0be 100644 --- a/Makefile +++ b/Makefile @@ -1020,7 +1020,7 @@ KBUILD_AFLAGS += -fno-lto export CC_FLAGS_LTO endif -ifdef CONFIG_CFI_CLANG +ifdef CONFIG_CFI CC_FLAGS_CFI := -fsanitize=kcfi ifdef CONFIG_CFI_ICALL_NORMALIZE_INTEGERS CC_FLAGS_CFI += -fsanitize-cfi-icall-experimental-normalize-integers diff --git a/arch/Kconfig b/arch/Kconfig index d1b4ffd6e085..97642c08a124 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -867,22 +867,26 @@ config PROPELLER_CLANG If unsure, say N. -config ARCH_SUPPORTS_CFI_CLANG +config ARCH_SUPPORTS_CFI bool help - An architecture should select this option if it can support Clang's - Control-Flow Integrity (CFI) checking. + An architecture should select this option if it can support Kernel + Control-Flow Integrity (CFI) checking (-fsanitize=kcfi). config ARCH_USES_CFI_TRAPS bool + help + An architecture should select this option if it requires the + .kcfi_traps section for KCFI trap handling. -config CFI_CLANG - bool "Use Clang's Control Flow Integrity (CFI)" - depends on ARCH_SUPPORTS_CFI_CLANG +config CFI + bool "Use Kernel Control Flow Integrity (kCFI)" + default CFI_CLANG + depends on ARCH_SUPPORTS_CFI depends on $(cc-option,-fsanitize=kcfi) help - This option enables Clang's forward-edge Control Flow Integrity - (CFI) checking, where the compiler injects a runtime check to each + This option enables forward-edge Control Flow Integrity (CFI) + checking, where the compiler injects a runtime check to each indirect function call to ensure the target is a valid function with the correct static type. This restricts possible call targets and makes it more difficult for an attacker to exploit bugs that allow @@ -891,10 +895,16 @@ config CFI_CLANG https://clang.llvm.org/docs/ControlFlowIntegrity.html +config CFI_CLANG + bool + transitional + help + Transitional config for CFI_CLANG to CFI migration. + config CFI_ICALL_NORMALIZE_INTEGERS bool "Normalize CFI tags for integers" - depends on CFI_CLANG - depends on HAVE_CFI_ICALL_NORMALIZE_INTEGERS_CLANG + depends on CFI + depends on HAVE_CFI_ICALL_NORMALIZE_INTEGERS help This option normalizes the CFI tags for integer types so that all integer types of the same size and signedness receive the same CFI @@ -907,7 +917,7 @@ config CFI_ICALL_NORMALIZE_INTEGERS This option is necessary for using CFI with Rust. If unsure, say N. -config HAVE_CFI_ICALL_NORMALIZE_INTEGERS_CLANG +config HAVE_CFI_ICALL_NORMALIZE_INTEGERS def_bool y depends on $(cc-option,-fsanitize=kcfi -fsanitize-cfi-icall-experimental-normalize-integers) # With GCOV/KASAN we need this fix: https://github.com/llvm/llvm-project/pull/104826 @@ -915,7 +925,7 @@ config HAVE_CFI_ICALL_NORMALIZE_INTEGERS_CLANG config HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC def_bool y - depends on HAVE_CFI_ICALL_NORMALIZE_INTEGERS_CLANG + depends on HAVE_CFI_ICALL_NORMALIZE_INTEGERS depends on RUSTC_VERSION >= 107900 # With GCOV/KASAN we need this fix: https://github.com/rust-lang/rust/pull/129373 depends on (RUSTC_LLVM_VERSION >= 190103 && RUSTC_VERSION >= 108200) || \ @@ -923,7 +933,7 @@ config HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC config CFI_PERMISSIVE bool "Use CFI in permissive mode" - depends on CFI_CLANG + depends on CFI help When selected, Control Flow Integrity (CFI) violations result in a warning instead of a kernel panic. This option should only be used diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b1f3df39ed40..36ab8625be72 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -38,7 +38,7 @@ config ARM select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 select ARCH_NEED_CMPXCHG_1_EMU if CPU_V6 select ARCH_SUPPORTS_ATOMIC_RMW - select ARCH_SUPPORTS_CFI_CLANG + select ARCH_SUPPORTS_CFI select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_USE_BUILTIN_BSWAP diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index a12efd0f43e8..cd4b34c96e35 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -904,7 +904,7 @@ unlock: watchpoint_single_step_handler(addr); } -#ifdef CONFIG_CFI_CLANG +#ifdef CONFIG_CFI static void hw_breakpoint_cfi_handler(struct pt_regs *regs) { /* diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index a195cd1d3e6d..1e2201013371 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -89,7 +89,7 @@ obj-$(CONFIG_CPU_V6) += proc-v6.o obj-$(CONFIG_CPU_V6K) += proc-v6.o obj-$(CONFIG_CPU_V7) += proc-v7.o proc-v7-bugs.o obj-$(CONFIG_CPU_V7M) += proc-v7m.o -obj-$(CONFIG_CFI_CLANG) += proc.o +obj-$(CONFIG_CFI) += proc.o obj-$(CONFIG_OUTER_CACHE) += l2c-common.o obj-$(CONFIG_CACHE_B15_RAC) += cache-b15-rac.o diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S index 4a3668b52a2d..e1641799569b 100644 --- a/arch/arm/mm/cache-fa.S +++ b/arch/arm/mm/cache-fa.S @@ -112,7 +112,7 @@ SYM_FUNC_END(fa_flush_user_cache_range) * - end - virtual end address */ SYM_TYPED_FUNC_START(fa_coherent_kern_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b fa_coherent_user_range #endif SYM_FUNC_END(fa_coherent_kern_range) diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S index 0e94e5193dbd..001d7042bd46 100644 --- a/arch/arm/mm/cache-v4.S +++ b/arch/arm/mm/cache-v4.S @@ -104,7 +104,7 @@ SYM_FUNC_END(v4_coherent_user_range) * - size - region size */ SYM_TYPED_FUNC_START(v4_flush_kern_dcache_area) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b v4_dma_flush_range #endif SYM_FUNC_END(v4_flush_kern_dcache_area) diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S index ce55a2eef5da..874fe5310f9a 100644 --- a/arch/arm/mm/cache-v4wb.S +++ b/arch/arm/mm/cache-v4wb.S @@ -136,7 +136,7 @@ SYM_FUNC_END(v4wb_flush_user_cache_range) */ SYM_TYPED_FUNC_START(v4wb_flush_kern_dcache_area) add r1, r0, r1 -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b v4wb_coherent_user_range #endif SYM_FUNC_END(v4wb_flush_kern_dcache_area) @@ -152,7 +152,7 @@ SYM_FUNC_END(v4wb_flush_kern_dcache_area) * - end - virtual end address */ SYM_TYPED_FUNC_START(v4wb_coherent_kern_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b v4wb_coherent_user_range #endif SYM_FUNC_END(v4wb_coherent_kern_range) diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S index a97dc267b3b0..2ee62e4b2b07 100644 --- a/arch/arm/mm/cache-v4wt.S +++ b/arch/arm/mm/cache-v4wt.S @@ -108,7 +108,7 @@ SYM_FUNC_END(v4wt_flush_user_cache_range) * - end - virtual end address */ SYM_TYPED_FUNC_START(v4wt_coherent_kern_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b v4wt_coherent_user_range #endif SYM_FUNC_END(v4wt_coherent_kern_range) diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 9f415476e218..5ceea8965ea1 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -117,7 +117,7 @@ SYM_FUNC_END(v6_flush_user_cache_range) * - the Icache does not read data from the write buffer */ SYM_TYPED_FUNC_START(v6_coherent_kern_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b v6_coherent_user_range #endif SYM_FUNC_END(v6_coherent_kern_range) diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 201ca05436fa..726681fb7d4d 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -261,7 +261,7 @@ SYM_FUNC_END(v7_flush_user_cache_range) * - the Icache does not read data from the write buffer */ SYM_TYPED_FUNC_START(v7_coherent_kern_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b v7_coherent_user_range #endif SYM_FUNC_END(v7_coherent_kern_range) diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S index 14d719eba729..7f9cfad2ea21 100644 --- a/arch/arm/mm/cache-v7m.S +++ b/arch/arm/mm/cache-v7m.S @@ -286,7 +286,7 @@ SYM_FUNC_END(v7m_flush_user_cache_range) * - the Icache does not read data from the write buffer */ SYM_TYPED_FUNC_START(v7m_coherent_kern_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b v7m_coherent_user_range #endif SYM_FUNC_END(v7m_coherent_kern_range) diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index d0ce3414a13e..4612a4961e81 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -203,7 +203,7 @@ SYM_FUNC_END(arm1020_flush_user_cache_range) * - end - virtual end address */ SYM_TYPED_FUNC_START(arm1020_coherent_kern_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b arm1020_coherent_user_range #endif SYM_FUNC_END(arm1020_coherent_kern_range) diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index 64f031bf6eff..b4a8a3a8eda3 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -200,7 +200,7 @@ SYM_FUNC_END(arm1020e_flush_user_cache_range) * - end - virtual end address */ SYM_TYPED_FUNC_START(arm1020e_coherent_kern_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b arm1020e_coherent_user_range #endif SYM_FUNC_END(arm1020e_coherent_kern_range) diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index 42ed5ed07252..709870e99e19 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -199,7 +199,7 @@ SYM_FUNC_END(arm1022_flush_user_cache_range) * - end - virtual end address */ SYM_TYPED_FUNC_START(arm1022_coherent_kern_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b arm1022_coherent_user_range #endif SYM_FUNC_END(arm1022_coherent_kern_range) diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index b3ae62cd553a..02f7370a8c5c 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -194,7 +194,7 @@ SYM_FUNC_END(arm1026_flush_user_cache_range) * - end - virtual end address */ SYM_TYPED_FUNC_START(arm1026_coherent_kern_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b arm1026_coherent_user_range #endif SYM_FUNC_END(arm1026_coherent_kern_range) diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index a30df54ad5fa..4727f4b5b6e8 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -180,7 +180,7 @@ SYM_FUNC_END(arm920_flush_user_cache_range) * - end - virtual end address */ SYM_TYPED_FUNC_START(arm920_coherent_kern_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b arm920_coherent_user_range #endif SYM_FUNC_END(arm920_coherent_kern_range) diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index aac4e048100d..5a4a3f4f2683 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -182,7 +182,7 @@ SYM_FUNC_END(arm922_flush_user_cache_range) * - end - virtual end address */ SYM_TYPED_FUNC_START(arm922_coherent_kern_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b arm922_coherent_user_range #endif SYM_FUNC_END(arm922_coherent_kern_range) diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index 035941faeb2e..1c4830afe1d3 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -229,7 +229,7 @@ SYM_FUNC_END(arm925_flush_user_cache_range) * - end - virtual end address */ SYM_TYPED_FUNC_START(arm925_coherent_kern_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b arm925_coherent_user_range #endif SYM_FUNC_END(arm925_coherent_kern_range) diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index 6f43d6af2d9a..a09cc3e02efd 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -192,7 +192,7 @@ SYM_FUNC_END(arm926_flush_user_cache_range) * - end - virtual end address */ SYM_TYPED_FUNC_START(arm926_coherent_kern_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b arm926_coherent_user_range #endif SYM_FUNC_END(arm926_coherent_kern_range) diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S index 0d30bb25c42b..545c076c36d2 100644 --- a/arch/arm/mm/proc-arm940.S +++ b/arch/arm/mm/proc-arm940.S @@ -153,7 +153,7 @@ SYM_FUNC_END(arm940_coherent_kern_range) * - end - virtual end address */ SYM_TYPED_FUNC_START(arm940_coherent_user_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b arm940_flush_kern_dcache_area #endif SYM_FUNC_END(arm940_coherent_user_range) diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S index 27750ace2ced..f3d4e18c3fba 100644 --- a/arch/arm/mm/proc-arm946.S +++ b/arch/arm/mm/proc-arm946.S @@ -173,7 +173,7 @@ SYM_FUNC_END(arm946_flush_user_cache_range) * - end - virtual end address */ SYM_TYPED_FUNC_START(arm946_coherent_kern_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b arm946_coherent_user_range #endif SYM_FUNC_END(arm946_coherent_kern_range) diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index f67b2ffac854..7f08d06c9625 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -208,7 +208,7 @@ SYM_FUNC_END(feroceon_flush_user_cache_range) */ .align 5 SYM_TYPED_FUNC_START(feroceon_coherent_kern_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b feroceon_coherent_user_range #endif SYM_FUNC_END(feroceon_coherent_kern_range) diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index 8e9f38da863a..4669c63e3121 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -163,7 +163,7 @@ SYM_FUNC_END(mohawk_flush_user_cache_range) * - end - virtual end address */ SYM_TYPED_FUNC_START(mohawk_coherent_kern_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b mohawk_coherent_user_range #endif SYM_FUNC_END(mohawk_coherent_kern_range) diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index 14927b380452..fd25634a2ed5 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -223,7 +223,7 @@ SYM_FUNC_END(xsc3_flush_user_cache_range) * it also trashes the mini I-cache used by JTAG debuggers. */ SYM_TYPED_FUNC_START(xsc3_coherent_kern_range) -#ifdef CONFIG_CFI_CLANG /* Fallthrough if !CFI */ +#ifdef CONFIG_CFI /* Fallthrough if !CFI */ b xsc3_coherent_user_range #endif SYM_FUNC_END(xsc3_coherent_kern_range) diff --git a/arch/arm/mm/tlb-v4.S b/arch/arm/mm/tlb-v4.S index 09ff69008d94..079774a02be6 100644 --- a/arch/arm/mm/tlb-v4.S +++ b/arch/arm/mm/tlb-v4.S @@ -52,7 +52,7 @@ SYM_FUNC_END(v4_flush_user_tlb_range) * - start - virtual address (may not be aligned) * - end - virtual address (may not be aligned) */ -#ifdef CONFIG_CFI_CLANG +#ifdef CONFIG_CFI SYM_TYPED_FUNC_START(v4_flush_kern_tlb_range) b .v4_flush_kern_tlb_range SYM_FUNC_END(v4_flush_kern_tlb_range) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e9bbfacc35a6..1e38b8885a46 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -100,7 +100,7 @@ config ARM64 select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK select ARCH_SUPPORTS_LTO_CLANG if CPU_LITTLE_ENDIAN select ARCH_SUPPORTS_LTO_CLANG_THIN - select ARCH_SUPPORTS_CFI_CLANG + select ARCH_SUPPORTS_CFI select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_SUPPORTS_NUMA_BALANCING @@ -212,7 +212,7 @@ config ARM64 select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS \ if DYNAMIC_FTRACE_WITH_ARGS && DYNAMIC_FTRACE_WITH_CALL_OPS select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS \ - if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG && \ + if (DYNAMIC_FTRACE_WITH_ARGS && !CFI && \ (CC_IS_CLANG || !CC_OPTIMIZE_FOR_SIZE)) select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \ if DYNAMIC_FTRACE_WITH_ARGS diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 110d9ff54174..ebf010443e22 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -212,7 +212,7 @@ static int call_el1_break_hook(struct pt_regs *regs, unsigned long esr) if (esr_brk_comment(esr) == BUG_BRK_IMM) return bug_brk_handler(regs, esr); - if (IS_ENABLED(CONFIG_CFI_CLANG) && esr_is_cfi_brk(esr)) + if (IS_ENABLED(CONFIG_CFI) && esr_is_cfi_brk(esr)) return cfi_brk_handler(regs, esr); if (esr_brk_comment(esr) == FAULT_BRK_IMM) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index f528b6041f6a..5041817af267 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -1015,7 +1015,7 @@ int bug_brk_handler(struct pt_regs *regs, unsigned long esr) return DBG_HOOK_HANDLED; } -#ifdef CONFIG_CFI_CLANG +#ifdef CONFIG_CFI int cfi_brk_handler(struct pt_regs *regs, unsigned long esr) { unsigned long target; @@ -1039,7 +1039,7 @@ int cfi_brk_handler(struct pt_regs *regs, unsigned long esr) arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); return DBG_HOOK_HANDLED; } -#endif /* CONFIG_CFI_CLANG */ +#endif /* CONFIG_CFI */ int reserved_fault_brk_handler(struct pt_regs *regs, unsigned long esr) { diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index a598072f36d2..8bdb1eed090a 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -545,7 +545,7 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line); else print_nvhe_hyp_panic("BUG", panic_addr); - } else if (IS_ENABLED(CONFIG_CFI_CLANG) && esr_is_cfi_brk(esr)) { + } else if (IS_ENABLED(CONFIG_CFI) && esr_is_cfi_brk(esr)) { kvm_nvhe_report_cfi_failure(panic_addr); } else if (IS_ENABLED(CONFIG_UBSAN_KVM_EL2) && ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 && diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 52ffe115a8c4..28996e0a9b00 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -185,7 +185,7 @@ static inline void emit_bti(u32 insn, struct jit_ctx *ctx) static inline void emit_kcfi(u32 hash, struct jit_ctx *ctx) { - if (IS_ENABLED(CONFIG_CFI_CLANG)) + if (IS_ENABLED(CONFIG_CFI)) emit_u32_data(hash, ctx); } diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a4b233a0659e..6043ad82b73c 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -60,7 +60,7 @@ config RISCV select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW # clang >= 17: https://github.com/llvm/llvm-project/commit/62fa708ceb027713b386c7e0efda994f8bdc27e2 - select ARCH_SUPPORTS_CFI_CLANG if CLANG_VERSION >= 170000 + select ARCH_SUPPORTS_CFI if (!CC_IS_CLANG || CLANG_VERSION >= 170000) select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_HUGETLBFS if MMU @@ -76,7 +76,7 @@ config RISCV select ARCH_USE_MEMTEST select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_SYM_ANNOTATIONS - select ARCH_USES_CFI_TRAPS if CFI_CLANG + select ARCH_USES_CFI_TRAPS if CFI select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if MMU select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS @@ -154,7 +154,7 @@ config RISCV select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && (CLANG_SUPPORTS_DYNAMIC_FTRACE || GCC_SUPPORTS_DYNAMIC_FTRACE) select FUNCTION_ALIGNMENT_4B if HAVE_DYNAMIC_FTRACE && RISCV_ISA_C select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS if HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS - select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS if (DYNAMIC_FTRACE_WITH_ARGS && !CFI_CLANG) + select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS if (DYNAMIC_FTRACE_WITH_ARGS && !CFI) select HAVE_DYNAMIC_FTRACE_WITH_ARGS if HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_GRAPH_FUNC select HAVE_FUNCTION_GRAPH_TRACER if HAVE_DYNAMIC_FTRACE_WITH_ARGS diff --git a/arch/riscv/include/asm/cfi.h b/arch/riscv/include/asm/cfi.h index 4508aaa7a2fd..710aa8192edd 100644 --- a/arch/riscv/include/asm/cfi.h +++ b/arch/riscv/include/asm/cfi.h @@ -11,7 +11,7 @@ struct pt_regs; -#ifdef CONFIG_CFI_CLANG +#ifdef CONFIG_CFI enum bug_trap_type handle_cfi_failure(struct pt_regs *regs); #define __bpfcall #else @@ -19,6 +19,6 @@ static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs) { return BUG_TRAP_TYPE_NONE; } -#endif /* CONFIG_CFI_CLANG */ +#endif /* CONFIG_CFI */ #endif /* _ASM_RISCV_CFI_H */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index c7b542573407..f60fce69b725 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -113,7 +113,7 @@ obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o -obj-$(CONFIG_CFI_CLANG) += cfi.o +obj-$(CONFIG_CFI) += cfi.o obj-$(CONFIG_EFI) += efi.o obj-$(CONFIG_COMPAT) += compat_syscall_table.o diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 10e01ff06312..24ba546a1c0e 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -18,7 +18,7 @@ #define RV_MAX_REG_ARGS 8 #define RV_FENTRY_NINSNS 2 #define RV_FENTRY_NBYTES (RV_FENTRY_NINSNS * 4) -#define RV_KCFI_NINSNS (IS_ENABLED(CONFIG_CFI_CLANG) ? 1 : 0) +#define RV_KCFI_NINSNS (IS_ENABLED(CONFIG_CFI) ? 1 : 0) /* imm that allows emit_imm to emit max count insns */ #define RV_MAX_COUNT_IMM 0x7FFF7FF7FF7FF7FF @@ -469,7 +469,7 @@ static int emit_call(u64 addr, bool fixed_addr, struct rv_jit_context *ctx) static inline void emit_kcfi(u32 hash, struct rv_jit_context *ctx) { - if (IS_ENABLED(CONFIG_CFI_CLANG)) + if (IS_ENABLED(CONFIG_CFI)) emit(hash, ctx); } diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index 240592e3f5c2..530e497ca2f9 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -71,7 +71,7 @@ ifdef CONFIG_STACKPROTECTOR_STRONG PURGATORY_CFLAGS_REMOVE += -fstack-protector-strong endif -ifdef CONFIG_CFI_CLANG +ifdef CONFIG_CFI PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_CFI) endif diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 58d890fe2100..b6da2d37cfd1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -127,8 +127,8 @@ config X86 select ARCH_SUPPORTS_PAGE_TABLE_CHECK if X86_64 select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096 - select ARCH_SUPPORTS_CFI_CLANG if X86_64 - select ARCH_USES_CFI_TRAPS if X86_64 && CFI_CLANG + select ARCH_SUPPORTS_CFI if X86_64 + select ARCH_USES_CFI_TRAPS if X86_64 && CFI select ARCH_SUPPORTS_LTO_CLANG select ARCH_SUPPORTS_LTO_CLANG_THIN select ARCH_SUPPORTS_RT @@ -2396,11 +2396,11 @@ config FUNCTION_PADDING_CFI default 3 if FUNCTION_ALIGNMENT_8B default 0 -# Basically: FUNCTION_ALIGNMENT - 5*CFI_CLANG +# Basically: FUNCTION_ALIGNMENT - 5*CFI # except Kconfig can't do arithmetic :/ config FUNCTION_PADDING_BYTES int - default FUNCTION_PADDING_CFI if CFI_CLANG + default FUNCTION_PADDING_CFI if CFI default FUNCTION_ALIGNMENT config CALL_PADDING @@ -2410,7 +2410,7 @@ config CALL_PADDING config FINEIBT def_bool y - depends on X86_KERNEL_IBT && CFI_CLANG && MITIGATION_RETPOLINE + depends on X86_KERNEL_IBT && CFI && MITIGATION_RETPOLINE select CALL_PADDING config FINEIBT_BHI @@ -2427,7 +2427,7 @@ config CALL_THUNKS config PREFIX_SYMBOLS def_bool y - depends on CALL_PADDING && !CFI_CLANG + depends on CALL_PADDING && !CFI menuconfig CPU_MITIGATIONS bool "Mitigations for CPU vulnerabilities" diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h index 1751f1eb95ef..976b90a3d190 100644 --- a/arch/x86/include/asm/cfi.h +++ b/arch/x86/include/asm/cfi.h @@ -113,7 +113,7 @@ extern bhi_thunk __bhi_args_end[]; struct pt_regs; -#ifdef CONFIG_CFI_CLANG +#ifdef CONFIG_CFI enum bug_trap_type handle_cfi_failure(struct pt_regs *regs); #define __bpfcall @@ -157,7 +157,7 @@ static inline int cfi_get_func_arity(void *func) { return 0; } -#endif /* CONFIG_CFI_CLANG */ +#endif /* CONFIG_CFI */ #if HAS_KERNEL_IBT == 1 #define CFI_NOSEAL(x) asm(IBT_NOSEAL(__stringify(x))) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0d2a6d953be9..bc184dd38d99 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -148,7 +148,7 @@ obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o -obj-$(CONFIG_CFI_CLANG) += cfi.o +obj-$(CONFIG_CFI) += cfi.o obj-$(CONFIG_CALL_THUNKS) += callthunks.o diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 7bde68247b5f..79ae9cb50019 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -1170,7 +1170,7 @@ void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { } #ifdef CONFIG_CFI_AUTO_DEFAULT # define __CFI_DEFAULT CFI_AUTO -#elif defined(CONFIG_CFI_CLANG) +#elif defined(CONFIG_CFI) # define __CFI_DEFAULT CFI_KCFI #else # define __CFI_DEFAULT CFI_OFF @@ -1182,7 +1182,7 @@ enum cfi_mode cfi_mode __ro_after_init = __CFI_DEFAULT; bool cfi_bhi __ro_after_init = false; #endif -#ifdef CONFIG_CFI_CLANG +#ifdef CONFIG_CFI u32 cfi_get_func_hash(void *func) { u32 hash; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6079d15dab8c..3863d7709386 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -339,7 +339,7 @@ static bool can_probe(unsigned long paddr) if (is_exception_insn(&insn)) return false; - if (IS_ENABLED(CONFIG_CFI_CLANG)) { + if (IS_ENABLED(CONFIG_CFI)) { /* * The compiler generates the following instruction sequence * for indirect call checks and cfi.c decodes this; diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index e0a607a14e7e..5ce1d4263000 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -57,7 +57,7 @@ ifdef CONFIG_MITIGATION_RETPOLINE PURGATORY_CFLAGS_REMOVE += $(RETPOLINE_CFLAGS) endif -ifdef CONFIG_CFI_CLANG +ifdef CONFIG_CFI PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_CFI) endif diff --git a/drivers/misc/lkdtm/cfi.c b/drivers/misc/lkdtm/cfi.c index 6a33889d0902..c3971f7caa65 100644 --- a/drivers/misc/lkdtm/cfi.c +++ b/drivers/misc/lkdtm/cfi.c @@ -43,7 +43,7 @@ static void lkdtm_CFI_FORWARD_PROTO(void) lkdtm_indirect_call((void *)lkdtm_increment_int); pr_err("FAIL: survived mismatched prototype function call!\n"); - pr_expected_config(CONFIG_CFI_CLANG); + pr_expected_config(CONFIG_CFI); } /* diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index ae2d2359b79e..a65a87366c48 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -157,7 +157,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) #define PATCHABLE_DISCARDS *(__patchable_function_entries) #endif -#ifndef CONFIG_ARCH_SUPPORTS_CFI_CLANG +#ifndef CONFIG_ARCH_SUPPORTS_CFI /* * Simply points to ftrace_stub, but with the proper protocol. * Defined by the linker script in linux/vmlinux.lds.h diff --git a/include/linux/cfi.h b/include/linux/cfi.h index 52a98886a455..1fd22ea6eba4 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -11,7 +11,7 @@ #include #include -#ifdef CONFIG_CFI_CLANG +#ifdef CONFIG_CFI extern bool cfi_warn; enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, @@ -52,7 +52,7 @@ static inline u32 cfi_get_func_hash(void *func) extern u32 cfi_bpf_hash; extern u32 cfi_bpf_subprog_hash; -#else /* CONFIG_CFI_CLANG */ +#else /* CONFIG_CFI */ static inline int cfi_get_offset(void) { return 0; } static inline u32 cfi_get_func_hash(void *func) { return 0; } @@ -60,7 +60,7 @@ static inline u32 cfi_get_func_hash(void *func) { return 0; } #define cfi_bpf_hash 0U #define cfi_bpf_subprog_hash 0U -#endif /* CONFIG_CFI_CLANG */ +#endif /* CONFIG_CFI */ #ifdef CONFIG_ARCH_USES_CFI_TRAPS bool is_cfi_trap(unsigned long addr); diff --git a/include/linux/cfi_types.h b/include/linux/cfi_types.h index 685f7181780f..a86af9bc8bdc 100644 --- a/include/linux/cfi_types.h +++ b/include/linux/cfi_types.h @@ -8,7 +8,7 @@ #ifdef __ASSEMBLY__ #include -#ifdef CONFIG_CFI_CLANG +#ifdef CONFIG_CFI /* * Use the __kcfi_typeid_ type identifier symbol to * annotate indirectly called assembly functions. The compiler emits @@ -29,12 +29,12 @@ #define SYM_TYPED_START(name, linkage, align...) \ SYM_TYPED_ENTRY(name, linkage, align) -#else /* CONFIG_CFI_CLANG */ +#else /* CONFIG_CFI */ #define SYM_TYPED_START(name, linkage, align...) \ SYM_START(name, linkage, align) -#endif /* CONFIG_CFI_CLANG */ +#endif /* CONFIG_CFI */ #ifndef SYM_TYPED_FUNC_START #define SYM_TYPED_FUNC_START(name) \ @@ -43,7 +43,7 @@ #else /* __ASSEMBLY__ */ -#ifdef CONFIG_CFI_CLANG +#ifdef CONFIG_CFI #define DEFINE_CFI_TYPE(name, func) \ /* \ * Force a reference to the function so the compiler generates \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 6f04a1d8c720..fb27da2221ee 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -248,7 +248,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #endif /* __KERNEL__ */ -#if defined(CONFIG_CFI_CLANG) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#if defined(CONFIG_CFI) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) /* * Force a reference to the external symbol so the compiler generates * __kcfi_typid. diff --git a/init/Kconfig b/init/Kconfig index 836320251219..67f10d8a33b7 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -2063,8 +2063,8 @@ config RUST depends on !GCC_PLUGIN_RANDSTRUCT depends on !RANDSTRUCT depends on !DEBUG_INFO_BTF || (PAHOLE_HAS_LANG_EXCLUDE && !LTO) - depends on !CFI_CLANG || HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC - select CFI_ICALL_NORMALIZE_INTEGERS if CFI_CLANG + depends on !CFI || HAVE_CFI_ICALL_NORMALIZE_INTEGERS_RUSTC + select CFI_ICALL_NORMALIZE_INTEGERS if CFI depends on !CALL_PADDING || RUSTC_VERSION >= 108100 depends on !KASAN_SW_TAGS depends on !(MITIGATION_RETHUNK && KASAN) || RUSTC_VERSION >= 108300 diff --git a/kernel/Makefile b/kernel/Makefile index c60623448235..27e0e6a33610 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -122,7 +122,7 @@ obj-$(CONFIG_KCSAN) += kcsan/ obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call_inline.o -obj-$(CONFIG_CFI_CLANG) += cfi.o +obj-$(CONFIG_CFI) += cfi.o obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/kernel/configs/hardening.config b/kernel/configs/hardening.config index 64caaf997fc0..7c3924614e01 100644 --- a/kernel/configs/hardening.config +++ b/kernel/configs/hardening.config @@ -93,8 +93,8 @@ CONFIG_SECCOMP_FILTER=y # Provides some protections against SYN flooding. CONFIG_SYN_COOKIES=y -# Enable Kernel Control Flow Integrity (currently Clang only). -CONFIG_CFI_CLANG=y +# Enable Kernel Control Flow Integrity. +CONFIG_CFI=y # CONFIG_CFI_PERMISSIVE is not set # Attack surface reduction: do not autoload TTY line disciplines. diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index 39278737bb68..2a1beebf1d37 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -460,6 +460,6 @@ config UNUSED_KSYMS_WHITELIST config MODULES_TREE_LOOKUP def_bool y - depends on PERF_EVENTS || TRACING || CFI_CLANG + depends on PERF_EVENTS || TRACING || CFI endif # MODULES diff --git a/kernel/module/tree_lookup.c b/kernel/module/tree_lookup.c index d3204c5c74eb..f8e8c126705c 100644 --- a/kernel/module/tree_lookup.c +++ b/kernel/module/tree_lookup.c @@ -14,7 +14,7 @@ * Use a latched RB-tree for __module_address(); this allows us to use * RCU lookups of the address from any context. * - * This is conditional on PERF_EVENTS || TRACING || CFI_CLANG because those can + * This is conditional on PERF_EVENTS || TRACING || CFI because those can * really hit __module_address() hard by doing a lot of stack unwinding; * potentially from NMI context. */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index dc0e0c6ed075..e3e69df19e78 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2894,7 +2894,7 @@ config FORTIFY_KUNIT_TEST config LONGEST_SYM_KUNIT_TEST tristate "Test the longest symbol possible" if !KUNIT_ALL_TESTS depends on KUNIT && KPROBES - depends on !PREFIX_SYMBOLS && !CFI_CLANG && !GCOV_KERNEL + depends on !PREFIX_SYMBOLS && !CFI && !GCOV_KERNEL default KUNIT_ALL_TESTS help Tests the longest symbol possible diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h index 6b8713675765..2e098274e45c 100644 --- a/tools/include/linux/cfi_types.h +++ b/tools/include/linux/cfi_types.h @@ -8,7 +8,7 @@ #ifdef __ASSEMBLY__ #include -#ifdef CONFIG_CFI_CLANG +#ifdef CONFIG_CFI /* * Use the __kcfi_typeid_ type identifier symbol to * annotate indirectly called assembly functions. The compiler emits @@ -29,12 +29,12 @@ #define SYM_TYPED_START(name, linkage, align...) \ SYM_TYPED_ENTRY(name, linkage, align) -#else /* CONFIG_CFI_CLANG */ +#else /* CONFIG_CFI */ #define SYM_TYPED_START(name, linkage, align...) \ SYM_START(name, linkage, align) -#endif /* CONFIG_CFI_CLANG */ +#endif /* CONFIG_CFI */ #ifndef SYM_TYPED_FUNC_START #define SYM_TYPED_FUNC_START(name) \ diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index 89979ca23c3f..34e2fdfe7300 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -120,7 +120,7 @@ #endif // In the kernel sources (include/linux/cfi_types.h), this has a different -// definition when CONFIG_CFI_CLANG is used, for tools/ just use the !clang +// definition when CONFIG_CFI is used, for tools/ just use the !cfi // definition: #ifndef SYM_TYPED_START #define SYM_TYPED_START(name, linkage, align...) \ -- cgit v1.2.3 From 2d965c1ae4135ed6f505661458f6dabd39488dac Mon Sep 17 00:00:00 2001 From: André Almeida Date: Thu, 25 Sep 2025 11:14:23 -0300 Subject: tools/nolibc: add stdbool.h to nolibc includes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise tests compiled with only "-include nolibc.h" will fail with "error: unknown type name 'bool'", even though a stdbool.h is available from nolibc. Fixes: ae1f550efc11 ("tools/nolibc: add stdbool.h header") Fixes: f2662ec26b26 ("selftests: kselftest: Create ksft_print_dbg_msg()") Reported-by: Mark Brown Closes: https://lore.kernel.org/lkml/833f5ae5-190e-47ec-9ad9-127ad166c80c@sirena.org.uk/ Signed-off-by: André Almeida [Thomas: add Fixes tags and massage commit message a bit] Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/nolibc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index c199ade200c2..d2f5aa085f8e 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -116,6 +116,7 @@ #include "sched.h" #include "signal.h" #include "unistd.h" +#include "stdbool.h" #include "stdio.h" #include "stdlib.h" #include "string.h" -- cgit v1.2.3 From c4fb7f0a79771dfd18838bfc5015650a9730e9c0 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 3 Sep 2025 14:59:53 +0200 Subject: tools/testing: Add support for changes to slab for sheaves The slab changes for sheaves requires more effort in the testing code. Unite all the kmem_cache work into the tools/include slab header for both the vma and maple tree testing. The vma test code also requires importing more #defines to allow for seamless use of the shared kmem_cache code. This adds the pthread header to the slab header in the tools directory to allow for the pthread_mutex in linux.c. Signed-off-by: Liam R. Howlett Reviewed-by: Suren Baghdasaryan Signed-off-by: Vlastimil Babka --- tools/include/linux/slab.h | 137 ++++++++++++++++++++++++++++++++++++-- tools/testing/shared/linux.c | 26 ++------ tools/testing/shared/maple-shim.c | 1 + tools/testing/vma/vma_internal.h | 92 +------------------------ 4 files changed, 142 insertions(+), 114 deletions(-) (limited to 'tools/include') diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h index c87051e2b26f..c5c5cc6db566 100644 --- a/tools/include/linux/slab.h +++ b/tools/include/linux/slab.h @@ -4,11 +4,31 @@ #include #include +#include -#define SLAB_PANIC 2 #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ #define kzalloc_node(size, flags, node) kmalloc(size, flags) +enum _slab_flag_bits { + _SLAB_KMALLOC, + _SLAB_HWCACHE_ALIGN, + _SLAB_PANIC, + _SLAB_TYPESAFE_BY_RCU, + _SLAB_ACCOUNT, + _SLAB_FLAGS_LAST_BIT +}; + +#define __SLAB_FLAG_BIT(nr) ((unsigned int __force)(1U << (nr))) +#define __SLAB_FLAG_UNUSED ((unsigned int __force)(0U)) + +#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN) +#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC) +#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU) +#ifdef CONFIG_MEMCG +# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT) +#else +# define SLAB_ACCOUNT __SLAB_FLAG_UNUSED +#endif void *kmalloc(size_t size, gfp_t gfp); void kfree(void *p); @@ -23,6 +43,86 @@ enum slab_state { FULL }; +struct kmem_cache { + pthread_mutex_t lock; + unsigned int size; + unsigned int align; + unsigned int sheaf_capacity; + int nr_objs; + void *objs; + void (*ctor)(void *); + bool non_kernel_enabled; + unsigned int non_kernel; + unsigned long nr_allocated; + unsigned long nr_tallocated; + bool exec_callback; + void (*callback)(void *); + void *private; +}; + +struct kmem_cache_args { + /** + * @align: The required alignment for the objects. + * + * %0 means no specific alignment is requested. + */ + unsigned int align; + /** + * @sheaf_capacity: The maximum size of the sheaf. + */ + unsigned int sheaf_capacity; + /** + * @useroffset: Usercopy region offset. + * + * %0 is a valid offset, when @usersize is non-%0 + */ + unsigned int useroffset; + /** + * @usersize: Usercopy region size. + * + * %0 means no usercopy region is specified. + */ + unsigned int usersize; + /** + * @freeptr_offset: Custom offset for the free pointer + * in &SLAB_TYPESAFE_BY_RCU caches + * + * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer + * outside of the object. This might cause the object to grow in size. + * Cache creators that have a reason to avoid this can specify a custom + * free pointer offset in their struct where the free pointer will be + * placed. + * + * Note that placing the free pointer inside the object requires the + * caller to ensure that no fields are invalidated that are required to + * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for + * details). + * + * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset + * is specified, %use_freeptr_offset must be set %true. + * + * Note that @ctor currently isn't supported with custom free pointers + * as a @ctor requires an external free pointer. + */ + unsigned int freeptr_offset; + /** + * @use_freeptr_offset: Whether a @freeptr_offset is used. + */ + bool use_freeptr_offset; + /** + * @ctor: A constructor for the objects. + * + * The constructor is invoked for each object in a newly allocated slab + * page. It is the cache user's responsibility to free object in the + * same state as after calling the constructor, or deal appropriately + * with any differences between a freshly constructed and a reallocated + * object. + * + * %NULL means no constructor. + */ + void (*ctor)(void *); +}; + static inline void *kzalloc(size_t size, gfp_t gfp) { return kmalloc(size, gfp | __GFP_ZERO); @@ -37,9 +137,38 @@ static inline void *kmem_cache_alloc(struct kmem_cache *cachep, int flags) } void kmem_cache_free(struct kmem_cache *cachep, void *objp); -struct kmem_cache *kmem_cache_create(const char *name, unsigned int size, - unsigned int align, unsigned int flags, - void (*ctor)(void *)); + +struct kmem_cache * +__kmem_cache_create_args(const char *name, unsigned int size, + struct kmem_cache_args *args, unsigned int flags); + +/* If NULL is passed for @args, use this variant with default arguments. */ +static inline struct kmem_cache * +__kmem_cache_default_args(const char *name, unsigned int size, + struct kmem_cache_args *args, unsigned int flags) +{ + struct kmem_cache_args kmem_default_args = {}; + + return __kmem_cache_create_args(name, size, &kmem_default_args, flags); +} + +static inline struct kmem_cache * +__kmem_cache_create(const char *name, unsigned int size, unsigned int align, + unsigned int flags, void (*ctor)(void *)) +{ + struct kmem_cache_args kmem_args = { + .align = align, + .ctor = ctor, + }; + + return __kmem_cache_create_args(name, size, &kmem_args, flags); +} + +#define kmem_cache_create(__name, __object_size, __args, ...) \ + _Generic((__args), \ + struct kmem_cache_args *: __kmem_cache_create_args, \ + void *: __kmem_cache_default_args, \ + default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__) void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list); int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c index 0f97fb0d19e1..97b8412ccbb6 100644 --- a/tools/testing/shared/linux.c +++ b/tools/testing/shared/linux.c @@ -16,21 +16,6 @@ int nr_allocated; int preempt_count; int test_verbose; -struct kmem_cache { - pthread_mutex_t lock; - unsigned int size; - unsigned int align; - int nr_objs; - void *objs; - void (*ctor)(void *); - unsigned int non_kernel; - unsigned long nr_allocated; - unsigned long nr_tallocated; - bool exec_callback; - void (*callback)(void *); - void *private; -}; - void kmem_cache_set_callback(struct kmem_cache *cachep, void (*callback)(void *)) { cachep->callback = callback; @@ -234,23 +219,26 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, } struct kmem_cache * -kmem_cache_create(const char *name, unsigned int size, unsigned int align, - unsigned int flags, void (*ctor)(void *)) +__kmem_cache_create_args(const char *name, unsigned int size, + struct kmem_cache_args *args, + unsigned int flags) { struct kmem_cache *ret = malloc(sizeof(*ret)); pthread_mutex_init(&ret->lock, NULL); ret->size = size; - ret->align = align; + ret->align = args->align; + ret->sheaf_capacity = args->sheaf_capacity; ret->nr_objs = 0; ret->nr_allocated = 0; ret->nr_tallocated = 0; ret->objs = NULL; - ret->ctor = ctor; + ret->ctor = args->ctor; ret->non_kernel = 0; ret->exec_callback = false; ret->callback = NULL; ret->private = NULL; + return ret; } diff --git a/tools/testing/shared/maple-shim.c b/tools/testing/shared/maple-shim.c index 640df76f483e..9d7b74341566 100644 --- a/tools/testing/shared/maple-shim.c +++ b/tools/testing/shared/maple-shim.c @@ -3,5 +3,6 @@ /* Very simple shim around the maple tree. */ #include "maple-shared.h" +#include #include "../../../lib/maple_tree.c" diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 6b6e2b05918c..d5b87fa6a133 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -26,6 +26,7 @@ #include #include #include +#include extern unsigned long stack_guard_gap; #ifdef CONFIG_MMU @@ -509,65 +510,6 @@ struct pagetable_move_control { .len_in = len_, \ } -struct kmem_cache_args { - /** - * @align: The required alignment for the objects. - * - * %0 means no specific alignment is requested. - */ - unsigned int align; - /** - * @useroffset: Usercopy region offset. - * - * %0 is a valid offset, when @usersize is non-%0 - */ - unsigned int useroffset; - /** - * @usersize: Usercopy region size. - * - * %0 means no usercopy region is specified. - */ - unsigned int usersize; - /** - * @freeptr_offset: Custom offset for the free pointer - * in &SLAB_TYPESAFE_BY_RCU caches - * - * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer - * outside of the object. This might cause the object to grow in size. - * Cache creators that have a reason to avoid this can specify a custom - * free pointer offset in their struct where the free pointer will be - * placed. - * - * Note that placing the free pointer inside the object requires the - * caller to ensure that no fields are invalidated that are required to - * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for - * details). - * - * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset - * is specified, %use_freeptr_offset must be set %true. - * - * Note that @ctor currently isn't supported with custom free pointers - * as a @ctor requires an external free pointer. - */ - unsigned int freeptr_offset; - /** - * @use_freeptr_offset: Whether a @freeptr_offset is used. - */ - bool use_freeptr_offset; - /** - * @ctor: A constructor for the objects. - * - * The constructor is invoked for each object in a newly allocated slab - * page. It is the cache user's responsibility to free object in the - * same state as after calling the constructor, or deal appropriately - * with any differences between a freshly constructed and a reallocated - * object. - * - * %NULL means no constructor. - */ - void (*ctor)(void *); -}; - static inline void vma_iter_invalidate(struct vma_iterator *vmi) { mas_pause(&vmi->mas); @@ -652,38 +594,6 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) vma->vm_lock_seq = UINT_MAX; } -struct kmem_cache { - const char *name; - size_t object_size; - struct kmem_cache_args *args; -}; - -static inline struct kmem_cache *__kmem_cache_create(const char *name, - size_t object_size, - struct kmem_cache_args *args) -{ - struct kmem_cache *ret = malloc(sizeof(struct kmem_cache)); - - ret->name = name; - ret->object_size = object_size; - ret->args = args; - - return ret; -} - -#define kmem_cache_create(__name, __object_size, __args, ...) \ - __kmem_cache_create((__name), (__object_size), (__args)) - -static inline void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) -{ - return calloc(1, s->object_size); -} - -static inline void kmem_cache_free(struct kmem_cache *s, void *x) -{ - free(x); -} - /* * These are defined in vma.h, but sadly vm_stat_account() is referenced by * kernel/fork.c, so we have to these broadly available there, and temporarily -- cgit v1.2.3 From fdbebab19f147af6b1459c821bc11162911245fa Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 3 Sep 2025 15:00:00 +0200 Subject: tools/testing: Add support for prefilled slab sheafs Add the prefilled sheaf structs to the slab header and the associated functions to the testing/shared/linux.c file. Signed-off-by: Liam R. Howlett Reviewed-by: Suren Baghdasaryan Signed-off-by: Vlastimil Babka --- tools/include/linux/slab.h | 28 ++++++++++++++ tools/testing/shared/linux.c | 89 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) (limited to 'tools/include') diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h index c5c5cc6db566..94937a699402 100644 --- a/tools/include/linux/slab.h +++ b/tools/include/linux/slab.h @@ -123,6 +123,18 @@ struct kmem_cache_args { void (*ctor)(void *); }; +struct slab_sheaf { + union { + struct list_head barn_list; + /* only used for prefilled sheafs */ + unsigned int capacity; + }; + struct kmem_cache *cache; + unsigned int size; + int node; /* only used for rcu_sheaf */ + void *objects[]; +}; + static inline void *kzalloc(size_t size, gfp_t gfp) { return kmalloc(size, gfp | __GFP_ZERO); @@ -173,5 +185,21 @@ __kmem_cache_create(const char *name, unsigned int size, unsigned int align, void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list); int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, void **list); +struct slab_sheaf * +kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size); + +void * +kmem_cache_alloc_from_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf); + +void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf); +int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf **sheafp, unsigned int size); + +static inline unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf) +{ + return sheaf->size; +} #endif /* _TOOLS_SLAB_H */ diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c index 97b8412ccbb6..4ceff7969b78 100644 --- a/tools/testing/shared/linux.c +++ b/tools/testing/shared/linux.c @@ -137,6 +137,12 @@ void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list) if (kmalloc_verbose) pr_debug("Bulk free %p[0-%zu]\n", list, size - 1); + if (cachep->exec_callback) { + if (cachep->callback) + cachep->callback(cachep->private); + cachep->exec_callback = false; + } + pthread_mutex_lock(&cachep->lock); for (int i = 0; i < size; i++) kmem_cache_free_locked(cachep, list[i]); @@ -242,6 +248,89 @@ __kmem_cache_create_args(const char *name, unsigned int size, return ret; } +struct slab_sheaf * +kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size) +{ + struct slab_sheaf *sheaf; + unsigned int capacity; + + if (s->exec_callback) { + if (s->callback) + s->callback(s->private); + s->exec_callback = false; + } + + capacity = max(size, s->sheaf_capacity); + + sheaf = calloc(1, sizeof(*sheaf) + sizeof(void *) * capacity); + if (!sheaf) + return NULL; + + sheaf->cache = s; + sheaf->capacity = capacity; + sheaf->size = kmem_cache_alloc_bulk(s, gfp, size, sheaf->objects); + if (!sheaf->size) { + free(sheaf); + return NULL; + } + + return sheaf; +} + +int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf **sheafp, unsigned int size) +{ + struct slab_sheaf *sheaf = *sheafp; + int refill; + + if (sheaf->size >= size) + return 0; + + if (size > sheaf->capacity) { + sheaf = kmem_cache_prefill_sheaf(s, gfp, size); + if (!sheaf) + return -ENOMEM; + + kmem_cache_return_sheaf(s, gfp, *sheafp); + *sheafp = sheaf; + return 0; + } + + refill = kmem_cache_alloc_bulk(s, gfp, size - sheaf->size, + &sheaf->objects[sheaf->size]); + if (!refill) + return -ENOMEM; + + sheaf->size += refill; + return 0; +} + +void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf) +{ + if (sheaf->size) + kmem_cache_free_bulk(s, sheaf->size, &sheaf->objects[0]); + + free(sheaf); +} + +void * +kmem_cache_alloc_from_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf) +{ + void *obj; + + if (sheaf->size == 0) { + printf("Nothing left in sheaf!\n"); + return NULL; + } + + obj = sheaf->objects[--sheaf->size]; + sheaf->objects[sheaf->size] = NULL; + + return obj; +} + /* * Test the test infrastructure for kem_cache_alloc/free and bulk counterparts. */ -- cgit v1.2.3 From f38ce0209ab4553906b44bd1159e35c740a84161 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 5 Sep 2025 15:47:06 -0700 Subject: tools bitmap: Add missing asm-generic/bitsperlong.h include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit small_const_nbits is defined in asm-generic/bitsperlong.h which bitmap.h uses but doesn't include causing build failures in some build systems. Add the missing #include. Note the bitmap.h in tools has diverged from that of the kernel, so no changes are made there. Signed-off-by: Ian Rogers Acked-by: Yury Norov Cc: Adrian Hunter Cc: Alexander Shishkin Cc: André Almeida Cc: Daniel Borkmann Cc: Darren Hart Cc: David S. Miller Cc: Davidlohr Bueso Cc: Ido Schimmel Cc: Ingo Molnar Cc: Jakub Kicinski Cc: Jamal Hadi Salim Cc: Jason Xing Cc: Jiri Olsa Cc: Jonas Gottlieb Cc: Kan Liang Cc: Mark Rutland Cc: Maurice Lambert Cc: Namhyung Kim Cc: Paolo Abeni Cc: Peter Zijlstra Cc: Petr Machata Cc: Rasmus Villemoes Cc: Thomas Gleixner Cc: Yuyang Huang Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/bitmap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include') diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index d4d300040d01..0d992245c600 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -3,6 +3,7 @@ #define _TOOLS_LINUX_BITMAP_H #include +#include #include #include #include -- cgit v1.2.3 From 57a64919f21ef4bd620da7d2ed8c683d55413b16 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 5 Sep 2025 15:47:07 -0700 Subject: tools include: Replace tools linux/gfp_types.h with kernel version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously the header gfp_types.h in tools points to the gfp_types.h in include/linux. This is a problem for tools like perf, since the tools header is supposed to be independent of the kernel headers. Therefore this patch copies the kernel header to the tools header and adds a header check. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: André Almeida Cc: Daniel Borkmann Cc: Darren Hart Cc: David S. Miller Cc: Davidlohr Bueso Cc: Ido Schimmel Cc: Ingo Molnar Cc: Jakub Kicinski Cc: Jamal Hadi Salim Cc: Jason Xing Cc: Jiri Olsa Cc: Jonas Gottlieb Cc: Kan Liang Cc: Mark Rutland Cc: Maurice Lambert Cc: Namhyung Kim Cc: Paolo Abeni Cc: Peter Zijlstra Cc: Petr Machata Cc: Rasmus Villemoes Cc: Thomas Gleixner Cc: Yury Norov Cc: Yuyang Huang Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/gfp_types.h | 393 +++++++++++++++++++++++++++++++++++++++- tools/perf/check-headers.sh | 1 + 2 files changed, 393 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/linux/gfp_types.h b/tools/include/linux/gfp_types.h index 5f9f1ed190a0..65db9349f905 100644 --- a/tools/include/linux/gfp_types.h +++ b/tools/include/linux/gfp_types.h @@ -1 +1,392 @@ -#include "../../../include/linux/gfp_types.h" +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_GFP_TYPES_H +#define __LINUX_GFP_TYPES_H + +#include + +/* The typedef is in types.h but we want the documentation here */ +#if 0 +/** + * typedef gfp_t - Memory allocation flags. + * + * GFP flags are commonly used throughout Linux to indicate how memory + * should be allocated. The GFP acronym stands for get_free_pages(), + * the underlying memory allocation function. Not every GFP flag is + * supported by every function which may allocate memory. Most users + * will want to use a plain ``GFP_KERNEL``. + */ +typedef unsigned int __bitwise gfp_t; +#endif + +/* + * In case of changes, please don't forget to update + * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c + */ + +enum { + ___GFP_DMA_BIT, + ___GFP_HIGHMEM_BIT, + ___GFP_DMA32_BIT, + ___GFP_MOVABLE_BIT, + ___GFP_RECLAIMABLE_BIT, + ___GFP_HIGH_BIT, + ___GFP_IO_BIT, + ___GFP_FS_BIT, + ___GFP_ZERO_BIT, + ___GFP_UNUSED_BIT, /* 0x200u unused */ + ___GFP_DIRECT_RECLAIM_BIT, + ___GFP_KSWAPD_RECLAIM_BIT, + ___GFP_WRITE_BIT, + ___GFP_NOWARN_BIT, + ___GFP_RETRY_MAYFAIL_BIT, + ___GFP_NOFAIL_BIT, + ___GFP_NORETRY_BIT, + ___GFP_MEMALLOC_BIT, + ___GFP_COMP_BIT, + ___GFP_NOMEMALLOC_BIT, + ___GFP_HARDWALL_BIT, + ___GFP_THISNODE_BIT, + ___GFP_ACCOUNT_BIT, + ___GFP_ZEROTAGS_BIT, +#ifdef CONFIG_KASAN_HW_TAGS + ___GFP_SKIP_ZERO_BIT, + ___GFP_SKIP_KASAN_BIT, +#endif +#ifdef CONFIG_LOCKDEP + ___GFP_NOLOCKDEP_BIT, +#endif +#ifdef CONFIG_SLAB_OBJ_EXT + ___GFP_NO_OBJ_EXT_BIT, +#endif + ___GFP_LAST_BIT +}; + +/* Plain integer GFP bitmasks. Do not use this directly. */ +#define ___GFP_DMA BIT(___GFP_DMA_BIT) +#define ___GFP_HIGHMEM BIT(___GFP_HIGHMEM_BIT) +#define ___GFP_DMA32 BIT(___GFP_DMA32_BIT) +#define ___GFP_MOVABLE BIT(___GFP_MOVABLE_BIT) +#define ___GFP_RECLAIMABLE BIT(___GFP_RECLAIMABLE_BIT) +#define ___GFP_HIGH BIT(___GFP_HIGH_BIT) +#define ___GFP_IO BIT(___GFP_IO_BIT) +#define ___GFP_FS BIT(___GFP_FS_BIT) +#define ___GFP_ZERO BIT(___GFP_ZERO_BIT) +/* 0x200u unused */ +#define ___GFP_DIRECT_RECLAIM BIT(___GFP_DIRECT_RECLAIM_BIT) +#define ___GFP_KSWAPD_RECLAIM BIT(___GFP_KSWAPD_RECLAIM_BIT) +#define ___GFP_WRITE BIT(___GFP_WRITE_BIT) +#define ___GFP_NOWARN BIT(___GFP_NOWARN_BIT) +#define ___GFP_RETRY_MAYFAIL BIT(___GFP_RETRY_MAYFAIL_BIT) +#define ___GFP_NOFAIL BIT(___GFP_NOFAIL_BIT) +#define ___GFP_NORETRY BIT(___GFP_NORETRY_BIT) +#define ___GFP_MEMALLOC BIT(___GFP_MEMALLOC_BIT) +#define ___GFP_COMP BIT(___GFP_COMP_BIT) +#define ___GFP_NOMEMALLOC BIT(___GFP_NOMEMALLOC_BIT) +#define ___GFP_HARDWALL BIT(___GFP_HARDWALL_BIT) +#define ___GFP_THISNODE BIT(___GFP_THISNODE_BIT) +#define ___GFP_ACCOUNT BIT(___GFP_ACCOUNT_BIT) +#define ___GFP_ZEROTAGS BIT(___GFP_ZEROTAGS_BIT) +#ifdef CONFIG_KASAN_HW_TAGS +#define ___GFP_SKIP_ZERO BIT(___GFP_SKIP_ZERO_BIT) +#define ___GFP_SKIP_KASAN BIT(___GFP_SKIP_KASAN_BIT) +#else +#define ___GFP_SKIP_ZERO 0 +#define ___GFP_SKIP_KASAN 0 +#endif +#ifdef CONFIG_LOCKDEP +#define ___GFP_NOLOCKDEP BIT(___GFP_NOLOCKDEP_BIT) +#else +#define ___GFP_NOLOCKDEP 0 +#endif +#ifdef CONFIG_SLAB_OBJ_EXT +#define ___GFP_NO_OBJ_EXT BIT(___GFP_NO_OBJ_EXT_BIT) +#else +#define ___GFP_NO_OBJ_EXT 0 +#endif + +/* + * Physical address zone modifiers (see linux/mmzone.h - low four bits) + * + * Do not put any conditional on these. If necessary modify the definitions + * without the underscores and use them consistently. The definitions here may + * be used in bit comparisons. + */ +#define __GFP_DMA ((__force gfp_t)___GFP_DMA) +#define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM) +#define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32) +#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */ +#define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) + +/** + * DOC: Page mobility and placement hints + * + * Page mobility and placement hints + * --------------------------------- + * + * These flags provide hints about how mobile the page is. Pages with similar + * mobility are placed within the same pageblocks to minimise problems due + * to external fragmentation. + * + * %__GFP_MOVABLE (also a zone modifier) indicates that the page can be + * moved by page migration during memory compaction or can be reclaimed. + * + * %__GFP_RECLAIMABLE is used for slab allocations that specify + * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers. + * + * %__GFP_WRITE indicates the caller intends to dirty the page. Where possible, + * these pages will be spread between local zones to avoid all the dirty + * pages being in one zone (fair zone allocation policy). + * + * %__GFP_HARDWALL enforces the cpuset memory allocation policy. + * + * %__GFP_THISNODE forces the allocation to be satisfied from the requested + * node with no fallbacks or placement policy enforcements. + * + * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg. + * + * %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension. + */ +#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) +#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) +#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) +#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) +#define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT) +#define __GFP_NO_OBJ_EXT ((__force gfp_t)___GFP_NO_OBJ_EXT) + +/** + * DOC: Watermark modifiers + * + * Watermark modifiers -- controls access to emergency reserves + * ------------------------------------------------------------ + * + * %__GFP_HIGH indicates that the caller is high-priority and that granting + * the request is necessary before the system can make forward progress. + * For example creating an IO context to clean pages and requests + * from atomic context. + * + * %__GFP_MEMALLOC allows access to all memory. This should only be used when + * the caller guarantees the allocation will allow more memory to be freed + * very shortly e.g. process exiting or swapping. Users either should + * be the MM or co-ordinating closely with the VM (e.g. swap over NFS). + * Users of this flag have to be extremely careful to not deplete the reserve + * completely and implement a throttling mechanism which controls the + * consumption of the reserve based on the amount of freed memory. + * Usage of a pre-allocated pool (e.g. mempool) should be always considered + * before using this flag. + * + * %__GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves. + * This takes precedence over the %__GFP_MEMALLOC flag if both are set. + */ +#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) +#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) +#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) + +/** + * DOC: Reclaim modifiers + * + * Reclaim modifiers + * ----------------- + * Please note that all the following flags are only applicable to sleepable + * allocations (e.g. %GFP_NOWAIT and %GFP_ATOMIC will ignore them). + * + * %__GFP_IO can start physical IO. + * + * %__GFP_FS can call down to the low-level FS. Clearing the flag avoids the + * allocator recursing into the filesystem which might already be holding + * locks. + * + * %__GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim. + * This flag can be cleared to avoid unnecessary delays when a fallback + * option is available. + * + * %__GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when + * the low watermark is reached and have it reclaim pages until the high + * watermark is reached. A caller may wish to clear this flag when fallback + * options are available and the reclaim is likely to disrupt the system. The + * canonical example is THP allocation where a fallback is cheap but + * reclaim/compaction may cause indirect stalls. + * + * %__GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. + * + * The default allocator behavior depends on the request size. We have a concept + * of so-called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER). + * !costly allocations are too essential to fail so they are implicitly + * non-failing by default (with some exceptions like OOM victims might fail so + * the caller still has to check for failures) while costly requests try to be + * not disruptive and back off even without invoking the OOM killer. + * The following three modifiers might be used to override some of these + * implicit rules. Please note that all of them must be used along with + * %__GFP_DIRECT_RECLAIM flag. + * + * %__GFP_NORETRY: The VM implementation will try only very lightweight + * memory direct reclaim to get some memory under memory pressure (thus + * it can sleep). It will avoid disruptive actions like OOM killer. The + * caller must handle the failure which is quite likely to happen under + * heavy memory pressure. The flag is suitable when failure can easily be + * handled at small cost, such as reduced throughput. + * + * %__GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim + * procedures that have previously failed if there is some indication + * that progress has been made elsewhere. It can wait for other + * tasks to attempt high-level approaches to freeing memory such as + * compaction (which removes fragmentation) and page-out. + * There is still a definite limit to the number of retries, but it is + * a larger limit than with %__GFP_NORETRY. + * Allocations with this flag may fail, but only when there is + * genuinely little unused memory. While these allocations do not + * directly trigger the OOM killer, their failure indicates that + * the system is likely to need to use the OOM killer soon. The + * caller must handle failure, but can reasonably do so by failing + * a higher-level request, or completing it only in a much less + * efficient manner. + * If the allocation does fail, and the caller is in a position to + * free some non-essential memory, doing so could benefit the system + * as a whole. + * + * %__GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller + * cannot handle allocation failures. The allocation could block + * indefinitely but will never return with failure. Testing for + * failure is pointless. + * It _must_ be blockable and used together with __GFP_DIRECT_RECLAIM. + * It should _never_ be used in non-sleepable contexts. + * New users should be evaluated carefully (and the flag should be + * used only when there is no reasonable failure policy) but it is + * definitely preferable to use the flag rather than opencode endless + * loop around allocator. + * Allocating pages from the buddy with __GFP_NOFAIL and order > 1 is + * not supported. Please consider using kvmalloc() instead. + */ +#define __GFP_IO ((__force gfp_t)___GFP_IO) +#define __GFP_FS ((__force gfp_t)___GFP_FS) +#define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ +#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ +#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) +#define __GFP_RETRY_MAYFAIL ((__force gfp_t)___GFP_RETRY_MAYFAIL) +#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) +#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) + +/** + * DOC: Action modifiers + * + * Action modifiers + * ---------------- + * + * %__GFP_NOWARN suppresses allocation failure reports. + * + * %__GFP_COMP address compound page metadata. + * + * %__GFP_ZERO returns a zeroed page on success. + * + * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself + * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that + * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting + * memory tags at the same time as zeroing memory has minimal additional + * performance impact. + * + * %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation. + * Used for userspace and vmalloc pages; the latter are unpoisoned by + * kasan_unpoison_vmalloc instead. For userspace pages, results in + * poisoning being skipped as well, see should_skip_kasan_poison for + * details. Only effective in HW_TAGS mode. + */ +#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) +#define __GFP_COMP ((__force gfp_t)___GFP_COMP) +#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) +#define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) +#define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO) +#define __GFP_SKIP_KASAN ((__force gfp_t)___GFP_SKIP_KASAN) + +/* Disable lockdep for GFP context tracking */ +#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) + +/* Room for N __GFP_FOO bits */ +#define __GFP_BITS_SHIFT ___GFP_LAST_BIT +#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) + +/** + * DOC: Useful GFP flag combinations + * + * Useful GFP flag combinations + * ---------------------------- + * + * Useful GFP flag combinations that are commonly used. It is recommended + * that subsystems start with one of these combinations and then set/clear + * %__GFP_FOO flags as necessary. + * + * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower + * watermark is applied to allow access to "atomic reserves". + * The current implementation doesn't support NMI and few other strict + * non-preemptive contexts (e.g. raw_spin_lock). The same applies to %GFP_NOWAIT. + * + * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires + * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim. + * + * %GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is + * accounted to kmemcg. + * + * %GFP_NOWAIT is for kernel allocations that should not stall for direct + * reclaim, start physical IO or use any filesystem callback. It is very + * likely to fail to allocate memory, even for very small allocations. + * + * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages + * that do not require the starting of any physical IO. + * Please try to avoid using this flag directly and instead use + * memalloc_noio_{save,restore} to mark the whole scope which cannot + * perform any IO with a short explanation why. All allocation requests + * will inherit GFP_NOIO implicitly. + * + * %GFP_NOFS will use direct reclaim but will not use any filesystem interfaces. + * Please try to avoid using this flag directly and instead use + * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't + * recurse into the FS layer with a short explanation why. All allocation + * requests will inherit GFP_NOFS implicitly. + * + * %GFP_USER is for userspace allocations that also need to be directly + * accessibly by the kernel or hardware. It is typically used by hardware + * for buffers that are mapped to userspace (e.g. graphics) that hardware + * still must DMA to. cpuset limits are enforced for these allocations. + * + * %GFP_DMA exists for historical reasons and should be avoided where possible. + * The flags indicates that the caller requires that the lowest zone be + * used (%ZONE_DMA or 16M on x86-64). Ideally, this would be removed but + * it would require careful auditing as some users really require it and + * others use the flag to avoid lowmem reserves in %ZONE_DMA and treat the + * lowest zone as a type of emergency reserve. + * + * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit + * address. Note that kmalloc(..., GFP_DMA32) does not return DMA32 memory + * because the DMA32 kmalloc cache array is not implemented. + * (Reason: there is no such user in kernel). + * + * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace, + * do not need to be directly accessible by the kernel but that cannot + * move once in use. An example may be a hardware allocation that maps + * data directly into userspace but has no addressing limitations. + * + * %GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not + * need direct access to but can use kmap() when access is required. They + * are expected to be movable via page reclaim or page migration. Typically, + * pages on the LRU would also be allocated with %GFP_HIGHUSER_MOVABLE. + * + * %GFP_TRANSHUGE and %GFP_TRANSHUGE_LIGHT are used for THP allocations. They + * are compound allocations that will generally fail quickly if memory is not + * available and will not wake kswapd/kcompactd on failure. The _LIGHT + * version does not attempt reclaim/compaction at all and is by default used + * in page fault path, while the non-light is used by khugepaged. + */ +#define GFP_ATOMIC (__GFP_HIGH|__GFP_KSWAPD_RECLAIM) +#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) +#define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) +#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM | __GFP_NOWARN) +#define GFP_NOIO (__GFP_RECLAIM) +#define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) +#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) +#define GFP_DMA __GFP_DMA +#define GFP_DMA32 __GFP_DMA32 +#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) +#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | __GFP_SKIP_KASAN) +#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ + __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) +#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) + +#endif /* __LINUX_GFP_TYPES_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 0987062896bb..e733b6a2e1b8 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -23,6 +23,7 @@ declare -a FILES=( "include/linux/const.h" "include/vdso/const.h" "include/vdso/unaligned.h" + "include/linux/gfp_types.h" "include/linux/hash.h" "include/linux/list-sort.h" "include/uapi/linux/hw_breakpoint.h" -- cgit v1.2.3 From f0015d8149de3b54af04147527163422eb34ec00 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 5 Sep 2025 15:47:08 -0700 Subject: tools include: Add headers to make tools builds more hermetic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tools/lib/bpf/netlink.c depends on rtnetlink.h and genetlink.h (via nlattr.h) which then depends on if_addr.h. tools/bpf/bpftool/link.c depends on netfilter_arp.h which then depends on netfilter.h. Update check-headers.sh to keep these in sync. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: André Almeida Cc: Daniel Borkmann Cc: Darren Hart Cc: David S. Miller Cc: Davidlohr Bueso Cc: Ido Schimmel Cc: Ingo Molnar Cc: Jakub Kicinski Cc: Jamal Hadi Salim Cc: Jason Xing Cc: Jiri Olsa Cc: Jonas Gottlieb Cc: Kan Liang Cc: Mark Rutland Cc: Maurice Lambert Cc: Namhyung Kim Cc: Paolo Abeni Cc: Peter Zijlstra Cc: Petr Machata Cc: Rasmus Villemoes Cc: Thomas Gleixner Cc: Yury Norov Cc: Yuyang Huang Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/genetlink.h | 103 ++++ tools/include/uapi/linux/if_addr.h | 79 +++ tools/include/uapi/linux/neighbour.h | 229 +++++++++ tools/include/uapi/linux/netfilter.h | 80 +++ tools/include/uapi/linux/netfilter_arp.h | 23 + tools/include/uapi/linux/rtnetlink.h | 848 +++++++++++++++++++++++++++++++ tools/perf/check-headers.sh | 8 +- 7 files changed, 1369 insertions(+), 1 deletion(-) create mode 100644 tools/include/uapi/linux/genetlink.h create mode 100644 tools/include/uapi/linux/if_addr.h create mode 100644 tools/include/uapi/linux/neighbour.h create mode 100644 tools/include/uapi/linux/netfilter.h create mode 100644 tools/include/uapi/linux/netfilter_arp.h create mode 100644 tools/include/uapi/linux/rtnetlink.h (limited to 'tools/include') diff --git a/tools/include/uapi/linux/genetlink.h b/tools/include/uapi/linux/genetlink.h new file mode 100644 index 000000000000..ddba3ca01e39 --- /dev/null +++ b/tools/include/uapi/linux/genetlink.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_GENERIC_NETLINK_H +#define _UAPI__LINUX_GENERIC_NETLINK_H + +#include +#include + +#define GENL_NAMSIZ 16 /* length of family name */ + +#define GENL_MIN_ID NLMSG_MIN_TYPE +#define GENL_MAX_ID 1023 + +struct genlmsghdr { + __u8 cmd; + __u8 version; + __u16 reserved; +}; + +#define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr)) + +#define GENL_ADMIN_PERM 0x01 +#define GENL_CMD_CAP_DO 0x02 +#define GENL_CMD_CAP_DUMP 0x04 +#define GENL_CMD_CAP_HASPOL 0x08 +#define GENL_UNS_ADMIN_PERM 0x10 + +/* + * List of reserved static generic netlink identifiers: + */ +#define GENL_ID_CTRL NLMSG_MIN_TYPE +#define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) +#define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2) +/* must be last reserved + 1 */ +#define GENL_START_ALLOC (NLMSG_MIN_TYPE + 3) + +/************************************************************************** + * Controller + **************************************************************************/ + +enum { + CTRL_CMD_UNSPEC, + CTRL_CMD_NEWFAMILY, + CTRL_CMD_DELFAMILY, + CTRL_CMD_GETFAMILY, + CTRL_CMD_NEWOPS, + CTRL_CMD_DELOPS, + CTRL_CMD_GETOPS, + CTRL_CMD_NEWMCAST_GRP, + CTRL_CMD_DELMCAST_GRP, + CTRL_CMD_GETMCAST_GRP, /* unused */ + CTRL_CMD_GETPOLICY, + __CTRL_CMD_MAX, +}; + +#define CTRL_CMD_MAX (__CTRL_CMD_MAX - 1) + +enum { + CTRL_ATTR_UNSPEC, + CTRL_ATTR_FAMILY_ID, + CTRL_ATTR_FAMILY_NAME, + CTRL_ATTR_VERSION, + CTRL_ATTR_HDRSIZE, + CTRL_ATTR_MAXATTR, + CTRL_ATTR_OPS, + CTRL_ATTR_MCAST_GROUPS, + CTRL_ATTR_POLICY, + CTRL_ATTR_OP_POLICY, + CTRL_ATTR_OP, + __CTRL_ATTR_MAX, +}; + +#define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1) + +enum { + CTRL_ATTR_OP_UNSPEC, + CTRL_ATTR_OP_ID, + CTRL_ATTR_OP_FLAGS, + __CTRL_ATTR_OP_MAX, +}; + +#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1) + +enum { + CTRL_ATTR_MCAST_GRP_UNSPEC, + CTRL_ATTR_MCAST_GRP_NAME, + CTRL_ATTR_MCAST_GRP_ID, + __CTRL_ATTR_MCAST_GRP_MAX, +}; + +#define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1) + +enum { + CTRL_ATTR_POLICY_UNSPEC, + CTRL_ATTR_POLICY_DO, + CTRL_ATTR_POLICY_DUMP, + + __CTRL_ATTR_POLICY_DUMP_MAX, + CTRL_ATTR_POLICY_DUMP_MAX = __CTRL_ATTR_POLICY_DUMP_MAX - 1 +}; + +#define CTRL_ATTR_POLICY_MAX (__CTRL_ATTR_POLICY_DUMP_MAX - 1) + +#endif /* _UAPI__LINUX_GENERIC_NETLINK_H */ diff --git a/tools/include/uapi/linux/if_addr.h b/tools/include/uapi/linux/if_addr.h new file mode 100644 index 000000000000..aa7958b4e41d --- /dev/null +++ b/tools/include/uapi/linux/if_addr.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_IF_ADDR_H +#define _UAPI__LINUX_IF_ADDR_H + +#include +#include + +struct ifaddrmsg { + __u8 ifa_family; + __u8 ifa_prefixlen; /* The prefix length */ + __u8 ifa_flags; /* Flags */ + __u8 ifa_scope; /* Address scope */ + __u32 ifa_index; /* Link index */ +}; + +/* + * Important comment: + * IFA_ADDRESS is prefix address, rather than local interface address. + * It makes no difference for normally configured broadcast interfaces, + * but for point-to-point IFA_ADDRESS is DESTINATION address, + * local address is supplied in IFA_LOCAL attribute. + * + * IFA_FLAGS is a u32 attribute that extends the u8 field ifa_flags. + * If present, the value from struct ifaddrmsg will be ignored. + */ +enum { + IFA_UNSPEC, + IFA_ADDRESS, + IFA_LOCAL, + IFA_LABEL, + IFA_BROADCAST, + IFA_ANYCAST, + IFA_CACHEINFO, + IFA_MULTICAST, + IFA_FLAGS, + IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */ + IFA_TARGET_NETNSID, + IFA_PROTO, /* u8, address protocol */ + __IFA_MAX, +}; + +#define IFA_MAX (__IFA_MAX - 1) + +/* ifa_flags */ +#define IFA_F_SECONDARY 0x01 +#define IFA_F_TEMPORARY IFA_F_SECONDARY + +#define IFA_F_NODAD 0x02 +#define IFA_F_OPTIMISTIC 0x04 +#define IFA_F_DADFAILED 0x08 +#define IFA_F_HOMEADDRESS 0x10 +#define IFA_F_DEPRECATED 0x20 +#define IFA_F_TENTATIVE 0x40 +#define IFA_F_PERMANENT 0x80 +#define IFA_F_MANAGETEMPADDR 0x100 +#define IFA_F_NOPREFIXROUTE 0x200 +#define IFA_F_MCAUTOJOIN 0x400 +#define IFA_F_STABLE_PRIVACY 0x800 + +struct ifa_cacheinfo { + __u32 ifa_prefered; + __u32 ifa_valid; + __u32 cstamp; /* created timestamp, hundredths of seconds */ + __u32 tstamp; /* updated timestamp, hundredths of seconds */ +}; + +/* backwards compatibility for userspace */ +#ifndef __KERNEL__ +#define IFA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg)))) +#define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg)) +#endif + +/* ifa_proto */ +#define IFAPROT_UNSPEC 0 +#define IFAPROT_KERNEL_LO 1 /* loopback */ +#define IFAPROT_KERNEL_RA 2 /* set by kernel from router announcement */ +#define IFAPROT_KERNEL_LL 3 /* link-local set by kernel */ + +#endif diff --git a/tools/include/uapi/linux/neighbour.h b/tools/include/uapi/linux/neighbour.h new file mode 100644 index 000000000000..c34a81245f87 --- /dev/null +++ b/tools/include/uapi/linux/neighbour.h @@ -0,0 +1,229 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_NEIGHBOUR_H +#define _UAPI__LINUX_NEIGHBOUR_H + +#include +#include + +struct ndmsg { + __u8 ndm_family; + __u8 ndm_pad1; + __u16 ndm_pad2; + __s32 ndm_ifindex; + __u16 ndm_state; + __u8 ndm_flags; + __u8 ndm_type; +}; + +enum { + NDA_UNSPEC, + NDA_DST, + NDA_LLADDR, + NDA_CACHEINFO, + NDA_PROBES, + NDA_VLAN, + NDA_PORT, + NDA_VNI, + NDA_IFINDEX, + NDA_MASTER, + NDA_LINK_NETNSID, + NDA_SRC_VNI, + NDA_PROTOCOL, /* Originator of entry */ + NDA_NH_ID, + NDA_FDB_EXT_ATTRS, + NDA_FLAGS_EXT, + NDA_NDM_STATE_MASK, + NDA_NDM_FLAGS_MASK, + __NDA_MAX +}; + +#define NDA_MAX (__NDA_MAX - 1) + +/* + * Neighbor Cache Entry Flags + */ + +#define NTF_USE (1 << 0) +#define NTF_SELF (1 << 1) +#define NTF_MASTER (1 << 2) +#define NTF_PROXY (1 << 3) /* == ATF_PUBL */ +#define NTF_EXT_LEARNED (1 << 4) +#define NTF_OFFLOADED (1 << 5) +#define NTF_STICKY (1 << 6) +#define NTF_ROUTER (1 << 7) +/* Extended flags under NDA_FLAGS_EXT: */ +#define NTF_EXT_MANAGED (1 << 0) +#define NTF_EXT_LOCKED (1 << 1) +#define NTF_EXT_EXT_VALIDATED (1 << 2) + +/* + * Neighbor Cache Entry States. + */ + +#define NUD_INCOMPLETE 0x01 +#define NUD_REACHABLE 0x02 +#define NUD_STALE 0x04 +#define NUD_DELAY 0x08 +#define NUD_PROBE 0x10 +#define NUD_FAILED 0x20 + +/* Dummy states */ +#define NUD_NOARP 0x40 +#define NUD_PERMANENT 0x80 +#define NUD_NONE 0x00 + +/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no + * address resolution or NUD. + * + * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true + * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier + * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED + * flagged entries explicitly are (which is also consistent with the routing + * subsystem). + * + * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry + * states don't make sense and thus are ignored. Such entries don't age and + * can roam. + * + * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf + * of a user space control plane, and automatically refreshed so that (if + * possible) they remain in NUD_REACHABLE state. + * + * NTF_EXT_LOCKED flagged bridge FDB entries are entries generated by the + * bridge in response to a host trying to communicate via a locked bridge port + * with MAB enabled. Their purpose is to notify user space that a host requires + * authentication. + * + * NTF_EXT_EXT_VALIDATED flagged neighbor entries were externally validated by + * a user space control plane. The kernel will not remove or invalidate them, + * but it can probe them and notify user space when they become reachable. + */ + +struct nda_cacheinfo { + __u32 ndm_confirmed; + __u32 ndm_used; + __u32 ndm_updated; + __u32 ndm_refcnt; +}; + +/***************************************************************** + * Neighbour tables specific messages. + * + * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the + * NLM_F_DUMP flag set. Every neighbour table configuration is + * spread over multiple messages to avoid running into message + * size limits on systems with many interfaces. The first message + * in the sequence transports all not device specific data such as + * statistics, configuration, and the default parameter set. + * This message is followed by 0..n messages carrying device + * specific parameter sets. + * Although the ordering should be sufficient, NDTA_NAME can be + * used to identify sequences. The initial message can be identified + * by checking for NDTA_CONFIG. The device specific messages do + * not contain this TLV but have NDTPA_IFINDEX set to the + * corresponding interface index. + * + * To change neighbour table attributes, send RTM_SETNEIGHTBL + * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], + * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked + * otherwise. Device specific parameter sets can be changed by + * setting NDTPA_IFINDEX to the interface index of the corresponding + * device. + ****/ + +struct ndt_stats { + __u64 ndts_allocs; + __u64 ndts_destroys; + __u64 ndts_hash_grows; + __u64 ndts_res_failed; + __u64 ndts_lookups; + __u64 ndts_hits; + __u64 ndts_rcv_probes_mcast; + __u64 ndts_rcv_probes_ucast; + __u64 ndts_periodic_gc_runs; + __u64 ndts_forced_gc_runs; + __u64 ndts_table_fulls; +}; + +enum { + NDTPA_UNSPEC, + NDTPA_IFINDEX, /* u32, unchangeable */ + NDTPA_REFCNT, /* u32, read-only */ + NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */ + NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */ + NDTPA_RETRANS_TIME, /* u64, msecs */ + NDTPA_GC_STALETIME, /* u64, msecs */ + NDTPA_DELAY_PROBE_TIME, /* u64, msecs */ + NDTPA_QUEUE_LEN, /* u32 */ + NDTPA_APP_PROBES, /* u32 */ + NDTPA_UCAST_PROBES, /* u32 */ + NDTPA_MCAST_PROBES, /* u32 */ + NDTPA_ANYCAST_DELAY, /* u64, msecs */ + NDTPA_PROXY_DELAY, /* u64, msecs */ + NDTPA_PROXY_QLEN, /* u32 */ + NDTPA_LOCKTIME, /* u64, msecs */ + NDTPA_QUEUE_LENBYTES, /* u32 */ + NDTPA_MCAST_REPROBES, /* u32 */ + NDTPA_PAD, + NDTPA_INTERVAL_PROBE_TIME_MS, /* u64, msecs */ + __NDTPA_MAX +}; +#define NDTPA_MAX (__NDTPA_MAX - 1) + +struct ndtmsg { + __u8 ndtm_family; + __u8 ndtm_pad1; + __u16 ndtm_pad2; +}; + +struct ndt_config { + __u16 ndtc_key_len; + __u16 ndtc_entry_size; + __u32 ndtc_entries; + __u32 ndtc_last_flush; /* delta to now in msecs */ + __u32 ndtc_last_rand; /* delta to now in msecs */ + __u32 ndtc_hash_rnd; + __u32 ndtc_hash_mask; + __u32 ndtc_hash_chain_gc; + __u32 ndtc_proxy_qlen; +}; + +enum { + NDTA_UNSPEC, + NDTA_NAME, /* char *, unchangeable */ + NDTA_THRESH1, /* u32 */ + NDTA_THRESH2, /* u32 */ + NDTA_THRESH3, /* u32 */ + NDTA_CONFIG, /* struct ndt_config, read-only */ + NDTA_PARMS, /* nested TLV NDTPA_* */ + NDTA_STATS, /* struct ndt_stats, read-only */ + NDTA_GC_INTERVAL, /* u64, msecs */ + NDTA_PAD, + __NDTA_MAX +}; +#define NDTA_MAX (__NDTA_MAX - 1) + + /* FDB activity notification bits used in NFEA_ACTIVITY_NOTIFY: + * - FDB_NOTIFY_BIT - notify on activity/expire for any entry + * - FDB_NOTIFY_INACTIVE_BIT - mark as inactive to avoid multiple notifications + */ +enum { + FDB_NOTIFY_BIT = (1 << 0), + FDB_NOTIFY_INACTIVE_BIT = (1 << 1) +}; + +/* embedded into NDA_FDB_EXT_ATTRS: + * [NDA_FDB_EXT_ATTRS] = { + * [NFEA_ACTIVITY_NOTIFY] + * ... + * } + */ +enum { + NFEA_UNSPEC, + NFEA_ACTIVITY_NOTIFY, + NFEA_DONT_REFRESH, + __NFEA_MAX +}; +#define NFEA_MAX (__NFEA_MAX - 1) + +#endif diff --git a/tools/include/uapi/linux/netfilter.h b/tools/include/uapi/linux/netfilter.h new file mode 100644 index 000000000000..5a79ccb76701 --- /dev/null +++ b/tools/include/uapi/linux/netfilter.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_NETFILTER_H +#define _UAPI__LINUX_NETFILTER_H + +#include +#include +#include +#include + +/* Responses from hook functions. */ +#define NF_DROP 0 +#define NF_ACCEPT 1 +#define NF_STOLEN 2 +#define NF_QUEUE 3 +#define NF_REPEAT 4 +#define NF_STOP 5 /* Deprecated, for userspace nf_queue compatibility. */ +#define NF_MAX_VERDICT NF_STOP + +/* we overload the higher bits for encoding auxiliary data such as the queue + * number or errno values. Not nice, but better than additional function + * arguments. */ +#define NF_VERDICT_MASK 0x000000ff + +/* extra verdict flags have mask 0x0000ff00 */ +#define NF_VERDICT_FLAG_QUEUE_BYPASS 0x00008000 + +/* queue number (NF_QUEUE) or errno (NF_DROP) */ +#define NF_VERDICT_QMASK 0xffff0000 +#define NF_VERDICT_QBITS 16 + +#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE) + +#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP) + +/* only for userspace compatibility */ +#ifndef __KERNEL__ + +/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */ +#define NF_VERDICT_BITS 16 +#endif + +enum nf_inet_hooks { + NF_INET_PRE_ROUTING, + NF_INET_LOCAL_IN, + NF_INET_FORWARD, + NF_INET_LOCAL_OUT, + NF_INET_POST_ROUTING, + NF_INET_NUMHOOKS, + NF_INET_INGRESS = NF_INET_NUMHOOKS, +}; + +enum nf_dev_hooks { + NF_NETDEV_INGRESS, + NF_NETDEV_EGRESS, + NF_NETDEV_NUMHOOKS +}; + +enum { + NFPROTO_UNSPEC = 0, + NFPROTO_INET = 1, + NFPROTO_IPV4 = 2, + NFPROTO_ARP = 3, + NFPROTO_NETDEV = 5, + NFPROTO_BRIDGE = 7, + NFPROTO_IPV6 = 10, +#ifndef __KERNEL__ /* no longer supported by kernel */ + NFPROTO_DECNET = 12, +#endif + NFPROTO_NUMPROTO, +}; + +union nf_inet_addr { + __u32 all[4]; + __be32 ip; + __be32 ip6[4]; + struct in_addr in; + struct in6_addr in6; +}; + +#endif /* _UAPI__LINUX_NETFILTER_H */ diff --git a/tools/include/uapi/linux/netfilter_arp.h b/tools/include/uapi/linux/netfilter_arp.h new file mode 100644 index 000000000000..791dfc5ae907 --- /dev/null +++ b/tools/include/uapi/linux/netfilter_arp.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */ +#ifndef __LINUX_ARP_NETFILTER_H +#define __LINUX_ARP_NETFILTER_H + +/* ARP-specific defines for netfilter. + * (C)2002 Rusty Russell IBM -- This code is GPL. + */ + +#include + +/* There is no PF_ARP. */ +#define NF_ARP 0 + +/* ARP Hooks */ +#define NF_ARP_IN 0 +#define NF_ARP_OUT 1 +#define NF_ARP_FORWARD 2 + +#ifndef __KERNEL__ +#define NF_ARP_NUMHOOKS 3 +#endif + +#endif /* __LINUX_ARP_NETFILTER_H */ diff --git a/tools/include/uapi/linux/rtnetlink.h b/tools/include/uapi/linux/rtnetlink.h new file mode 100644 index 000000000000..dab9493c791b --- /dev/null +++ b/tools/include/uapi/linux/rtnetlink.h @@ -0,0 +1,848 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_RTNETLINK_H +#define _UAPI__LINUX_RTNETLINK_H + +#include +#include +#include +#include +#include + +/* rtnetlink families. Values up to 127 are reserved for real address + * families, values above 128 may be used arbitrarily. + */ +#define RTNL_FAMILY_IPMR 128 +#define RTNL_FAMILY_IP6MR 129 +#define RTNL_FAMILY_MAX 129 + +/**** + * Routing/neighbour discovery messages. + ****/ + +/* Types of messages */ + +enum { + RTM_BASE = 16, +#define RTM_BASE RTM_BASE + + RTM_NEWLINK = 16, +#define RTM_NEWLINK RTM_NEWLINK + RTM_DELLINK, +#define RTM_DELLINK RTM_DELLINK + RTM_GETLINK, +#define RTM_GETLINK RTM_GETLINK + RTM_SETLINK, +#define RTM_SETLINK RTM_SETLINK + + RTM_NEWADDR = 20, +#define RTM_NEWADDR RTM_NEWADDR + RTM_DELADDR, +#define RTM_DELADDR RTM_DELADDR + RTM_GETADDR, +#define RTM_GETADDR RTM_GETADDR + + RTM_NEWROUTE = 24, +#define RTM_NEWROUTE RTM_NEWROUTE + RTM_DELROUTE, +#define RTM_DELROUTE RTM_DELROUTE + RTM_GETROUTE, +#define RTM_GETROUTE RTM_GETROUTE + + RTM_NEWNEIGH = 28, +#define RTM_NEWNEIGH RTM_NEWNEIGH + RTM_DELNEIGH, +#define RTM_DELNEIGH RTM_DELNEIGH + RTM_GETNEIGH, +#define RTM_GETNEIGH RTM_GETNEIGH + + RTM_NEWRULE = 32, +#define RTM_NEWRULE RTM_NEWRULE + RTM_DELRULE, +#define RTM_DELRULE RTM_DELRULE + RTM_GETRULE, +#define RTM_GETRULE RTM_GETRULE + + RTM_NEWQDISC = 36, +#define RTM_NEWQDISC RTM_NEWQDISC + RTM_DELQDISC, +#define RTM_DELQDISC RTM_DELQDISC + RTM_GETQDISC, +#define RTM_GETQDISC RTM_GETQDISC + + RTM_NEWTCLASS = 40, +#define RTM_NEWTCLASS RTM_NEWTCLASS + RTM_DELTCLASS, +#define RTM_DELTCLASS RTM_DELTCLASS + RTM_GETTCLASS, +#define RTM_GETTCLASS RTM_GETTCLASS + + RTM_NEWTFILTER = 44, +#define RTM_NEWTFILTER RTM_NEWTFILTER + RTM_DELTFILTER, +#define RTM_DELTFILTER RTM_DELTFILTER + RTM_GETTFILTER, +#define RTM_GETTFILTER RTM_GETTFILTER + + RTM_NEWACTION = 48, +#define RTM_NEWACTION RTM_NEWACTION + RTM_DELACTION, +#define RTM_DELACTION RTM_DELACTION + RTM_GETACTION, +#define RTM_GETACTION RTM_GETACTION + + RTM_NEWPREFIX = 52, +#define RTM_NEWPREFIX RTM_NEWPREFIX + + RTM_NEWMULTICAST = 56, +#define RTM_NEWMULTICAST RTM_NEWMULTICAST + RTM_DELMULTICAST, +#define RTM_DELMULTICAST RTM_DELMULTICAST + RTM_GETMULTICAST, +#define RTM_GETMULTICAST RTM_GETMULTICAST + + RTM_NEWANYCAST = 60, +#define RTM_NEWANYCAST RTM_NEWANYCAST + RTM_DELANYCAST, +#define RTM_DELANYCAST RTM_DELANYCAST + RTM_GETANYCAST, +#define RTM_GETANYCAST RTM_GETANYCAST + + RTM_NEWNEIGHTBL = 64, +#define RTM_NEWNEIGHTBL RTM_NEWNEIGHTBL + RTM_GETNEIGHTBL = 66, +#define RTM_GETNEIGHTBL RTM_GETNEIGHTBL + RTM_SETNEIGHTBL, +#define RTM_SETNEIGHTBL RTM_SETNEIGHTBL + + RTM_NEWNDUSEROPT = 68, +#define RTM_NEWNDUSEROPT RTM_NEWNDUSEROPT + + RTM_NEWADDRLABEL = 72, +#define RTM_NEWADDRLABEL RTM_NEWADDRLABEL + RTM_DELADDRLABEL, +#define RTM_DELADDRLABEL RTM_DELADDRLABEL + RTM_GETADDRLABEL, +#define RTM_GETADDRLABEL RTM_GETADDRLABEL + + RTM_GETDCB = 78, +#define RTM_GETDCB RTM_GETDCB + RTM_SETDCB, +#define RTM_SETDCB RTM_SETDCB + + RTM_NEWNETCONF = 80, +#define RTM_NEWNETCONF RTM_NEWNETCONF + RTM_DELNETCONF, +#define RTM_DELNETCONF RTM_DELNETCONF + RTM_GETNETCONF = 82, +#define RTM_GETNETCONF RTM_GETNETCONF + + RTM_NEWMDB = 84, +#define RTM_NEWMDB RTM_NEWMDB + RTM_DELMDB = 85, +#define RTM_DELMDB RTM_DELMDB + RTM_GETMDB = 86, +#define RTM_GETMDB RTM_GETMDB + + RTM_NEWNSID = 88, +#define RTM_NEWNSID RTM_NEWNSID + RTM_DELNSID = 89, +#define RTM_DELNSID RTM_DELNSID + RTM_GETNSID = 90, +#define RTM_GETNSID RTM_GETNSID + + RTM_NEWSTATS = 92, +#define RTM_NEWSTATS RTM_NEWSTATS + RTM_GETSTATS = 94, +#define RTM_GETSTATS RTM_GETSTATS + RTM_SETSTATS, +#define RTM_SETSTATS RTM_SETSTATS + + RTM_NEWCACHEREPORT = 96, +#define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT + + RTM_NEWCHAIN = 100, +#define RTM_NEWCHAIN RTM_NEWCHAIN + RTM_DELCHAIN, +#define RTM_DELCHAIN RTM_DELCHAIN + RTM_GETCHAIN, +#define RTM_GETCHAIN RTM_GETCHAIN + + RTM_NEWNEXTHOP = 104, +#define RTM_NEWNEXTHOP RTM_NEWNEXTHOP + RTM_DELNEXTHOP, +#define RTM_DELNEXTHOP RTM_DELNEXTHOP + RTM_GETNEXTHOP, +#define RTM_GETNEXTHOP RTM_GETNEXTHOP + + RTM_NEWLINKPROP = 108, +#define RTM_NEWLINKPROP RTM_NEWLINKPROP + RTM_DELLINKPROP, +#define RTM_DELLINKPROP RTM_DELLINKPROP + RTM_GETLINKPROP, +#define RTM_GETLINKPROP RTM_GETLINKPROP + + RTM_NEWVLAN = 112, +#define RTM_NEWVLAN RTM_NEWVLAN + RTM_DELVLAN, +#define RTM_DELVLAN RTM_DELVLAN + RTM_GETVLAN, +#define RTM_GETVLAN RTM_GETVLAN + + RTM_NEWNEXTHOPBUCKET = 116, +#define RTM_NEWNEXTHOPBUCKET RTM_NEWNEXTHOPBUCKET + RTM_DELNEXTHOPBUCKET, +#define RTM_DELNEXTHOPBUCKET RTM_DELNEXTHOPBUCKET + RTM_GETNEXTHOPBUCKET, +#define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET + + RTM_NEWTUNNEL = 120, +#define RTM_NEWTUNNEL RTM_NEWTUNNEL + RTM_DELTUNNEL, +#define RTM_DELTUNNEL RTM_DELTUNNEL + RTM_GETTUNNEL, +#define RTM_GETTUNNEL RTM_GETTUNNEL + + __RTM_MAX, +#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) +}; + +#define RTM_NR_MSGTYPES (RTM_MAX + 1 - RTM_BASE) +#define RTM_NR_FAMILIES (RTM_NR_MSGTYPES >> 2) +#define RTM_FAM(cmd) (((cmd) - RTM_BASE) >> 2) + +/* + Generic structure for encapsulation of optional route information. + It is reminiscent of sockaddr, but with sa_family replaced + with attribute type. + */ + +struct rtattr { + unsigned short rta_len; + unsigned short rta_type; +}; + +/* Macros to handle rtattributes */ + +#define RTA_ALIGNTO 4U +#define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) ) +#define RTA_OK(rta,len) ((len) >= (int)sizeof(struct rtattr) && \ + (rta)->rta_len >= sizeof(struct rtattr) && \ + (rta)->rta_len <= (len)) +#define RTA_NEXT(rta,attrlen) ((attrlen) -= RTA_ALIGN((rta)->rta_len), \ + (struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len))) +#define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len)) +#define RTA_SPACE(len) RTA_ALIGN(RTA_LENGTH(len)) +#define RTA_DATA(rta) ((void*)(((char*)(rta)) + RTA_LENGTH(0))) +#define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0)) + + + + +/****************************************************************************** + * Definitions used in routing table administration. + ****/ + +struct rtmsg { + unsigned char rtm_family; + unsigned char rtm_dst_len; + unsigned char rtm_src_len; + unsigned char rtm_tos; + + unsigned char rtm_table; /* Routing table id */ + unsigned char rtm_protocol; /* Routing protocol; see below */ + unsigned char rtm_scope; /* See below */ + unsigned char rtm_type; /* See below */ + + unsigned rtm_flags; +}; + +/* rtm_type */ + +enum { + RTN_UNSPEC, + RTN_UNICAST, /* Gateway or direct route */ + RTN_LOCAL, /* Accept locally */ + RTN_BROADCAST, /* Accept locally as broadcast, + send as broadcast */ + RTN_ANYCAST, /* Accept locally as broadcast, + but send as unicast */ + RTN_MULTICAST, /* Multicast route */ + RTN_BLACKHOLE, /* Drop */ + RTN_UNREACHABLE, /* Destination is unreachable */ + RTN_PROHIBIT, /* Administratively prohibited */ + RTN_THROW, /* Not in this table */ + RTN_NAT, /* Translate this address */ + RTN_XRESOLVE, /* Use external resolver */ + __RTN_MAX +}; + +#define RTN_MAX (__RTN_MAX - 1) + + +/* rtm_protocol */ + +#define RTPROT_UNSPEC 0 +#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects; + not used by current IPv4 */ +#define RTPROT_KERNEL 2 /* Route installed by kernel */ +#define RTPROT_BOOT 3 /* Route installed during boot */ +#define RTPROT_STATIC 4 /* Route installed by administrator */ + +/* Values of protocol >= RTPROT_STATIC are not interpreted by kernel; + they are just passed from user and back as is. + It will be used by hypothetical multiple routing daemons. + Note that protocol values should be standardized in order to + avoid conflicts. + */ + +#define RTPROT_GATED 8 /* Apparently, GateD */ +#define RTPROT_RA 9 /* RDISC/ND router advertisements */ +#define RTPROT_MRT 10 /* Merit MRT */ +#define RTPROT_ZEBRA 11 /* Zebra */ +#define RTPROT_BIRD 12 /* BIRD */ +#define RTPROT_DNROUTED 13 /* DECnet routing daemon */ +#define RTPROT_XORP 14 /* XORP */ +#define RTPROT_NTK 15 /* Netsukuku */ +#define RTPROT_DHCP 16 /* DHCP client */ +#define RTPROT_MROUTED 17 /* Multicast daemon */ +#define RTPROT_KEEPALIVED 18 /* Keepalived daemon */ +#define RTPROT_BABEL 42 /* Babel daemon */ +#define RTPROT_OVN 84 /* OVN daemon */ +#define RTPROT_OPENR 99 /* Open Routing (Open/R) Routes */ +#define RTPROT_BGP 186 /* BGP Routes */ +#define RTPROT_ISIS 187 /* ISIS Routes */ +#define RTPROT_OSPF 188 /* OSPF Routes */ +#define RTPROT_RIP 189 /* RIP Routes */ +#define RTPROT_EIGRP 192 /* EIGRP Routes */ + +/* rtm_scope + + Really it is not scope, but sort of distance to the destination. + NOWHERE are reserved for not existing destinations, HOST is our + local addresses, LINK are destinations, located on directly attached + link and UNIVERSE is everywhere in the Universe. + + Intermediate values are also possible f.e. interior routes + could be assigned a value between UNIVERSE and LINK. +*/ + +enum rt_scope_t { + RT_SCOPE_UNIVERSE=0, +/* User defined values */ + RT_SCOPE_SITE=200, + RT_SCOPE_LINK=253, + RT_SCOPE_HOST=254, + RT_SCOPE_NOWHERE=255 +}; + +/* rtm_flags */ + +#define RTM_F_NOTIFY 0x100 /* Notify user of route change */ +#define RTM_F_CLONED 0x200 /* This route is cloned */ +#define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */ +#define RTM_F_PREFIX 0x800 /* Prefix addresses */ +#define RTM_F_LOOKUP_TABLE 0x1000 /* set rtm_table to FIB lookup result */ +#define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */ +#define RTM_F_OFFLOAD 0x4000 /* route is offloaded */ +#define RTM_F_TRAP 0x8000 /* route is trapping packets */ +#define RTM_F_OFFLOAD_FAILED 0x20000000 /* route offload failed, this value + * is chosen to avoid conflicts with + * other flags defined in + * include/uapi/linux/ipv6_route.h + */ + +/* Reserved table identifiers */ + +enum rt_class_t { + RT_TABLE_UNSPEC=0, +/* User defined values */ + RT_TABLE_COMPAT=252, + RT_TABLE_DEFAULT=253, + RT_TABLE_MAIN=254, + RT_TABLE_LOCAL=255, + RT_TABLE_MAX=0xFFFFFFFF +}; + + +/* Routing message attributes */ + +enum rtattr_type_t { + RTA_UNSPEC, + RTA_DST, + RTA_SRC, + RTA_IIF, + RTA_OIF, + RTA_GATEWAY, + RTA_PRIORITY, + RTA_PREFSRC, + RTA_METRICS, + RTA_MULTIPATH, + RTA_PROTOINFO, /* no longer used */ + RTA_FLOW, + RTA_CACHEINFO, + RTA_SESSION, /* no longer used */ + RTA_MP_ALGO, /* no longer used */ + RTA_TABLE, + RTA_MARK, + RTA_MFC_STATS, + RTA_VIA, + RTA_NEWDST, + RTA_PREF, + RTA_ENCAP_TYPE, + RTA_ENCAP, + RTA_EXPIRES, + RTA_PAD, + RTA_UID, + RTA_TTL_PROPAGATE, + RTA_IP_PROTO, + RTA_SPORT, + RTA_DPORT, + RTA_NH_ID, + RTA_FLOWLABEL, + __RTA_MAX +}; + +#define RTA_MAX (__RTA_MAX - 1) + +#define RTM_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct rtmsg)))) +#define RTM_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct rtmsg)) + +/* RTM_MULTIPATH --- array of struct rtnexthop. + * + * "struct rtnexthop" describes all necessary nexthop information, + * i.e. parameters of path to a destination via this nexthop. + * + * At the moment it is impossible to set different prefsrc, mtu, window + * and rtt for different paths from multipath. + */ + +struct rtnexthop { + unsigned short rtnh_len; + unsigned char rtnh_flags; + unsigned char rtnh_hops; + int rtnh_ifindex; +}; + +/* rtnh_flags */ + +#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ +#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ +#define RTNH_F_ONLINK 4 /* Gateway is forced on link */ +#define RTNH_F_OFFLOAD 8 /* Nexthop is offloaded */ +#define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */ +#define RTNH_F_UNRESOLVED 32 /* The entry is unresolved (ipmr) */ +#define RTNH_F_TRAP 64 /* Nexthop is trapping packets */ + +#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | \ + RTNH_F_OFFLOAD | RTNH_F_TRAP) + +/* Macros to handle hexthops */ + +#define RTNH_ALIGNTO 4 +#define RTNH_ALIGN(len) ( ((len)+RTNH_ALIGNTO-1) & ~(RTNH_ALIGNTO-1) ) +#define RTNH_OK(rtnh,len) ((rtnh)->rtnh_len >= sizeof(struct rtnexthop) && \ + ((int)(rtnh)->rtnh_len) <= (len)) +#define RTNH_NEXT(rtnh) ((struct rtnexthop*)(((char*)(rtnh)) + RTNH_ALIGN((rtnh)->rtnh_len))) +#define RTNH_LENGTH(len) (RTNH_ALIGN(sizeof(struct rtnexthop)) + (len)) +#define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len)) +#define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0))) + +/* RTA_VIA */ +struct rtvia { + __kernel_sa_family_t rtvia_family; + __u8 rtvia_addr[]; +}; + +/* RTM_CACHEINFO */ + +struct rta_cacheinfo { + __u32 rta_clntref; + __u32 rta_lastuse; + __s32 rta_expires; + __u32 rta_error; + __u32 rta_used; + +#define RTNETLINK_HAVE_PEERINFO 1 + __u32 rta_id; + __u32 rta_ts; + __u32 rta_tsage; +}; + +/* RTM_METRICS --- array of struct rtattr with types of RTAX_* */ + +enum { + RTAX_UNSPEC, +#define RTAX_UNSPEC RTAX_UNSPEC + RTAX_LOCK, +#define RTAX_LOCK RTAX_LOCK + RTAX_MTU, +#define RTAX_MTU RTAX_MTU + RTAX_WINDOW, +#define RTAX_WINDOW RTAX_WINDOW + RTAX_RTT, +#define RTAX_RTT RTAX_RTT + RTAX_RTTVAR, +#define RTAX_RTTVAR RTAX_RTTVAR + RTAX_SSTHRESH, +#define RTAX_SSTHRESH RTAX_SSTHRESH + RTAX_CWND, +#define RTAX_CWND RTAX_CWND + RTAX_ADVMSS, +#define RTAX_ADVMSS RTAX_ADVMSS + RTAX_REORDERING, +#define RTAX_REORDERING RTAX_REORDERING + RTAX_HOPLIMIT, +#define RTAX_HOPLIMIT RTAX_HOPLIMIT + RTAX_INITCWND, +#define RTAX_INITCWND RTAX_INITCWND + RTAX_FEATURES, +#define RTAX_FEATURES RTAX_FEATURES + RTAX_RTO_MIN, +#define RTAX_RTO_MIN RTAX_RTO_MIN + RTAX_INITRWND, +#define RTAX_INITRWND RTAX_INITRWND + RTAX_QUICKACK, +#define RTAX_QUICKACK RTAX_QUICKACK + RTAX_CC_ALGO, +#define RTAX_CC_ALGO RTAX_CC_ALGO + RTAX_FASTOPEN_NO_COOKIE, +#define RTAX_FASTOPEN_NO_COOKIE RTAX_FASTOPEN_NO_COOKIE + __RTAX_MAX +}; + +#define RTAX_MAX (__RTAX_MAX - 1) + +#define RTAX_FEATURE_ECN (1 << 0) +#define RTAX_FEATURE_SACK (1 << 1) /* unused */ +#define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */ +#define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */ +#define RTAX_FEATURE_TCP_USEC_TS (1 << 4) + +#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \ + RTAX_FEATURE_SACK | \ + RTAX_FEATURE_TIMESTAMP | \ + RTAX_FEATURE_ALLFRAG | \ + RTAX_FEATURE_TCP_USEC_TS) + +struct rta_session { + __u8 proto; + __u8 pad1; + __u16 pad2; + + union { + struct { + __u16 sport; + __u16 dport; + } ports; + + struct { + __u8 type; + __u8 code; + __u16 ident; + } icmpt; + + __u32 spi; + } u; +}; + +struct rta_mfc_stats { + __u64 mfcs_packets; + __u64 mfcs_bytes; + __u64 mfcs_wrong_if; +}; + +/**** + * General form of address family dependent message. + ****/ + +struct rtgenmsg { + unsigned char rtgen_family; +}; + +/***************************************************************** + * Link layer specific messages. + ****/ + +/* struct ifinfomsg + * passes link level specific information, not dependent + * on network protocol. + */ + +struct ifinfomsg { + unsigned char ifi_family; + unsigned char __ifi_pad; + unsigned short ifi_type; /* ARPHRD_* */ + int ifi_index; /* Link index */ + unsigned ifi_flags; /* IFF_* flags */ + unsigned ifi_change; /* IFF_* change mask */ +}; + +/******************************************************************** + * prefix information + ****/ + +struct prefixmsg { + unsigned char prefix_family; + unsigned char prefix_pad1; + unsigned short prefix_pad2; + int prefix_ifindex; + unsigned char prefix_type; + unsigned char prefix_len; + unsigned char prefix_flags; + unsigned char prefix_pad3; +}; + +enum +{ + PREFIX_UNSPEC, + PREFIX_ADDRESS, + PREFIX_CACHEINFO, + __PREFIX_MAX +}; + +#define PREFIX_MAX (__PREFIX_MAX - 1) + +struct prefix_cacheinfo { + __u32 preferred_time; + __u32 valid_time; +}; + + +/***************************************************************** + * Traffic control messages. + ****/ + +struct tcmsg { + unsigned char tcm_family; + unsigned char tcm__pad1; + unsigned short tcm__pad2; + int tcm_ifindex; + __u32 tcm_handle; + __u32 tcm_parent; +/* tcm_block_index is used instead of tcm_parent + * in case tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK + */ +#define tcm_block_index tcm_parent + __u32 tcm_info; +}; + +/* For manipulation of filters in shared block, tcm_ifindex is set to + * TCM_IFINDEX_MAGIC_BLOCK, and tcm_parent is aliased to tcm_block_index + * which is the block index. + */ +#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU) + +enum { + TCA_UNSPEC, + TCA_KIND, + TCA_OPTIONS, + TCA_STATS, + TCA_XSTATS, + TCA_RATE, + TCA_FCNT, + TCA_STATS2, + TCA_STAB, + TCA_PAD, + TCA_DUMP_INVISIBLE, + TCA_CHAIN, + TCA_HW_OFFLOAD, + TCA_INGRESS_BLOCK, + TCA_EGRESS_BLOCK, + TCA_DUMP_FLAGS, + TCA_EXT_WARN_MSG, + __TCA_MAX +}; + +#define TCA_MAX (__TCA_MAX - 1) + +#define TCA_DUMP_FLAGS_TERSE (1 << 0) /* Means that in dump user gets only basic + * data necessary to identify the objects + * (handle, cookie, etc.) and stats. + */ + +#define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg)))) +#define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg)) + +/******************************************************************** + * Neighbor Discovery userland options + ****/ + +struct nduseroptmsg { + unsigned char nduseropt_family; + unsigned char nduseropt_pad1; + unsigned short nduseropt_opts_len; /* Total length of options */ + int nduseropt_ifindex; + __u8 nduseropt_icmp_type; + __u8 nduseropt_icmp_code; + unsigned short nduseropt_pad2; + unsigned int nduseropt_pad3; + /* Followed by one or more ND options */ +}; + +enum { + NDUSEROPT_UNSPEC, + NDUSEROPT_SRCADDR, + __NDUSEROPT_MAX +}; + +#define NDUSEROPT_MAX (__NDUSEROPT_MAX - 1) + +#ifndef __KERNEL__ +/* RTnetlink multicast groups - backwards compatibility for userspace */ +#define RTMGRP_LINK 1 +#define RTMGRP_NOTIFY 2 +#define RTMGRP_NEIGH 4 +#define RTMGRP_TC 8 + +#define RTMGRP_IPV4_IFADDR 0x10 +#define RTMGRP_IPV4_MROUTE 0x20 +#define RTMGRP_IPV4_ROUTE 0x40 +#define RTMGRP_IPV4_RULE 0x80 + +#define RTMGRP_IPV6_IFADDR 0x100 +#define RTMGRP_IPV6_MROUTE 0x200 +#define RTMGRP_IPV6_ROUTE 0x400 +#define RTMGRP_IPV6_IFINFO 0x800 + +#define RTMGRP_DECnet_IFADDR 0x1000 +#define RTMGRP_DECnet_ROUTE 0x4000 + +#define RTMGRP_IPV6_PREFIX 0x20000 +#endif + +/* RTnetlink multicast groups */ +enum rtnetlink_groups { + RTNLGRP_NONE, +#define RTNLGRP_NONE RTNLGRP_NONE + RTNLGRP_LINK, +#define RTNLGRP_LINK RTNLGRP_LINK + RTNLGRP_NOTIFY, +#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY + RTNLGRP_NEIGH, +#define RTNLGRP_NEIGH RTNLGRP_NEIGH + RTNLGRP_TC, +#define RTNLGRP_TC RTNLGRP_TC + RTNLGRP_IPV4_IFADDR, +#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR + RTNLGRP_IPV4_MROUTE, +#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE + RTNLGRP_IPV4_ROUTE, +#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE + RTNLGRP_IPV4_RULE, +#define RTNLGRP_IPV4_RULE RTNLGRP_IPV4_RULE + RTNLGRP_IPV6_IFADDR, +#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR + RTNLGRP_IPV6_MROUTE, +#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE + RTNLGRP_IPV6_ROUTE, +#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE + RTNLGRP_IPV6_IFINFO, +#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO + RTNLGRP_DECnet_IFADDR, +#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR + RTNLGRP_NOP2, + RTNLGRP_DECnet_ROUTE, +#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE + RTNLGRP_DECnet_RULE, +#define RTNLGRP_DECnet_RULE RTNLGRP_DECnet_RULE + RTNLGRP_NOP4, + RTNLGRP_IPV6_PREFIX, +#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX + RTNLGRP_IPV6_RULE, +#define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE + RTNLGRP_ND_USEROPT, +#define RTNLGRP_ND_USEROPT RTNLGRP_ND_USEROPT + RTNLGRP_PHONET_IFADDR, +#define RTNLGRP_PHONET_IFADDR RTNLGRP_PHONET_IFADDR + RTNLGRP_PHONET_ROUTE, +#define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE + RTNLGRP_DCB, +#define RTNLGRP_DCB RTNLGRP_DCB + RTNLGRP_IPV4_NETCONF, +#define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF + RTNLGRP_IPV6_NETCONF, +#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF + RTNLGRP_MDB, +#define RTNLGRP_MDB RTNLGRP_MDB + RTNLGRP_MPLS_ROUTE, +#define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE + RTNLGRP_NSID, +#define RTNLGRP_NSID RTNLGRP_NSID + RTNLGRP_MPLS_NETCONF, +#define RTNLGRP_MPLS_NETCONF RTNLGRP_MPLS_NETCONF + RTNLGRP_IPV4_MROUTE_R, +#define RTNLGRP_IPV4_MROUTE_R RTNLGRP_IPV4_MROUTE_R + RTNLGRP_IPV6_MROUTE_R, +#define RTNLGRP_IPV6_MROUTE_R RTNLGRP_IPV6_MROUTE_R + RTNLGRP_NEXTHOP, +#define RTNLGRP_NEXTHOP RTNLGRP_NEXTHOP + RTNLGRP_BRVLAN, +#define RTNLGRP_BRVLAN RTNLGRP_BRVLAN + RTNLGRP_MCTP_IFADDR, +#define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR + RTNLGRP_TUNNEL, +#define RTNLGRP_TUNNEL RTNLGRP_TUNNEL + RTNLGRP_STATS, +#define RTNLGRP_STATS RTNLGRP_STATS + RTNLGRP_IPV4_MCADDR, +#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR + RTNLGRP_IPV6_MCADDR, +#define RTNLGRP_IPV6_MCADDR RTNLGRP_IPV6_MCADDR + RTNLGRP_IPV6_ACADDR, +#define RTNLGRP_IPV6_ACADDR RTNLGRP_IPV6_ACADDR + __RTNLGRP_MAX +}; +#define RTNLGRP_MAX (__RTNLGRP_MAX - 1) + +/* TC action piece */ +struct tcamsg { + unsigned char tca_family; + unsigned char tca__pad1; + unsigned short tca__pad2; +}; + +enum { + TCA_ROOT_UNSPEC, + TCA_ROOT_TAB, +#define TCA_ACT_TAB TCA_ROOT_TAB +#define TCAA_MAX TCA_ROOT_TAB + TCA_ROOT_FLAGS, + TCA_ROOT_COUNT, + TCA_ROOT_TIME_DELTA, /* in msecs */ + TCA_ROOT_EXT_WARN_MSG, + __TCA_ROOT_MAX, +#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1) +}; + +#define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg)))) +#define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg)) +/* tcamsg flags stored in attribute TCA_ROOT_FLAGS + * + * TCA_ACT_FLAG_LARGE_DUMP_ON user->kernel to request for larger than + * TCA_ACT_MAX_PRIO actions in a dump. All dump responses will contain the + * number of actions being dumped stored in for user app's consumption in + * TCA_ROOT_COUNT + * + * TCA_ACT_FLAG_TERSE_DUMP user->kernel to request terse (brief) dump that only + * includes essential action info (kind, index, etc.) + * + */ +#define TCA_FLAG_LARGE_DUMP_ON (1 << 0) +#define TCA_ACT_FLAG_LARGE_DUMP_ON TCA_FLAG_LARGE_DUMP_ON +#define TCA_ACT_FLAG_TERSE_DUMP (1 << 1) + +/* New extended info filters for IFLA_EXT_MASK */ +#define RTEXT_FILTER_VF (1 << 0) +#define RTEXT_FILTER_BRVLAN (1 << 1) +#define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2) +#define RTEXT_FILTER_SKIP_STATS (1 << 3) +#define RTEXT_FILTER_MRP (1 << 4) +#define RTEXT_FILTER_CFM_CONFIG (1 << 5) +#define RTEXT_FILTER_CFM_STATUS (1 << 6) +#define RTEXT_FILTER_MST (1 << 7) + +/* End of information exported to user level */ + + + +#endif /* _UAPI__LINUX_RTNETLINK_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index e733b6a2e1b8..e0537f275da2 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -11,10 +11,16 @@ declare -a FILES=( "include/uapi/linux/bits.h" "include/uapi/linux/fadvise.h" "include/uapi/linux/fscrypt.h" + "include/uapi/linux/genetlink.h" + "include/uapi/linux/if_addr.h" + "include/uapi/linux/in.h" "include/uapi/linux/kcmp.h" "include/uapi/linux/kvm.h" - "include/uapi/linux/in.h" + "include/uapi/linux/neighbour.h" + "include/uapi/linux/netfilter.h" + "include/uapi/linux/netfilter_arp.h" "include/uapi/linux/perf_event.h" + "include/uapi/linux/rtnetlink.h" "include/uapi/linux/seccomp.h" "include/uapi/linux/stat.h" "include/linux/bits.h" -- cgit v1.2.3 From de7342228b7343774d6a9981c2ddbfb5e201044b Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Sat, 4 Oct 2025 22:23:29 +0800 Subject: bpf: Finish constification of 1st parameter of bpf_d_path() The commit 1b8abbb12128 ("bpf...d_path(): constify path argument") constified the first parameter of the bpf_d_path(), but failed to update it in all places. Finish constification. Otherwise the selftest fail to build: .../selftests/bpf/bpf_experimental.h:222:12: error: conflicting types for 'bpf_path_d_path' 222 | extern int bpf_path_d_path(const struct path *path, char *buf, size_t buf__sz) __ksym; | ^ .../selftests/bpf/tools/include/vmlinux.h:153922:12: note: previous declaration is here 153922 | extern int bpf_path_d_path(struct path *path, char *buf, size_t buf__sz) __weak __ksym; Fixes: 1b8abbb12128 ("bpf...d_path(): constify path argument") Signed-off-by: Rong Tao Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 +- scripts/bpf_doc.py | 1 + tools/include/uapi/linux/bpf.h | 2 +- tools/testing/selftests/bpf/progs/verifier_vfs_accept.c | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) (limited to 'tools/include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ae83d8649ef1..6829936d33f5 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4891,7 +4891,7 @@ union bpf_attr { * * **-ENOENT** if the bpf_local_storage cannot be found. * - * long bpf_d_path(struct path *path, char *buf, u32 sz) + * long bpf_d_path(const struct path *path, char *buf, u32 sz) * Description * Return full path for given **struct path** object, which * needs to be the kernel BTF *path* object. The path is diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index c77dc40f7689..15d113a1bc1d 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -788,6 +788,7 @@ class PrinterHelpersHeader(Printer): 'struct task_struct', 'struct cgroup', 'struct path', + 'const struct path', 'struct btf_ptr', 'struct inode', 'struct socket', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index ae83d8649ef1..6829936d33f5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4891,7 +4891,7 @@ union bpf_attr { * * **-ENOENT** if the bpf_local_storage cannot be found. * - * long bpf_d_path(struct path *path, char *buf, u32 sz) + * long bpf_d_path(const struct path *path, char *buf, u32 sz) * Description * Return full path for given **struct path** object, which * needs to be the kernel BTF *path* object. The path is diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c index 3e2d76ee8050..55398c04290a 100644 --- a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c +++ b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c @@ -70,7 +70,7 @@ __success int BPF_PROG(path_d_path_from_file_argument, struct file *file) { int ret; - struct path *path; + const struct path *path; /* The f_path member is a path which is embedded directly within a * file. Therefore, a pointer to such embedded members are still -- cgit v1.2.3 From b157dd228cf0ee24b2414712abd82bd3a8d5b009 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Mon, 6 Oct 2025 22:51:32 +0000 Subject: tools headers: kcfi: rename missed CONFIG_CFI_CLANG Commit 23ef9d439769 ("kcfi: Rename CONFIG_CFI_CLANG to CONFIG_CFI") missed one instance of CONFIG_CFI_CLANG. Rename it to match the original kernel header. This addresses the following build warning: Warning: Kernel ABI header differences: diff -u tools/include/linux/cfi_types.h include/linux/cfi_types.h Cc: Kees Cook Fixes: a5ba183bdeee ("Merge tag 'hardening-v6.18-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux") Reviewed-by: Nathan Chancellor Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20251006225148.1636486-1-cmllamas@google.com Signed-off-by: Kees Cook --- tools/include/linux/cfi_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h index fb8d90bff92e..a86af9bc8bdc 100644 --- a/tools/include/linux/cfi_types.h +++ b/tools/include/linux/cfi_types.h @@ -43,7 +43,7 @@ #else /* __ASSEMBLY__ */ -#ifdef CONFIG_CFI_CLANG +#ifdef CONFIG_CFI #define DEFINE_CFI_TYPE(name, func) \ /* \ * Force a reference to the function so the compiler generates \ -- cgit v1.2.3 From f8950b47db707565543d6c4a9fdb7e36b3329722 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Nov 2025 10:33:26 -0300 Subject: tools headers UAPI: Update tools's copy of drm.h to pick DRM_IOCTL_GEM_CHANGE_HANDLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Picking the changes from: 0864197382fa7c8c ("drm: Move drm_gem ioctl kerneldoc to uapi file") 53096728b8910c69 ("drm: Add DRM prime interface to reassign GEM handle") Addressing these perf build warnings: Warning: Kernel ABI header differences: Now 'perf trace' and other code that might use the tools/perf/trace/beauty autogenerated tables will be able to translate this new ioctl command into a string: $ tools/perf/trace/beauty/drm_ioctl.sh > before $ cp include/uapi/drm/drm.h tools/include/uapi/drm/drm.h $ tools/perf/trace/beauty/drm_ioctl.sh > after $ diff -u before after --- before 2025-11-03 09:57:34.832553174 -0300 +++ after 2025-11-03 09:57:47.969409428 -0300 @@ -111,6 +111,7 @@ [0xCF] = "SYNCOBJ_EVENTFD", [0xD0] = "MODE_CLOSEFB", [0xD1] = "SET_CLIENT_NAME", + [0xD2] = "GEM_CHANGE_HANDLE", [DRM_COMMAND_BASE + 0x00] = "I915_INIT", [DRM_COMMAND_BASE + 0x01] = "I915_FLUSH", [DRM_COMMAND_BASE + 0x02] = "I915_FLIP", $ Please see tools/include/uapi/README for further details. Cc: Christian König Cc: David Francis Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/drm.h | 63 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 12 deletions(-) (limited to 'tools/include') diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index e63a71d3c607..3cd5cf15e3c9 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -597,34 +597,65 @@ struct drm_set_version { int drm_dd_minor; }; -/* DRM_IOCTL_GEM_CLOSE ioctl argument type */ +/** + * struct drm_gem_close - Argument for &DRM_IOCTL_GEM_CLOSE ioctl. + * @handle: Handle of the object to be closed. + * @pad: Padding. + * + * Releases the handle to an mm object. + */ struct drm_gem_close { - /** Handle of the object to be closed. */ __u32 handle; __u32 pad; }; -/* DRM_IOCTL_GEM_FLINK ioctl argument type */ +/** + * struct drm_gem_flink - Argument for &DRM_IOCTL_GEM_FLINK ioctl. + * @handle: Handle for the object being named. + * @name: Returned global name. + * + * Create a global name for an object, returning the name. + * + * Note that the name does not hold a reference; when the object + * is freed, the name goes away. + */ struct drm_gem_flink { - /** Handle for the object being named */ __u32 handle; - - /** Returned global name */ __u32 name; }; -/* DRM_IOCTL_GEM_OPEN ioctl argument type */ +/** + * struct drm_gem_open - Argument for &DRM_IOCTL_GEM_OPEN ioctl. + * @name: Name of object being opened. + * @handle: Returned handle for the object. + * @size: Returned size of the object + * + * Open an object using the global name, returning a handle and the size. + * + * This handle (of course) holds a reference to the object, so the object + * will not go away until the handle is deleted. + */ struct drm_gem_open { - /** Name of object being opened */ __u32 name; - - /** Returned handle for the object */ __u32 handle; - - /** Returned size of the object */ __u64 size; }; +/** + * struct drm_gem_change_handle - Argument for &DRM_IOCTL_GEM_CHANGE_HANDLE ioctl. + * @handle: The handle of a gem object. + * @new_handle: An available gem handle. + * + * This ioctl changes the handle of a GEM object to the specified one. + * The new handle must be unused. On success the old handle is closed + * and all further IOCTL should refer to the new handle only. + * Calls to DRM_IOCTL_PRIME_FD_TO_HANDLE will return the new handle. + */ +struct drm_gem_change_handle { + __u32 handle; + __u32 new_handle; +}; + /** * DRM_CAP_DUMB_BUFFER * @@ -1309,6 +1340,14 @@ extern "C" { */ #define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name) +/** + * DRM_IOCTL_GEM_CHANGE_HANDLE - Move an object to a different handle + * + * Some applications (notably CRIU) need objects to have specific gem handles. + * This ioctl changes the object at one gem handle to use a new gem handle. + */ +#define DRM_IOCTL_GEM_CHANGE_HANDLE DRM_IOWR(0xD2, struct drm_gem_change_handle) + /* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. -- cgit v1.2.3 From 29e4d12a2957e4e7dcad2418c7251f36285d99d8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Nov 2025 10:53:46 -0300 Subject: tools headers UAPI: Sync linux/kvm.h with the kernel sources To pick the changes in: fe2bf6234e947bf5 ("KVM: guest_memfd: Add INIT_SHARED flag, reject user page faults if not set") d2042d8f96ddefde ("KVM: Rework KVM_CAP_GUEST_MEMFD_MMAP into KVM_CAP_GUEST_MEMFD_FLAGS") 3d3a04fad25a6621 ("KVM: Allow and advertise support for host mmap() on guest_memfd files") That just rebuilds perf, as these patches don't add any new KVM ioctl to be harvested for the 'perf trace' ioctl syscall argument beautifiers. This addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Please see tools/include/uapi/README for further details. Cc: Sean Christopherson Cc: Fuad Tabba Cc: Paolo Bonzini Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index f0f0d49d2544..52f6000ab020 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -962,6 +962,7 @@ struct kvm_enable_cap { #define KVM_CAP_ARM_EL2_E2H0 241 #define KVM_CAP_RISCV_MP_STATE_RESET 242 #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 +#define KVM_CAP_GUEST_MEMFD_FLAGS 244 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1598,6 +1599,8 @@ struct kvm_memory_attributes { #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) +#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0) +#define GUEST_MEMFD_FLAG_INIT_SHARED (1ULL << 1) struct kvm_create_guest_memfd { __u64 size; -- cgit v1.2.3 From 549042f16716b4c72bfa9813d9e38f352c539dad Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Nov 2025 13:35:06 -0300 Subject: tools headers asm: Sync fls headers header with the kernel sources To pick the changes in: 6606c8c7e8188656 ("bitops: Add __attribute_const__ to generic ffs()-family implementations") This addresses these tools build warnings: Warning: Kernel ABI header differences: diff -u tools/include/asm-generic/bitops/__fls.h include/asm-generic/bitops/__fls.h diff -u tools/include/asm-generic/bitops/fls.h include/asm-generic/bitops/fls.h diff -u tools/include/asm-generic/bitops/fls64.h include/asm-generic/bitops/fls64.h Please see tools/include/uapi/README for further details. Cc: Kees Cook Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/asm-generic/bitops/__fls.h | 2 +- tools/include/asm-generic/bitops/fls.h | 2 +- tools/include/asm-generic/bitops/fls64.h | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/include') diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h index e974ec932ec1..35f33780ca6c 100644 --- a/tools/include/asm-generic/bitops/__fls.h +++ b/tools/include/asm-generic/bitops/__fls.h @@ -10,7 +10,7 @@ * * Undefined if no set bit exists, so code should check against 0 first. */ -static __always_inline unsigned int generic___fls(unsigned long word) +static __always_inline __attribute_const__ unsigned int generic___fls(unsigned long word) { unsigned int num = BITS_PER_LONG - 1; diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h index 26f3ce1dd6e4..8eed3437edb9 100644 --- a/tools/include/asm-generic/bitops/fls.h +++ b/tools/include/asm-generic/bitops/fls.h @@ -10,7 +10,7 @@ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ -static __always_inline int generic_fls(unsigned int x) +static __always_inline __attribute_const__ int generic_fls(unsigned int x) { int r = 32; diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h index 866f2b2304ff..b5f58dd261a3 100644 --- a/tools/include/asm-generic/bitops/fls64.h +++ b/tools/include/asm-generic/bitops/fls64.h @@ -16,7 +16,7 @@ * at position 64. */ #if BITS_PER_LONG == 32 -static __always_inline int fls64(__u64 x) +static __always_inline __attribute_const__ int fls64(__u64 x) { __u32 h = x >> 32; if (h) @@ -24,7 +24,7 @@ static __always_inline int fls64(__u64 x) return fls(x); } #elif BITS_PER_LONG == 64 -static __always_inline int fls64(__u64 x) +static __always_inline __attribute_const__ int fls64(__u64 x) { if (x == 0) return 0; -- cgit v1.2.3