diff options
Diffstat (limited to 'tools/include')
93 files changed, 6507 insertions, 1238 deletions
diff --git a/tools/include/asm-generic/io.h b/tools/include/asm-generic/io.h new file mode 100644 index 000000000000..e5a0b07ad452 --- /dev/null +++ b/tools/include/asm-generic/io.h @@ -0,0 +1,482 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_ASM_GENERIC_IO_H +#define _TOOLS_ASM_GENERIC_IO_H + +#include <asm/barrier.h> +#include <asm/byteorder.h> + +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/types.h> + +#ifndef mmiowb_set_pending +#define mmiowb_set_pending() do { } while (0) +#endif + +#ifndef __io_br +#define __io_br() barrier() +#endif + +/* prevent prefetching of coherent DMA data ahead of a dma-complete */ +#ifndef __io_ar +#ifdef rmb +#define __io_ar(v) rmb() +#else +#define __io_ar(v) barrier() +#endif +#endif + +/* flush writes to coherent DMA data before possibly triggering a DMA read */ +#ifndef __io_bw +#ifdef wmb +#define __io_bw() wmb() +#else +#define __io_bw() barrier() +#endif +#endif + +/* serialize device access against a spin_unlock, usually handled there. */ +#ifndef __io_aw +#define __io_aw() mmiowb_set_pending() +#endif + +#ifndef __io_pbw +#define __io_pbw() __io_bw() +#endif + +#ifndef __io_paw +#define __io_paw() __io_aw() +#endif + +#ifndef __io_pbr +#define __io_pbr() __io_br() +#endif + +#ifndef __io_par +#define __io_par(v) __io_ar(v) +#endif + +#ifndef _THIS_IP_ +#define _THIS_IP_ 0 +#endif + +static inline void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} +static inline void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} +static inline void log_read_mmio(u8 width, const volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} +static inline void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr, + unsigned long caller_addr, unsigned long caller_addr0) {} + +/* + * __raw_{read,write}{b,w,l,q}() access memory in native endianness. + * + * On some architectures memory mapped IO needs to be accessed differently. + * On the simple architectures, we just read/write the memory location + * directly. + */ + +#ifndef __raw_readb +#define __raw_readb __raw_readb +static inline u8 __raw_readb(const volatile void __iomem *addr) +{ + return *(const volatile u8 __force *)addr; +} +#endif + +#ifndef __raw_readw +#define __raw_readw __raw_readw +static inline u16 __raw_readw(const volatile void __iomem *addr) +{ + return *(const volatile u16 __force *)addr; +} +#endif + +#ifndef __raw_readl +#define __raw_readl __raw_readl +static inline u32 __raw_readl(const volatile void __iomem *addr) +{ + return *(const volatile u32 __force *)addr; +} +#endif + +#ifndef __raw_readq +#define __raw_readq __raw_readq +static inline u64 __raw_readq(const volatile void __iomem *addr) +{ + return *(const volatile u64 __force *)addr; +} +#endif + +#ifndef __raw_writeb +#define __raw_writeb __raw_writeb +static inline void __raw_writeb(u8 value, volatile void __iomem *addr) +{ + *(volatile u8 __force *)addr = value; +} +#endif + +#ifndef __raw_writew +#define __raw_writew __raw_writew +static inline void __raw_writew(u16 value, volatile void __iomem *addr) +{ + *(volatile u16 __force *)addr = value; +} +#endif + +#ifndef __raw_writel +#define __raw_writel __raw_writel +static inline void __raw_writel(u32 value, volatile void __iomem *addr) +{ + *(volatile u32 __force *)addr = value; +} +#endif + +#ifndef __raw_writeq +#define __raw_writeq __raw_writeq +static inline void __raw_writeq(u64 value, volatile void __iomem *addr) +{ + *(volatile u64 __force *)addr = value; +} +#endif + +/* + * {read,write}{b,w,l,q}() access little endian memory and return result in + * native endianness. + */ + +#ifndef readb +#define readb readb +static inline u8 readb(const volatile void __iomem *addr) +{ + u8 val; + + log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __raw_readb(addr); + __io_ar(val); + log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readw +#define readw readw +static inline u16 readw(const volatile void __iomem *addr) +{ + u16 val; + + log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); + __io_ar(val); + log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readl +#define readl readl +static inline u32 readl(const volatile void __iomem *addr) +{ + u32 val; + + log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); + __io_ar(val); + log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readq +#define readq readq +static inline u64 readq(const volatile void __iomem *addr) +{ + u64 val; + + log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); + __io_br(); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); + __io_ar(val); + log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef writeb +#define writeb writeb +static inline void writeb(u8 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writeb(value, addr); + __io_aw(); + log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writew +#define writew writew +static inline void writew(u16 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writew((u16 __force)cpu_to_le16(value), addr); + __io_aw(); + log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writel +#define writel writel +static inline void writel(u32 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writel((u32 __force)__cpu_to_le32(value), addr); + __io_aw(); + log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writeq +#define writeq writeq +static inline void writeq(u64 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); + __io_bw(); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); + __io_aw(); + log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); +} +#endif + +/* + * {read,write}{b,w,l,q}_relaxed() are like the regular version, but + * are not guaranteed to provide ordering against spinlocks or memory + * accesses. + */ +#ifndef readb_relaxed +#define readb_relaxed readb_relaxed +static inline u8 readb_relaxed(const volatile void __iomem *addr) +{ + u8 val; + + log_read_mmio(8, addr, _THIS_IP_, _RET_IP_); + val = __raw_readb(addr); + log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readw_relaxed +#define readw_relaxed readw_relaxed +static inline u16 readw_relaxed(const volatile void __iomem *addr) +{ + u16 val; + + log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); + val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); + log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef readl_relaxed +#define readl_relaxed readl_relaxed +static inline u32 readl_relaxed(const volatile void __iomem *addr) +{ + u32 val; + + log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); + val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); + log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#if defined(readq) && !defined(readq_relaxed) +#define readq_relaxed readq_relaxed +static inline u64 readq_relaxed(const volatile void __iomem *addr) +{ + u64 val; + + log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); + log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); + return val; +} +#endif + +#ifndef writeb_relaxed +#define writeb_relaxed writeb_relaxed +static inline void writeb_relaxed(u8 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); + __raw_writeb(value, addr); + log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writew_relaxed +#define writew_relaxed writew_relaxed +static inline void writew_relaxed(u16 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); + __raw_writew((u16 __force)cpu_to_le16(value), addr); + log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#ifndef writel_relaxed +#define writel_relaxed writel_relaxed +static inline void writel_relaxed(u32 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); + __raw_writel((u32 __force)__cpu_to_le32(value), addr); + log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); +} +#endif + +#if defined(writeq) && !defined(writeq_relaxed) +#define writeq_relaxed writeq_relaxed +static inline void writeq_relaxed(u64 value, volatile void __iomem *addr) +{ + log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); + log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); +} +#endif + +/* + * {read,write}s{b,w,l,q}() repeatedly access the same memory address in + * native endianness in 8-, 16-, 32- or 64-bit chunks (@count times). + */ +#ifndef readsb +#define readsb readsb +static inline void readsb(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u8 *buf = buffer; + + do { + u8 x = __raw_readb(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef readsw +#define readsw readsw +static inline void readsw(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u16 *buf = buffer; + + do { + u16 x = __raw_readw(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef readsl +#define readsl readsl +static inline void readsl(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u32 *buf = buffer; + + do { + u32 x = __raw_readl(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef readsq +#define readsq readsq +static inline void readsq(const volatile void __iomem *addr, void *buffer, + unsigned int count) +{ + if (count) { + u64 *buf = buffer; + + do { + u64 x = __raw_readq(addr); + *buf++ = x; + } while (--count); + } +} +#endif + +#ifndef writesb +#define writesb writesb +static inline void writesb(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u8 *buf = buffer; + + do { + __raw_writeb(*buf++, addr); + } while (--count); + } +} +#endif + +#ifndef writesw +#define writesw writesw +static inline void writesw(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u16 *buf = buffer; + + do { + __raw_writew(*buf++, addr); + } while (--count); + } +} +#endif + +#ifndef writesl +#define writesl writesl +static inline void writesl(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u32 *buf = buffer; + + do { + __raw_writel(*buf++, addr); + } while (--count); + } +} +#endif + +#ifndef writesq +#define writesq writesq +static inline void writesq(volatile void __iomem *addr, const void *buffer, + unsigned int count) +{ + if (count) { + const u64 *buf = buffer; + + do { + __raw_writeq(*buf++, addr); + } while (--count); + } +} +#endif + +#endif /* _TOOLS_ASM_GENERIC_IO_H */ diff --git a/tools/include/asm/io.h b/tools/include/asm/io.h new file mode 100644 index 000000000000..eed5066f25c4 --- /dev/null +++ b/tools/include/asm/io.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_ASM_IO_H +#define _TOOLS_ASM_IO_H + +#if defined(__i386__) || defined(__x86_64__) +#include "../../arch/x86/include/asm/io.h" +#else +#include <asm-generic/io.h> +#endif + +#endif /* _TOOLS_ASM_IO_H */ diff --git a/tools/include/linux/args.h b/tools/include/linux/args.h new file mode 100644 index 000000000000..2e8e65d975c7 --- /dev/null +++ b/tools/include/linux/args.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_ARGS_H +#define _LINUX_ARGS_H + +/* + * How do these macros work? + * + * In __COUNT_ARGS() _0 to _12 are just placeholders from the start + * in order to make sure _n is positioned over the correct number + * from 12 to 0 (depending on X, which is a variadic argument list). + * They serve no purpose other than occupying a position. Since each + * macro parameter must have a distinct identifier, those identifiers + * are as good as any. + * + * In COUNT_ARGS() we use actual integers, so __COUNT_ARGS() returns + * that as _n. + */ + +/* This counts to 15. Any more, it will return 16th argument. */ +#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _n, X...) _n +#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +/* Concatenate two parameters, but allow them to be expanded beforehand. */ +#define __CONCAT(a, b) a ## b +#define CONCATENATE(a, b) __CONCAT(a, b) + +#endif /* _LINUX_ARGS_H */ diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h index 01907b33537e..50c66ba9ada5 100644 --- a/tools/include/linux/atomic.h +++ b/tools/include/linux/atomic.h @@ -12,4 +12,26 @@ void atomic_long_set(atomic_long_t *v, long i); #define atomic_cmpxchg_release atomic_cmpxchg #endif /* atomic_cmpxchg_relaxed */ +static inline bool atomic_try_cmpxchg(atomic_t *ptr, int *oldp, int new) +{ + int ret, old = *oldp; + + ret = atomic_cmpxchg(ptr, old, new); + if (ret != old) + *oldp = ret; + return ret == old; +} + +static inline bool atomic_inc_unless_negative(atomic_t *v) +{ + int c = atomic_read(v); + + do { + if (unlikely(c < 0)) + return false; + } while (!atomic_try_cmpxchg(v, &c, c + 1)); + + return true; +} + #endif /* __TOOLS_LINUX_ATOMIC_H */ diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index d4d300040d01..0d992245c600 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -3,6 +3,7 @@ #define _TOOLS_LINUX_BITMAP_H #include <string.h> +#include <asm-generic/bitsperlong.h> #include <linux/align.h> #include <linux/bitops.h> #include <linux/find.h> diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 8de2914e6510..a40cc861b3a7 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -2,16 +2,15 @@ #ifndef __LINUX_BITS_H #define __LINUX_BITS_H -#include <linux/const.h> #include <vdso/bits.h> #include <uapi/linux/bits.h> -#include <asm/bitsperlong.h> #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) #define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG)) #define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG) #define BITS_PER_BYTE 8 +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) /* * Create a contiguous bitmask starting at bit position @l and ending at @@ -19,36 +18,72 @@ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ #if !defined(__ASSEMBLY__) + +/* + * Missing asm support + * + * GENMASK_U*() and BIT_U*() depend on BITS_PER_TYPE() which relies on sizeof(), + * something not available in asm. Nevertheless, fixed width integers is a C + * concept. Assembly code can rely on the long and long long versions instead. + */ + #include <linux/build_bug.h> -#define GENMASK_INPUT_CHECK(h, l) \ - (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \ - __is_constexpr((l) > (h)), (l) > (h), 0))) -#else +#include <linux/compiler.h> +#include <linux/overflow.h> + +#define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h))) + /* - * BUILD_BUG_ON_ZERO is not available in h files included from asm files, - * disable the input check if that is the case. + * Generate a mask for the specified type @t. Additional checks are made to + * guarantee the value returned fits in that type, relying on + * -Wshift-count-overflow compiler check to detect incompatible arguments. + * For example, all these create build errors or warnings: + * + * - GENMASK(15, 20): wrong argument order + * - GENMASK(72, 15): doesn't fit unsigned long + * - GENMASK_U32(33, 15): doesn't fit in a u32 */ -#define GENMASK_INPUT_CHECK(h, l) 0 -#endif +#define GENMASK_TYPE(t, h, l) \ + ((t)(GENMASK_INPUT_CHECK(h, l) + \ + (type_max(t) << (l) & \ + type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h))))) -#define GENMASK(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) -#define GENMASK_ULL(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) +#define GENMASK(h, l) GENMASK_TYPE(unsigned long, h, l) +#define GENMASK_ULL(h, l) GENMASK_TYPE(unsigned long long, h, l) + +#define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l) +#define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l) +#define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l) +#define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l) +#define GENMASK_U128(h, l) GENMASK_TYPE(u128, h, l) -#if !defined(__ASSEMBLY__) /* - * Missing asm support + * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The + * following examples generate compiler warnings due to -Wshift-count-overflow: * - * __GENMASK_U128() depends on _BIT128() which would not work - * in the asm code, as it shifts an 'unsigned __int128' data - * type instead of direct representation of 128 bit constants - * such as long and unsigned long. The fundamental problem is - * that a 128 bit constant will get silently truncated by the - * gcc compiler. + * - BIT_U8(8) + * - BIT_U32(-1) + * - BIT_U32(40) */ -#define GENMASK_U128(h, l) \ - (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l)) -#endif +#define BIT_INPUT_CHECK(type, nr) \ + BUILD_BUG_ON_ZERO(const_true((nr) >= BITS_PER_TYPE(type))) + +#define BIT_TYPE(type, nr) ((type)(BIT_INPUT_CHECK(type, nr) + BIT_ULL(nr))) + +#define BIT_U8(nr) BIT_TYPE(u8, nr) +#define BIT_U16(nr) BIT_TYPE(u16, nr) +#define BIT_U32(nr) BIT_TYPE(u32, nr) +#define BIT_U64(nr) BIT_TYPE(u64, nr) + +#else /* defined(__ASSEMBLY__) */ + +/* + * BUILD_BUG_ON_ZERO is not available in h files included from asm files, + * disable the input check if that is the case. + */ +#define GENMASK(h, l) __GENMASK(h, l) +#define GENMASK_ULL(h, l) __GENMASK_ULL(h, l) + +#endif /* !defined(__ASSEMBLY__) */ #endif /* __LINUX_BITS_H */ diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index b4898ff085de..ab2aa97bd8ce 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h @@ -4,17 +4,17 @@ #include <linux/compiler.h> -#ifdef __CHECKER__ -#define BUILD_BUG_ON_ZERO(e) (0) -#else /* __CHECKER__ */ /* * Force a compilation error if condition is true, but also produce a * result (of value 0 and type int), so the expression can be used * e.g. in a structure initializer (or where-ever else comma expressions * aren't permitted). + * + * Take an error message as an optional second argument. If omitted, + * default to the stringification of the tested expression. */ -#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); }))) -#endif /* __CHECKER__ */ +#define BUILD_BUG_ON_ZERO(e, ...) \ + __BUILD_BUG_ON_ZERO_MSG(e, ##__VA_ARGS__, #e " is true") /* Force a compilation error if a constant expression is not a power of 2 */ #define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h new file mode 100644 index 000000000000..a86af9bc8bdc --- /dev/null +++ b/tools/include/linux/cfi_types.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Clang Control Flow Integrity (CFI) type definitions. + */ +#ifndef _LINUX_CFI_TYPES_H +#define _LINUX_CFI_TYPES_H + +#ifdef __ASSEMBLY__ +#include <linux/linkage.h> + +#ifdef CONFIG_CFI +/* + * Use the __kcfi_typeid_<function> type identifier symbol to + * annotate indirectly called assembly functions. The compiler emits + * these symbols for all address-taken function declarations in C + * code. + */ +#ifndef __CFI_TYPE +#define __CFI_TYPE(name) \ + .4byte __kcfi_typeid_##name +#endif + +#define SYM_TYPED_ENTRY(name, linkage, align...) \ + linkage(name) ASM_NL \ + align ASM_NL \ + __CFI_TYPE(name) ASM_NL \ + name: + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_TYPED_ENTRY(name, linkage, align) + +#else /* CONFIG_CFI */ + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_START(name, linkage, align) + +#endif /* CONFIG_CFI */ + +#ifndef SYM_TYPED_FUNC_START +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_CFI +#define DEFINE_CFI_TYPE(name, func) \ + /* \ + * Force a reference to the function so the compiler generates \ + * __kcfi_typeid_<func>. \ + */ \ + __ADDRESSABLE(func); \ + /* u32 name __ro_after_init = __kcfi_typeid_<func> */ \ + extern u32 name; \ + asm ( \ + " .pushsection .data..ro_after_init,\"aw\",\%progbits \n" \ + " .type " #name ",\%object \n" \ + " .globl " #name " \n" \ + " .p2align 2, 0x0 \n" \ + #name ": \n" \ + " .4byte __kcfi_typeid_" #func " \n" \ + " .size " #name ", 4 \n" \ + " .popsection \n" \ + ); +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _LINUX_CFI_TYPES_H */ diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 9c05a59f0184..f40bd2b04c29 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -81,6 +81,28 @@ #define __is_constexpr(x) \ (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) +/* + * Similar to statically_true() but produces a constant expression + * + * To be used in conjunction with macros, such as BUILD_BUG_ON_ZERO(), + * which require their input to be a constant expression and for which + * statically_true() would otherwise fail. + * + * This is a trade-off: const_true() requires all its operands to be + * compile time constants. Else, it would always returns false even on + * the most trivial cases like: + * + * true || non_const_var + * + * On the opposite, statically_true() is able to fold more complex + * tautologies and will return true on expressions such as: + * + * !(non_const_var * 8 % 4) + * + * For the general case, statically_true() is better. + */ +#define const_true(x) __builtin_choose_expr(__is_constexpr(x), x, false) + #ifdef __ANDROID__ /* * FIXME: Big hammer to get rid of tons of: @@ -116,6 +138,10 @@ # define __force #endif +#ifndef __iomem +# define __iomem +#endif + #ifndef __weak # define __weak __attribute__((weak)) #endif @@ -222,6 +248,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __asm__ ("" : "=r" (var) : "0" (var)) #endif +#ifndef __BUILD_BUG_ON_ZERO_MSG +#if defined(__clang__) +#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)(sizeof(struct { int:(-!!(e)); }))) +#else +#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)sizeof(struct {_Static_assert(!(e), msg);})) +#endif +#endif + #endif /* __ASSEMBLY__ */ #endif /* _TOOLS_LINUX_COMPILER_H */ diff --git a/tools/include/linux/gfp_types.h b/tools/include/linux/gfp_types.h index 5f9f1ed190a0..65db9349f905 100644 --- a/tools/include/linux/gfp_types.h +++ b/tools/include/linux/gfp_types.h @@ -1 +1,392 @@ -#include "../../../include/linux/gfp_types.h" +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_GFP_TYPES_H +#define __LINUX_GFP_TYPES_H + +#include <linux/bits.h> + +/* The typedef is in types.h but we want the documentation here */ +#if 0 +/** + * typedef gfp_t - Memory allocation flags. + * + * GFP flags are commonly used throughout Linux to indicate how memory + * should be allocated. The GFP acronym stands for get_free_pages(), + * the underlying memory allocation function. Not every GFP flag is + * supported by every function which may allocate memory. Most users + * will want to use a plain ``GFP_KERNEL``. + */ +typedef unsigned int __bitwise gfp_t; +#endif + +/* + * In case of changes, please don't forget to update + * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c + */ + +enum { + ___GFP_DMA_BIT, + ___GFP_HIGHMEM_BIT, + ___GFP_DMA32_BIT, + ___GFP_MOVABLE_BIT, + ___GFP_RECLAIMABLE_BIT, + ___GFP_HIGH_BIT, + ___GFP_IO_BIT, + ___GFP_FS_BIT, + ___GFP_ZERO_BIT, + ___GFP_UNUSED_BIT, /* 0x200u unused */ + ___GFP_DIRECT_RECLAIM_BIT, + ___GFP_KSWAPD_RECLAIM_BIT, + ___GFP_WRITE_BIT, + ___GFP_NOWARN_BIT, + ___GFP_RETRY_MAYFAIL_BIT, + ___GFP_NOFAIL_BIT, + ___GFP_NORETRY_BIT, + ___GFP_MEMALLOC_BIT, + ___GFP_COMP_BIT, + ___GFP_NOMEMALLOC_BIT, + ___GFP_HARDWALL_BIT, + ___GFP_THISNODE_BIT, + ___GFP_ACCOUNT_BIT, + ___GFP_ZEROTAGS_BIT, +#ifdef CONFIG_KASAN_HW_TAGS + ___GFP_SKIP_ZERO_BIT, + ___GFP_SKIP_KASAN_BIT, +#endif +#ifdef CONFIG_LOCKDEP + ___GFP_NOLOCKDEP_BIT, +#endif +#ifdef CONFIG_SLAB_OBJ_EXT + ___GFP_NO_OBJ_EXT_BIT, +#endif + ___GFP_LAST_BIT +}; + +/* Plain integer GFP bitmasks. Do not use this directly. */ +#define ___GFP_DMA BIT(___GFP_DMA_BIT) +#define ___GFP_HIGHMEM BIT(___GFP_HIGHMEM_BIT) +#define ___GFP_DMA32 BIT(___GFP_DMA32_BIT) +#define ___GFP_MOVABLE BIT(___GFP_MOVABLE_BIT) +#define ___GFP_RECLAIMABLE BIT(___GFP_RECLAIMABLE_BIT) +#define ___GFP_HIGH BIT(___GFP_HIGH_BIT) +#define ___GFP_IO BIT(___GFP_IO_BIT) +#define ___GFP_FS BIT(___GFP_FS_BIT) +#define ___GFP_ZERO BIT(___GFP_ZERO_BIT) +/* 0x200u unused */ +#define ___GFP_DIRECT_RECLAIM BIT(___GFP_DIRECT_RECLAIM_BIT) +#define ___GFP_KSWAPD_RECLAIM BIT(___GFP_KSWAPD_RECLAIM_BIT) +#define ___GFP_WRITE BIT(___GFP_WRITE_BIT) +#define ___GFP_NOWARN BIT(___GFP_NOWARN_BIT) +#define ___GFP_RETRY_MAYFAIL BIT(___GFP_RETRY_MAYFAIL_BIT) +#define ___GFP_NOFAIL BIT(___GFP_NOFAIL_BIT) +#define ___GFP_NORETRY BIT(___GFP_NORETRY_BIT) +#define ___GFP_MEMALLOC BIT(___GFP_MEMALLOC_BIT) +#define ___GFP_COMP BIT(___GFP_COMP_BIT) +#define ___GFP_NOMEMALLOC BIT(___GFP_NOMEMALLOC_BIT) +#define ___GFP_HARDWALL BIT(___GFP_HARDWALL_BIT) +#define ___GFP_THISNODE BIT(___GFP_THISNODE_BIT) +#define ___GFP_ACCOUNT BIT(___GFP_ACCOUNT_BIT) +#define ___GFP_ZEROTAGS BIT(___GFP_ZEROTAGS_BIT) +#ifdef CONFIG_KASAN_HW_TAGS +#define ___GFP_SKIP_ZERO BIT(___GFP_SKIP_ZERO_BIT) +#define ___GFP_SKIP_KASAN BIT(___GFP_SKIP_KASAN_BIT) +#else +#define ___GFP_SKIP_ZERO 0 +#define ___GFP_SKIP_KASAN 0 +#endif +#ifdef CONFIG_LOCKDEP +#define ___GFP_NOLOCKDEP BIT(___GFP_NOLOCKDEP_BIT) +#else +#define ___GFP_NOLOCKDEP 0 +#endif +#ifdef CONFIG_SLAB_OBJ_EXT +#define ___GFP_NO_OBJ_EXT BIT(___GFP_NO_OBJ_EXT_BIT) +#else +#define ___GFP_NO_OBJ_EXT 0 +#endif + +/* + * Physical address zone modifiers (see linux/mmzone.h - low four bits) + * + * Do not put any conditional on these. If necessary modify the definitions + * without the underscores and use them consistently. The definitions here may + * be used in bit comparisons. + */ +#define __GFP_DMA ((__force gfp_t)___GFP_DMA) +#define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM) +#define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32) +#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */ +#define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) + +/** + * DOC: Page mobility and placement hints + * + * Page mobility and placement hints + * --------------------------------- + * + * These flags provide hints about how mobile the page is. Pages with similar + * mobility are placed within the same pageblocks to minimise problems due + * to external fragmentation. + * + * %__GFP_MOVABLE (also a zone modifier) indicates that the page can be + * moved by page migration during memory compaction or can be reclaimed. + * + * %__GFP_RECLAIMABLE is used for slab allocations that specify + * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers. + * + * %__GFP_WRITE indicates the caller intends to dirty the page. Where possible, + * these pages will be spread between local zones to avoid all the dirty + * pages being in one zone (fair zone allocation policy). + * + * %__GFP_HARDWALL enforces the cpuset memory allocation policy. + * + * %__GFP_THISNODE forces the allocation to be satisfied from the requested + * node with no fallbacks or placement policy enforcements. + * + * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg. + * + * %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension. + */ +#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) +#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) +#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) +#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) +#define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT) +#define __GFP_NO_OBJ_EXT ((__force gfp_t)___GFP_NO_OBJ_EXT) + +/** + * DOC: Watermark modifiers + * + * Watermark modifiers -- controls access to emergency reserves + * ------------------------------------------------------------ + * + * %__GFP_HIGH indicates that the caller is high-priority and that granting + * the request is necessary before the system can make forward progress. + * For example creating an IO context to clean pages and requests + * from atomic context. + * + * %__GFP_MEMALLOC allows access to all memory. This should only be used when + * the caller guarantees the allocation will allow more memory to be freed + * very shortly e.g. process exiting or swapping. Users either should + * be the MM or co-ordinating closely with the VM (e.g. swap over NFS). + * Users of this flag have to be extremely careful to not deplete the reserve + * completely and implement a throttling mechanism which controls the + * consumption of the reserve based on the amount of freed memory. + * Usage of a pre-allocated pool (e.g. mempool) should be always considered + * before using this flag. + * + * %__GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves. + * This takes precedence over the %__GFP_MEMALLOC flag if both are set. + */ +#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) +#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) +#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) + +/** + * DOC: Reclaim modifiers + * + * Reclaim modifiers + * ----------------- + * Please note that all the following flags are only applicable to sleepable + * allocations (e.g. %GFP_NOWAIT and %GFP_ATOMIC will ignore them). + * + * %__GFP_IO can start physical IO. + * + * %__GFP_FS can call down to the low-level FS. Clearing the flag avoids the + * allocator recursing into the filesystem which might already be holding + * locks. + * + * %__GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim. + * This flag can be cleared to avoid unnecessary delays when a fallback + * option is available. + * + * %__GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when + * the low watermark is reached and have it reclaim pages until the high + * watermark is reached. A caller may wish to clear this flag when fallback + * options are available and the reclaim is likely to disrupt the system. The + * canonical example is THP allocation where a fallback is cheap but + * reclaim/compaction may cause indirect stalls. + * + * %__GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. + * + * The default allocator behavior depends on the request size. We have a concept + * of so-called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER). + * !costly allocations are too essential to fail so they are implicitly + * non-failing by default (with some exceptions like OOM victims might fail so + * the caller still has to check for failures) while costly requests try to be + * not disruptive and back off even without invoking the OOM killer. + * The following three modifiers might be used to override some of these + * implicit rules. Please note that all of them must be used along with + * %__GFP_DIRECT_RECLAIM flag. + * + * %__GFP_NORETRY: The VM implementation will try only very lightweight + * memory direct reclaim to get some memory under memory pressure (thus + * it can sleep). It will avoid disruptive actions like OOM killer. The + * caller must handle the failure which is quite likely to happen under + * heavy memory pressure. The flag is suitable when failure can easily be + * handled at small cost, such as reduced throughput. + * + * %__GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim + * procedures that have previously failed if there is some indication + * that progress has been made elsewhere. It can wait for other + * tasks to attempt high-level approaches to freeing memory such as + * compaction (which removes fragmentation) and page-out. + * There is still a definite limit to the number of retries, but it is + * a larger limit than with %__GFP_NORETRY. + * Allocations with this flag may fail, but only when there is + * genuinely little unused memory. While these allocations do not + * directly trigger the OOM killer, their failure indicates that + * the system is likely to need to use the OOM killer soon. The + * caller must handle failure, but can reasonably do so by failing + * a higher-level request, or completing it only in a much less + * efficient manner. + * If the allocation does fail, and the caller is in a position to + * free some non-essential memory, doing so could benefit the system + * as a whole. + * + * %__GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller + * cannot handle allocation failures. The allocation could block + * indefinitely but will never return with failure. Testing for + * failure is pointless. + * It _must_ be blockable and used together with __GFP_DIRECT_RECLAIM. + * It should _never_ be used in non-sleepable contexts. + * New users should be evaluated carefully (and the flag should be + * used only when there is no reasonable failure policy) but it is + * definitely preferable to use the flag rather than opencode endless + * loop around allocator. + * Allocating pages from the buddy with __GFP_NOFAIL and order > 1 is + * not supported. Please consider using kvmalloc() instead. + */ +#define __GFP_IO ((__force gfp_t)___GFP_IO) +#define __GFP_FS ((__force gfp_t)___GFP_FS) +#define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ +#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ +#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) +#define __GFP_RETRY_MAYFAIL ((__force gfp_t)___GFP_RETRY_MAYFAIL) +#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) +#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) + +/** + * DOC: Action modifiers + * + * Action modifiers + * ---------------- + * + * %__GFP_NOWARN suppresses allocation failure reports. + * + * %__GFP_COMP address compound page metadata. + * + * %__GFP_ZERO returns a zeroed page on success. + * + * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself + * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that + * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting + * memory tags at the same time as zeroing memory has minimal additional + * performance impact. + * + * %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation. + * Used for userspace and vmalloc pages; the latter are unpoisoned by + * kasan_unpoison_vmalloc instead. For userspace pages, results in + * poisoning being skipped as well, see should_skip_kasan_poison for + * details. Only effective in HW_TAGS mode. + */ +#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) +#define __GFP_COMP ((__force gfp_t)___GFP_COMP) +#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) +#define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) +#define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO) +#define __GFP_SKIP_KASAN ((__force gfp_t)___GFP_SKIP_KASAN) + +/* Disable lockdep for GFP context tracking */ +#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) + +/* Room for N __GFP_FOO bits */ +#define __GFP_BITS_SHIFT ___GFP_LAST_BIT +#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) + +/** + * DOC: Useful GFP flag combinations + * + * Useful GFP flag combinations + * ---------------------------- + * + * Useful GFP flag combinations that are commonly used. It is recommended + * that subsystems start with one of these combinations and then set/clear + * %__GFP_FOO flags as necessary. + * + * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower + * watermark is applied to allow access to "atomic reserves". + * The current implementation doesn't support NMI and few other strict + * non-preemptive contexts (e.g. raw_spin_lock). The same applies to %GFP_NOWAIT. + * + * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires + * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim. + * + * %GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is + * accounted to kmemcg. + * + * %GFP_NOWAIT is for kernel allocations that should not stall for direct + * reclaim, start physical IO or use any filesystem callback. It is very + * likely to fail to allocate memory, even for very small allocations. + * + * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages + * that do not require the starting of any physical IO. + * Please try to avoid using this flag directly and instead use + * memalloc_noio_{save,restore} to mark the whole scope which cannot + * perform any IO with a short explanation why. All allocation requests + * will inherit GFP_NOIO implicitly. + * + * %GFP_NOFS will use direct reclaim but will not use any filesystem interfaces. + * Please try to avoid using this flag directly and instead use + * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't + * recurse into the FS layer with a short explanation why. All allocation + * requests will inherit GFP_NOFS implicitly. + * + * %GFP_USER is for userspace allocations that also need to be directly + * accessibly by the kernel or hardware. It is typically used by hardware + * for buffers that are mapped to userspace (e.g. graphics) that hardware + * still must DMA to. cpuset limits are enforced for these allocations. + * + * %GFP_DMA exists for historical reasons and should be avoided where possible. + * The flags indicates that the caller requires that the lowest zone be + * used (%ZONE_DMA or 16M on x86-64). Ideally, this would be removed but + * it would require careful auditing as some users really require it and + * others use the flag to avoid lowmem reserves in %ZONE_DMA and treat the + * lowest zone as a type of emergency reserve. + * + * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit + * address. Note that kmalloc(..., GFP_DMA32) does not return DMA32 memory + * because the DMA32 kmalloc cache array is not implemented. + * (Reason: there is no such user in kernel). + * + * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace, + * do not need to be directly accessible by the kernel but that cannot + * move once in use. An example may be a hardware allocation that maps + * data directly into userspace but has no addressing limitations. + * + * %GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not + * need direct access to but can use kmap() when access is required. They + * are expected to be movable via page reclaim or page migration. Typically, + * pages on the LRU would also be allocated with %GFP_HIGHUSER_MOVABLE. + * + * %GFP_TRANSHUGE and %GFP_TRANSHUGE_LIGHT are used for THP allocations. They + * are compound allocations that will generally fail quickly if memory is not + * available and will not wake kswapd/kcompactd on failure. The _LIGHT + * version does not attempt reclaim/compaction at all and is by default used + * in page fault path, while the non-light is used by khugepaged. + */ +#define GFP_ATOMIC (__GFP_HIGH|__GFP_KSWAPD_RECLAIM) +#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) +#define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) +#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM | __GFP_NOWARN) +#define GFP_NOIO (__GFP_RECLAIM) +#define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) +#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) +#define GFP_DMA __GFP_DMA +#define GFP_DMA32 __GFP_DMA32 +#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) +#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | __GFP_SKIP_KASAN) +#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ + __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) +#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) + +#endif /* __LINUX_GFP_TYPES_H */ diff --git a/tools/include/linux/io.h b/tools/include/linux/io.h index e129871fe661..4b94b84160b8 100644 --- a/tools/include/linux/io.h +++ b/tools/include/linux/io.h @@ -2,4 +2,6 @@ #ifndef _TOOLS_IO_H #define _TOOLS_IO_H -#endif +#include <asm/io.h> + +#endif /* _TOOLS_IO_H */ diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h index 5a37ccbec54f..f61a01dd7eb7 100644 --- a/tools/include/linux/kallsyms.h +++ b/tools/include/linux/kallsyms.h @@ -18,6 +18,7 @@ static inline const char *kallsyms_lookup(unsigned long addr, return NULL; } +#ifdef HAVE_BACKTRACE_SUPPORT #include <execinfo.h> #include <stdlib.h> static inline void print_ip_sym(const char *loglvl, unsigned long ip) @@ -30,5 +31,8 @@ static inline void print_ip_sym(const char *loglvl, unsigned long ip) free(name); } +#else +static inline void print_ip_sym(const char *loglvl, unsigned long ip) {} +#endif #endif diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index df5d9fa84dba..aceac94632c8 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -65,5 +65,6 @@ struct unwind_hint { #define ANNOTYPE_IGNORE_ALTS 6 #define ANNOTYPE_INTRA_FUNCTION_CALL 7 #define ANNOTYPE_REACHABLE 8 +#define ANNOTYPE_NOCFI 9 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/include/linux/pci_ids.h b/tools/include/linux/pci_ids.h new file mode 120000 index 000000000000..1c9e88f41261 --- /dev/null +++ b/tools/include/linux/pci_ids.h @@ -0,0 +1 @@ +../../../include/linux/pci_ids.h
\ No newline at end of file diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h index c87051e2b26f..94937a699402 100644 --- a/tools/include/linux/slab.h +++ b/tools/include/linux/slab.h @@ -4,11 +4,31 @@ #include <linux/types.h> #include <linux/gfp.h> +#include <pthread.h> -#define SLAB_PANIC 2 #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ #define kzalloc_node(size, flags, node) kmalloc(size, flags) +enum _slab_flag_bits { + _SLAB_KMALLOC, + _SLAB_HWCACHE_ALIGN, + _SLAB_PANIC, + _SLAB_TYPESAFE_BY_RCU, + _SLAB_ACCOUNT, + _SLAB_FLAGS_LAST_BIT +}; + +#define __SLAB_FLAG_BIT(nr) ((unsigned int __force)(1U << (nr))) +#define __SLAB_FLAG_UNUSED ((unsigned int __force)(0U)) + +#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN) +#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC) +#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU) +#ifdef CONFIG_MEMCG +# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT) +#else +# define SLAB_ACCOUNT __SLAB_FLAG_UNUSED +#endif void *kmalloc(size_t size, gfp_t gfp); void kfree(void *p); @@ -23,6 +43,98 @@ enum slab_state { FULL }; +struct kmem_cache { + pthread_mutex_t lock; + unsigned int size; + unsigned int align; + unsigned int sheaf_capacity; + int nr_objs; + void *objs; + void (*ctor)(void *); + bool non_kernel_enabled; + unsigned int non_kernel; + unsigned long nr_allocated; + unsigned long nr_tallocated; + bool exec_callback; + void (*callback)(void *); + void *private; +}; + +struct kmem_cache_args { + /** + * @align: The required alignment for the objects. + * + * %0 means no specific alignment is requested. + */ + unsigned int align; + /** + * @sheaf_capacity: The maximum size of the sheaf. + */ + unsigned int sheaf_capacity; + /** + * @useroffset: Usercopy region offset. + * + * %0 is a valid offset, when @usersize is non-%0 + */ + unsigned int useroffset; + /** + * @usersize: Usercopy region size. + * + * %0 means no usercopy region is specified. + */ + unsigned int usersize; + /** + * @freeptr_offset: Custom offset for the free pointer + * in &SLAB_TYPESAFE_BY_RCU caches + * + * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer + * outside of the object. This might cause the object to grow in size. + * Cache creators that have a reason to avoid this can specify a custom + * free pointer offset in their struct where the free pointer will be + * placed. + * + * Note that placing the free pointer inside the object requires the + * caller to ensure that no fields are invalidated that are required to + * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for + * details). + * + * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset + * is specified, %use_freeptr_offset must be set %true. + * + * Note that @ctor currently isn't supported with custom free pointers + * as a @ctor requires an external free pointer. + */ + unsigned int freeptr_offset; + /** + * @use_freeptr_offset: Whether a @freeptr_offset is used. + */ + bool use_freeptr_offset; + /** + * @ctor: A constructor for the objects. + * + * The constructor is invoked for each object in a newly allocated slab + * page. It is the cache user's responsibility to free object in the + * same state as after calling the constructor, or deal appropriately + * with any differences between a freshly constructed and a reallocated + * object. + * + * %NULL means no constructor. + */ + void (*ctor)(void *); +}; + +struct slab_sheaf { + union { + struct list_head barn_list; + /* only used for prefilled sheafs */ + unsigned int capacity; + }; + struct kmem_cache *cache; + unsigned int size; + int node; /* only used for rcu_sheaf */ + void *objects[]; +}; + static inline void *kzalloc(size_t size, gfp_t gfp) { return kmalloc(size, gfp | __GFP_ZERO); @@ -37,12 +149,57 @@ static inline void *kmem_cache_alloc(struct kmem_cache *cachep, int flags) } void kmem_cache_free(struct kmem_cache *cachep, void *objp); -struct kmem_cache *kmem_cache_create(const char *name, unsigned int size, - unsigned int align, unsigned int flags, - void (*ctor)(void *)); + +struct kmem_cache * +__kmem_cache_create_args(const char *name, unsigned int size, + struct kmem_cache_args *args, unsigned int flags); + +/* If NULL is passed for @args, use this variant with default arguments. */ +static inline struct kmem_cache * +__kmem_cache_default_args(const char *name, unsigned int size, + struct kmem_cache_args *args, unsigned int flags) +{ + struct kmem_cache_args kmem_default_args = {}; + + return __kmem_cache_create_args(name, size, &kmem_default_args, flags); +} + +static inline struct kmem_cache * +__kmem_cache_create(const char *name, unsigned int size, unsigned int align, + unsigned int flags, void (*ctor)(void *)) +{ + struct kmem_cache_args kmem_args = { + .align = align, + .ctor = ctor, + }; + + return __kmem_cache_create_args(name, size, &kmem_args, flags); +} + +#define kmem_cache_create(__name, __object_size, __args, ...) \ + _Generic((__args), \ + struct kmem_cache_args *: __kmem_cache_create_args, \ + void *: __kmem_cache_default_args, \ + default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__) void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list); int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, void **list); +struct slab_sheaf * +kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size); + +void * +kmem_cache_alloc_from_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf); + +void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf *sheaf); +int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, + struct slab_sheaf **sheafp, unsigned int size); + +static inline unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf) +{ + return sheaf->size; +} #endif /* _TOOLS_SLAB_H */ diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index f9702877ac21..143c2d2c2ba6 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -23,25 +23,47 @@ else Q=@ endif -nolibc_arch := $(patsubst arm64,aarch64,$(ARCH)) -arch_file := arch-$(nolibc_arch).h +arch_file := arch-$(ARCH).h all_files := \ compiler.h \ crt.h \ ctype.h \ dirent.h \ + elf.h \ errno.h \ + fcntl.h \ + getopt.h \ limits.h \ + math.h \ nolibc.h \ + poll.h \ + sched.h \ signal.h \ stackprotector.h \ std.h \ stdarg.h \ stdbool.h \ + stddef.h \ stdint.h \ stdlib.h \ string.h \ sys.h \ + sys/auxv.h \ + sys/ioctl.h \ + sys/mman.h \ + sys/mount.h \ + sys/prctl.h \ + sys/random.h \ + sys/reboot.h \ + sys/resource.h \ + sys/stat.h \ + sys/syscall.h \ + sys/sysmacros.h \ + sys/time.h \ + sys/timerfd.h \ + sys/types.h \ + sys/utsname.h \ + sys/wait.h \ time.h \ types.h \ unistd.h \ @@ -68,18 +90,12 @@ help: @echo " OUTPUT = $(OUTPUT)" @echo "" -# Note: when ARCH is "x86" we concatenate both x86_64 and i386 headers: $(Q)mkdir -p $(OUTPUT)sysroot $(Q)mkdir -p $(OUTPUT)sysroot/include - $(Q)cp $(all_files) $(OUTPUT)sysroot/include/ - $(Q)if [ "$(ARCH)" = "x86" ]; then \ - sed -e \ - 's,^#ifndef _NOLIBC_ARCH_X86_64_H,#if !defined(_NOLIBC_ARCH_X86_64_H) \&\& defined(__x86_64__),' \ - arch-x86_64.h; \ - sed -e \ - 's,^#ifndef _NOLIBC_ARCH_I386_H,#if !defined(_NOLIBC_ARCH_I386_H) \&\& !defined(__x86_64__),' \ - arch-i386.h; \ + $(Q)cp --parents $(all_files) $(OUTPUT)sysroot/include/ + $(Q)if [ "$(ARCH)" = "i386" -o "$(ARCH)" = "x86_64" ]; then \ + cat arch-x86.h; \ elif [ -e "$(arch_file)" ]; then \ cat $(arch_file); \ else \ @@ -91,5 +107,11 @@ headers_standalone: headers $(Q)$(MAKE) -C $(srctree) headers $(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)sysroot +headers_check: headers_standalone + $(Q)for header in $(filter-out crt.h std.h,$(all_files)); do \ + $(CC) $(CLANG_CROSS_FLAGS) -Wall -Werror -nostdinc -fsyntax-only -x c /dev/null \ + -I$(or $(objtree),$(srctree))/usr/include -include $$header -include $$header || exit 1; \ + done + clean: $(call QUIET_CLEAN, nolibc) rm -rf "$(OUTPUT)sysroot" diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h index 6180ff99ab43..1f66e7e5a444 100644 --- a/tools/include/nolibc/arch-arm.h +++ b/tools/include/nolibc/arch-arm.h @@ -189,8 +189,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s { __asm__ volatile ( "mov r0, sp\n" /* save stack pointer to %r0, as arg1 of _start_c */ - "and ip, r0, #-8\n" /* sp must be 8-byte aligned in the callee */ - "mov sp, ip\n" "bl _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-arm64.h index 06fdef7b291a..02a3f74c8ec8 100644 --- a/tools/include/nolibc/arch-aarch64.h +++ b/tools/include/nolibc/arch-arm64.h @@ -1,16 +1,16 @@ /* SPDX-License-Identifier: LGPL-2.1 OR MIT */ /* - * AARCH64 specific definitions for NOLIBC + * ARM64 specific definitions for NOLIBC * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ -#ifndef _NOLIBC_ARCH_AARCH64_H -#define _NOLIBC_ARCH_AARCH64_H +#ifndef _NOLIBC_ARCH_ARM64_H +#define _NOLIBC_ARCH_ARM64_H #include "compiler.h" #include "crt.h" -/* Syscalls for AARCH64 : +/* Syscalls for ARM64 : * - registers are 64-bit * - stack is 16-byte aligned * - syscall number is passed in x8 @@ -146,9 +146,8 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s { __asm__ volatile ( "mov x0, sp\n" /* save stack pointer to x0, as arg1 of _start_c */ - "and sp, x0, -16\n" /* sp must be 16-byte aligned in the callee */ "bl _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); } -#endif /* _NOLIBC_ARCH_AARCH64_H */ +#endif /* _NOLIBC_ARCH_ARM64_H */ diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h deleted file mode 100644 index ff5afc35bbd8..000000000000 --- a/tools/include/nolibc/arch-i386.h +++ /dev/null @@ -1,180 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * i386 specific definitions for NOLIBC - * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> - */ - -#ifndef _NOLIBC_ARCH_I386_H -#define _NOLIBC_ARCH_I386_H - -#include "compiler.h" -#include "crt.h" - -/* Syscalls for i386 : - * - mostly similar to x86_64 - * - registers are 32-bit - * - syscall number is passed in eax - * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively - * - all registers are preserved (except eax of course) - * - the system call is performed by calling int $0x80 - * - syscall return comes in eax - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - * - the syscall number is always specified last in order to allow to force - * some registers before (gcc refuses a %-register at the last position). - * - * Also, i386 supports the old_select syscall if newselect is not available - */ -#define __ARCH_WANT_SYS_OLD_SELECT - -#define my_syscall0(num) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - register long _arg3 __asm__ ("edx") = (long)(arg3); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - register long _arg3 __asm__ ("edx") = (long)(arg3); \ - register long _arg4 __asm__ ("esi") = (long)(arg4); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - register long _arg3 __asm__ ("edx") = (long)(arg3); \ - register long _arg4 __asm__ ("esi") = (long)(arg4); \ - register long _arg5 __asm__ ("edi") = (long)(arg5); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - long _eax = (long)(num); \ - long _arg6 = (long)(arg6); /* Always in memory */ \ - __asm__ volatile ( \ - "pushl %[_arg6]\n\t" \ - "pushl %%ebp\n\t" \ - "movl 4(%%esp),%%ebp\n\t" \ - "int $0x80\n\t" \ - "popl %%ebp\n\t" \ - "addl $4,%%esp\n\t" \ - : "+a"(_eax) /* %eax */ \ - : "b"(arg1), /* %ebx */ \ - "c"(arg2), /* %ecx */ \ - "d"(arg3), /* %edx */ \ - "S"(arg4), /* %esi */ \ - "D"(arg5), /* %edi */ \ - [_arg6]"m"(_arg6) /* memory */ \ - : "memory", "cc" \ - ); \ - _eax; \ -}) - -/* startup code */ -/* - * i386 System V ABI mandates: - * 1) last pushed argument must be 16-byte aligned. - * 2) The deepest stack frame should be set to zero - * - */ -void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) -{ - __asm__ volatile ( - "xor %ebp, %ebp\n" /* zero the stack frame */ - "mov %esp, %eax\n" /* save stack pointer to %eax, as arg1 of _start_c */ - "add $12, %esp\n" /* avoid over-estimating after the 'and' & 'sub' below */ - "and $-16, %esp\n" /* the %esp must be 16-byte aligned on 'call' */ - "sub $12, %esp\n" /* sub 12 to keep it aligned after the push %eax */ - "push %eax\n" /* push arg1 on stack to support plain stack modes too */ - "call _start_c\n" /* transfer to c runtime */ - "hlt\n" /* ensure it does not return */ - ); - __nolibc_entrypoint_epilogue(); -} - -#endif /* _NOLIBC_ARCH_I386_H */ diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h index fb519545959e..5511705303ea 100644 --- a/tools/include/nolibc/arch-loongarch.h +++ b/tools/include/nolibc/arch-loongarch.h @@ -142,18 +142,11 @@ _arg1; \ }) -#if __loongarch_grlen == 32 -#define LONG_BSTRINS "bstrins.w" -#else /* __loongarch_grlen == 64 */ -#define LONG_BSTRINS "bstrins.d" -#endif - /* startup code */ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ - LONG_BSTRINS " $sp, $zero, 3, 0\n" /* $sp must be 16-byte aligned */ "bl _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); diff --git a/tools/include/nolibc/arch-m68k.h b/tools/include/nolibc/arch-m68k.h new file mode 100644 index 000000000000..6dac1845f298 --- /dev/null +++ b/tools/include/nolibc/arch-m68k.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * m68k specific definitions for NOLIBC + * Copyright (C) 2025 Daniel Palmer<daniel@thingy.jp> + * + * Roughly based on one or more of the other arch files. + * + */ + +#ifndef _NOLIBC_ARCH_M68K_H +#define _NOLIBC_ARCH_M68K_H + +#include "compiler.h" +#include "crt.h" + +#define _NOLIBC_SYSCALL_CLOBBERLIST "memory" + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_num) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_arg1) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_arg1), "r"(_arg2) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r"(_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + register long _arg4 __asm__ ("d4") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + register long _arg4 __asm__ ("d4") = (long)(arg4); \ + register long _arg5 __asm__ ("d5") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("d0") = (num); \ + register long _arg1 __asm__ ("d1") = (long)(arg1); \ + register long _arg2 __asm__ ("d2") = (long)(arg2); \ + register long _arg3 __asm__ ("d3") = (long)(arg3); \ + register long _arg4 __asm__ ("d4") = (long)(arg4); \ + register long _arg5 __asm__ ("d5") = (long)(arg5); \ + register long _arg6 __asm__ ("a0") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "trap #0\n" \ + : "+r" (_num) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _num; \ +}) + +void _start(void); +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +{ + __asm__ volatile ( + "movel %sp, %sp@-\n" + "jsr _start_c\n" + ); + __nolibc_entrypoint_epilogue(); +} + +#endif /* _NOLIBC_ARCH_M68K_H */ diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index 753a8ed2cf69..0cbac63b249a 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -10,7 +10,7 @@ #include "compiler.h" #include "crt.h" -#if !defined(_ABIO32) +#if !defined(_ABIO32) && !defined(_ABIN32) && !defined(_ABI64) #error Unsupported MIPS ABI #endif @@ -32,11 +32,32 @@ * - the arguments are cast to long and assigned into the target registers * which are then simply passed as registers to the asm code, so that we * don't have to experience issues with register constraints. + * + * Syscalls for MIPS ABI N32, same as ABI O32 with the following differences : + * - arguments are in a0, a1, a2, a3, t0, t1, t2, t3. + * t0..t3 are also known as a4..a7. + * - stack is 16-byte aligned */ +#if defined(_ABIO32) + #define _NOLIBC_SYSCALL_CLOBBERLIST \ "memory", "cc", "at", "v1", "hi", "lo", \ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" +#define _NOLIBC_SYSCALL_STACK_RESERVE "addiu $sp, $sp, -32\n" +#define _NOLIBC_SYSCALL_STACK_UNRESERVE "addiu $sp, $sp, 32\n" + +#else /* _ABIN32 || _ABI64 */ + +/* binutils, GCC and clang disagree about register aliases, use numbers instead. */ +#define _NOLIBC_SYSCALL_CLOBBERLIST \ + "memory", "cc", "at", "v1", \ + "10", "11", "12", "13", "14", "15", "24", "25" + +#define _NOLIBC_SYSCALL_STACK_RESERVE +#define _NOLIBC_SYSCALL_STACK_UNRESERVE + +#endif /* _ABIO32 */ #define my_syscall0(num) \ ({ \ @@ -44,9 +65,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "r"(_num) \ : _NOLIBC_SYSCALL_CLOBBERLIST \ @@ -61,9 +82,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1) \ @@ -80,9 +101,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2) \ @@ -100,9 +121,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3) \ @@ -120,9 +141,9 @@ register long _arg4 __asm__ ("a3") = (long)(arg4); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ @@ -131,6 +152,8 @@ _arg4 ? -_num : _num; \ }) +#if defined(_ABIO32) + #define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ({ \ register long _num __asm__ ("v0") = (num); \ @@ -141,10 +164,10 @@ register long _arg5 = (long)(arg5); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "sw %7, 16($sp)\n" \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ @@ -164,11 +187,53 @@ register long _arg6 = (long)(arg6); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "sw %7, 16($sp)\n" \ "sw %8, 20($sp)\n" \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#else /* _ABIN32 || _ABI64 */ + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("$4") = (long)(arg1); \ + register long _arg2 __asm__ ("$5") = (long)(arg2); \ + register long _arg3 __asm__ ("$6") = (long)(arg3); \ + register long _arg4 __asm__ ("$7") = (long)(arg4); \ + register long _arg5 __asm__ ("$8") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("$4") = (long)(arg1); \ + register long _arg2 __asm__ ("$5") = (long)(arg2); \ + register long _arg3 __asm__ ("$6") = (long)(arg3); \ + register long _arg4 __asm__ ("$7") = (long)(arg4); \ + register long _arg5 __asm__ ("$8") = (long)(arg5); \ + register long _arg6 __asm__ ("$9") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "syscall\n" \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ @@ -178,28 +243,26 @@ _arg4 ? -_num : _num; \ }) +#endif /* _ABIO32 */ + /* startup code, note that it's called __start on MIPS */ void __start(void); void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __start(void) { __asm__ volatile ( - ".set push\n" - ".set noreorder\n" - "bal 1f\n" /* prime $ra for .cpload */ - "nop\n" - "1:\n" - ".cpload $ra\n" "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ - "addiu $sp, $sp, -4\n" /* space for .cprestore to store $gp */ - ".cprestore 0\n" - "li $t0, -8\n" - "and $sp, $sp, $t0\n" /* $sp must be 8-byte aligned */ +#if defined(_ABIO32) "addiu $sp, $sp, -16\n" /* the callee expects to save a0..a3 there */ +#endif /* _ABIO32 */ "lui $t9, %hi(_start_c)\n" /* ABI requires current function address in $t9 */ "ori $t9, %lo(_start_c)\n" +#if defined(_ABI64) + "lui $t0, %highest(_start_c)\n" + "ori $t0, %higher(_start_c)\n" + "dsll $t0, 0x20\n" + "or $t9, $t0\n" +#endif /* _ABI64 */ "jalr $t9\n" /* transfer to c runtime */ - " nop\n" /* delayed slot */ - ".set pop\n" ); __nolibc_entrypoint_epilogue(); } diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h index ee2fdb8d601d..204564bbcd32 100644 --- a/tools/include/nolibc/arch-powerpc.h +++ b/tools/include/nolibc/arch-powerpc.h @@ -201,7 +201,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s __asm__ volatile ( "mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */ - "clrrdi 1, 1, 4\n" /* align the stack to 16 bytes */ "li 0, 0\n" /* zero the frame pointer */ "stdu 1, -32(1)\n" /* the initial stack frame */ "bl _start_c\n" /* transfer to c runtime */ @@ -209,7 +208,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s #else __asm__ volatile ( "mr 3, 1\n" /* save stack pointer to r3, as arg1 of _start_c */ - "clrrwi 1, 1, 4\n" /* align the stack to 16 bytes */ "li 0, 0\n" /* zero the frame pointer */ "stwu 1, -16(1)\n" /* the initial stack frame */ "bl _start_c\n" /* transfer to c runtime */ diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h index 8827bf936212..885383a86c38 100644 --- a/tools/include/nolibc/arch-riscv.h +++ b/tools/include/nolibc/arch-riscv.h @@ -148,7 +148,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s "lla gp, __global_pointer$\n" ".option pop\n" "mv a0, sp\n" /* save stack pointer to a0, as arg1 of _start_c */ - "andi sp, a0, -16\n" /* sp must be 16-byte aligned */ "call _start_c\n" /* transfer to c runtime */ ); __nolibc_entrypoint_epilogue(); diff --git a/tools/include/nolibc/arch-sh.h b/tools/include/nolibc/arch-sh.h new file mode 100644 index 000000000000..a96b8914607e --- /dev/null +++ b/tools/include/nolibc/arch-sh.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * SuperH specific definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +#ifndef _NOLIBC_ARCH_SH_H +#define _NOLIBC_ARCH_SH_H + +#include "compiler.h" +#include "crt.h" + +/* + * Syscalls for SuperH: + * - registers are 32bit wide + * - syscall number is passed in r3 + * - arguments are in r4, r5, r6, r7, r0, r1, r2 + * - the system call is performed by calling trapa #31 + * - syscall return value is in r0 + */ + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + register long _arg4 __asm__ ("r7") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + register long _arg4 __asm__ ("r7") = (long)(arg4); \ + register long _arg5 __asm__ ("r0") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_arg5) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + register long _arg4 __asm__ ("r7") = (long)(arg4); \ + register long _arg5 __asm__ ("r0") = (long)(arg5); \ + register long _arg6 __asm__ ("r1") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_arg5), "r"(_arg6) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +/* startup code */ +void _start_wrapper(void); +void __attribute__((weak,noreturn)) __nolibc_entrypoint __no_stack_protector _start_wrapper(void) +{ + __asm__ volatile ( + ".global _start\n" /* The C function will have a prologue, */ + ".type _start, @function\n" /* corrupting "sp" */ + ".weak _start\n" + "_start:\n" + + "mov sp, r4\n" /* save argc pointer to r4, as arg1 of _start_c */ + "bsr _start_c\n" /* transfer to c runtime */ + "nop\n" /* delay slot */ + + ".size _start, .-_start\n" + ); + __nolibc_entrypoint_epilogue(); +} + +#endif /* _NOLIBC_ARCH_SH_H */ diff --git a/tools/include/nolibc/arch-sparc.h b/tools/include/nolibc/arch-sparc.h new file mode 100644 index 000000000000..ca420d843e25 --- /dev/null +++ b/tools/include/nolibc/arch-sparc.h @@ -0,0 +1,207 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * SPARC (32bit and 64bit) specific definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +#ifndef _NOLIBC_ARCH_SPARC_H +#define _NOLIBC_ARCH_SPARC_H + +#include <linux/unistd.h> + +#include "compiler.h" +#include "crt.h" + +/* + * Syscalls for SPARC: + * - registers are native word size + * - syscall number is passed in g1 + * - arguments are in o0-o5 + * - the system call is performed by calling a trap instruction + * - syscall return value is in o0 + * - syscall error flag is in the carry bit of the processor status register + */ + +#ifdef __arch64__ + +#define _NOLIBC_SYSCALL "t 0x6d\n" \ + "bcs,a %%xcc, 1f\n" \ + "sub %%g0, %%o0, %%o0\n" \ + "1:\n" + +#else + +#define _NOLIBC_SYSCALL "t 0x10\n" \ + "bcs,a 1f\n" \ + "sub %%g0, %%o0, %%o0\n" \ + "1:\n" + +#endif /* __arch64__ */ + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0"); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + register long _arg5 __asm__ ("o4") = (long)(arg5); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("g1") = (num); \ + register long _arg1 __asm__ ("o0") = (long)(arg1); \ + register long _arg2 __asm__ ("o1") = (long)(arg2); \ + register long _arg3 __asm__ ("o2") = (long)(arg3); \ + register long _arg4 __asm__ ("o3") = (long)(arg4); \ + register long _arg5 __asm__ ("o4") = (long)(arg5); \ + register long _arg6 __asm__ ("o5") = (long)(arg6); \ + \ + __asm__ volatile ( \ + _NOLIBC_SYSCALL \ + : "+r"(_arg1) \ + : "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +/* startup code */ +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +{ + __asm__ volatile ( + /* + * Save argc pointer to o0, as arg1 of _start_c. + * Account for the window save area, which is 16 registers wide. + */ +#ifdef __arch64__ + "add %sp, 128 + 2047, %o0\n" /* on sparc64 / v9 the stack is offset by 2047 */ +#else + "add %sp, 64, %o0\n" +#endif + "b,a _start_c\n" /* transfer to c runtime */ + ); + __nolibc_entrypoint_epilogue(); +} + +static pid_t getpid(void); + +static __attribute__((unused)) +pid_t sys_fork(void) +{ + pid_t parent, ret; + + parent = getpid(); + ret = my_syscall0(__NR_fork); + + /* The syscall returns the parent pid in the child instead of 0 */ + if (ret == parent) + return 0; + else + return ret; +} +#define sys_fork sys_fork + +static __attribute__((unused)) +pid_t sys_vfork(void) +{ + pid_t parent, ret; + + parent = getpid(); + ret = my_syscall0(__NR_vfork); + + /* The syscall returns the parent pid in the child instead of 0 */ + if (ret == parent) + return 0; + else + return ret; +} +#define sys_vfork sys_vfork + +#endif /* _NOLIBC_ARCH_SPARC_H */ diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86.h index 1e40620a2b33..d3efc0c3b8ad 100644 --- a/tools/include/nolibc/arch-x86_64.h +++ b/tools/include/nolibc/arch-x86.h @@ -1,15 +1,184 @@ /* SPDX-License-Identifier: LGPL-2.1 OR MIT */ /* - * x86_64 specific definitions for NOLIBC - * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + * x86 specific definitions for NOLIBC (both 32- and 64-bit) + * Copyright (C) 2017-2025 Willy Tarreau <w@1wt.eu> */ -#ifndef _NOLIBC_ARCH_X86_64_H -#define _NOLIBC_ARCH_X86_64_H +#ifndef _NOLIBC_ARCH_X86_H +#define _NOLIBC_ARCH_X86_H #include "compiler.h" #include "crt.h" +#if !defined(__x86_64__) + +/* Syscalls for i386 : + * - mostly similar to x86_64 + * - registers are 32-bit + * - syscall number is passed in eax + * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively + * - all registers are preserved (except eax of course) + * - the system call is performed by calling int $0x80 + * - syscall return comes in eax + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + * + * Also, i386 supports the old_select syscall if newselect is not available + */ +#define __ARCH_WANT_SYS_OLD_SELECT + +#define my_syscall0(num) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + register long _arg3 __asm__ ("edx") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + register long _arg3 __asm__ ("edx") = (long)(arg3); \ + register long _arg4 __asm__ ("esi") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + register long _arg3 __asm__ ("edx") = (long)(arg3); \ + register long _arg4 __asm__ ("esi") = (long)(arg4); \ + register long _arg5 __asm__ ("edi") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + long _eax = (long)(num); \ + long _arg6 = (long)(arg6); /* Always in memory */ \ + __asm__ volatile ( \ + "pushl %[_arg6]\n\t" \ + "pushl %%ebp\n\t" \ + "movl 4(%%esp),%%ebp\n\t" \ + "int $0x80\n\t" \ + "popl %%ebp\n\t" \ + "addl $4,%%esp\n\t" \ + : "+a"(_eax) /* %eax */ \ + : "b"(arg1), /* %ebx */ \ + "c"(arg2), /* %ecx */ \ + "d"(arg3), /* %edx */ \ + "S"(arg4), /* %esi */ \ + "D"(arg5), /* %edi */ \ + [_arg6]"m"(_arg6) /* memory */ \ + : "memory", "cc" \ + ); \ + _eax; \ +}) + +/* startup code */ +/* + * i386 System V ABI mandates: + * 1) last pushed argument must be 16-byte aligned. + * 2) The deepest stack frame should be set to zero + * + */ +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +{ + __asm__ volatile ( + "xor %ebp, %ebp\n" /* zero the stack frame */ + "mov %esp, %eax\n" /* save stack pointer to %eax, as arg1 of _start_c */ + "sub $12, %esp\n" /* sub 12 to keep it aligned after the push %eax */ + "push %eax\n" /* push arg1 on stack to support plain stack modes too */ + "call _start_c\n" /* transfer to c runtime */ + "hlt\n" /* ensure it does not return */ + ); + __nolibc_entrypoint_epilogue(); +} + +#else /* !defined(__x86_64__) */ + /* Syscalls for x86_64 : * - registers are 64-bit * - syscall number is passed in rax @@ -166,7 +335,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s __asm__ volatile ( "xor %ebp, %ebp\n" /* zero the stack frame */ "mov %rsp, %rdi\n" /* save stack pointer to %rdi, as arg1 of _start_c */ - "and $-16, %rsp\n" /* %rsp must be 16-byte aligned before call */ "call _start_c\n" /* transfer to c runtime */ "hlt\n" /* ensure it does not return */ ); @@ -215,4 +383,5 @@ __asm__ ( "retq\n" ); -#endif /* _NOLIBC_ARCH_X86_64_H */ +#endif /* !defined(__x86_64__) */ +#endif /* _NOLIBC_ARCH_X86_H */ diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h index 8a2c143c0fba..426c89198135 100644 --- a/tools/include/nolibc/arch.h +++ b/tools/include/nolibc/arch.h @@ -15,14 +15,12 @@ #ifndef _NOLIBC_ARCH_H #define _NOLIBC_ARCH_H -#if defined(__x86_64__) -#include "arch-x86_64.h" -#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) -#include "arch-i386.h" +#if defined(__x86_64__) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) +#include "arch-x86.h" #elif defined(__ARM_EABI__) #include "arch-arm.h" #elif defined(__aarch64__) -#include "arch-aarch64.h" +#include "arch-arm64.h" #elif defined(__mips__) #include "arch-mips.h" #elif defined(__powerpc__) @@ -33,6 +31,12 @@ #include "arch-s390.h" #elif defined(__loongarch__) #include "arch-loongarch.h" +#elif defined(__sparc__) +#include "arch-sparc.h" +#elif defined(__m68k__) +#include "arch-m68k.h" +#elif defined(__sh__) +#include "arch-sh.h" #else #error Unsupported Architecture #endif diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h index fa1f547e7f13..369cfb5a0e78 100644 --- a/tools/include/nolibc/compiler.h +++ b/tools/include/nolibc/compiler.h @@ -12,6 +12,15 @@ # define __nolibc_has_attribute(attr) 0 #endif +#if defined(__has_feature) +# define __nolibc_has_feature(feature) __has_feature(feature) +#else +# define __nolibc_has_feature(feature) 0 +#endif + +#define __nolibc_aligned(alignment) __attribute__((aligned(alignment))) +#define __nolibc_aligned_as(type) __nolibc_aligned(__alignof__(type)) + #if __nolibc_has_attribute(naked) # define __nolibc_entrypoint __attribute__((naked)) # define __nolibc_entrypoint_epilogue() diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h index c4b10103bbec..961cfe777c35 100644 --- a/tools/include/nolibc/crt.h +++ b/tools/include/nolibc/crt.h @@ -7,6 +7,8 @@ #ifndef _NOLIBC_CRT_H #define _NOLIBC_CRT_H +#include "compiler.h" + char **environ __attribute__((weak)); const unsigned long *_auxv __attribute__((weak)); @@ -25,6 +27,9 @@ extern void (*const __fini_array_end[])(void) __attribute__((weak)); void _start_c(long *sp); __attribute__((weak,used)) +#if __nolibc_has_feature(undefined_behavior_sanitizer) + __attribute__((no_sanitize("function"))) +#endif void _start_c(long *sp) { long argc; diff --git a/tools/include/nolibc/ctype.h b/tools/include/nolibc/ctype.h index 6f90706d0644..470fdf34394a 100644 --- a/tools/include/nolibc/ctype.h +++ b/tools/include/nolibc/ctype.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_CTYPE_H #define _NOLIBC_CTYPE_H @@ -96,7 +99,4 @@ int ispunct(int c) return isgraph(c) && !isalnum(c); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_CTYPE_H */ diff --git a/tools/include/nolibc/dirent.h b/tools/include/nolibc/dirent.h index c5c30d0dd680..758b95c48e7a 100644 --- a/tools/include/nolibc/dirent.h +++ b/tools/include/nolibc/dirent.h @@ -4,11 +4,16 @@ * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_DIRENT_H #define _NOLIBC_DIRENT_H +#include "compiler.h" #include "stdint.h" #include "types.h" +#include "fcntl.h" #include <linux/limits.h> @@ -58,7 +63,7 @@ int closedir(DIR *dirp) static __attribute__((unused)) int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result) { - char buf[sizeof(struct linux_dirent64) + NAME_MAX + 1]; + char buf[sizeof(struct linux_dirent64) + NAME_MAX + 1] __nolibc_aligned_as(struct linux_dirent64); struct linux_dirent64 *ldir = (void *)buf; intptr_t i = (intptr_t)dirp; int fd, ret; @@ -92,7 +97,4 @@ int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result) return 0; } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_DIRENT_H */ diff --git a/tools/include/nolibc/elf.h b/tools/include/nolibc/elf.h new file mode 100644 index 000000000000..3e2c5228bf3d --- /dev/null +++ b/tools/include/nolibc/elf.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Shim elf.h header for NOLIBC. + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_SYS_ELF_H +#define _NOLIBC_SYS_ELF_H + +#include <linux/elf.h> + +#endif /* _NOLIBC_SYS_ELF_H */ diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h index 1d8d8033e8ff..08a33c40ec0c 100644 --- a/tools/include/nolibc/errno.h +++ b/tools/include/nolibc/errno.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_ERRNO_H #define _NOLIBC_ERRNO_H @@ -22,7 +25,4 @@ int errno __attribute__((weak)); */ #define MAX_ERRNO 4095 -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_ERRNO_H */ diff --git a/tools/include/nolibc/fcntl.h b/tools/include/nolibc/fcntl.h new file mode 100644 index 000000000000..bff2e542f20f --- /dev/null +++ b/tools/include/nolibc/fcntl.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * fcntl definition for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_FCNTL_H +#define _NOLIBC_FCNTL_H + +#include "arch.h" +#include "types.h" +#include "sys.h" + +/* + * int openat(int dirfd, const char *path, int flags[, mode_t mode]); + */ + +static __attribute__((unused)) +int sys_openat(int dirfd, const char *path, int flags, mode_t mode) +{ + return my_syscall4(__NR_openat, dirfd, path, flags, mode); +} + +static __attribute__((unused)) +int openat(int dirfd, const char *path, int flags, ...) +{ + mode_t mode = 0; + + if (flags & O_CREAT) { + va_list args; + + va_start(args, flags); + mode = va_arg(args, mode_t); + va_end(args); + } + + return __sysret(sys_openat(dirfd, path, flags, mode)); +} + +/* + * int open(const char *path, int flags[, mode_t mode]); + */ + +static __attribute__((unused)) +int sys_open(const char *path, int flags, mode_t mode) +{ + return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); +} + +static __attribute__((unused)) +int open(const char *path, int flags, ...) +{ + mode_t mode = 0; + + if (flags & O_CREAT) { + va_list args; + + va_start(args, flags); + mode = va_arg(args, mode_t); + va_end(args); + } + + return __sysret(sys_open(path, flags, mode)); +} + +#endif /* _NOLIBC_FCNTL_H */ diff --git a/tools/include/nolibc/getopt.h b/tools/include/nolibc/getopt.h new file mode 100644 index 000000000000..217abb95264b --- /dev/null +++ b/tools/include/nolibc/getopt.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * getopt function definitions for NOLIBC, adapted from musl libc + * Copyright (C) 2005-2020 Rich Felker, et al. + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_GETOPT_H +#define _NOLIBC_GETOPT_H + +struct FILE; +static struct FILE *const stderr; +static int fprintf(struct FILE *stream, const char *fmt, ...); + +__attribute__((weak,unused,section(".data.nolibc_getopt"))) +char *optarg; + +__attribute__((weak,unused,section(".data.nolibc_getopt"))) +int optind = 1, opterr = 1, optopt; + +static __attribute__((unused)) +int getopt(int argc, char * const argv[], const char *optstring) +{ + static int __optpos; + int i; + char c, d; + char *optchar; + + if (!optind) { + __optpos = 0; + optind = 1; + } + + if (optind >= argc || !argv[optind]) + return -1; + + if (argv[optind][0] != '-') { + if (optstring[0] == '-') { + optarg = argv[optind++]; + return 1; + } + return -1; + } + + if (!argv[optind][1]) + return -1; + + if (argv[optind][1] == '-' && !argv[optind][2]) + return optind++, -1; + + if (!__optpos) + __optpos++; + c = argv[optind][__optpos]; + optchar = argv[optind] + __optpos; + __optpos++; + + if (!argv[optind][__optpos]) { + optind++; + __optpos = 0; + } + + if (optstring[0] == '-' || optstring[0] == '+') + optstring++; + + i = 0; + d = 0; + do { + d = optstring[i++]; + } while (d && d != c); + + if (d != c || c == ':') { + optopt = c; + if (optstring[0] != ':' && opterr) + fprintf(stderr, "%s: unrecognized option: %c\n", argv[0], *optchar); + return '?'; + } + if (optstring[i] == ':') { + optarg = 0; + if (optstring[i + 1] != ':' || __optpos) { + optarg = argv[optind++]; + if (__optpos) + optarg += __optpos; + __optpos = 0; + } + if (optind > argc) { + optopt = c; + if (optstring[0] == ':') + return ':'; + if (opterr) + fprintf(stderr, "%s: option requires argument: %c\n", + argv[0], *optchar); + return '?'; + } + } + return c; +} + +#endif /* _NOLIBC_GETOPT_H */ diff --git a/tools/include/nolibc/math.h b/tools/include/nolibc/math.h new file mode 100644 index 000000000000..9df823ddd412 --- /dev/null +++ b/tools/include/nolibc/math.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * math definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_SYS_MATH_H +#define _NOLIBC_SYS_MATH_H + +static __inline__ +double fabs(double x) +{ + return x >= 0 ? x : -x; +} + +static __inline__ +float fabsf(float x) +{ + return x >= 0 ? x : -x; +} + +static __inline__ +long double fabsl(long double x) +{ + return x >= 0 ? x : -x; +} + +#endif /* _NOLIBC_SYS_MATH_H */ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 70872401aca8..d2f5aa085f8e 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -96,15 +96,37 @@ #include "arch.h" #include "types.h" #include "sys.h" +#include "sys/auxv.h" +#include "sys/ioctl.h" +#include "sys/mman.h" +#include "sys/mount.h" +#include "sys/prctl.h" +#include "sys/random.h" +#include "sys/reboot.h" +#include "sys/resource.h" +#include "sys/stat.h" +#include "sys/syscall.h" +#include "sys/sysmacros.h" +#include "sys/time.h" +#include "sys/timerfd.h" +#include "sys/utsname.h" +#include "sys/wait.h" #include "ctype.h" +#include "elf.h" +#include "sched.h" #include "signal.h" #include "unistd.h" +#include "stdbool.h" #include "stdio.h" #include "stdlib.h" #include "string.h" #include "time.h" #include "stackprotector.h" #include "dirent.h" +#include "fcntl.h" +#include "getopt.h" +#include "poll.h" +#include "math.h" /* Used by programs to avoid std includes */ #define NOLIBC diff --git a/tools/include/nolibc/poll.h b/tools/include/nolibc/poll.h new file mode 100644 index 000000000000..0d053f93ea99 --- /dev/null +++ b/tools/include/nolibc/poll.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * poll definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_POLL_H +#define _NOLIBC_POLL_H + +#include "arch.h" +#include "sys.h" + +#include <linux/poll.h> +#include <linux/time.h> + +/* + * int poll(struct pollfd *fds, int nfds, int timeout); + */ + +static __attribute__((unused)) +int sys_poll(struct pollfd *fds, int nfds, int timeout) +{ +#if defined(__NR_ppoll) + struct timespec t; + + if (timeout >= 0) { + t.tv_sec = timeout / 1000; + t.tv_nsec = (timeout % 1000) * 1000000; + } + return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); +#elif defined(__NR_ppoll_time64) + struct __kernel_timespec t; + + if (timeout >= 0) { + t.tv_sec = timeout / 1000; + t.tv_nsec = (timeout % 1000) * 1000000; + } + return my_syscall5(__NR_ppoll_time64, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); +#else + return my_syscall3(__NR_poll, fds, nfds, timeout); +#endif +} + +static __attribute__((unused)) +int poll(struct pollfd *fds, int nfds, int timeout) +{ + return __sysret(sys_poll(fds, nfds, timeout)); +} + +#endif /* _NOLIBC_POLL_H */ diff --git a/tools/include/nolibc/sched.h b/tools/include/nolibc/sched.h new file mode 100644 index 000000000000..32221562c166 --- /dev/null +++ b/tools/include/nolibc/sched.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * sched function definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_SCHED_H +#define _NOLIBC_SCHED_H + +#include "sys.h" + +#include <linux/sched.h> + +/* + * int setns(int fd, int nstype); + */ + +static __attribute__((unused)) +int sys_setns(int fd, int nstype) +{ + return my_syscall2(__NR_setns, fd, nstype); +} + +static __attribute__((unused)) +int setns(int fd, int nstype) +{ + return __sysret(sys_setns(fd, nstype)); +} + + +/* + * int unshare(int flags); + */ + +static __attribute__((unused)) +int sys_unshare(int flags) +{ + return my_syscall1(__NR_unshare, flags); +} + +static __attribute__((unused)) +int unshare(int flags) +{ + return __sysret(sys_unshare(flags)); +} + +#endif /* _NOLIBC_SCHED_H */ diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h index cdcc5904c51e..ac13e53ac31d 100644 --- a/tools/include/nolibc/signal.h +++ b/tools/include/nolibc/signal.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_SIGNAL_H #define _NOLIBC_SIGNAL_H @@ -20,7 +23,4 @@ int raise(int signal) return sys_kill(sys_getpid(), signal); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_SIGNAL_H */ diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h index 933bc0be7e1c..2c1ad23b9b5c 100644 --- a/tools/include/nolibc/std.h +++ b/tools/include/nolibc/std.h @@ -13,12 +13,10 @@ * syscall-specific stuff, as this file is expected to be included very early. */ -/* note: may already be defined */ -#ifndef NULL -#define NULL ((void *)0) -#endif - #include "stdint.h" +#include "stddef.h" + +#include <linux/types.h> /* those are commonly provided by sys/types.h */ typedef unsigned int dev_t; @@ -31,6 +29,6 @@ typedef unsigned long nlink_t; typedef signed long off_t; typedef signed long blksize_t; typedef signed long blkcnt_t; -typedef signed long time_t; +typedef __kernel_time_t time_t; #endif /* _NOLIBC_STD_H */ diff --git a/tools/include/nolibc/stddef.h b/tools/include/nolibc/stddef.h new file mode 100644 index 000000000000..ecbd13eab1f5 --- /dev/null +++ b/tools/include/nolibc/stddef.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Stddef definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "nolibc.h" + +#ifndef _NOLIBC_STDDEF_H +#define _NOLIBC_STDDEF_H + +#include "stdint.h" + +/* note: may already be defined */ +#ifndef NULL +#define NULL ((void *)0) +#endif + +#ifndef offsetof +#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD) +#endif + +#endif /* _NOLIBC_STDDEF_H */ diff --git a/tools/include/nolibc/stdint.h b/tools/include/nolibc/stdint.h index cd79ddd6170e..b052ad6303c3 100644 --- a/tools/include/nolibc/stdint.h +++ b/tools/include/nolibc/stdint.h @@ -39,8 +39,8 @@ typedef size_t uint_fast32_t; typedef int64_t int_fast64_t; typedef uint64_t uint_fast64_t; -typedef int64_t intmax_t; -typedef uint64_t uintmax_t; +typedef __INTMAX_TYPE__ intmax_t; +typedef __UINTMAX_TYPE__ uintmax_t; /* limits of integral types */ diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index a403351dbf60..7630234408c5 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -4,12 +4,16 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_STDIO_H #define _NOLIBC_STDIO_H #include "std.h" #include "arch.h" #include "errno.h" +#include "fcntl.h" #include "types.h" #include "sys.h" #include "stdarg.h" @@ -17,6 +21,8 @@ #include "string.h" #include "compiler.h" +static const char *strerror(int errnum); + #ifndef EOF #define EOF (-1) #endif @@ -50,6 +56,32 @@ FILE *fdopen(int fd, const char *mode __attribute__((unused))) return (FILE*)(intptr_t)~fd; } +static __attribute__((unused)) +FILE *fopen(const char *pathname, const char *mode) +{ + int flags, fd; + + switch (*mode) { + case 'r': + flags = O_RDONLY; + break; + case 'w': + flags = O_WRONLY | O_CREAT | O_TRUNC; + break; + case 'a': + flags = O_WRONLY | O_CREAT | O_APPEND; + break; + default: + SET_ERRNO(EINVAL); return NULL; + } + + if (mode[1] == '+') + flags = (flags & ~(O_RDONLY | O_WRONLY)) | O_RDWR; + + fd = open(pathname, flags, 0666); + return fdopen(fd, mode); +} + /* provides the fd of stream. */ static __attribute__((unused)) int fileno(FILE *stream) @@ -208,28 +240,40 @@ char *fgets(char *s, int size, FILE *stream) } -/* minimal vfprintf(). It supports the following formats: +/* minimal printf(). It supports the following formats: * - %[l*]{d,u,c,x,p} * - %s * - unknown modifiers are ignored. */ -static __attribute__((unused, format(printf, 2, 0))) -int vfprintf(FILE *stream, const char *fmt, va_list args) +typedef int (*__nolibc_printf_cb)(intptr_t state, const char *buf, size_t size); + +static __attribute__((unused, format(printf, 4, 0))) +int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, size_t n, const char *fmt, va_list args) { char escape, lpref, c; unsigned long long v; - unsigned int written; - size_t len, ofs; + unsigned int written, width; + size_t len, ofs, w; char tmpbuf[21]; const char *outstr; written = ofs = escape = lpref = 0; while (1) { c = fmt[ofs++]; + width = 0; if (escape) { /* we're in an escape sequence, ofs == 1 */ escape = 0; + + /* width */ + while (c >= '0' && c <= '9') { + width *= 10; + width += c - '0'; + + c = fmt[ofs++]; + } + if (c == 'c' || c == 'd' || c == 'u' || c == 'x' || c == 'p') { char *out = tmpbuf; @@ -277,6 +321,11 @@ int vfprintf(FILE *stream, const char *fmt, va_list args) if (!outstr) outstr="(null)"; } +#ifndef NOLIBC_IGNORE_ERRNO + else if (c == 'm') { + outstr = strerror(errno); + } +#endif /* NOLIBC_IGNORE_ERRNO */ else if (c == '%') { /* queue it verbatim */ continue; @@ -286,6 +335,8 @@ int vfprintf(FILE *stream, const char *fmt, va_list args) if (c == 'l') { /* long format prefix, maintain the escape */ lpref++; + } else if (c == 'j') { + lpref = 2; } escape = 1; goto do_escape; @@ -302,8 +353,17 @@ int vfprintf(FILE *stream, const char *fmt, va_list args) outstr = fmt; len = ofs - 1; flush_str: - if (_fwrite(outstr, len, stream) != 0) - break; + if (n) { + w = len < n ? len : n; + n -= w; + while (width-- > w) { + if (cb(state, " ", 1) != 0) + return -1; + written += 1; + } + if (cb(state, outstr, w) != 0) + return -1; + } written += len; do_escape: @@ -319,6 +379,17 @@ int vfprintf(FILE *stream, const char *fmt, va_list args) return written; } +static int __nolibc_fprintf_cb(intptr_t state, const char *buf, size_t size) +{ + return _fwrite(buf, size, (FILE *)state); +} + +static __attribute__((unused, format(printf, 2, 0))) +int vfprintf(FILE *stream, const char *fmt, va_list args) +{ + return __nolibc_printf(__nolibc_fprintf_cb, (intptr_t)stream, SIZE_MAX, fmt, args); +} + static __attribute__((unused, format(printf, 1, 0))) int vprintf(const char *fmt, va_list args) { @@ -349,6 +420,85 @@ int printf(const char *fmt, ...) return ret; } +static __attribute__((unused, format(printf, 2, 0))) +int vdprintf(int fd, const char *fmt, va_list args) +{ + FILE *stream; + + stream = fdopen(fd, NULL); + if (!stream) + return -1; + /* Technically 'stream' is leaked, but as it's only a wrapper around 'fd' that is fine */ + return vfprintf(stream, fmt, args); +} + +static __attribute__((unused, format(printf, 2, 3))) +int dprintf(int fd, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vdprintf(fd, fmt, args); + va_end(args); + + return ret; +} + +static int __nolibc_sprintf_cb(intptr_t _state, const char *buf, size_t size) +{ + char **state = (char **)_state; + + memcpy(*state, buf, size); + *state += size; + return 0; +} + +static __attribute__((unused, format(printf, 3, 0))) +int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + char *state = buf; + int ret; + + ret = __nolibc_printf(__nolibc_sprintf_cb, (intptr_t)&state, size, fmt, args); + if (ret < 0) + return ret; + buf[(size_t)ret < size ? (size_t)ret : size - 1] = '\0'; + return ret; +} + +static __attribute__((unused, format(printf, 3, 4))) +int snprintf(char *buf, size_t size, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vsnprintf(buf, size, fmt, args); + va_end(args); + + return ret; +} + +static __attribute__((unused, format(printf, 2, 0))) +int vsprintf(char *buf, const char *fmt, va_list args) +{ + return vsnprintf(buf, SIZE_MAX, fmt, args); +} + +static __attribute__((unused, format(printf, 2, 3))) +int sprintf(char *buf, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vsprintf(buf, fmt, args); + va_end(args); + + return ret; +} + static __attribute__((unused)) int vsscanf(const char *str, const char *format, va_list args) { @@ -485,7 +635,4 @@ const char *strerror(int errno) return buf; } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_STDIO_H */ diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h index 86ad378ab1ea..5fd99a480f82 100644 --- a/tools/include/nolibc/stdlib.h +++ b/tools/include/nolibc/stdlib.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_STDLIB_H #define _NOLIBC_STDLIB_H @@ -29,6 +32,24 @@ static __attribute__((unused)) char itoa_buffer[21]; * As much as possible, please keep functions alphabetically sorted. */ +static __inline__ +int abs(int j) +{ + return j >= 0 ? j : -j; +} + +static __inline__ +long labs(long j) +{ + return j >= 0 ? j : -j; +} + +static __inline__ +long long llabs(long long j) +{ + return j >= 0 ? j : -j; +} + /* must be exported, as it's used by libgcc for various divide functions */ void abort(void); __attribute__((weak,unused,noreturn,section(".text.nolibc_abort"))) @@ -103,32 +124,6 @@ char *getenv(const char *name) } static __attribute__((unused)) -unsigned long getauxval(unsigned long type) -{ - const unsigned long *auxv = _auxv; - unsigned long ret; - - if (!auxv) - return 0; - - while (1) { - if (!auxv[0] && !auxv[1]) { - ret = 0; - break; - } - - if (auxv[0] == type) { - ret = auxv[1]; - break; - } - - auxv += 2; - } - - return ret; -} - -static __attribute__((unused)) void *malloc(size_t len) { struct nolibc_heap *heap; @@ -275,7 +270,7 @@ int itoa_r(long in, char *buffer) int len = 0; if (in < 0) { - in = -in; + in = -(unsigned long)in; *(ptr++) = '-'; len++; } @@ -411,7 +406,7 @@ int i64toa_r(int64_t in, char *buffer) int len = 0; if (in < 0) { - in = -in; + in = -(uint64_t)in; *(ptr++) = '-'; len++; } @@ -548,7 +543,4 @@ uintmax_t strtoumax(const char *nptr, char **endptr, int base) return __strtox(nptr, endptr, base, 0, UINTMAX_MAX); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_STDLIB_H */ diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h index ba84ab700e30..163a17e7dd38 100644 --- a/tools/include/nolibc/string.h +++ b/tools/include/nolibc/string.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_STRING_H #define _NOLIBC_STRING_H @@ -289,7 +292,40 @@ char *strrchr(const char *s, int c) return (char *)ret; } -/* make sure to include all global symbols */ -#include "nolibc.h" +static __attribute__((unused)) +char *strstr(const char *haystack, const char *needle) +{ + size_t len_haystack, len_needle; + + len_needle = strlen(needle); + if (!len_needle) + return NULL; + + len_haystack = strlen(haystack); + while (len_haystack >= len_needle) { + if (!memcmp(haystack, needle, len_needle)) + return (char *)haystack; + haystack++; + len_haystack--; + } + + return NULL; +} + +static __attribute__((unused)) +int tolower(int c) +{ + if (c >= 'A' && c <= 'Z') + return c - 'A' + 'a'; + return c; +} + +static __attribute__((unused)) +int toupper(int c) +{ + if (c >= 'a' && c <= 'z') + return c - 'a' + 'A'; + return c; +} #endif /* _NOLIBC_STRING_H */ diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 08c1c074bec8..c5564f57deec 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_SYS_H #define _NOLIBC_SYS_H @@ -19,12 +22,9 @@ #include <linux/time.h> #include <linux/auxvec.h> #include <linux/fcntl.h> /* for O_* and AT_* */ +#include <linux/sched.h> /* for clone_args */ #include <linux/stat.h> /* for statx() */ -#include <linux/prctl.h> -#include <linux/resource.h> -#include <linux/utsname.h> -#include "arch.h" #include "errno.h" #include "stdarg.h" #include "types.h" @@ -140,12 +140,10 @@ int chdir(const char *path) static __attribute__((unused)) int sys_chmod(const char *path, mode_t mode) { -#ifdef __NR_fchmodat +#if defined(__NR_fchmodat) return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0); -#elif defined(__NR_chmod) - return my_syscall2(__NR_chmod, path, mode); #else - return __nolibc_enosys(__func__, path, mode); + return my_syscall2(__NR_chmod, path, mode); #endif } @@ -163,12 +161,10 @@ int chmod(const char *path, mode_t mode) static __attribute__((unused)) int sys_chown(const char *path, uid_t owner, gid_t group) { -#ifdef __NR_fchownat +#if defined(__NR_fchownat) return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0); -#elif defined(__NR_chown) - return my_syscall3(__NR_chown, path, owner, group); #else - return __nolibc_enosys(__func__, path, owner, group); + return my_syscall3(__NR_chown, path, owner, group); #endif } @@ -237,12 +233,23 @@ int dup(int fd) static __attribute__((unused)) int sys_dup2(int old, int new) { -#ifdef __NR_dup3 +#if defined(__NR_dup3) + int ret, nr_fcntl; + +#ifdef __NR_fcntl64 + nr_fcntl = __NR_fcntl64; +#else + nr_fcntl = __NR_fcntl; +#endif + + if (old == new) { + ret = my_syscall2(nr_fcntl, old, F_GETFD); + return ret < 0 ? ret : old; + } + return my_syscall3(__NR_dup3, old, new, 0); -#elif defined(__NR_dup2) - return my_syscall2(__NR_dup2, old, new); #else - return __nolibc_enosys(__func__, old, new); + return my_syscall2(__NR_dup2, old, new); #endif } @@ -257,7 +264,7 @@ int dup2(int old, int new) * int dup3(int old, int new, int flags); */ -#ifdef __NR_dup3 +#if defined(__NR_dup3) static __attribute__((unused)) int sys_dup3(int old, int new, int flags) { @@ -301,11 +308,17 @@ void sys_exit(int status) } static __attribute__((noreturn,unused)) -void exit(int status) +void _exit(int status) { sys_exit(status); } +static __attribute__((noreturn,unused)) +void exit(int status) +{ + _exit(status); +} + /* * pid_t fork(void); @@ -315,16 +328,14 @@ void exit(int status) static __attribute__((unused)) pid_t sys_fork(void) { -#ifdef __NR_clone +#if defined(__NR_clone) /* note: some archs only have clone() and not fork(). Different archs * have a different API, but most archs have the flags on first arg and * will not use the rest with no other flag. */ return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0); -#elif defined(__NR_fork) - return my_syscall0(__NR_fork); #else - return __nolibc_enosys(__func__); + return my_syscall0(__NR_fork); #endif } #endif @@ -335,6 +346,32 @@ pid_t fork(void) return __sysret(sys_fork()); } +#ifndef sys_vfork +static __attribute__((unused)) +pid_t sys_vfork(void) +{ +#if defined(__NR_vfork) + return my_syscall0(__NR_vfork); +#else + /* + * clone() could be used but has different argument orders per + * architecture. + */ + struct clone_args args = { + .flags = CLONE_VM | CLONE_VFORK, + .exit_signal = SIGCHLD, + }; + + return my_syscall2(__NR_clone3, &args, sizeof(args)); +#endif +} +#endif + +static __attribute__((unused)) +pid_t vfork(void) +{ + return __sysret(sys_vfork()); +} /* * int fsync(int fd); @@ -377,7 +414,7 @@ int getdents64(int fd, struct linux_dirent64 *dirp, int count) static __attribute__((unused)) uid_t sys_geteuid(void) { -#ifdef __NR_geteuid32 +#if defined(__NR_geteuid32) return my_syscall0(__NR_geteuid32); #else return my_syscall0(__NR_geteuid); @@ -489,34 +526,13 @@ int getpagesize(void) /* - * int gettimeofday(struct timeval *tv, struct timezone *tz); - */ - -static __attribute__((unused)) -int sys_gettimeofday(struct timeval *tv, struct timezone *tz) -{ -#ifdef __NR_gettimeofday - return my_syscall2(__NR_gettimeofday, tv, tz); -#else - return __nolibc_enosys(__func__, tv, tz); -#endif -} - -static __attribute__((unused)) -int gettimeofday(struct timeval *tv, struct timezone *tz) -{ - return __sysret(sys_gettimeofday(tv, tz)); -} - - -/* * uid_t getuid(void); */ static __attribute__((unused)) uid_t sys_getuid(void) { -#ifdef __NR_getuid32 +#if defined(__NR_getuid32) return my_syscall0(__NR_getuid32); #else return my_syscall0(__NR_getuid); @@ -531,18 +547,6 @@ uid_t getuid(void) /* - * int ioctl(int fd, unsigned long cmd, ... arg); - */ - -static __attribute__((unused)) -long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - return my_syscall3(__NR_ioctl, fd, cmd, arg); -} - -#define ioctl(fd, cmd, arg) __sysret(sys_ioctl(fd, cmd, (unsigned long)(arg))) - -/* * int kill(pid_t pid, int signal); */ @@ -566,12 +570,10 @@ int kill(pid_t pid, int signal) static __attribute__((unused)) int sys_link(const char *old, const char *new) { -#ifdef __NR_linkat +#if defined(__NR_linkat) return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0); -#elif defined(__NR_link) - return my_syscall2(__NR_link, old, new); #else - return __nolibc_enosys(__func__, old, new); + return my_syscall2(__NR_link, old, new); #endif } @@ -589,44 +591,30 @@ int link(const char *old, const char *new) static __attribute__((unused)) off_t sys_lseek(int fd, off_t offset, int whence) { -#ifdef __NR_lseek +#if defined(__NR_lseek) return my_syscall3(__NR_lseek, fd, offset, whence); #else - return __nolibc_enosys(__func__, fd, offset, whence); -#endif -} + __kernel_loff_t loff = 0; + off_t result; + int ret; -static __attribute__((unused)) -int sys_llseek(int fd, unsigned long offset_high, unsigned long offset_low, - __kernel_loff_t *result, int whence) -{ -#ifdef __NR_llseek - return my_syscall5(__NR_llseek, fd, offset_high, offset_low, result, whence); -#else - return __nolibc_enosys(__func__, fd, offset_high, offset_low, result, whence); + /* Only exists on 32bit where nolibc off_t is also 32bit */ + ret = my_syscall5(__NR_llseek, fd, 0, offset, &loff, whence); + if (ret < 0) + result = ret; + else if (loff != (off_t)loff) + result = -EOVERFLOW; + else + result = loff; + + return result; #endif } static __attribute__((unused)) off_t lseek(int fd, off_t offset, int whence) { - __kernel_loff_t loff = 0; - off_t result; - int ret; - - result = sys_lseek(fd, offset, whence); - if (result == -ENOSYS) { - /* Only exists on 32bit where nolibc off_t is also 32bit */ - ret = sys_llseek(fd, 0, offset, &loff, whence); - if (ret < 0) - result = ret; - else if (loff != (off_t)loff) - result = -EOVERFLOW; - else - result = loff; - } - - return __sysret(result); + return __sysret(sys_lseek(fd, offset, whence)); } @@ -637,12 +625,10 @@ off_t lseek(int fd, off_t offset, int whence) static __attribute__((unused)) int sys_mkdir(const char *path, mode_t mode) { -#ifdef __NR_mkdirat +#if defined(__NR_mkdirat) return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode); -#elif defined(__NR_mkdir) - return my_syscall2(__NR_mkdir, path, mode); #else - return __nolibc_enosys(__func__, path, mode); + return my_syscall2(__NR_mkdir, path, mode); #endif } @@ -659,12 +645,10 @@ int mkdir(const char *path, mode_t mode) static __attribute__((unused)) int sys_rmdir(const char *path) { -#ifdef __NR_rmdir +#if defined(__NR_rmdir) return my_syscall1(__NR_rmdir, path); -#elif defined(__NR_unlinkat) - return my_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR); #else - return __nolibc_enosys(__func__, path); + return my_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR); #endif } @@ -682,12 +666,10 @@ int rmdir(const char *path) static __attribute__((unused)) long sys_mknod(const char *path, mode_t mode, dev_t dev) { -#ifdef __NR_mknodat +#if defined(__NR_mknodat) return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev); -#elif defined(__NR_mknod) - return my_syscall3(__NR_mknod, path, mode, dev); #else - return __nolibc_enosys(__func__, path, mode, dev); + return my_syscall3(__NR_mknod, path, mode, dev); #endif } @@ -697,125 +679,6 @@ int mknod(const char *path, mode_t mode, dev_t dev) return __sysret(sys_mknod(path, mode, dev)); } -#ifndef sys_mmap -static __attribute__((unused)) -void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd, - off_t offset) -{ - int n; - -#if defined(__NR_mmap2) - n = __NR_mmap2; - offset >>= 12; -#else - n = __NR_mmap; -#endif - - return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset); -} -#endif - -/* Note that on Linux, MAP_FAILED is -1 so we can use the generic __sysret() - * which returns -1 upon error and still satisfy user land that checks for - * MAP_FAILED. - */ - -static __attribute__((unused)) -void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) -{ - void *ret = sys_mmap(addr, length, prot, flags, fd, offset); - - if ((unsigned long)ret >= -4095UL) { - SET_ERRNO(-(long)ret); - ret = MAP_FAILED; - } - return ret; -} - -static __attribute__((unused)) -int sys_munmap(void *addr, size_t length) -{ - return my_syscall2(__NR_munmap, addr, length); -} - -static __attribute__((unused)) -int munmap(void *addr, size_t length) -{ - return __sysret(sys_munmap(addr, length)); -} - -/* - * int mount(const char *source, const char *target, - * const char *fstype, unsigned long flags, - * const void *data); - */ -static __attribute__((unused)) -int sys_mount(const char *src, const char *tgt, const char *fst, - unsigned long flags, const void *data) -{ - return my_syscall5(__NR_mount, src, tgt, fst, flags, data); -} - -static __attribute__((unused)) -int mount(const char *src, const char *tgt, - const char *fst, unsigned long flags, - const void *data) -{ - return __sysret(sys_mount(src, tgt, fst, flags, data)); -} - -/* - * int openat(int dirfd, const char *path, int flags[, mode_t mode]); - */ - -static __attribute__((unused)) -int sys_openat(int dirfd, const char *path, int flags, mode_t mode) -{ - return my_syscall4(__NR_openat, dirfd, path, flags, mode); -} - -static __attribute__((unused)) -int openat(int dirfd, const char *path, int flags, ...) -{ - mode_t mode = 0; - - if (flags & O_CREAT) { - va_list args; - - va_start(args, flags); - mode = va_arg(args, mode_t); - va_end(args); - } - - return __sysret(sys_openat(dirfd, path, flags, mode)); -} - -/* - * int open(const char *path, int flags[, mode_t mode]); - */ - -static __attribute__((unused)) -int sys_open(const char *path, int flags, mode_t mode) -{ - return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); -} - -static __attribute__((unused)) -int open(const char *path, int flags, ...) -{ - mode_t mode = 0; - - if (flags & O_CREAT) { - va_list args; - - va_start(args, flags); - mode = va_arg(args, mode_t); - va_end(args); - } - - return __sysret(sys_open(path, flags, mode)); -} - /* * int pipe2(int pipefd[2], int flags); @@ -842,26 +705,6 @@ int pipe(int pipefd[2]) /* - * int prctl(int option, unsigned long arg2, unsigned long arg3, - * unsigned long arg4, unsigned long arg5); - */ - -static __attribute__((unused)) -int sys_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5) -{ - return my_syscall5(__NR_prctl, option, arg2, arg3, arg4, arg5); -} - -static __attribute__((unused)) -int prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5) -{ - return __sysret(sys_prctl(option, arg2, arg3, arg4, arg5)); -} - - -/* * int pivot_root(const char *new, const char *old); */ @@ -879,35 +722,6 @@ int pivot_root(const char *new, const char *old) /* - * int poll(struct pollfd *fds, int nfds, int timeout); - */ - -static __attribute__((unused)) -int sys_poll(struct pollfd *fds, int nfds, int timeout) -{ -#if defined(__NR_ppoll) - struct timespec t; - - if (timeout >= 0) { - t.tv_sec = timeout / 1000; - t.tv_nsec = (timeout % 1000) * 1000000; - } - return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); -#elif defined(__NR_poll) - return my_syscall3(__NR_poll, fds, nfds, timeout); -#else - return __nolibc_enosys(__func__, fds, nfds, timeout); -#endif -} - -static __attribute__((unused)) -int poll(struct pollfd *fds, int nfds, int timeout) -{ - return __sysret(sys_poll(fds, nfds, timeout)); -} - - -/* * ssize_t read(int fd, void *buf, size_t count); */ @@ -925,61 +739,6 @@ ssize_t read(int fd, void *buf, size_t count) /* - * int reboot(int cmd); - * <cmd> is among LINUX_REBOOT_CMD_* - */ - -static __attribute__((unused)) -ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg) -{ - return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg); -} - -static __attribute__((unused)) -int reboot(int cmd) -{ - return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0)); -} - - -/* - * int getrlimit(int resource, struct rlimit *rlim); - * int setrlimit(int resource, const struct rlimit *rlim); - */ - -static __attribute__((unused)) -int sys_prlimit64(pid_t pid, int resource, - const struct rlimit64 *new_limit, struct rlimit64 *old_limit) -{ - return my_syscall4(__NR_prlimit64, pid, resource, new_limit, old_limit); -} - -static __attribute__((unused)) -int getrlimit(int resource, struct rlimit *rlim) -{ - struct rlimit64 rlim64; - int ret; - - ret = __sysret(sys_prlimit64(0, resource, NULL, &rlim64)); - rlim->rlim_cur = rlim64.rlim_cur; - rlim->rlim_max = rlim64.rlim_max; - - return ret; -} - -static __attribute__((unused)) -int setrlimit(int resource, const struct rlimit *rlim) -{ - struct rlimit64 rlim64 = { - .rlim_cur = rlim->rlim_cur, - .rlim_max = rlim->rlim_max, - }; - - return __sysret(sys_prlimit64(0, resource, &rlim64, NULL)); -} - - -/* * int sched_yield(void); */ @@ -1024,7 +783,13 @@ int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeva } return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); #else - return __nolibc_enosys(__func__, nfds, rfds, wfds, efds, timeout); + struct __kernel_timespec t; + + if (timeout) { + t.tv_sec = timeout->tv_sec; + t.tv_nsec = timeout->tv_usec * 1000; + } + return my_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); #endif } @@ -1051,77 +816,31 @@ int setpgid(pid_t pid, pid_t pgid) return __sysret(sys_setpgid(pid, pgid)); } - /* - * pid_t setsid(void); + * pid_t setpgrp(void) */ static __attribute__((unused)) -pid_t sys_setsid(void) +pid_t setpgrp(void) { - return my_syscall0(__NR_setsid); + return setpgid(0, 0); } -static __attribute__((unused)) -pid_t setsid(void) -{ - return __sysret(sys_setsid()); -} /* - * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf); - * int stat(const char *path, struct stat *buf); + * pid_t setsid(void); */ static __attribute__((unused)) -int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) -{ -#ifdef __NR_statx - return my_syscall5(__NR_statx, fd, path, flags, mask, buf); -#else - return __nolibc_enosys(__func__, fd, path, flags, mask, buf); -#endif -} - -static __attribute__((unused)) -int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) +pid_t sys_setsid(void) { - return __sysret(sys_statx(fd, path, flags, mask, buf)); + return my_syscall0(__NR_setsid); } - static __attribute__((unused)) -int stat(const char *path, struct stat *buf) +pid_t setsid(void) { - struct statx statx; - long ret; - - ret = __sysret(sys_statx(AT_FDCWD, path, AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx)); - if (ret == -1) - return ret; - - buf->st_dev = ((statx.stx_dev_minor & 0xff) - | (statx.stx_dev_major << 8) - | ((statx.stx_dev_minor & ~0xff) << 12)); - buf->st_ino = statx.stx_ino; - buf->st_mode = statx.stx_mode; - buf->st_nlink = statx.stx_nlink; - buf->st_uid = statx.stx_uid; - buf->st_gid = statx.stx_gid; - buf->st_rdev = ((statx.stx_rdev_minor & 0xff) - | (statx.stx_rdev_major << 8) - | ((statx.stx_rdev_minor & ~0xff) << 12)); - buf->st_size = statx.stx_size; - buf->st_blksize = statx.stx_blksize; - buf->st_blocks = statx.stx_blocks; - buf->st_atim.tv_sec = statx.stx_atime.tv_sec; - buf->st_atim.tv_nsec = statx.stx_atime.tv_nsec; - buf->st_mtim.tv_sec = statx.stx_mtime.tv_sec; - buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec; - buf->st_ctim.tv_sec = statx.stx_ctime.tv_sec; - buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec; - - return 0; + return __sysret(sys_setsid()); } @@ -1132,12 +851,10 @@ int stat(const char *path, struct stat *buf) static __attribute__((unused)) int sys_symlink(const char *old, const char *new) { -#ifdef __NR_symlinkat +#if defined(__NR_symlinkat) return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new); -#elif defined(__NR_symlink) - return my_syscall2(__NR_symlink, old, new); #else - return __nolibc_enosys(__func__, old, new); + return my_syscall2(__NR_symlink, old, new); #endif } @@ -1183,44 +900,16 @@ int umount2(const char *path, int flags) /* - * int uname(struct utsname *buf); - */ - -struct utsname { - char sysname[65]; - char nodename[65]; - char release[65]; - char version[65]; - char machine[65]; - char domainname[65]; -}; - -static __attribute__((unused)) -int sys_uname(struct utsname *buf) -{ - return my_syscall1(__NR_uname, buf); -} - -static __attribute__((unused)) -int uname(struct utsname *buf) -{ - return __sysret(sys_uname(buf)); -} - - -/* * int unlink(const char *path); */ static __attribute__((unused)) int sys_unlink(const char *path) { -#ifdef __NR_unlinkat +#if defined(__NR_unlinkat) return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0); -#elif defined(__NR_unlink) - return my_syscall1(__NR_unlink, path); #else - return __nolibc_enosys(__func__, path); + return my_syscall1(__NR_unlink, path); #endif } @@ -1232,59 +921,6 @@ int unlink(const char *path) /* - * pid_t wait(int *status); - * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage); - * pid_t waitpid(pid_t pid, int *status, int options); - */ - -static __attribute__((unused)) -pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ -#ifdef __NR_wait4 - return my_syscall4(__NR_wait4, pid, status, options, rusage); -#else - return __nolibc_enosys(__func__, pid, status, options, rusage); -#endif -} - -static __attribute__((unused)) -pid_t wait(int *status) -{ - return __sysret(sys_wait4(-1, status, 0, NULL)); -} - -static __attribute__((unused)) -pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) -{ - return __sysret(sys_wait4(pid, status, options, rusage)); -} - - -static __attribute__((unused)) -pid_t waitpid(pid_t pid, int *status, int options) -{ - return __sysret(sys_wait4(pid, status, options, NULL)); -} - - -/* - * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); - */ - -static __attribute__((unused)) -int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage) -{ - return my_syscall5(__NR_waitid, which, pid, infop, options, rusage); -} - -static __attribute__((unused)) -int waitid(int which, pid_t pid, siginfo_t *infop, int options) -{ - return __sysret(sys_waitid(which, pid, infop, options, NULL)); -} - - -/* * ssize_t write(int fd, const void *buf, size_t count); */ @@ -1317,7 +953,4 @@ int memfd_create(const char *name, unsigned int flags) return __sysret(sys_memfd_create(name, flags)); } -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_SYS_H */ diff --git a/tools/include/nolibc/sys/auxv.h b/tools/include/nolibc/sys/auxv.h new file mode 100644 index 000000000000..c52463d6c18d --- /dev/null +++ b/tools/include/nolibc/sys/auxv.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * auxv definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_AUXV_H +#define _NOLIBC_SYS_AUXV_H + +#include "../crt.h" + +static __attribute__((unused)) +unsigned long getauxval(unsigned long type) +{ + const unsigned long *auxv = _auxv; + unsigned long ret; + + if (!auxv) + return 0; + + while (1) { + if (!auxv[0] && !auxv[1]) { + ret = 0; + break; + } + + if (auxv[0] == type) { + ret = auxv[1]; + break; + } + + auxv += 2; + } + + return ret; +} + +#endif /* _NOLIBC_SYS_AUXV_H */ diff --git a/tools/include/nolibc/sys/ioctl.h b/tools/include/nolibc/sys/ioctl.h new file mode 100644 index 000000000000..fc880687e02a --- /dev/null +++ b/tools/include/nolibc/sys/ioctl.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Ioctl definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_IOCTL_H +#define _NOLIBC_SYS_IOCTL_H + +#include "../sys.h" + +#include <linux/ioctl.h> + +/* + * int ioctl(int fd, unsigned long cmd, ... arg); + */ + +static __attribute__((unused)) +long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +{ + return my_syscall3(__NR_ioctl, fd, cmd, arg); +} + +#define ioctl(fd, cmd, arg) __sysret(sys_ioctl(fd, cmd, (unsigned long)(arg))) + +#endif /* _NOLIBC_SYS_IOCTL_H */ diff --git a/tools/include/nolibc/sys/mman.h b/tools/include/nolibc/sys/mman.h new file mode 100644 index 000000000000..5228751b458c --- /dev/null +++ b/tools/include/nolibc/sys/mman.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * mm definition for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_MMAN_H +#define _NOLIBC_SYS_MMAN_H + +#include "../arch.h" +#include "../sys.h" + +#ifndef sys_mmap +static __attribute__((unused)) +void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd, + off_t offset) +{ + int n; + +#if defined(__NR_mmap2) + n = __NR_mmap2; + offset >>= 12; +#else + n = __NR_mmap; +#endif + + return (void *)my_syscall6(n, addr, length, prot, flags, fd, offset); +} +#endif + +/* Note that on Linux, MAP_FAILED is -1 so we can use the generic __sysret() + * which returns -1 upon error and still satisfy user land that checks for + * MAP_FAILED. + */ + +static __attribute__((unused)) +void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) +{ + void *ret = sys_mmap(addr, length, prot, flags, fd, offset); + + if ((unsigned long)ret >= -4095UL) { + SET_ERRNO(-(long)ret); + ret = MAP_FAILED; + } + return ret; +} + +static __attribute__((unused)) +void *sys_mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address) +{ + return (void *)my_syscall5(__NR_mremap, old_address, old_size, + new_size, flags, new_address); +} + +static __attribute__((unused)) +void *mremap(void *old_address, size_t old_size, size_t new_size, int flags, void *new_address) +{ + void *ret = sys_mremap(old_address, old_size, new_size, flags, new_address); + + if ((unsigned long)ret >= -4095UL) { + SET_ERRNO(-(long)ret); + ret = MAP_FAILED; + } + return ret; +} + +static __attribute__((unused)) +int sys_munmap(void *addr, size_t length) +{ + return my_syscall2(__NR_munmap, addr, length); +} + +static __attribute__((unused)) +int munmap(void *addr, size_t length) +{ + return __sysret(sys_munmap(addr, length)); +} + +#endif /* _NOLIBC_SYS_MMAN_H */ diff --git a/tools/include/nolibc/sys/mount.h b/tools/include/nolibc/sys/mount.h new file mode 100644 index 000000000000..e39ec02ea24c --- /dev/null +++ b/tools/include/nolibc/sys/mount.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Mount definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_MOUNT_H +#define _NOLIBC_SYS_MOUNT_H + +#include "../sys.h" + +#include <linux/mount.h> + +/* + * int mount(const char *source, const char *target, + * const char *fstype, unsigned long flags, + * const void *data); + */ +static __attribute__((unused)) +int sys_mount(const char *src, const char *tgt, const char *fst, + unsigned long flags, const void *data) +{ + return my_syscall5(__NR_mount, src, tgt, fst, flags, data); +} + +static __attribute__((unused)) +int mount(const char *src, const char *tgt, + const char *fst, unsigned long flags, + const void *data) +{ + return __sysret(sys_mount(src, tgt, fst, flags, data)); +} + +#endif /* _NOLIBC_SYS_MOUNT_H */ diff --git a/tools/include/nolibc/sys/prctl.h b/tools/include/nolibc/sys/prctl.h new file mode 100644 index 000000000000..0205907b6ac8 --- /dev/null +++ b/tools/include/nolibc/sys/prctl.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Prctl definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_PRCTL_H +#define _NOLIBC_SYS_PRCTL_H + +#include "../sys.h" + +#include <linux/prctl.h> + +/* + * int prctl(int option, unsigned long arg2, unsigned long arg3, + * unsigned long arg4, unsigned long arg5); + */ + +static __attribute__((unused)) +int sys_prctl(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) +{ + return my_syscall5(__NR_prctl, option, arg2, arg3, arg4, arg5); +} + +static __attribute__((unused)) +int prctl(int option, unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) +{ + return __sysret(sys_prctl(option, arg2, arg3, arg4, arg5)); +} + +#endif /* _NOLIBC_SYS_PRCTL_H */ diff --git a/tools/include/nolibc/sys/random.h b/tools/include/nolibc/sys/random.h new file mode 100644 index 000000000000..cd5d25c571a8 --- /dev/null +++ b/tools/include/nolibc/sys/random.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * random definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_RANDOM_H +#define _NOLIBC_SYS_RANDOM_H + +#include "../arch.h" +#include "../sys.h" + +#include <linux/random.h> + +/* + * ssize_t getrandom(void *buf, size_t buflen, unsigned int flags); + */ + +static __attribute__((unused)) +ssize_t sys_getrandom(void *buf, size_t buflen, unsigned int flags) +{ + return my_syscall3(__NR_getrandom, buf, buflen, flags); +} + +static __attribute__((unused)) +ssize_t getrandom(void *buf, size_t buflen, unsigned int flags) +{ + return __sysret(sys_getrandom(buf, buflen, flags)); +} + +#endif /* _NOLIBC_SYS_RANDOM_H */ diff --git a/tools/include/nolibc/sys/reboot.h b/tools/include/nolibc/sys/reboot.h new file mode 100644 index 000000000000..4a1e435be669 --- /dev/null +++ b/tools/include/nolibc/sys/reboot.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Reboot definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_REBOOT_H +#define _NOLIBC_SYS_REBOOT_H + +#include "../sys.h" + +#include <linux/reboot.h> + +/* + * int reboot(int cmd); + * <cmd> is among LINUX_REBOOT_CMD_* + */ + +static __attribute__((unused)) +ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg) +{ + return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg); +} + +static __attribute__((unused)) +int reboot(int cmd) +{ + return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0)); +} + +#endif /* _NOLIBC_SYS_REBOOT_H */ diff --git a/tools/include/nolibc/sys/resource.h b/tools/include/nolibc/sys/resource.h new file mode 100644 index 000000000000..b990f914dc56 --- /dev/null +++ b/tools/include/nolibc/sys/resource.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Resource definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_RESOURCE_H +#define _NOLIBC_SYS_RESOURCE_H + +#include "../sys.h" + +#include <linux/resource.h> + +/* + * int getrlimit(int resource, struct rlimit *rlim); + * int setrlimit(int resource, const struct rlimit *rlim); + */ + +static __attribute__((unused)) +int sys_prlimit64(pid_t pid, int resource, + const struct rlimit64 *new_limit, struct rlimit64 *old_limit) +{ + return my_syscall4(__NR_prlimit64, pid, resource, new_limit, old_limit); +} + +static __attribute__((unused)) +int getrlimit(int resource, struct rlimit *rlim) +{ + struct rlimit64 rlim64; + int ret; + + ret = __sysret(sys_prlimit64(0, resource, NULL, &rlim64)); + rlim->rlim_cur = rlim64.rlim_cur; + rlim->rlim_max = rlim64.rlim_max; + + return ret; +} + +static __attribute__((unused)) +int setrlimit(int resource, const struct rlimit *rlim) +{ + struct rlimit64 rlim64 = { + .rlim_cur = rlim->rlim_cur, + .rlim_max = rlim->rlim_max, + }; + + return __sysret(sys_prlimit64(0, resource, &rlim64, NULL)); +} + +#endif /* _NOLIBC_SYS_RESOURCE_H */ diff --git a/tools/include/nolibc/sys/stat.h b/tools/include/nolibc/sys/stat.h new file mode 100644 index 000000000000..8b4d80e3ea03 --- /dev/null +++ b/tools/include/nolibc/sys/stat.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * stat definition for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_STAT_H +#define _NOLIBC_SYS_STAT_H + +#include "../arch.h" +#include "../types.h" +#include "../sys.h" + +/* + * int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf); + * int stat(const char *path, struct stat *buf); + * int fstatat(int fd, const char *path, struct stat *buf, int flag); + * int fstat(int fildes, struct stat *buf); + * int lstat(const char *path, struct stat *buf); + */ + +static __attribute__((unused)) +int sys_statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) +{ +#ifdef __NR_statx + return my_syscall5(__NR_statx, fd, path, flags, mask, buf); +#else + return __nolibc_enosys(__func__, fd, path, flags, mask, buf); +#endif +} + +static __attribute__((unused)) +int statx(int fd, const char *path, int flags, unsigned int mask, struct statx *buf) +{ + return __sysret(sys_statx(fd, path, flags, mask, buf)); +} + + +static __attribute__((unused)) +int fstatat(int fd, const char *path, struct stat *buf, int flag) +{ + struct statx statx; + long ret; + + ret = __sysret(sys_statx(fd, path, flag | AT_NO_AUTOMOUNT, STATX_BASIC_STATS, &statx)); + if (ret == -1) + return ret; + + buf->st_dev = ((statx.stx_dev_minor & 0xff) + | (statx.stx_dev_major << 8) + | ((statx.stx_dev_minor & ~0xff) << 12)); + buf->st_ino = statx.stx_ino; + buf->st_mode = statx.stx_mode; + buf->st_nlink = statx.stx_nlink; + buf->st_uid = statx.stx_uid; + buf->st_gid = statx.stx_gid; + buf->st_rdev = ((statx.stx_rdev_minor & 0xff) + | (statx.stx_rdev_major << 8) + | ((statx.stx_rdev_minor & ~0xff) << 12)); + buf->st_size = statx.stx_size; + buf->st_blksize = statx.stx_blksize; + buf->st_blocks = statx.stx_blocks; + buf->st_atim.tv_sec = statx.stx_atime.tv_sec; + buf->st_atim.tv_nsec = statx.stx_atime.tv_nsec; + buf->st_mtim.tv_sec = statx.stx_mtime.tv_sec; + buf->st_mtim.tv_nsec = statx.stx_mtime.tv_nsec; + buf->st_ctim.tv_sec = statx.stx_ctime.tv_sec; + buf->st_ctim.tv_nsec = statx.stx_ctime.tv_nsec; + + return 0; +} + +static __attribute__((unused)) +int stat(const char *path, struct stat *buf) +{ + return fstatat(AT_FDCWD, path, buf, 0); +} + +static __attribute__((unused)) +int fstat(int fildes, struct stat *buf) +{ + return fstatat(fildes, "", buf, AT_EMPTY_PATH); +} + +static __attribute__((unused)) +int lstat(const char *path, struct stat *buf) +{ + return fstatat(AT_FDCWD, path, buf, AT_SYMLINK_NOFOLLOW); +} + +#endif /* _NOLIBC_SYS_STAT_H */ diff --git a/tools/include/nolibc/sys/syscall.h b/tools/include/nolibc/sys/syscall.h new file mode 100644 index 000000000000..4bf97f1386a0 --- /dev/null +++ b/tools/include/nolibc/sys/syscall.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * syscall() definition for NOLIBC + * Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_SYSCALL_H +#define _NOLIBC_SYS_SYSCALL_H + +#define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N +#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0) +#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__)) +#define _syscall_n(N, ...) _syscall(N, __VA_ARGS__) +#define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__) + +#endif /* _NOLIBC_SYS_SYSCALL_H */ diff --git a/tools/include/nolibc/sys/sysmacros.h b/tools/include/nolibc/sys/sysmacros.h new file mode 100644 index 000000000000..37c33f030f02 --- /dev/null +++ b/tools/include/nolibc/sys/sysmacros.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Sysmacro definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_SYSMACROS_H +#define _NOLIBC_SYS_SYSMACROS_H + +#include "../std.h" + +/* WARNING, it only deals with the 4096 first majors and 256 first minors */ +#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff))) +#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff)) +#define minor(dev) ((unsigned int)((dev) & 0xff)) + +#endif /* _NOLIBC_SYS_SYSMACROS_H */ diff --git a/tools/include/nolibc/sys/time.h b/tools/include/nolibc/sys/time.h new file mode 100644 index 000000000000..33782a19aae9 --- /dev/null +++ b/tools/include/nolibc/sys/time.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * time definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_TIME_H +#define _NOLIBC_SYS_TIME_H + +#include "../arch.h" +#include "../sys.h" + +static int sys_clock_gettime(clockid_t clockid, struct timespec *tp); + +/* + * int gettimeofday(struct timeval *tv, struct timezone *tz); + */ + +static __attribute__((unused)) +int sys_gettimeofday(struct timeval *tv, struct timezone *tz) +{ +#ifdef __NR_gettimeofday + return my_syscall2(__NR_gettimeofday, tv, tz); +#else + (void) tz; /* Non-NULL tz is undefined behaviour */ + + struct timespec tp; + int ret; + + ret = sys_clock_gettime(CLOCK_REALTIME, &tp); + if (!ret && tv) { + tv->tv_sec = tp.tv_sec; + tv->tv_usec = tp.tv_nsec / 1000; + } + + return ret; +#endif +} + +static __attribute__((unused)) +int gettimeofday(struct timeval *tv, struct timezone *tz) +{ + return __sysret(sys_gettimeofday(tv, tz)); +} + +#endif /* _NOLIBC_SYS_TIME_H */ diff --git a/tools/include/nolibc/sys/timerfd.h b/tools/include/nolibc/sys/timerfd.h new file mode 100644 index 000000000000..5dd61030c991 --- /dev/null +++ b/tools/include/nolibc/sys/timerfd.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * timerfd definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_TIMERFD_H +#define _NOLIBC_SYS_TIMERFD_H + +#include "../sys.h" +#include "../time.h" + +#include <linux/timerfd.h> + + +static __attribute__((unused)) +int sys_timerfd_create(int clockid, int flags) +{ + return my_syscall2(__NR_timerfd_create, clockid, flags); +} + +static __attribute__((unused)) +int timerfd_create(int clockid, int flags) +{ + return __sysret(sys_timerfd_create(clockid, flags)); +} + + +static __attribute__((unused)) +int sys_timerfd_gettime(int fd, struct itimerspec *curr_value) +{ +#if defined(__NR_timerfd_gettime) + return my_syscall2(__NR_timerfd_gettime, fd, curr_value); +#else + struct __kernel_itimerspec kcurr_value; + int ret; + + ret = my_syscall2(__NR_timerfd_gettime64, fd, &kcurr_value); + __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval); + __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value); + return ret; +#endif +} + +static __attribute__((unused)) +int timerfd_gettime(int fd, struct itimerspec *curr_value) +{ + return __sysret(sys_timerfd_gettime(fd, curr_value)); +} + + +static __attribute__((unused)) +int sys_timerfd_settime(int fd, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ +#if defined(__NR_timerfd_settime) + return my_syscall4(__NR_timerfd_settime, fd, flags, new_value, old_value); +#else + struct __kernel_itimerspec knew_value, kold_value; + int ret; + + __nolibc_timespec_user_to_kernel(&new_value->it_value, &knew_value.it_value); + __nolibc_timespec_user_to_kernel(&new_value->it_interval, &knew_value.it_interval); + ret = my_syscall4(__NR_timerfd_settime64, fd, flags, &knew_value, &kold_value); + if (old_value) { + __nolibc_timespec_kernel_to_user(&kold_value.it_interval, &old_value->it_interval); + __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value); + } + return ret; +#endif +} + +static __attribute__((unused)) +int timerfd_settime(int fd, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ + return __sysret(sys_timerfd_settime(fd, flags, new_value, old_value)); +} + +#endif /* _NOLIBC_SYS_TIMERFD_H */ diff --git a/tools/include/nolibc/sys/types.h b/tools/include/nolibc/sys/types.h new file mode 100644 index 000000000000..8a264a13275c --- /dev/null +++ b/tools/include/nolibc/sys/types.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * sys/types.h shim for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <thomas.weissschuh@linutronix.de> + */ + +#include "../types.h" diff --git a/tools/include/nolibc/sys/utsname.h b/tools/include/nolibc/sys/utsname.h new file mode 100644 index 000000000000..01023e1bb439 --- /dev/null +++ b/tools/include/nolibc/sys/utsname.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Utsname definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_UTSNAME_H +#define _NOLIBC_SYS_UTSNAME_H + +#include "../sys.h" + +#include <linux/utsname.h> + +/* + * int uname(struct utsname *buf); + */ + +struct utsname { + char sysname[65]; + char nodename[65]; + char release[65]; + char version[65]; + char machine[65]; + char domainname[65]; +}; + +static __attribute__((unused)) +int sys_uname(struct utsname *buf) +{ + return my_syscall1(__NR_uname, buf); +} + +static __attribute__((unused)) +int uname(struct utsname *buf) +{ + return __sysret(sys_uname(buf)); +} + +#endif /* _NOLIBC_SYS_UTSNAME_H */ diff --git a/tools/include/nolibc/sys/wait.h b/tools/include/nolibc/sys/wait.h new file mode 100644 index 000000000000..4e66e1f7a03e --- /dev/null +++ b/tools/include/nolibc/sys/wait.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * wait definitions for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_WAIT_H +#define _NOLIBC_SYS_WAIT_H + +#include "../arch.h" +#include "../std.h" +#include "../types.h" + +/* + * pid_t wait(int *status); + * pid_t waitpid(pid_t pid, int *status, int options); + * int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); + */ + +static __attribute__((unused)) +int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage) +{ + return my_syscall5(__NR_waitid, which, pid, infop, options, rusage); +} + +static __attribute__((unused)) +int waitid(int which, pid_t pid, siginfo_t *infop, int options) +{ + return __sysret(sys_waitid(which, pid, infop, options, NULL)); +} + + +static __attribute__((unused)) +pid_t waitpid(pid_t pid, int *status, int options) +{ + int idtype, ret; + siginfo_t info; + pid_t id; + + if (pid == INT_MIN) { + SET_ERRNO(ESRCH); + return -1; + } else if (pid < -1) { + idtype = P_PGID; + id = -pid; + } else if (pid == -1) { + idtype = P_ALL; + id = 0; + } else if (pid == 0) { + idtype = P_PGID; + id = 0; + } else { + idtype = P_PID; + id = pid; + } + + options |= WEXITED; + + ret = waitid(idtype, id, &info, options); + if (ret) + return -1; + + switch (info.si_code) { + case 0: + *status = 0; + break; + case CLD_EXITED: + *status = (info.si_status & 0xff) << 8; + break; + case CLD_KILLED: + *status = info.si_status & 0x7f; + break; + case CLD_DUMPED: + *status = (info.si_status & 0x7f) | 0x80; + break; + case CLD_STOPPED: + case CLD_TRAPPED: + *status = (info.si_status << 8) + 0x7f; + break; + case CLD_CONTINUED: + *status = 0xffff; + break; + default: + return -1; + } + + return info.si_pid; +} + +static __attribute__((unused)) +pid_t wait(int *status) +{ + return waitpid(-1, status, 0); +} + +#endif /* _NOLIBC_SYS_WAIT_H */ diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index 84655361b9ad..6c276b8d646a 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_TIME_H #define _NOLIBC_TIME_H @@ -12,6 +15,137 @@ #include "types.h" #include "sys.h" +#include <linux/signal.h> +#include <linux/time.h> + +static __inline__ +void __nolibc_timespec_user_to_kernel(const struct timespec *ts, struct __kernel_timespec *kts) +{ + kts->tv_sec = ts->tv_sec; + kts->tv_nsec = ts->tv_nsec; +} + +static __inline__ +void __nolibc_timespec_kernel_to_user(const struct __kernel_timespec *kts, struct timespec *ts) +{ + ts->tv_sec = kts->tv_sec; + ts->tv_nsec = kts->tv_nsec; +} + +/* + * int clock_getres(clockid_t clockid, struct timespec *res); + * int clock_gettime(clockid_t clockid, struct timespec *tp); + * int clock_settime(clockid_t clockid, const struct timespec *tp); + * int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, + * struct timespec *rmtp) + */ + +static __attribute__((unused)) +int sys_clock_getres(clockid_t clockid, struct timespec *res) +{ +#if defined(__NR_clock_getres) + return my_syscall2(__NR_clock_getres, clockid, res); +#else + struct __kernel_timespec kres; + int ret; + + ret = my_syscall2(__NR_clock_getres_time64, clockid, &kres); + if (res) + __nolibc_timespec_kernel_to_user(&kres, res); + return ret; +#endif +} + +static __attribute__((unused)) +int clock_getres(clockid_t clockid, struct timespec *res) +{ + return __sysret(sys_clock_getres(clockid, res)); +} + +static __attribute__((unused)) +int sys_clock_gettime(clockid_t clockid, struct timespec *tp) +{ +#if defined(__NR_clock_gettime) + return my_syscall2(__NR_clock_gettime, clockid, tp); +#else + struct __kernel_timespec ktp; + int ret; + + ret = my_syscall2(__NR_clock_gettime64, clockid, &ktp); + if (tp) + __nolibc_timespec_kernel_to_user(&ktp, tp); + return ret; +#endif +} + +static __attribute__((unused)) +int clock_gettime(clockid_t clockid, struct timespec *tp) +{ + return __sysret(sys_clock_gettime(clockid, tp)); +} + +static __attribute__((unused)) +int sys_clock_settime(clockid_t clockid, struct timespec *tp) +{ +#if defined(__NR_clock_settime) + return my_syscall2(__NR_clock_settime, clockid, tp); +#elif defined(__NR_clock_settime64) + struct __kernel_timespec ktp; + + __nolibc_timespec_user_to_kernel(tp, &ktp); + return my_syscall2(__NR_clock_settime64, clockid, &ktp); +#else + return __nolibc_enosys(__func__, clockid, tp); +#endif +} + +static __attribute__((unused)) +int clock_settime(clockid_t clockid, struct timespec *tp) +{ + return __sysret(sys_clock_settime(clockid, tp)); +} + +static __attribute__((unused)) +int sys_clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, + struct timespec *rmtp) +{ +#if defined(__NR_clock_nanosleep) + return my_syscall4(__NR_clock_nanosleep, clockid, flags, rqtp, rmtp); +#elif defined(__NR_clock_nanosleep_time64) + struct __kernel_timespec krqtp, krmtp; + int ret; + + __nolibc_timespec_user_to_kernel(rqtp, &krqtp); + ret = my_syscall4(__NR_clock_nanosleep_time64, clockid, flags, &krqtp, &krmtp); + if (rmtp) + __nolibc_timespec_kernel_to_user(&krmtp, rmtp); + return ret; +#else + return __nolibc_enosys(__func__, clockid, flags, rqtp, rmtp); +#endif +} + +static __attribute__((unused)) +int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, + struct timespec *rmtp) +{ + /* Directly return a positive error number */ + return -sys_clock_nanosleep(clockid, flags, rqtp, rmtp); +} + +static __inline__ +double difftime(time_t time1, time_t time2) +{ + return time1 - time2; +} + +static __inline__ +int nanosleep(const struct timespec *rqtp, struct timespec *rmtp) +{ + return __sysret(sys_clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp)); +} + + static __attribute__((unused)) time_t time(time_t *tptr) { @@ -25,7 +159,89 @@ time_t time(time_t *tptr) return tv.tv_sec; } -/* make sure to include all global symbols */ -#include "nolibc.h" + +/* + * int timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid); + * int timer_gettime(timer_t timerid, struct itimerspec *curr_value); + * int timer_settime(timer_t timerid, int flags, const struct itimerspec *new_value, struct itimerspec *old_value); + */ + +static __attribute__((unused)) +int sys_timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid) +{ + return my_syscall3(__NR_timer_create, clockid, evp, timerid); +} + +static __attribute__((unused)) +int timer_create(clockid_t clockid, struct sigevent *evp, timer_t *timerid) +{ + return __sysret(sys_timer_create(clockid, evp, timerid)); +} + +static __attribute__((unused)) +int sys_timer_delete(timer_t timerid) +{ + return my_syscall1(__NR_timer_delete, timerid); +} + +static __attribute__((unused)) +int timer_delete(timer_t timerid) +{ + return __sysret(sys_timer_delete(timerid)); +} + +static __attribute__((unused)) +int sys_timer_gettime(timer_t timerid, struct itimerspec *curr_value) +{ +#if defined(__NR_timer_gettime) + return my_syscall2(__NR_timer_gettime, timerid, curr_value); +#elif defined(__NR_timer_gettime64) + struct __kernel_itimerspec kcurr_value; + int ret; + + ret = my_syscall2(__NR_timer_gettime64, timerid, &kcurr_value); + __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval); + __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value); + return ret; +#else + return __nolibc_enosys(__func__, timerid, curr_value); +#endif +} + +static __attribute__((unused)) +int timer_gettime(timer_t timerid, struct itimerspec *curr_value) +{ + return __sysret(sys_timer_gettime(timerid, curr_value)); +} + +static __attribute__((unused)) +int sys_timer_settime(timer_t timerid, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ +#if defined(__NR_timer_settime) + return my_syscall4(__NR_timer_settime, timerid, flags, new_value, old_value); +#elif defined(__NR_timer_settime64) + struct __kernel_itimerspec knew_value, kold_value; + int ret; + + __nolibc_timespec_user_to_kernel(&new_value->it_value, &knew_value.it_value); + __nolibc_timespec_user_to_kernel(&new_value->it_interval, &knew_value.it_interval); + ret = my_syscall4(__NR_timer_settime64, timerid, flags, &knew_value, &kold_value); + if (old_value) { + __nolibc_timespec_kernel_to_user(&kold_value.it_interval, &old_value->it_interval); + __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value); + } + return ret; +#else + return __nolibc_enosys(__func__, timerid, flags, new_value, old_value); +#endif +} + +static __attribute__((unused)) +int timer_settime(timer_t timerid, int flags, + const struct itimerspec *new_value, struct itimerspec *old_value) +{ + return __sysret(sys_timer_settime(timerid, flags, new_value, old_value)); +} #endif /* _NOLIBC_TIME_H */ diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index b26a5d0c417c..16c6e9ec9451 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -4,16 +4,17 @@ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_TYPES_H #define _NOLIBC_TYPES_H #include "std.h" #include <linux/mman.h> -#include <linux/reboot.h> /* for LINUX_REBOOT_* */ #include <linux/stat.h> #include <linux/time.h> #include <linux/wait.h> -#include <linux/resource.h> /* Only the generic macros and types may be defined here. The arch-specific @@ -127,7 +128,7 @@ typedef struct { int __fd = (fd); \ if (__fd >= 0) \ __set->fds[__fd / FD_SETIDXMASK] &= \ - ~(1U << (__fd & FX_SETBITMASK)); \ + ~(1U << (__fd & FD_SETBITMASK)); \ } while (0) #define FD_SET(fd, set) do { \ @@ -144,7 +145,7 @@ typedef struct { int __r = 0; \ if (__fd >= 0) \ __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \ -1U << (__fd & FD_SET_BITMASK)); \ +1U << (__fd & FD_SETBITMASK)); \ __r; \ }) @@ -156,20 +157,6 @@ typedef struct { __set->fds[__idx] = 0; \ } while (0) -/* for poll() */ -#define POLLIN 0x0001 -#define POLLPRI 0x0002 -#define POLLOUT 0x0004 -#define POLLERR 0x0008 -#define POLLHUP 0x0010 -#define POLLNVAL 0x0020 - -struct pollfd { - int fd; - short int events; - short int revents; -}; - /* for getdents64() */ struct linux_dirent64 { uint64_t d_ino; @@ -198,14 +185,8 @@ struct stat { union { time_t st_ctime; struct timespec st_ctim; }; /* time of last status change */ }; -/* WARNING, it only deals with the 4096 first majors and 256 first minors */ -#define makedev(major, minor) ((dev_t)((((major) & 0xfff) << 8) | ((minor) & 0xff))) -#define major(dev) ((unsigned int)(((dev) >> 8) & 0xfff)) -#define minor(dev) ((unsigned int)(((dev) & 0xff)) - -#ifndef offsetof -#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD) -#endif +typedef __kernel_clockid_t clockid_t; +typedef int timer_t; #ifndef container_of #define container_of(PTR, TYPE, FIELD) ({ \ @@ -214,7 +195,4 @@ struct stat { }) #endif -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_TYPES_H */ diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h index e38f3660c051..7405fa2b89ba 100644 --- a/tools/include/nolibc/unistd.h +++ b/tools/include/nolibc/unistd.h @@ -4,6 +4,9 @@ * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ +/* make sure to include all global symbols */ +#include "nolibc.h" + #ifndef _NOLIBC_UNISTD_H #define _NOLIBC_UNISTD_H @@ -17,6 +20,34 @@ #define STDOUT_FILENO 1 #define STDERR_FILENO 2 +#define F_OK 0 +#define X_OK 1 +#define W_OK 2 +#define R_OK 4 + +/* + * int access(const char *path, int amode); + * int faccessat(int fd, const char *path, int amode, int flag); + */ + +static __attribute__((unused)) +int sys_faccessat(int fd, const char *path, int amode, int flag) +{ + return my_syscall4(__NR_faccessat, fd, path, amode, flag); +} + +static __attribute__((unused)) +int faccessat(int fd, const char *path, int amode, int flag) +{ + return __sysret(sys_faccessat(fd, path, amode, flag)); +} + +static __attribute__((unused)) +int access(const char *path, int amode) +{ + return faccessat(AT_FDCWD, path, amode, 0); +} + static __attribute__((unused)) int msleep(unsigned int msecs) @@ -56,13 +87,4 @@ int tcsetpgrp(int fd, pid_t pid) return ioctl(fd, TIOCSPGRP, &pid); } -#define __syscall_narg(_0, _1, _2, _3, _4, _5, _6, N, ...) N -#define _syscall_narg(...) __syscall_narg(__VA_ARGS__, 6, 5, 4, 3, 2, 1, 0) -#define _syscall(N, ...) __sysret(my_syscall##N(__VA_ARGS__)) -#define _syscall_n(N, ...) _syscall(N, __VA_ARGS__) -#define syscall(...) _syscall_n(_syscall_narg(__VA_ARGS__), ##__VA_ARGS__) - -/* make sure to include all global symbols */ -#include "nolibc.h" - #endif /* _NOLIBC_UNISTD_H */ diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 1ea2c4c33b86..ef1c27fa3c57 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -85,6 +85,7 @@ /* compatibility flags */ #define MAP_FILE 0 +#define PKEY_UNRESTRICTED 0x0 #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index aa5016ff3d91..f333a0ac4ee4 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -145,6 +145,8 @@ #define SO_RCVPRIORITY 82 +#define SO_PASSRIGHTS 83 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 88dc393c2bca..04e0077fb4c9 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -849,9 +849,17 @@ __SYSCALL(__NR_getxattrat, sys_getxattrat) __SYSCALL(__NR_listxattrat, sys_listxattrat) #define __NR_removexattrat 466 __SYSCALL(__NR_removexattrat, sys_removexattrat) +#define __NR_open_tree_attr 467 +__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) + +/* fs/inode.c */ +#define __NR_file_getattr 468 +__SYSCALL(__NR_file_getattr, sys_file_getattr) +#define __NR_file_setattr 469 +__SYSCALL(__NR_file_setattr, sys_file_setattr) #undef __NR_syscalls -#define __NR_syscalls 467 +#define __NR_syscalls 470 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 7fba37b94401..e63a71d3c607 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -905,13 +905,17 @@ struct drm_syncobj_destroy { }; #define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE (1 << 1) #define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_TIMELINE (1 << 1) struct drm_syncobj_handle { __u32 handle; __u32 flags; __s32 fd; __u32 pad; + + __u64 point; }; struct drm_syncobj_transfer { diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h index 5ee30f882736..a04afef9efca 100644 --- a/tools/include/uapi/linux/bits.h +++ b/tools/include/uapi/linux/bits.h @@ -4,13 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) \ - (((~_UL(0)) - (_UL(1) << (l)) + 1) & \ - (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) \ - (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \ - (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 28705ae67784..6829936d33f5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -450,6 +450,7 @@ union bpf_iter_link_info { * * **struct bpf_map_info** * * **struct bpf_btf_info** * * **struct bpf_link_info** + * * **struct bpf_token_info** * * Return * Returns zero on success. On error, -1 is returned and *errno* @@ -906,6 +907,17 @@ union bpf_iter_link_info { * A new file descriptor (a nonnegative integer), or -1 if an * error occurred (in which case, *errno* is set appropriately). * + * BPF_PROG_STREAM_READ_BY_FD + * Description + * Read data of a program's BPF stream. The program is identified + * by *prog_fd*, and the stream is identified by the *stream_id*. + * The data is copied to a buffer pointed to by *stream_buf*, and + * filled less than or equal to *stream_buf_len* bytes. + * + * Return + * Number of bytes read from the stream on success, or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -961,6 +973,7 @@ enum bpf_cmd { BPF_LINK_DETACH, BPF_PROG_BIND_MAP, BPF_TOKEN_CREATE, + BPF_PROG_STREAM_READ_BY_FD, __MAX_BPF_CMD, }; @@ -1463,6 +1476,11 @@ struct bpf_stack_build_id { #define BPF_OBJ_NAME_LEN 16U +enum { + BPF_STREAM_STDOUT = 1, + BPF_STREAM_STDERR = 2, +}; + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -1504,9 +1522,15 @@ union bpf_attr { * If provided, map_flags should have BPF_F_TOKEN_FD flag set. */ __s32 map_token_fd; + + /* Hash of the program that has exclusive access to the map. + */ + __aligned_u64 excl_prog_hash; + /* Size of the passed excl_prog_hash. */ + __u32 excl_prog_hash_size; }; - struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ + struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */ __u32 map_fd; __aligned_u64 key; union { @@ -1587,6 +1611,16 @@ union bpf_attr { * continuous. */ __u32 fd_array_cnt; + /* Pointer to a buffer containing the signature of the BPF + * program. + */ + __aligned_u64 signature; + /* Size of the signature buffer in bytes. */ + __u32 signature_size; + /* ID of the kernel keyring to be used for signature + * verification. + */ + __s32 keyring_id; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -1794,6 +1828,13 @@ union bpf_attr { }; __u64 expected_revision; } netkit; + struct { + union { + __u32 relative_fd; + __u32 relative_id; + }; + __u64 expected_revision; + } cgroup; }; } link_create; @@ -1842,6 +1883,13 @@ union bpf_attr { __u32 bpffs_fd; } token_create; + struct { + __aligned_u64 stream_buf; + __u32 stream_buf_len; + __u32 stream_id; + __u32 prog_fd; + } prog_stream_read; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF @@ -1995,11 +2043,15 @@ union bpf_attr { * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description * Store *len* bytes from address *from* into the packet - * associated to *skb*, at *offset*. *flags* are a combination of - * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the - * checksum for the packet after storing the bytes) and - * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\ - * **->swhash** and *skb*\ **->l4hash** to 0). + * associated to *skb*, at *offset*. The *flags* are a combination + * of the following values: + * + * **BPF_F_RECOMPUTE_CSUM** + * Automatically update *skb*\ **->csum** after storing the + * bytes. + * **BPF_F_INVALIDATE_HASH** + * Set *skb*\ **->hash**, *skb*\ **->swhash** and *skb*\ + * **->l4hash** to 0. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers @@ -2051,7 +2103,8 @@ union bpf_attr { * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and * for updates resulting in a null checksum the value is set to * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates - * the checksum is to be computed against a pseudo-header. + * that the modified header field is part of the pseudo-header. + * Flag **BPF_F_IPV6** should be set for IPv6 packets. * * This helper works in combination with **bpf_csum_diff**\ (), * which does not update the checksum in-place, but offers more @@ -2398,7 +2451,7 @@ union bpf_attr { * into it. An example is available in file * *samples/bpf/trace_output_user.c* in the Linux kernel source * tree (the eBPF program counterpart is in - * *samples/bpf/trace_output_kern.c*). + * *samples/bpf/trace_output.bpf.c*). * * **bpf_perf_event_output**\ () achieves better performance * than **bpf_trace_printk**\ () for sharing data with user @@ -4838,7 +4891,7 @@ union bpf_attr { * * **-ENOENT** if the bpf_local_storage cannot be found. * - * long bpf_d_path(struct path *path, char *buf, u32 sz) + * long bpf_d_path(const struct path *path, char *buf, u32 sz) * Description * Return full path for given **struct path** object, which * needs to be the kernel BTF *path* object. The path is @@ -4968,6 +5021,9 @@ union bpf_attr { * the netns switch takes place from ingress to ingress without * going through the CPU's backlog queue. * + * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during + * the netns switch. + * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types at the * ingress hook and for veth and netkit target device types. The @@ -6065,6 +6121,7 @@ enum { BPF_F_PSEUDO_HDR = (1ULL << 4), BPF_F_MARK_MANGLED_0 = (1ULL << 5), BPF_F_MARK_ENFORCE = (1ULL << 6), + BPF_F_IPV6 = (1ULL << 7), }; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ @@ -6625,6 +6682,8 @@ struct bpf_map_info { __u32 btf_value_type_id; __u32 btf_vmlinux_id; __u64 map_extra; + __aligned_u64 hash; + __u32 hash_size; } __attribute__((aligned(8))); struct bpf_btf_info { @@ -6644,11 +6703,15 @@ struct bpf_link_info { struct { __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */ __u32 tp_name_len; /* in/out: tp_name buffer len */ + __u32 :32; + __u64 cookie; } raw_tracepoint; struct { __u32 attach_type; __u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */ __u32 target_btf_id; /* BTF type id inside the object */ + __u32 :32; + __u64 cookie; } tracing; struct { __u64 cgroup_id; @@ -6720,6 +6783,7 @@ struct bpf_link_info { __u32 name_len; __u32 offset; /* offset from file_name */ __u64 cookie; + __u64 ref_ctr_offset; } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */ struct { __aligned_u64 func_name; /* in/out */ @@ -6758,6 +6822,13 @@ struct bpf_link_info { }; } __attribute__((aligned(8))); +struct bpf_token_info { + __u64 allowed_cmds; + __u64 allowed_maps; + __u64 allowed_progs; + __u64 allowed_attachs; +} __attribute__((aligned(8))); + /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed * by user and intended to be used by socket (e.g. to bind to, depends on * attach type). @@ -7365,6 +7436,10 @@ struct bpf_timer { __u64 __opaque[2]; } __attribute__((aligned(8))); +struct bpf_task_work { + __u64 __opaque; +} __attribute__((aligned(8))); + struct bpf_wq { __u64 __opaque[2]; } __attribute__((aligned(8))); diff --git a/tools/include/uapi/linux/coredump.h b/tools/include/uapi/linux/coredump.h new file mode 100644 index 000000000000..dc3789b78af0 --- /dev/null +++ b/tools/include/uapi/linux/coredump.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_COREDUMP_H +#define _UAPI_LINUX_COREDUMP_H + +#include <linux/types.h> + +/** + * coredump_{req,ack} flags + * @COREDUMP_KERNEL: kernel writes coredump + * @COREDUMP_USERSPACE: userspace writes coredump + * @COREDUMP_REJECT: don't generate coredump + * @COREDUMP_WAIT: wait for coredump server + */ +enum { + COREDUMP_KERNEL = (1ULL << 0), + COREDUMP_USERSPACE = (1ULL << 1), + COREDUMP_REJECT = (1ULL << 2), + COREDUMP_WAIT = (1ULL << 3), +}; + +/** + * struct coredump_req - message kernel sends to userspace + * @size: size of struct coredump_req + * @size_ack: known size of struct coredump_ack on this kernel + * @mask: supported features + * + * When a coredump happens the kernel will connect to the coredump + * socket and send a coredump request to the coredump server. The @size + * member is set to the size of struct coredump_req and provides a hint + * to userspace how much data can be read. Userspace may use MSG_PEEK to + * peek the size of struct coredump_req and then choose to consume it in + * one go. Userspace may also simply read a COREDUMP_ACK_SIZE_VER0 + * request. If the size the kernel sends is larger userspace simply + * discards any remaining data. + * + * The coredump_req->mask member is set to the currently know features. + * Userspace may only set coredump_ack->mask to the bits raised by the + * kernel in coredump_req->mask. + * + * The coredump_req->size_ack member is set by the kernel to the size of + * struct coredump_ack the kernel knows. Userspace may only send up to + * coredump_req->size_ack bytes to the kernel and must set + * coredump_ack->size accordingly. + */ +struct coredump_req { + __u32 size; + __u32 size_ack; + __u64 mask; +}; + +enum { + COREDUMP_REQ_SIZE_VER0 = 16U, /* size of first published struct */ +}; + +/** + * struct coredump_ack - message userspace sends to kernel + * @size: size of the struct + * @spare: unused + * @mask: features kernel is supposed to use + * + * The @size member must be set to the size of struct coredump_ack. It + * may never exceed what the kernel returned in coredump_req->size_ack + * but it may of course be smaller (>= COREDUMP_ACK_SIZE_VER0 and <= + * coredump_req->size_ack). + * + * The @mask member must be set to the features the coredump server + * wants the kernel to use. Only bits the kernel returned in + * coredump_req->mask may be set. + */ +struct coredump_ack { + __u32 size; + __u32 spare; + __u64 mask; +}; + +enum { + COREDUMP_ACK_SIZE_VER0 = 16U, /* size of first published struct */ +}; + +/** + * enum coredump_mark - Markers for the coredump socket + * + * The kernel will place a single byte on the coredump socket. The + * markers notify userspace whether the coredump ack succeeded or + * failed. + * + * @COREDUMP_MARK_MINSIZE: the provided coredump_ack size was too small + * @COREDUMP_MARK_MAXSIZE: the provided coredump_ack size was too big + * @COREDUMP_MARK_UNSUPPORTED: the provided coredump_ack mask was invalid + * @COREDUMP_MARK_CONFLICTING: the provided coredump_ack mask has conflicting options + * @COREDUMP_MARK_REQACK: the coredump request and ack was successful + * @__COREDUMP_MARK_MAX: the maximum coredump mark value + */ +enum coredump_mark { + COREDUMP_MARK_REQACK = 0U, + COREDUMP_MARK_MINSIZE = 1U, + COREDUMP_MARK_MAXSIZE = 2U, + COREDUMP_MARK_UNSUPPORTED = 3U, + COREDUMP_MARK_CONFLICTING = 4U, + __COREDUMP_MARK_MAX = (1U << 31), +}; + +#endif /* _UAPI_LINUX_COREDUMP_H */ diff --git a/tools/include/uapi/linux/fanotify.h b/tools/include/uapi/linux/fanotify.h new file mode 100644 index 000000000000..e710967c7c26 --- /dev/null +++ b/tools/include/uapi/linux/fanotify.h @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_FANOTIFY_H +#define _UAPI_LINUX_FANOTIFY_H + +#include <linux/types.h> + +/* the following events that user-space can register for */ +#define FAN_ACCESS 0x00000001 /* File was accessed */ +#define FAN_MODIFY 0x00000002 /* File was modified */ +#define FAN_ATTRIB 0x00000004 /* Metadata changed */ +#define FAN_CLOSE_WRITE 0x00000008 /* Writable file closed */ +#define FAN_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */ +#define FAN_OPEN 0x00000020 /* File was opened */ +#define FAN_MOVED_FROM 0x00000040 /* File was moved from X */ +#define FAN_MOVED_TO 0x00000080 /* File was moved to Y */ +#define FAN_CREATE 0x00000100 /* Subfile was created */ +#define FAN_DELETE 0x00000200 /* Subfile was deleted */ +#define FAN_DELETE_SELF 0x00000400 /* Self was deleted */ +#define FAN_MOVE_SELF 0x00000800 /* Self was moved */ +#define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */ + +#define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ +#define FAN_FS_ERROR 0x00008000 /* Filesystem error */ + +#define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ +#define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ +#define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ +/* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */ + +#define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */ +#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */ +#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */ + +#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ + +#define FAN_RENAME 0x10000000 /* File was renamed */ + +#define FAN_ONDIR 0x40000000 /* Event occurred against dir */ + +/* helper events */ +#define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ +#define FAN_MOVE (FAN_MOVED_FROM | FAN_MOVED_TO) /* moves */ + +/* flags used for fanotify_init() */ +#define FAN_CLOEXEC 0x00000001 +#define FAN_NONBLOCK 0x00000002 + +/* These are NOT bitwise flags. Both bits are used together. */ +#define FAN_CLASS_NOTIF 0x00000000 +#define FAN_CLASS_CONTENT 0x00000004 +#define FAN_CLASS_PRE_CONTENT 0x00000008 + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ + FAN_CLASS_PRE_CONTENT) + +#define FAN_UNLIMITED_QUEUE 0x00000010 +#define FAN_UNLIMITED_MARKS 0x00000020 +#define FAN_ENABLE_AUDIT 0x00000040 + +/* Flags to determine fanotify event format */ +#define FAN_REPORT_PIDFD 0x00000080 /* Report pidfd for event->pid */ +#define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */ +#define FAN_REPORT_FID 0x00000200 /* Report unique file id */ +#define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */ +#define FAN_REPORT_NAME 0x00000800 /* Report events with name */ +#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ +#define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */ +#define FAN_REPORT_MNT 0x00004000 /* Report mount events */ + +/* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ +#define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) +/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */ +#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \ + FAN_REPORT_FID | FAN_REPORT_TARGET_FID) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ + FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\ + FAN_UNLIMITED_MARKS) + +/* flags used for fanotify_modify_mark() */ +#define FAN_MARK_ADD 0x00000001 +#define FAN_MARK_REMOVE 0x00000002 +#define FAN_MARK_DONT_FOLLOW 0x00000004 +#define FAN_MARK_ONLYDIR 0x00000008 +/* FAN_MARK_MOUNT is 0x00000010 */ +#define FAN_MARK_IGNORED_MASK 0x00000020 +#define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 +#define FAN_MARK_FLUSH 0x00000080 +/* FAN_MARK_FILESYSTEM is 0x00000100 */ +#define FAN_MARK_EVICTABLE 0x00000200 +/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */ +#define FAN_MARK_IGNORE 0x00000400 + +/* These are NOT bitwise flags. Both bits can be used togther. */ +#define FAN_MARK_INODE 0x00000000 +#define FAN_MARK_MOUNT 0x00000010 +#define FAN_MARK_FILESYSTEM 0x00000100 +#define FAN_MARK_MNTNS 0x00000110 + +/* + * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY + * for non-inode mark types. + */ +#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ + FAN_MARK_REMOVE |\ + FAN_MARK_DONT_FOLLOW |\ + FAN_MARK_ONLYDIR |\ + FAN_MARK_MOUNT |\ + FAN_MARK_IGNORED_MASK |\ + FAN_MARK_IGNORED_SURV_MODIFY |\ + FAN_MARK_FLUSH) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_EVENTS (FAN_ACCESS |\ + FAN_MODIFY |\ + FAN_CLOSE |\ + FAN_OPEN) + +/* + * All events which require a permission response from userspace + */ +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\ + FAN_ACCESS_PERM) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\ + FAN_ALL_PERM_EVENTS |\ + FAN_Q_OVERFLOW) + +#define FANOTIFY_METADATA_VERSION 3 + +struct fanotify_event_metadata { + __u32 event_len; + __u8 vers; + __u8 reserved; + __u16 metadata_len; + __aligned_u64 mask; + __s32 fd; + __s32 pid; +}; + +#define FAN_EVENT_INFO_TYPE_FID 1 +#define FAN_EVENT_INFO_TYPE_DFID_NAME 2 +#define FAN_EVENT_INFO_TYPE_DFID 3 +#define FAN_EVENT_INFO_TYPE_PIDFD 4 +#define FAN_EVENT_INFO_TYPE_ERROR 5 +#define FAN_EVENT_INFO_TYPE_RANGE 6 +#define FAN_EVENT_INFO_TYPE_MNT 7 + +/* Special info types for FAN_RENAME */ +#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10 +/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID 11 */ +#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME 12 +/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID 13 */ + +/* Variable length info record following event metadata */ +struct fanotify_event_info_header { + __u8 info_type; + __u8 pad; + __u16 len; +}; + +/* + * Unique file identifier info record. + * This structure is used for records of types FAN_EVENT_INFO_TYPE_FID, + * FAN_EVENT_INFO_TYPE_DFID and FAN_EVENT_INFO_TYPE_DFID_NAME. + * For FAN_EVENT_INFO_TYPE_DFID_NAME there is additionally a null terminated + * name immediately after the file handle. + */ +struct fanotify_event_info_fid { + struct fanotify_event_info_header hdr; + __kernel_fsid_t fsid; + /* + * Following is an opaque struct file_handle that can be passed as + * an argument to open_by_handle_at(2). + */ + unsigned char handle[]; +}; + +/* + * This structure is used for info records of type FAN_EVENT_INFO_TYPE_PIDFD. + * It holds a pidfd for the pid that was responsible for generating an event. + */ +struct fanotify_event_info_pidfd { + struct fanotify_event_info_header hdr; + __s32 pidfd; +}; + +struct fanotify_event_info_error { + struct fanotify_event_info_header hdr; + __s32 error; + __u32 error_count; +}; + +struct fanotify_event_info_range { + struct fanotify_event_info_header hdr; + __u32 pad; + __u64 offset; + __u64 count; +}; + +struct fanotify_event_info_mnt { + struct fanotify_event_info_header hdr; + __u64 mnt_id; +}; + +/* + * User space may need to record additional information about its decision. + * The extra information type records what kind of information is included. + * The default is none. We also define an extra information buffer whose + * size is determined by the extra information type. + * + * If the information type is Audit Rule, then the information following + * is the rule number that triggered the user space decision that + * requires auditing. + */ + +#define FAN_RESPONSE_INFO_NONE 0 +#define FAN_RESPONSE_INFO_AUDIT_RULE 1 + +struct fanotify_response { + __s32 fd; + __u32 response; +}; + +struct fanotify_response_info_header { + __u8 type; + __u8 pad; + __u16 len; +}; + +struct fanotify_response_info_audit_rule { + struct fanotify_response_info_header hdr; + __u32 rule_number; + __u32 subj_trust; + __u32 obj_trust; +}; + +/* Legit userspace responses to a _PERM event */ +#define FAN_ALLOW 0x01 +#define FAN_DENY 0x02 +/* errno other than EPERM can specified in upper byte of deny response */ +#define FAN_ERRNO_BITS 8 +#define FAN_ERRNO_SHIFT (32 - FAN_ERRNO_BITS) +#define FAN_ERRNO_MASK ((1 << FAN_ERRNO_BITS) - 1) +#define FAN_DENY_ERRNO(err) \ + (FAN_DENY | ((((__u32)(err)) & FAN_ERRNO_MASK) << FAN_ERRNO_SHIFT)) + +#define FAN_AUDIT 0x10 /* Bitmask to create audit record for result */ +#define FAN_INFO 0x20 /* Bitmask to indicate additional information */ + +/* No fd set in event */ +#define FAN_NOFD -1 +#define FAN_NOPIDFD FAN_NOFD +#define FAN_EPIDFD -2 + +/* Helper functions to deal with fanotify_event_metadata buffers */ +#define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata)) + +#define FAN_EVENT_NEXT(meta, len) ((len) -= (meta)->event_len, \ + (struct fanotify_event_metadata*)(((char *)(meta)) + \ + (meta)->event_len)) + +#define FAN_EVENT_OK(meta, len) ((long)(len) >= (long)FAN_EVENT_METADATA_LEN && \ + (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \ + (long)(meta)->event_len <= (long)(len)) + +#endif /* _UAPI_LINUX_FANOTIFY_H */ diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index 8a27bc5c7a7f..24ddf7bc4f25 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -40,6 +40,15 @@ #define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) +/* flags for integrity meta */ +#define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */ +#define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */ +#define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */ + +#define IO_INTEGRITY_VALID_FLAGS (IO_INTEGRITY_CHK_GUARD | \ + IO_INTEGRITY_CHK_REFTAG | \ + IO_INTEGRITY_CHK_APPTAG) + #define SEEK_SET 0 /* seek relative to beginning of file */ #define SEEK_CUR 1 /* seek relative to current file position */ #define SEEK_END 2 /* seek relative to end of file */ @@ -329,9 +338,16 @@ typedef int __bitwise __kernel_rwf_t; /* per-IO negation of O_APPEND */ #define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020) +/* Atomic Write */ +#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040) + +/* buffered IO that drops the cache after reading or writing data */ +#define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND | RWF_NOAPPEND) + RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\ + RWF_DONTCACHE) #define PROCFS_IOCTL_MAGIC 'f' @@ -347,6 +363,7 @@ typedef int __bitwise __kernel_rwf_t; #define PAGE_IS_PFNZERO (1 << 5) #define PAGE_IS_HUGE (1 << 6) #define PAGE_IS_SOFT_DIRTY (1 << 7) +#define PAGE_IS_GUARD (1 << 8) /* * struct page_region - Page region with flags diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index 7a8f4c290187..3aff99f2696a 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -119,7 +119,7 @@ struct fscrypt_key_specifier { */ struct fscrypt_provisioning_key_payload { __u32 type; - __u32 __reserved; + __u32 flags; __u8 raw[]; }; @@ -128,7 +128,9 @@ struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; __u32 key_id; - __u32 __reserved[8]; +#define FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001 + __u32 flags; + __u32 __reserved[7]; __u8 raw[]; }; diff --git a/tools/include/uapi/linux/genetlink.h b/tools/include/uapi/linux/genetlink.h new file mode 100644 index 000000000000..ddba3ca01e39 --- /dev/null +++ b/tools/include/uapi/linux/genetlink.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_GENERIC_NETLINK_H +#define _UAPI__LINUX_GENERIC_NETLINK_H + +#include <linux/types.h> +#include <linux/netlink.h> + +#define GENL_NAMSIZ 16 /* length of family name */ + +#define GENL_MIN_ID NLMSG_MIN_TYPE +#define GENL_MAX_ID 1023 + +struct genlmsghdr { + __u8 cmd; + __u8 version; + __u16 reserved; +}; + +#define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr)) + +#define GENL_ADMIN_PERM 0x01 +#define GENL_CMD_CAP_DO 0x02 +#define GENL_CMD_CAP_DUMP 0x04 +#define GENL_CMD_CAP_HASPOL 0x08 +#define GENL_UNS_ADMIN_PERM 0x10 + +/* + * List of reserved static generic netlink identifiers: + */ +#define GENL_ID_CTRL NLMSG_MIN_TYPE +#define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) +#define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2) +/* must be last reserved + 1 */ +#define GENL_START_ALLOC (NLMSG_MIN_TYPE + 3) + +/************************************************************************** + * Controller + **************************************************************************/ + +enum { + CTRL_CMD_UNSPEC, + CTRL_CMD_NEWFAMILY, + CTRL_CMD_DELFAMILY, + CTRL_CMD_GETFAMILY, + CTRL_CMD_NEWOPS, + CTRL_CMD_DELOPS, + CTRL_CMD_GETOPS, + CTRL_CMD_NEWMCAST_GRP, + CTRL_CMD_DELMCAST_GRP, + CTRL_CMD_GETMCAST_GRP, /* unused */ + CTRL_CMD_GETPOLICY, + __CTRL_CMD_MAX, +}; + +#define CTRL_CMD_MAX (__CTRL_CMD_MAX - 1) + +enum { + CTRL_ATTR_UNSPEC, + CTRL_ATTR_FAMILY_ID, + CTRL_ATTR_FAMILY_NAME, + CTRL_ATTR_VERSION, + CTRL_ATTR_HDRSIZE, + CTRL_ATTR_MAXATTR, + CTRL_ATTR_OPS, + CTRL_ATTR_MCAST_GROUPS, + CTRL_ATTR_POLICY, + CTRL_ATTR_OP_POLICY, + CTRL_ATTR_OP, + __CTRL_ATTR_MAX, +}; + +#define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1) + +enum { + CTRL_ATTR_OP_UNSPEC, + CTRL_ATTR_OP_ID, + CTRL_ATTR_OP_FLAGS, + __CTRL_ATTR_OP_MAX, +}; + +#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1) + +enum { + CTRL_ATTR_MCAST_GRP_UNSPEC, + CTRL_ATTR_MCAST_GRP_NAME, + CTRL_ATTR_MCAST_GRP_ID, + __CTRL_ATTR_MCAST_GRP_MAX, +}; + +#define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1) + +enum { + CTRL_ATTR_POLICY_UNSPEC, + CTRL_ATTR_POLICY_DO, + CTRL_ATTR_POLICY_DUMP, + + __CTRL_ATTR_POLICY_DUMP_MAX, + CTRL_ATTR_POLICY_DUMP_MAX = __CTRL_ATTR_POLICY_DUMP_MAX - 1 +}; + +#define CTRL_ATTR_POLICY_MAX (__CTRL_ATTR_POLICY_DUMP_MAX - 1) + +#endif /* _UAPI__LINUX_GENERIC_NETLINK_H */ diff --git a/tools/include/uapi/linux/if_addr.h b/tools/include/uapi/linux/if_addr.h new file mode 100644 index 000000000000..aa7958b4e41d --- /dev/null +++ b/tools/include/uapi/linux/if_addr.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_IF_ADDR_H +#define _UAPI__LINUX_IF_ADDR_H + +#include <linux/types.h> +#include <linux/netlink.h> + +struct ifaddrmsg { + __u8 ifa_family; + __u8 ifa_prefixlen; /* The prefix length */ + __u8 ifa_flags; /* Flags */ + __u8 ifa_scope; /* Address scope */ + __u32 ifa_index; /* Link index */ +}; + +/* + * Important comment: + * IFA_ADDRESS is prefix address, rather than local interface address. + * It makes no difference for normally configured broadcast interfaces, + * but for point-to-point IFA_ADDRESS is DESTINATION address, + * local address is supplied in IFA_LOCAL attribute. + * + * IFA_FLAGS is a u32 attribute that extends the u8 field ifa_flags. + * If present, the value from struct ifaddrmsg will be ignored. + */ +enum { + IFA_UNSPEC, + IFA_ADDRESS, + IFA_LOCAL, + IFA_LABEL, + IFA_BROADCAST, + IFA_ANYCAST, + IFA_CACHEINFO, + IFA_MULTICAST, + IFA_FLAGS, + IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */ + IFA_TARGET_NETNSID, + IFA_PROTO, /* u8, address protocol */ + __IFA_MAX, +}; + +#define IFA_MAX (__IFA_MAX - 1) + +/* ifa_flags */ +#define IFA_F_SECONDARY 0x01 +#define IFA_F_TEMPORARY IFA_F_SECONDARY + +#define IFA_F_NODAD 0x02 +#define IFA_F_OPTIMISTIC 0x04 +#define IFA_F_DADFAILED 0x08 +#define IFA_F_HOMEADDRESS 0x10 +#define IFA_F_DEPRECATED 0x20 +#define IFA_F_TENTATIVE 0x40 +#define IFA_F_PERMANENT 0x80 +#define IFA_F_MANAGETEMPADDR 0x100 +#define IFA_F_NOPREFIXROUTE 0x200 +#define IFA_F_MCAUTOJOIN 0x400 +#define IFA_F_STABLE_PRIVACY 0x800 + +struct ifa_cacheinfo { + __u32 ifa_prefered; + __u32 ifa_valid; + __u32 cstamp; /* created timestamp, hundredths of seconds */ + __u32 tstamp; /* updated timestamp, hundredths of seconds */ +}; + +/* backwards compatibility for userspace */ +#ifndef __KERNEL__ +#define IFA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg)))) +#define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg)) +#endif + +/* ifa_proto */ +#define IFAPROT_UNSPEC 0 +#define IFAPROT_KERNEL_LO 1 /* loopback */ +#define IFAPROT_KERNEL_RA 2 /* set by kernel from router announcement */ +#define IFAPROT_KERNEL_LL 3 /* link-local set by kernel */ + +#endif diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index 42869770776e..23a062781468 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -7,8 +7,8 @@ * Magnus Karlsson <magnus.karlsson@intel.com> */ -#ifndef _LINUX_IF_XDP_H -#define _LINUX_IF_XDP_H +#ifndef _UAPI_LINUX_IF_XDP_H +#define _UAPI_LINUX_IF_XDP_H #include <linux/types.h> @@ -79,6 +79,7 @@ struct xdp_mmap_offsets { #define XDP_UMEM_COMPLETION_RING 6 #define XDP_STATISTICS 7 #define XDP_OPTIONS 8 +#define XDP_MAX_TX_SKB_BUDGET 9 struct xdp_umem_reg { __u64 addr; /* Start of packet data area */ @@ -180,4 +181,4 @@ struct xdp_desc { /* TX packet carries valid metadata. */ #define XDP_TX_METADATA (1 << 1) -#endif /* _LINUX_IF_XDP_H */ +#endif /* _UAPI_LINUX_IF_XDP_H */ diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 5d32d53508d9..ced0fc3c3aa5 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -79,6 +79,8 @@ enum { #define IPPROTO_MPLS IPPROTO_MPLS IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ #define IPPROTO_ETHERNET IPPROTO_ETHERNET + IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */ +#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_SMC = 256, /* Shared Memory Communications */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 502ea63b5d2e..f0f0d49d2544 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -178,6 +178,7 @@ struct kvm_xen_exit { #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 +#define KVM_EXIT_TDX 40 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -375,6 +376,7 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_WAKEUP 4 #define KVM_SYSTEM_EVENT_SUSPEND 5 #define KVM_SYSTEM_EVENT_SEV_TERM 6 +#define KVM_SYSTEM_EVENT_TDX_FATAL 7 __u32 type; __u32 ndata; union { @@ -446,6 +448,31 @@ struct kvm_run { __u64 gpa; __u64 size; } memory_fault; + /* KVM_EXIT_TDX */ + struct { + __u64 flags; + __u64 nr; + union { + struct { + __u64 ret; + __u64 data[5]; + } unknown; + struct { + __u64 ret; + __u64 gpa; + __u64 size; + } get_quote; + struct { + __u64 ret; + __u64 leaf; + __u64 r11, r12, r13, r14; + } get_tdvmcall_info; + struct { + __u64 ret; + __u64 vector; + } setup_event_notify; + }; + } tdx; /* Fix the size of the union. */ char padding[256]; }; @@ -617,10 +644,7 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) -#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ - KVM_X86_DISABLE_EXITS_HLT | \ - KVM_X86_DISABLE_EXITS_PAUSE | \ - KVM_X86_DISABLE_EXITS_CSTATE) +#define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { @@ -933,6 +957,11 @@ struct kvm_enable_cap { #define KVM_CAP_PRE_FAULT_MEMORY 236 #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 +#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 +#define KVM_CAP_ARM_EL2 240 +#define KVM_CAP_ARM_EL2_E2H0 241 +#define KVM_CAP_RISCV_MP_STATE_RESET 242 +#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1070,6 +1099,10 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + #define KVM_REG_SIZE_U8 0x0000000000000000ULL #define KVM_REG_SIZE_U16 0x0010000000000000ULL #define KVM_REG_SIZE_U32 0x0020000000000000ULL diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h new file mode 100644 index 000000000000..7fa67c2031a5 --- /dev/null +++ b/tools/include/uapi/linux/mount.h @@ -0,0 +1,235 @@ +#ifndef _UAPI_LINUX_MOUNT_H +#define _UAPI_LINUX_MOUNT_H + +#include <linux/types.h> + +/* + * These are the fs-independent mount-flags: up to 32 flags are supported + * + * Usage of these is restricted within the kernel to core mount(2) code and + * callers of sys_mount() only. Filesystems should be using the SB_* + * equivalent instead. + */ +#define MS_RDONLY 1 /* Mount read-only */ +#define MS_NOSUID 2 /* Ignore suid and sgid bits */ +#define MS_NODEV 4 /* Disallow access to device special files */ +#define MS_NOEXEC 8 /* Disallow program execution */ +#define MS_SYNCHRONOUS 16 /* Writes are synced at once */ +#define MS_REMOUNT 32 /* Alter flags of a mounted FS */ +#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ +#define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */ +#define MS_NOATIME 1024 /* Do not update access times. */ +#define MS_NODIRATIME 2048 /* Do not update directory access times */ +#define MS_BIND 4096 +#define MS_MOVE 8192 +#define MS_REC 16384 +#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. + MS_VERBOSE is deprecated. */ +#define MS_SILENT 32768 +#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ +#define MS_UNBINDABLE (1<<17) /* change to unbindable */ +#define MS_PRIVATE (1<<18) /* change to private */ +#define MS_SLAVE (1<<19) /* change to slave */ +#define MS_SHARED (1<<20) /* change to shared */ +#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ +#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ +#define MS_I_VERSION (1<<23) /* Update inode I_version field */ +#define MS_STRICTATIME (1<<24) /* Always perform atime updates */ +#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ + +/* These sb flags are internal to the kernel */ +#define MS_SUBMOUNT (1<<26) +#define MS_NOREMOTELOCK (1<<27) +#define MS_NOSEC (1<<28) +#define MS_BORN (1<<29) +#define MS_ACTIVE (1<<30) +#define MS_NOUSER (1<<31) + +/* + * Superblock flags that can be altered by MS_REMOUNT + */ +#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\ + MS_LAZYTIME) + +/* + * Old magic mount flag and mask + */ +#define MS_MGC_VAL 0xC0ED0000 +#define MS_MGC_MSK 0xffff0000 + +/* + * open_tree() flags. + */ +#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ +#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ + +/* + * move_mount() flags. + */ +#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */ +#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */ +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ +#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */ +#define MOVE_MOUNT_BENEATH 0x00000200 /* Mount beneath top mount */ +#define MOVE_MOUNT__MASK 0x00000377 + +/* + * fsopen() flags. + */ +#define FSOPEN_CLOEXEC 0x00000001 + +/* + * fspick() flags. + */ +#define FSPICK_CLOEXEC 0x00000001 +#define FSPICK_SYMLINK_NOFOLLOW 0x00000002 +#define FSPICK_NO_AUTOMOUNT 0x00000004 +#define FSPICK_EMPTY_PATH 0x00000008 + +/* + * The type of fsconfig() call made. + */ +enum fsconfig_command { + FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ + FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ + FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ + FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ + FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ + FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ + FSCONFIG_CMD_CREATE = 6, /* Create new or reuse existing superblock */ + FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ + FSCONFIG_CMD_CREATE_EXCL = 8, /* Create new superblock, fail if reusing existing superblock */ +}; + +/* + * fsmount() flags. + */ +#define FSMOUNT_CLOEXEC 0x00000001 + +/* + * Mount attributes. + */ +#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ +#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */ +#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */ +#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */ +#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */ +#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */ +#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ +#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ +#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ +#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ +#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */ + +/* + * mount_setattr() + */ +struct mount_attr { + __u64 attr_set; + __u64 attr_clr; + __u64 propagation; + __u64 userns_fd; +}; + +/* List of all mount_attr versions. */ +#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */ + + +/* + * Structure for getting mount/superblock/filesystem info with statmount(2). + * + * The interface is similar to statx(2): individual fields or groups can be + * selected with the @mask argument of statmount(). Kernel will set the @mask + * field according to the supported fields. + * + * If string fields are selected, then the caller needs to pass a buffer that + * has space after the fixed part of the structure. Nul terminated strings are + * copied there and offsets relative to @str are stored in the relevant fields. + * If the buffer is too small, then EOVERFLOW is returned. The actually used + * size is returned in @size. + */ +struct statmount { + __u32 size; /* Total size, including strings */ + __u32 mnt_opts; /* [str] Options (comma separated, escaped) */ + __u64 mask; /* What results were written */ + __u32 sb_dev_major; /* Device ID */ + __u32 sb_dev_minor; + __u64 sb_magic; /* ..._SUPER_MAGIC */ + __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */ + __u32 fs_type; /* [str] Filesystem type */ + __u64 mnt_id; /* Unique ID of mount */ + __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */ + __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */ + __u32 mnt_parent_id_old; + __u64 mnt_attr; /* MOUNT_ATTR_... */ + __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */ + __u64 mnt_peer_group; /* ID of shared peer group */ + __u64 mnt_master; /* Mount receives propagation from this ID */ + __u64 propagate_from; /* Propagation from in current namespace */ + __u32 mnt_root; /* [str] Root of mount relative to root of fs */ + __u32 mnt_point; /* [str] Mountpoint relative to current root */ + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ + __u32 sb_source; /* [str] Source string of the mount */ + __u32 opt_num; /* Number of fs options */ + __u32 opt_array; /* [str] Array of nul terminated fs options */ + __u32 opt_sec_num; /* Number of security options */ + __u32 opt_sec_array; /* [str] Array of nul terminated security options */ + __u64 supported_mask; /* Mask flags that this kernel supports */ + __u32 mnt_uidmap_num; /* Number of uid mappings */ + __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */ + __u32 mnt_gidmap_num; /* Number of gid mappings */ + __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */ + __u64 __spare2[43]; + char str[]; /* Variable size part containing strings */ +}; + +/* + * Structure for passing mount ID and miscellaneous parameters to statmount(2) + * and listmount(2). + * + * For statmount(2) @param represents the request mask. + * For listmount(2) @param represents the last listed mount id (or zero). + */ +struct mnt_id_req { + __u32 size; + __u32 spare; + __u64 mnt_id; + __u64 param; + __u64 mnt_ns_id; +}; + +/* List of all mnt_id_req versions. */ +#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ + +/* + * @mask bits for statmount(2) + */ +#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */ +#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */ +#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */ +#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ +#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ +#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ +#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ +#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ +#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ +#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ +#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */ +#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */ +#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */ + +/* + * Special @mnt_id values that can be passed to listmount + */ +#define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ + +#endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/include/uapi/linux/neighbour.h b/tools/include/uapi/linux/neighbour.h new file mode 100644 index 000000000000..c34a81245f87 --- /dev/null +++ b/tools/include/uapi/linux/neighbour.h @@ -0,0 +1,229 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_NEIGHBOUR_H +#define _UAPI__LINUX_NEIGHBOUR_H + +#include <linux/types.h> +#include <linux/netlink.h> + +struct ndmsg { + __u8 ndm_family; + __u8 ndm_pad1; + __u16 ndm_pad2; + __s32 ndm_ifindex; + __u16 ndm_state; + __u8 ndm_flags; + __u8 ndm_type; +}; + +enum { + NDA_UNSPEC, + NDA_DST, + NDA_LLADDR, + NDA_CACHEINFO, + NDA_PROBES, + NDA_VLAN, + NDA_PORT, + NDA_VNI, + NDA_IFINDEX, + NDA_MASTER, + NDA_LINK_NETNSID, + NDA_SRC_VNI, + NDA_PROTOCOL, /* Originator of entry */ + NDA_NH_ID, + NDA_FDB_EXT_ATTRS, + NDA_FLAGS_EXT, + NDA_NDM_STATE_MASK, + NDA_NDM_FLAGS_MASK, + __NDA_MAX +}; + +#define NDA_MAX (__NDA_MAX - 1) + +/* + * Neighbor Cache Entry Flags + */ + +#define NTF_USE (1 << 0) +#define NTF_SELF (1 << 1) +#define NTF_MASTER (1 << 2) +#define NTF_PROXY (1 << 3) /* == ATF_PUBL */ +#define NTF_EXT_LEARNED (1 << 4) +#define NTF_OFFLOADED (1 << 5) +#define NTF_STICKY (1 << 6) +#define NTF_ROUTER (1 << 7) +/* Extended flags under NDA_FLAGS_EXT: */ +#define NTF_EXT_MANAGED (1 << 0) +#define NTF_EXT_LOCKED (1 << 1) +#define NTF_EXT_EXT_VALIDATED (1 << 2) + +/* + * Neighbor Cache Entry States. + */ + +#define NUD_INCOMPLETE 0x01 +#define NUD_REACHABLE 0x02 +#define NUD_STALE 0x04 +#define NUD_DELAY 0x08 +#define NUD_PROBE 0x10 +#define NUD_FAILED 0x20 + +/* Dummy states */ +#define NUD_NOARP 0x40 +#define NUD_PERMANENT 0x80 +#define NUD_NONE 0x00 + +/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no + * address resolution or NUD. + * + * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true + * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier + * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED + * flagged entries explicitly are (which is also consistent with the routing + * subsystem). + * + * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry + * states don't make sense and thus are ignored. Such entries don't age and + * can roam. + * + * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf + * of a user space control plane, and automatically refreshed so that (if + * possible) they remain in NUD_REACHABLE state. + * + * NTF_EXT_LOCKED flagged bridge FDB entries are entries generated by the + * bridge in response to a host trying to communicate via a locked bridge port + * with MAB enabled. Their purpose is to notify user space that a host requires + * authentication. + * + * NTF_EXT_EXT_VALIDATED flagged neighbor entries were externally validated by + * a user space control plane. The kernel will not remove or invalidate them, + * but it can probe them and notify user space when they become reachable. + */ + +struct nda_cacheinfo { + __u32 ndm_confirmed; + __u32 ndm_used; + __u32 ndm_updated; + __u32 ndm_refcnt; +}; + +/***************************************************************** + * Neighbour tables specific messages. + * + * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the + * NLM_F_DUMP flag set. Every neighbour table configuration is + * spread over multiple messages to avoid running into message + * size limits on systems with many interfaces. The first message + * in the sequence transports all not device specific data such as + * statistics, configuration, and the default parameter set. + * This message is followed by 0..n messages carrying device + * specific parameter sets. + * Although the ordering should be sufficient, NDTA_NAME can be + * used to identify sequences. The initial message can be identified + * by checking for NDTA_CONFIG. The device specific messages do + * not contain this TLV but have NDTPA_IFINDEX set to the + * corresponding interface index. + * + * To change neighbour table attributes, send RTM_SETNEIGHTBL + * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], + * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked + * otherwise. Device specific parameter sets can be changed by + * setting NDTPA_IFINDEX to the interface index of the corresponding + * device. + ****/ + +struct ndt_stats { + __u64 ndts_allocs; + __u64 ndts_destroys; + __u64 ndts_hash_grows; + __u64 ndts_res_failed; + __u64 ndts_lookups; + __u64 ndts_hits; + __u64 ndts_rcv_probes_mcast; + __u64 ndts_rcv_probes_ucast; + __u64 ndts_periodic_gc_runs; + __u64 ndts_forced_gc_runs; + __u64 ndts_table_fulls; +}; + +enum { + NDTPA_UNSPEC, + NDTPA_IFINDEX, /* u32, unchangeable */ + NDTPA_REFCNT, /* u32, read-only */ + NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */ + NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */ + NDTPA_RETRANS_TIME, /* u64, msecs */ + NDTPA_GC_STALETIME, /* u64, msecs */ + NDTPA_DELAY_PROBE_TIME, /* u64, msecs */ + NDTPA_QUEUE_LEN, /* u32 */ + NDTPA_APP_PROBES, /* u32 */ + NDTPA_UCAST_PROBES, /* u32 */ + NDTPA_MCAST_PROBES, /* u32 */ + NDTPA_ANYCAST_DELAY, /* u64, msecs */ + NDTPA_PROXY_DELAY, /* u64, msecs */ + NDTPA_PROXY_QLEN, /* u32 */ + NDTPA_LOCKTIME, /* u64, msecs */ + NDTPA_QUEUE_LENBYTES, /* u32 */ + NDTPA_MCAST_REPROBES, /* u32 */ + NDTPA_PAD, + NDTPA_INTERVAL_PROBE_TIME_MS, /* u64, msecs */ + __NDTPA_MAX +}; +#define NDTPA_MAX (__NDTPA_MAX - 1) + +struct ndtmsg { + __u8 ndtm_family; + __u8 ndtm_pad1; + __u16 ndtm_pad2; +}; + +struct ndt_config { + __u16 ndtc_key_len; + __u16 ndtc_entry_size; + __u32 ndtc_entries; + __u32 ndtc_last_flush; /* delta to now in msecs */ + __u32 ndtc_last_rand; /* delta to now in msecs */ + __u32 ndtc_hash_rnd; + __u32 ndtc_hash_mask; + __u32 ndtc_hash_chain_gc; + __u32 ndtc_proxy_qlen; +}; + +enum { + NDTA_UNSPEC, + NDTA_NAME, /* char *, unchangeable */ + NDTA_THRESH1, /* u32 */ + NDTA_THRESH2, /* u32 */ + NDTA_THRESH3, /* u32 */ + NDTA_CONFIG, /* struct ndt_config, read-only */ + NDTA_PARMS, /* nested TLV NDTPA_* */ + NDTA_STATS, /* struct ndt_stats, read-only */ + NDTA_GC_INTERVAL, /* u64, msecs */ + NDTA_PAD, + __NDTA_MAX +}; +#define NDTA_MAX (__NDTA_MAX - 1) + + /* FDB activity notification bits used in NFEA_ACTIVITY_NOTIFY: + * - FDB_NOTIFY_BIT - notify on activity/expire for any entry + * - FDB_NOTIFY_INACTIVE_BIT - mark as inactive to avoid multiple notifications + */ +enum { + FDB_NOTIFY_BIT = (1 << 0), + FDB_NOTIFY_INACTIVE_BIT = (1 << 1) +}; + +/* embedded into NDA_FDB_EXT_ATTRS: + * [NDA_FDB_EXT_ATTRS] = { + * [NFEA_ACTIVITY_NOTIFY] + * ... + * } + */ +enum { + NFEA_UNSPEC, + NFEA_ACTIVITY_NOTIFY, + NFEA_DONT_REFRESH, + __NFEA_MAX +}; +#define NFEA_MAX (__NFEA_MAX - 1) + +#endif diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 7600bf62dbdf..48eb49aa03d4 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -77,6 +77,11 @@ enum netdev_qstats_scope { NETDEV_QSTATS_SCOPE_QUEUE = 1, }; +enum netdev_napi_threaded { + NETDEV_NAPI_THREADED_DISABLED, + NETDEV_NAPI_THREADED_ENABLED, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, @@ -134,6 +139,7 @@ enum { NETDEV_A_NAPI_DEFER_HARD_IRQS, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, + NETDEV_A_NAPI_THREADED, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) @@ -219,6 +225,7 @@ enum { NETDEV_CMD_QSTATS_GET, NETDEV_CMD_BIND_RX, NETDEV_CMD_NAPI_SET, + NETDEV_CMD_BIND_TX, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/tools/include/uapi/linux/netfilter.h b/tools/include/uapi/linux/netfilter.h new file mode 100644 index 000000000000..5a79ccb76701 --- /dev/null +++ b/tools/include/uapi/linux/netfilter.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_NETFILTER_H +#define _UAPI__LINUX_NETFILTER_H + +#include <linux/types.h> +#include <linux/compiler.h> +#include <linux/in.h> +#include <linux/in6.h> + +/* Responses from hook functions. */ +#define NF_DROP 0 +#define NF_ACCEPT 1 +#define NF_STOLEN 2 +#define NF_QUEUE 3 +#define NF_REPEAT 4 +#define NF_STOP 5 /* Deprecated, for userspace nf_queue compatibility. */ +#define NF_MAX_VERDICT NF_STOP + +/* we overload the higher bits for encoding auxiliary data such as the queue + * number or errno values. Not nice, but better than additional function + * arguments. */ +#define NF_VERDICT_MASK 0x000000ff + +/* extra verdict flags have mask 0x0000ff00 */ +#define NF_VERDICT_FLAG_QUEUE_BYPASS 0x00008000 + +/* queue number (NF_QUEUE) or errno (NF_DROP) */ +#define NF_VERDICT_QMASK 0xffff0000 +#define NF_VERDICT_QBITS 16 + +#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE) + +#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP) + +/* only for userspace compatibility */ +#ifndef __KERNEL__ + +/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */ +#define NF_VERDICT_BITS 16 +#endif + +enum nf_inet_hooks { + NF_INET_PRE_ROUTING, + NF_INET_LOCAL_IN, + NF_INET_FORWARD, + NF_INET_LOCAL_OUT, + NF_INET_POST_ROUTING, + NF_INET_NUMHOOKS, + NF_INET_INGRESS = NF_INET_NUMHOOKS, +}; + +enum nf_dev_hooks { + NF_NETDEV_INGRESS, + NF_NETDEV_EGRESS, + NF_NETDEV_NUMHOOKS +}; + +enum { + NFPROTO_UNSPEC = 0, + NFPROTO_INET = 1, + NFPROTO_IPV4 = 2, + NFPROTO_ARP = 3, + NFPROTO_NETDEV = 5, + NFPROTO_BRIDGE = 7, + NFPROTO_IPV6 = 10, +#ifndef __KERNEL__ /* no longer supported by kernel */ + NFPROTO_DECNET = 12, +#endif + NFPROTO_NUMPROTO, +}; + +union nf_inet_addr { + __u32 all[4]; + __be32 ip; + __be32 ip6[4]; + struct in_addr in; + struct in6_addr in6; +}; + +#endif /* _UAPI__LINUX_NETFILTER_H */ diff --git a/tools/include/uapi/linux/netfilter_arp.h b/tools/include/uapi/linux/netfilter_arp.h new file mode 100644 index 000000000000..791dfc5ae907 --- /dev/null +++ b/tools/include/uapi/linux/netfilter_arp.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */ +#ifndef __LINUX_ARP_NETFILTER_H +#define __LINUX_ARP_NETFILTER_H + +/* ARP-specific defines for netfilter. + * (C)2002 Rusty Russell IBM -- This code is GPL. + */ + +#include <linux/netfilter.h> + +/* There is no PF_ARP. */ +#define NF_ARP 0 + +/* ARP Hooks */ +#define NF_ARP_IN 0 +#define NF_ARP_OUT 1 +#define NF_ARP_FORWARD 2 + +#ifndef __KERNEL__ +#define NF_ARP_NUMHOOKS 3 +#endif + +#endif /* __LINUX_ARP_NETFILTER_H */ diff --git a/tools/include/uapi/linux/nsfs.h b/tools/include/uapi/linux/nsfs.h new file mode 100644 index 000000000000..33c9b578b3b2 --- /dev/null +++ b/tools/include/uapi/linux/nsfs.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_NSFS_H +#define __LINUX_NSFS_H + +#include <linux/ioctl.h> +#include <linux/types.h> + +#define NSIO 0xb7 + +/* Returns a file descriptor that refers to an owning user namespace */ +#define NS_GET_USERNS _IO(NSIO, 0x1) +/* Returns a file descriptor that refers to a parent namespace */ +#define NS_GET_PARENT _IO(NSIO, 0x2) +/* Returns the type of namespace (CLONE_NEW* value) referred to by + file descriptor */ +#define NS_GET_NSTYPE _IO(NSIO, 0x3) +/* Get owner UID (in the caller's user namespace) for a user namespace */ +#define NS_GET_OWNER_UID _IO(NSIO, 0x4) +/* Translate pid from target pid namespace into the caller's pid namespace. */ +#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int) +/* Return thread-group leader id of pid in the callers pid namespace. */ +#define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int) +/* Translate pid from caller's pid namespace into a target pid namespace. */ +#define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x8, int) +/* Return thread-group leader id of pid in the target pid namespace. */ +#define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int) + +struct mnt_ns_info { + __u32 size; + __u32 nr_mounts; + __u64 mnt_ns_id; +}; + +#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */ + +/* Get information about namespace. */ +#define NS_MNT_GET_INFO _IOR(NSIO, 10, struct mnt_ns_info) +/* Get next namespace. */ +#define NS_MNT_GET_NEXT _IOR(NSIO, 11, struct mnt_ns_info) +/* Get previous namespace. */ +#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info) + +/* Retrieve namespace identifiers. */ +#define NS_GET_MNTNS_ID _IOR(NSIO, 5, __u64) +#define NS_GET_ID _IOR(NSIO, 13, __u64) + +enum init_ns_ino { + IPC_NS_INIT_INO = 0xEFFFFFFFU, + UTS_NS_INIT_INO = 0xEFFFFFFEU, + USER_NS_INIT_INO = 0xEFFFFFFDU, + PID_NS_INIT_INO = 0xEFFFFFFCU, + CGROUP_NS_INIT_INO = 0xEFFFFFFBU, + TIME_NS_INIT_INO = 0xEFFFFFFAU, + NET_NS_INIT_INO = 0xEFFFFFF9U, + MNT_NS_INIT_INO = 0xEFFFFFF8U, +}; + +#endif /* __LINUX_NSFS_H */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 0524d541d4e3..78a362b80027 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -39,18 +39,21 @@ enum perf_type_id { /* * attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE + * * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA * AA: hardware event ID * EEEEEEEE: PMU type ID + * * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB * BB: hardware cache ID * CC: hardware cache op ID * DD: hardware cache op result ID * EEEEEEEE: PMU type ID - * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. + * + * If the PMU type ID is 0, PERF_TYPE_RAW will be applied. */ -#define PERF_PMU_TYPE_SHIFT 32 -#define PERF_HW_EVENT_MASK 0xffffffff +#define PERF_PMU_TYPE_SHIFT 32 +#define PERF_HW_EVENT_MASK 0xffffffff /* * Generalized performance event event_id types, used by the @@ -112,7 +115,7 @@ enum perf_hw_cache_op_result_id { /* * Special "software" events provided by the kernel, even if the hardware * does not support performance events. These events measure various - * physical and sw events of the kernel (and allow the profiling of them as + * physical and SW events of the kernel (and allow the profiling of them as * well): */ enum perf_sw_ids { @@ -167,8 +170,9 @@ enum perf_event_sample_format { }; #define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) + /* - * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set + * Values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set. * * If the user does not pass priv level information via branch_sample_type, * the kernel uses the event's priv level. Branch and event priv levels do @@ -178,20 +182,20 @@ enum perf_event_sample_format { * of branches and therefore it supersedes all the other types. */ enum perf_branch_sample_type_shift { - PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ - PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ - PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ - - PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ - PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ - PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ - PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ - PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ - PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ - PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ + PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ + PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ + PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ + + PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ + PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ + PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ + PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ + PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ + PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ + PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ - PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ + PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* CALL/RET stack */ PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */ PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */ @@ -210,96 +214,95 @@ enum perf_branch_sample_type_shift { }; enum perf_branch_sample_type { - PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, - PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, - PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, + PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, + PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, + PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, - PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, - PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, - PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, - PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, - PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, - PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, + PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, + PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, + PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, + PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, + PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, + PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, - PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, - PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, - PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, + PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, + PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, + PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, - PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, - PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, + PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, + PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, - PERF_SAMPLE_BRANCH_TYPE_SAVE = - 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, - PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, - PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; /* - * Common flow change classification + * Common control flow change classifications: */ enum { - PERF_BR_UNKNOWN = 0, /* unknown */ - PERF_BR_COND = 1, /* conditional */ - PERF_BR_UNCOND = 2, /* unconditional */ - PERF_BR_IND = 3, /* indirect */ - PERF_BR_CALL = 4, /* function call */ - PERF_BR_IND_CALL = 5, /* indirect function call */ - PERF_BR_RET = 6, /* function return */ - PERF_BR_SYSCALL = 7, /* syscall */ - PERF_BR_SYSRET = 8, /* syscall return */ - PERF_BR_COND_CALL = 9, /* conditional function call */ - PERF_BR_COND_RET = 10, /* conditional function return */ - PERF_BR_ERET = 11, /* exception return */ - PERF_BR_IRQ = 12, /* irq */ - PERF_BR_SERROR = 13, /* system error */ - PERF_BR_NO_TX = 14, /* not in transaction */ - PERF_BR_EXTEND_ABI = 15, /* extend ABI */ + PERF_BR_UNKNOWN = 0, /* Unknown */ + PERF_BR_COND = 1, /* Conditional */ + PERF_BR_UNCOND = 2, /* Unconditional */ + PERF_BR_IND = 3, /* Indirect */ + PERF_BR_CALL = 4, /* Function call */ + PERF_BR_IND_CALL = 5, /* Indirect function call */ + PERF_BR_RET = 6, /* Function return */ + PERF_BR_SYSCALL = 7, /* Syscall */ + PERF_BR_SYSRET = 8, /* Syscall return */ + PERF_BR_COND_CALL = 9, /* Conditional function call */ + PERF_BR_COND_RET = 10, /* Conditional function return */ + PERF_BR_ERET = 11, /* Exception return */ + PERF_BR_IRQ = 12, /* IRQ */ + PERF_BR_SERROR = 13, /* System error */ + PERF_BR_NO_TX = 14, /* Not in transaction */ + PERF_BR_EXTEND_ABI = 15, /* Extend ABI */ PERF_BR_MAX, }; /* - * Common branch speculation outcome classification + * Common branch speculation outcome classifications: */ enum { - PERF_BR_SPEC_NA = 0, /* Not available */ - PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ - PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ - PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ + PERF_BR_SPEC_NA = 0, /* Not available */ + PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ + PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ + PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ PERF_BR_SPEC_MAX, }; enum { - PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ - PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ - PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ - PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ - PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ - PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ - PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ - PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ + PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ + PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ + PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ + PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ + PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ + PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ + PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ + PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ PERF_BR_NEW_MAX, }; enum { - PERF_BR_PRIV_UNKNOWN = 0, - PERF_BR_PRIV_USER = 1, - PERF_BR_PRIV_KERNEL = 2, - PERF_BR_PRIV_HV = 3, + PERF_BR_PRIV_UNKNOWN = 0, + PERF_BR_PRIV_USER = 1, + PERF_BR_PRIV_KERNEL = 2, + PERF_BR_PRIV_HV = 3, }; -#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 -#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 -#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 -#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 -#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 +#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 +#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 +#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 +#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 +#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ @@ -310,9 +313,9 @@ enum { * Values to determine ABI of the registers dump. */ enum perf_sample_regs_abi { - PERF_SAMPLE_REGS_ABI_NONE = 0, - PERF_SAMPLE_REGS_ABI_32 = 1, - PERF_SAMPLE_REGS_ABI_64 = 2, + PERF_SAMPLE_REGS_ABI_NONE = 0, + PERF_SAMPLE_REGS_ABI_32 = 1, + PERF_SAMPLE_REGS_ABI_64 = 2, }; /* @@ -320,21 +323,21 @@ enum perf_sample_regs_abi { * abort events. Multiple bits can be set. */ enum { - PERF_TXN_ELISION = (1 << 0), /* From elision */ - PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ - PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ - PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */ - PERF_TXN_RETRY = (1 << 4), /* Retry possible */ - PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ - PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ - PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ + PERF_TXN_ELISION = (1 << 0), /* From elision */ + PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ + PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ + PERF_TXN_ASYNC = (1 << 3), /* Instruction is not related */ + PERF_TXN_RETRY = (1 << 4), /* Retry possible */ + PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ + PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ + PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ - PERF_TXN_MAX = (1 << 8), /* non-ABI */ + PERF_TXN_MAX = (1 << 8), /* non-ABI */ - /* bits 32..63 are reserved for the abort code */ + /* Bits 32..63 are reserved for the abort code */ - PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), - PERF_TXN_ABORT_SHIFT = 32, + PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), + PERF_TXN_ABORT_SHIFT = 32, }; /* @@ -369,22 +372,22 @@ enum perf_event_read_format { PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; -#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ -#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ -#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ -#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ - /* add: sample_stack_user */ -#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ -#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ -#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ -#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ -#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */ +#define PERF_ATTR_SIZE_VER0 64 /* Size of first published 'struct perf_event_attr' */ +#define PERF_ATTR_SIZE_VER1 72 /* Add: config2 */ +#define PERF_ATTR_SIZE_VER2 80 /* Add: branch_sample_type */ +#define PERF_ATTR_SIZE_VER3 96 /* Add: sample_regs_user */ + /* Add: sample_stack_user */ +#define PERF_ATTR_SIZE_VER4 104 /* Add: sample_regs_intr */ +#define PERF_ATTR_SIZE_VER5 112 /* Add: aux_watermark */ +#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */ +#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */ +#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */ /* - * Hardware event_id to monitor via a performance monitoring event: - * - * @sample_max_stack: Max number of frame pointers in a callchain, - * should be < /proc/sys/kernel/perf_event_max_stack + * 'struct perf_event_attr' contains various attributes that define + * a performance event - most of them hardware related configuration + * details, but also a lot of behavioral switches and values implemented + * by the kernel. */ struct perf_event_attr { @@ -394,7 +397,7 @@ struct perf_event_attr { __u32 type; /* - * Size of the attr structure, for fwd/bwd compat. + * Size of the attr structure, for forward/backwards compatibility. */ __u32 size; @@ -449,21 +452,21 @@ struct perf_event_attr { comm_exec : 1, /* flag comm events that are due to an exec */ use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ - write_backward : 1, /* Write ring buffer from end to beginning */ + write_backward : 1, /* write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ ksymbol : 1, /* include ksymbol events */ - bpf_event : 1, /* include bpf events */ + bpf_event : 1, /* include BPF events */ aux_output : 1, /* generate AUX records instead of events */ cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ - build_id : 1, /* use build id in mmap2 events */ + build_id : 1, /* use build ID in mmap2 events */ inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ remove_on_exec : 1, /* event is removed from task on exec */ sigtrap : 1, /* send synchronous SIGTRAP on event */ __reserved_1 : 26; union { - __u32 wakeup_events; /* wakeup every n events */ + __u32 wakeup_events; /* wake up every n events */ __u32 wakeup_watermark; /* bytes before wakeup */ }; @@ -472,13 +475,13 @@ struct perf_event_attr { __u64 bp_addr; __u64 kprobe_func; /* for perf_kprobe */ __u64 uprobe_path; /* for perf_uprobe */ - __u64 config1; /* extension of config */ + __u64 config1; /* extension of config */ }; union { __u64 bp_len; - __u64 kprobe_addr; /* when kprobe_func == NULL */ + __u64 kprobe_addr; /* when kprobe_func == NULL */ __u64 probe_offset; /* for perf_[k,u]probe */ - __u64 config2; /* extension of config1 */ + __u64 config2; /* extension of config1 */ }; __u64 branch_sample_type; /* enum perf_branch_sample_type */ @@ -508,7 +511,16 @@ struct perf_event_attr { * Wakeup watermark for AUX area */ __u32 aux_watermark; + + /* + * Max number of frame pointers in a callchain, should be + * lower than /proc/sys/kernel/perf_event_max_stack. + * + * Max number of entries of branch stack should be lower + * than the hardware limit. + */ __u16 sample_max_stack; + __u16 __reserved_2; __u32 aux_sample_size; @@ -535,7 +547,7 @@ struct perf_event_attr { /* * Structure used by below PERF_EVENT_IOC_QUERY_BPF command - * to query bpf programs attached to the same perf tracepoint + * to query BPF programs attached to the same perf tracepoint * as the given perf event. */ struct perf_event_query_bpf { @@ -557,21 +569,21 @@ struct perf_event_query_bpf { /* * Ioctls that can be done on a perf event fd: */ -#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) -#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) -#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) -#define PERF_EVENT_IOC_RESET _IO ('$', 3) -#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) -#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) -#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) -#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) -#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) -#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW ('$', 4, __u64) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW ('$', 6, char *) +#define PERF_EVENT_IOC_ID _IOR ('$', 7, __u64 *) +#define PERF_EVENT_IOC_SET_BPF _IOW ('$', 8, __u32) +#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW ('$', 9, __u32) #define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) -#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *) +#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW ('$', 11, struct perf_event_attr *) enum perf_event_ioc_flags { - PERF_IOC_FLAG_GROUP = 1U << 0, + PERF_IOC_FLAG_GROUP = 1U << 0, }; /* @@ -582,7 +594,7 @@ struct perf_event_mmap_page { __u32 compat_version; /* lowest version this is compat with */ /* - * Bits needed to read the hw events in user-space. + * Bits needed to read the HW events in user-space. * * u32 seq, time_mult, time_shift, index, width; * u64 count, enabled, running; @@ -620,7 +632,7 @@ struct perf_event_mmap_page { __u32 index; /* hardware event identifier */ __s64 offset; /* add to hardware event value */ __u64 time_enabled; /* time event active */ - __u64 time_running; /* time event on cpu */ + __u64 time_running; /* time event on CPU */ union { __u64 capabilities; struct { @@ -648,7 +660,7 @@ struct perf_event_mmap_page { /* * If cap_usr_time the below fields can be used to compute the time - * delta since time_enabled (in ns) using rdtsc or similar. + * delta since time_enabled (in ns) using RDTSC or similar. * * u64 quot, rem; * u64 delta; @@ -721,7 +733,7 @@ struct perf_event_mmap_page { * after reading this value. * * When the mapping is PROT_WRITE the @data_tail value should be - * written by userspace to reflect the last read data, after issueing + * written by user-space to reflect the last read data, after issuing * an smp_mb() to separate the data read from the ->data_tail store. * In this case the kernel will not over-write unread data. * @@ -737,7 +749,7 @@ struct perf_event_mmap_page { /* * AUX area is defined by aux_{offset,size} fields that should be set - * by the userspace, so that + * by the user-space, so that * * aux_offset >= data_offset + data_size * @@ -811,7 +823,7 @@ struct perf_event_mmap_page { * Indicates that thread was preempted in TASK_RUNNING state. * * PERF_RECORD_MISC_MMAP_BUILD_ID: - * Indicates that mmap2 event carries build id data. + * Indicates that mmap2 event carries build ID data. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) #define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) @@ -822,26 +834,26 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) struct perf_event_header { - __u32 type; - __u16 misc; - __u16 size; + __u32 type; + __u16 misc; + __u16 size; }; struct perf_ns_link_info { - __u64 dev; - __u64 ino; + __u64 dev; + __u64 ino; }; enum { - NET_NS_INDEX = 0, - UTS_NS_INDEX = 1, - IPC_NS_INDEX = 2, - PID_NS_INDEX = 3, - USER_NS_INDEX = 4, - MNT_NS_INDEX = 5, - CGROUP_NS_INDEX = 6, - - NR_NAMESPACES, /* number of available namespaces */ + NET_NS_INDEX = 0, + UTS_NS_INDEX = 1, + IPC_NS_INDEX = 2, + PID_NS_INDEX = 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX = 5, + CGROUP_NS_INDEX = 6, + + NR_NAMESPACES, /* number of available namespaces */ }; enum perf_event_type { @@ -857,11 +869,11 @@ enum perf_event_type { * optional fields being ignored. * * struct sample_id { - * { u32 pid, tid; } && PERF_SAMPLE_TID - * { u64 time; } && PERF_SAMPLE_TIME - * { u64 id; } && PERF_SAMPLE_ID - * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID - * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU * { u64 id; } && PERF_SAMPLE_IDENTIFIER * } && perf_event_attr::sample_id_all * @@ -872,7 +884,7 @@ enum perf_event_type { /* * The MMAP events record the PROT_EXEC mappings so that we can - * correlate userspace IPs to code. They have the following structure: + * correlate user-space IPs to code. They have the following structure: * * struct { * struct perf_event_header header; @@ -882,7 +894,7 @@ enum perf_event_type { * u64 len; * u64 pgoff; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP = 1, @@ -892,7 +904,7 @@ enum perf_event_type { * struct perf_event_header header; * u64 id; * u64 lost; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_LOST = 2, @@ -903,7 +915,7 @@ enum perf_event_type { * * u32 pid, tid; * char comm[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_COMM = 3, @@ -914,7 +926,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_EXIT = 4, @@ -925,7 +937,7 @@ enum perf_event_type { * u64 time; * u64 id; * u64 stream_id; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_THROTTLE = 5, @@ -937,7 +949,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_FORK = 7, @@ -948,7 +960,7 @@ enum perf_event_type { * u32 pid, tid; * * struct read_format values; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_READ = 8, @@ -1003,12 +1015,12 @@ enum perf_event_type { * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS * } && PERF_SAMPLE_BRANCH_STACK * - * { u64 abi; # enum perf_sample_regs_abi - * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER * - * { u64 size; - * char data[size]; - * u64 dyn_size; } && PERF_SAMPLE_STACK_USER + * { u64 size; + * char data[size]; + * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * * { union perf_sample_weight * { @@ -1033,10 +1045,11 @@ enum perf_event_type { * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR - * { u64 size; - * char data[size]; } && PERF_SAMPLE_AUX + * { u64 cgroup;} && PERF_SAMPLE_CGROUP * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE + * { u64 size; + * char data[size]; } && PERF_SAMPLE_AUX * }; */ PERF_RECORD_SAMPLE = 9, @@ -1068,7 +1081,7 @@ enum perf_event_type { * }; * u32 prot, flags; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP2 = 10, @@ -1077,12 +1090,12 @@ enum perf_event_type { * Records that new data landed in the AUX buffer part. * * struct { - * struct perf_event_header header; + * struct perf_event_header header; * - * u64 aux_offset; - * u64 aux_size; + * u64 aux_offset; + * u64 aux_size; * u64 flags; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_AUX = 11, @@ -1165,7 +1178,7 @@ enum perf_event_type { PERF_RECORD_KSYMBOL = 17, /* - * Record bpf events: + * Record BPF events: * enum perf_bpf_event_type { * PERF_BPF_EVENT_UNKNOWN = 0, * PERF_BPF_EVENT_PROG_LOAD = 1, @@ -1243,181 +1256,181 @@ enum perf_record_ksymbol_type { #define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) enum perf_bpf_event_type { - PERF_BPF_EVENT_UNKNOWN = 0, - PERF_BPF_EVENT_PROG_LOAD = 1, - PERF_BPF_EVENT_PROG_UNLOAD = 2, - PERF_BPF_EVENT_MAX, /* non-ABI */ + PERF_BPF_EVENT_UNKNOWN = 0, + PERF_BPF_EVENT_PROG_LOAD = 1, + PERF_BPF_EVENT_PROG_UNLOAD = 2, + PERF_BPF_EVENT_MAX, /* non-ABI */ }; -#define PERF_MAX_STACK_DEPTH 127 -#define PERF_MAX_CONTEXTS_PER_STACK 8 +#define PERF_MAX_STACK_DEPTH 127 +#define PERF_MAX_CONTEXTS_PER_STACK 8 enum perf_callchain_context { - PERF_CONTEXT_HV = (__u64)-32, - PERF_CONTEXT_KERNEL = (__u64)-128, - PERF_CONTEXT_USER = (__u64)-512, + PERF_CONTEXT_HV = (__u64)-32, + PERF_CONTEXT_KERNEL = (__u64)-128, + PERF_CONTEXT_USER = (__u64)-512, - PERF_CONTEXT_GUEST = (__u64)-2048, - PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, - PERF_CONTEXT_GUEST_USER = (__u64)-2560, + PERF_CONTEXT_GUEST = (__u64)-2048, + PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, + PERF_CONTEXT_GUEST_USER = (__u64)-2560, - PERF_CONTEXT_MAX = (__u64)-4095, + PERF_CONTEXT_MAX = (__u64)-4095, }; /** * PERF_RECORD_AUX::flags bits */ -#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ -#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ -#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ -#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_TRUNCATED 0x0001 /* Record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x0002 /* Snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x0004 /* Record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x0008 /* Sample collided with another */ #define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ /* CoreSight PMU AUX buffer formats */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ -#define PERF_FLAG_FD_NO_GROUP (1UL << 0) -#define PERF_FLAG_FD_OUTPUT (1UL << 1) -#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ -#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ +#define PERF_FLAG_FD_NO_GROUP (1UL << 0) +#define PERF_FLAG_FD_OUTPUT (1UL << 1) +#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup ID, per-CPU mode only */ +#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ #if defined(__LITTLE_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_op:5, /* type of opcode */ - mem_lvl:14, /* memory hierarchy level */ - mem_snoop:5, /* snoop mode */ - mem_lock:2, /* lock instr */ - mem_dtlb:7, /* tlb access */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_remote:1, /* remote */ - mem_snoopx:2, /* snoop mode, ext */ - mem_blk:3, /* access blocked */ - mem_hops:3, /* hop level */ - mem_rsvd:18; + __u64 mem_op : 5, /* Type of opcode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_snoop : 5, /* Snoop mode */ + mem_lock : 2, /* Lock instr */ + mem_dtlb : 7, /* TLB access */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_remote : 1, /* Remote */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_blk : 3, /* Access blocked */ + mem_hops : 3, /* Hop level */ + mem_rsvd : 18; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:18, - mem_hops:3, /* hop level */ - mem_blk:3, /* access blocked */ - mem_snoopx:2, /* snoop mode, ext */ - mem_remote:1, /* remote */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_dtlb:7, /* tlb access */ - mem_lock:2, /* lock instr */ - mem_snoop:5, /* snoop mode */ - mem_lvl:14, /* memory hierarchy level */ - mem_op:5; /* type of opcode */ + __u64 mem_rsvd : 18, + mem_hops : 3, /* Hop level */ + mem_blk : 3, /* Access blocked */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_remote : 1, /* Remote */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_dtlb : 7, /* TLB access */ + mem_lock : 2, /* Lock instr */ + mem_snoop : 5, /* Snoop mode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_op : 5; /* Type of opcode */ }; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif -/* type of opcode (load/store/prefetch,code) */ -#define PERF_MEM_OP_NA 0x01 /* not available */ -#define PERF_MEM_OP_LOAD 0x02 /* load instruction */ -#define PERF_MEM_OP_STORE 0x04 /* store instruction */ -#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */ -#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ -#define PERF_MEM_OP_SHIFT 0 +/* Type of memory opcode: */ +#define PERF_MEM_OP_NA 0x0001 /* Not available */ +#define PERF_MEM_OP_LOAD 0x0002 /* Load instruction */ +#define PERF_MEM_OP_STORE 0x0004 /* Store instruction */ +#define PERF_MEM_OP_PFETCH 0x0008 /* Prefetch */ +#define PERF_MEM_OP_EXEC 0x0010 /* Code (execution) */ +#define PERF_MEM_OP_SHIFT 0 /* - * PERF_MEM_LVL_* namespace being depricated to some extent in the + * The PERF_MEM_LVL_* namespace is being deprecated to some extent in * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. - * Supporting this namespace inorder to not break defined ABIs. + * We support this namespace in order to not break defined ABIs. * - * memory hierarchy (memory level, hit or miss) + * Memory hierarchy (memory level, hit or miss) */ -#define PERF_MEM_LVL_NA 0x01 /* not available */ -#define PERF_MEM_LVL_HIT 0x02 /* hit level */ -#define PERF_MEM_LVL_MISS 0x04 /* miss level */ -#define PERF_MEM_LVL_L1 0x08 /* L1 */ -#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */ -#define PERF_MEM_LVL_L2 0x20 /* L2 */ -#define PERF_MEM_LVL_L3 0x40 /* L3 */ -#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */ -#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */ -#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */ -#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */ -#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */ -#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ -#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ -#define PERF_MEM_LVL_SHIFT 5 - -#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */ -#define PERF_MEM_REMOTE_SHIFT 37 - -#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */ -#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ -#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ -#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ -/* 0x7 available */ -#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ -#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ -#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ -#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ -#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ -#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ - -#define PERF_MEM_LVLNUM_SHIFT 33 - -/* snoop mode */ -#define PERF_MEM_SNOOP_NA 0x01 /* not available */ -#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ -#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */ -#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */ -#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ -#define PERF_MEM_SNOOP_SHIFT 19 - -#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ -#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */ -#define PERF_MEM_SNOOPX_SHIFT 38 - -/* locked instruction */ -#define PERF_MEM_LOCK_NA 0x01 /* not available */ -#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ -#define PERF_MEM_LOCK_SHIFT 24 +#define PERF_MEM_LVL_NA 0x0001 /* Not available */ +#define PERF_MEM_LVL_HIT 0x0002 /* Hit level */ +#define PERF_MEM_LVL_MISS 0x0004 /* Miss level */ +#define PERF_MEM_LVL_L1 0x0008 /* L1 */ +#define PERF_MEM_LVL_LFB 0x0010 /* Line Fill Buffer */ +#define PERF_MEM_LVL_L2 0x0020 /* L2 */ +#define PERF_MEM_LVL_L3 0x0040 /* L3 */ +#define PERF_MEM_LVL_LOC_RAM 0x0080 /* Local DRAM */ +#define PERF_MEM_LVL_REM_RAM1 0x0100 /* Remote DRAM (1 hop) */ +#define PERF_MEM_LVL_REM_RAM2 0x0200 /* Remote DRAM (2 hops) */ +#define PERF_MEM_LVL_REM_CCE1 0x0400 /* Remote Cache (1 hop) */ +#define PERF_MEM_LVL_REM_CCE2 0x0800 /* Remote Cache (2 hops) */ +#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ +#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ +#define PERF_MEM_LVL_SHIFT 5 + +#define PERF_MEM_REMOTE_REMOTE 0x0001 /* Remote */ +#define PERF_MEM_REMOTE_SHIFT 37 + +#define PERF_MEM_LVLNUM_L1 0x0001 /* L1 */ +#define PERF_MEM_LVLNUM_L2 0x0002 /* L2 */ +#define PERF_MEM_LVLNUM_L3 0x0003 /* L3 */ +#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */ +#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */ +/* 0x007 available */ +#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */ +#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */ +#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */ +#define PERF_MEM_LVLNUM_ANY_CACHE 0x000b /* Any cache */ +#define PERF_MEM_LVLNUM_LFB 0x000c /* LFB / L1 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_RAM 0x000d /* RAM */ +#define PERF_MEM_LVLNUM_PMEM 0x000e /* PMEM */ +#define PERF_MEM_LVLNUM_NA 0x000f /* N/A */ + +#define PERF_MEM_LVLNUM_SHIFT 33 + +/* Snoop mode */ +#define PERF_MEM_SNOOP_NA 0x0001 /* Not available */ +#define PERF_MEM_SNOOP_NONE 0x0002 /* No snoop */ +#define PERF_MEM_SNOOP_HIT 0x0004 /* Snoop hit */ +#define PERF_MEM_SNOOP_MISS 0x0008 /* Snoop miss */ +#define PERF_MEM_SNOOP_HITM 0x0010 /* Snoop hit modified */ +#define PERF_MEM_SNOOP_SHIFT 19 + +#define PERF_MEM_SNOOPX_FWD 0x0001 /* Forward */ +#define PERF_MEM_SNOOPX_PEER 0x0002 /* Transfer from peer */ +#define PERF_MEM_SNOOPX_SHIFT 38 + +/* Locked instruction */ +#define PERF_MEM_LOCK_NA 0x0001 /* Not available */ +#define PERF_MEM_LOCK_LOCKED 0x0002 /* Locked transaction */ +#define PERF_MEM_LOCK_SHIFT 24 /* TLB access */ -#define PERF_MEM_TLB_NA 0x01 /* not available */ -#define PERF_MEM_TLB_HIT 0x02 /* hit level */ -#define PERF_MEM_TLB_MISS 0x04 /* miss level */ -#define PERF_MEM_TLB_L1 0x08 /* L1 */ -#define PERF_MEM_TLB_L2 0x10 /* L2 */ -#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/ -#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ -#define PERF_MEM_TLB_SHIFT 26 +#define PERF_MEM_TLB_NA 0x0001 /* Not available */ +#define PERF_MEM_TLB_HIT 0x0002 /* Hit level */ +#define PERF_MEM_TLB_MISS 0x0004 /* Miss level */ +#define PERF_MEM_TLB_L1 0x0008 /* L1 */ +#define PERF_MEM_TLB_L2 0x0010 /* L2 */ +#define PERF_MEM_TLB_WK 0x0020 /* Hardware Walker*/ +#define PERF_MEM_TLB_OS 0x0040 /* OS fault handler */ +#define PERF_MEM_TLB_SHIFT 26 /* Access blocked */ -#define PERF_MEM_BLK_NA 0x01 /* not available */ -#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */ -#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ -#define PERF_MEM_BLK_SHIFT 40 - -/* hop level */ -#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ -#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ -#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ -#define PERF_MEM_HOPS_3 0x04 /* remote board */ +#define PERF_MEM_BLK_NA 0x0001 /* Not available */ +#define PERF_MEM_BLK_DATA 0x0002 /* Data could not be forwarded */ +#define PERF_MEM_BLK_ADDR 0x0004 /* Address conflict */ +#define PERF_MEM_BLK_SHIFT 40 + +/* Hop level */ +#define PERF_MEM_HOPS_0 0x0001 /* Remote core, same node */ +#define PERF_MEM_HOPS_1 0x0002 /* Remote node, same socket */ +#define PERF_MEM_HOPS_2 0x0003 /* Remote socket, same board */ +#define PERF_MEM_HOPS_3 0x0004 /* Remote board */ /* 5-7 available */ -#define PERF_MEM_HOPS_SHIFT 43 +#define PERF_MEM_HOPS_SHIFT 43 #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) /* - * single taken branch record layout: + * Layout of single taken branch records: * * from: source instruction (may not always be a branch insn) * to: branch target @@ -1436,37 +1449,37 @@ union perf_mem_data_src { struct perf_branch_entry { __u64 from; __u64 to; - __u64 mispred:1, /* target mispredicted */ - predicted:1,/* target predicted */ - in_tx:1, /* in transaction */ - abort:1, /* transaction abort */ - cycles:16, /* cycle count to last branch */ - type:4, /* branch type */ - spec:2, /* branch speculation info */ - new_type:4, /* additional branch type */ - priv:3, /* privilege level */ - reserved:31; + __u64 mispred : 1, /* target mispredicted */ + predicted : 1, /* target predicted */ + in_tx : 1, /* in transaction */ + abort : 1, /* transaction abort */ + cycles : 16, /* cycle count to last branch */ + type : 4, /* branch type */ + spec : 2, /* branch speculation info */ + new_type : 4, /* additional branch type */ + priv : 3, /* privilege level */ + reserved : 31; }; /* Size of used info bits in struct perf_branch_entry */ #define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 union perf_sample_weight { - __u64 full; + __u64 full; #if defined(__LITTLE_ENDIAN_BITFIELD) struct { - __u32 var1_dw; - __u16 var2_w; - __u16 var3_w; + __u32 var1_dw; + __u16 var2_w; + __u16 var3_w; }; #elif defined(__BIG_ENDIAN_BITFIELD) struct { - __u16 var3_w; - __u16 var2_w; - __u32 var1_dw; + __u16 var3_w; + __u16 var2_w; + __u32 var1_dw; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif }; diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 35791791a879..475fc8ca4403 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -230,7 +230,7 @@ struct prctl_mm_map { # define PR_PAC_APDBKEY (1UL << 3) # define PR_PAC_APGAKEY (1UL << 4) -/* Tagged user address controls for arm64 */ +/* Tagged user address controls for arm64 and RISC-V */ #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) @@ -244,6 +244,9 @@ struct prctl_mm_map { # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Unused; kept only for source compatibility */ # define PR_MTE_TCF_SHIFT 1 +/* RISC-V pointer masking tag length */ +# define PR_PMLEN_SHIFT 24 +# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 @@ -252,7 +255,12 @@ struct prctl_mm_map { /* Dispatch syscalls to a userspace handler */ #define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 -# define PR_SYS_DISPATCH_ON 1 +/* Enable dispatch except for the specified range */ +# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1 +/* Enable dispatch for the specified range */ +# define PR_SYS_DISPATCH_INCLUSIVE_ON 2 +/* Legacy name for backwards compatibility */ +# define PR_SYS_DISPATCH_ON PR_SYS_DISPATCH_EXCLUSIVE_ON /* The control values for the user space selector when dispatch is enabled */ # define SYSCALL_DISPATCH_FILTER_ALLOW 0 # define SYSCALL_DISPATCH_FILTER_BLOCK 1 @@ -328,4 +336,42 @@ struct prctl_mm_map { # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */ # define PR_PPC_DEXCR_CTRL_MASK 0x1f +/* + * Get the current shadow stack configuration for the current thread, + * this will be the value configured via PR_SET_SHADOW_STACK_STATUS. + */ +#define PR_GET_SHADOW_STACK_STATUS 74 + +/* + * Set the current shadow stack configuration. Enabling the shadow + * stack will cause a shadow stack to be allocated for the thread. + */ +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +/* + * Prevent further changes to the specified shadow stack + * configuration. All bits may be locked via this call, including + * undefined bits. + */ +#define PR_LOCK_SHADOW_STACK_STATUS 76 + +/* + * Controls the mode of timer_create() for CRIU restore operations. + * Enabling this allows CRIU to restore timers with explicit IDs. + * + * Don't use for normal operations as the result might be undefined. + */ +#define PR_TIMER_CREATE_RESTORE_IDS 77 +# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0 +# define PR_TIMER_CREATE_RESTORE_IDS_ON 1 +# define PR_TIMER_CREATE_RESTORE_IDS_GET 2 + +/* FUTEX hash management */ +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define PR_FUTEX_HASH_GET_SLOTS 2 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/include/uapi/linux/rtnetlink.h b/tools/include/uapi/linux/rtnetlink.h new file mode 100644 index 000000000000..dab9493c791b --- /dev/null +++ b/tools/include/uapi/linux/rtnetlink.h @@ -0,0 +1,848 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__LINUX_RTNETLINK_H +#define _UAPI__LINUX_RTNETLINK_H + +#include <linux/types.h> +#include <linux/netlink.h> +#include <linux/if_link.h> +#include <linux/if_addr.h> +#include <linux/neighbour.h> + +/* rtnetlink families. Values up to 127 are reserved for real address + * families, values above 128 may be used arbitrarily. + */ +#define RTNL_FAMILY_IPMR 128 +#define RTNL_FAMILY_IP6MR 129 +#define RTNL_FAMILY_MAX 129 + +/**** + * Routing/neighbour discovery messages. + ****/ + +/* Types of messages */ + +enum { + RTM_BASE = 16, +#define RTM_BASE RTM_BASE + + RTM_NEWLINK = 16, +#define RTM_NEWLINK RTM_NEWLINK + RTM_DELLINK, +#define RTM_DELLINK RTM_DELLINK + RTM_GETLINK, +#define RTM_GETLINK RTM_GETLINK + RTM_SETLINK, +#define RTM_SETLINK RTM_SETLINK + + RTM_NEWADDR = 20, +#define RTM_NEWADDR RTM_NEWADDR + RTM_DELADDR, +#define RTM_DELADDR RTM_DELADDR + RTM_GETADDR, +#define RTM_GETADDR RTM_GETADDR + + RTM_NEWROUTE = 24, +#define RTM_NEWROUTE RTM_NEWROUTE + RTM_DELROUTE, +#define RTM_DELROUTE RTM_DELROUTE + RTM_GETROUTE, +#define RTM_GETROUTE RTM_GETROUTE + + RTM_NEWNEIGH = 28, +#define RTM_NEWNEIGH RTM_NEWNEIGH + RTM_DELNEIGH, +#define RTM_DELNEIGH RTM_DELNEIGH + RTM_GETNEIGH, +#define RTM_GETNEIGH RTM_GETNEIGH + + RTM_NEWRULE = 32, +#define RTM_NEWRULE RTM_NEWRULE + RTM_DELRULE, +#define RTM_DELRULE RTM_DELRULE + RTM_GETRULE, +#define RTM_GETRULE RTM_GETRULE + + RTM_NEWQDISC = 36, +#define RTM_NEWQDISC RTM_NEWQDISC + RTM_DELQDISC, +#define RTM_DELQDISC RTM_DELQDISC + RTM_GETQDISC, +#define RTM_GETQDISC RTM_GETQDISC + + RTM_NEWTCLASS = 40, +#define RTM_NEWTCLASS RTM_NEWTCLASS + RTM_DELTCLASS, +#define RTM_DELTCLASS RTM_DELTCLASS + RTM_GETTCLASS, +#define RTM_GETTCLASS RTM_GETTCLASS + + RTM_NEWTFILTER = 44, +#define RTM_NEWTFILTER RTM_NEWTFILTER + RTM_DELTFILTER, +#define RTM_DELTFILTER RTM_DELTFILTER + RTM_GETTFILTER, +#define RTM_GETTFILTER RTM_GETTFILTER + + RTM_NEWACTION = 48, +#define RTM_NEWACTION RTM_NEWACTION + RTM_DELACTION, +#define RTM_DELACTION RTM_DELACTION + RTM_GETACTION, +#define RTM_GETACTION RTM_GETACTION + + RTM_NEWPREFIX = 52, +#define RTM_NEWPREFIX RTM_NEWPREFIX + + RTM_NEWMULTICAST = 56, +#define RTM_NEWMULTICAST RTM_NEWMULTICAST + RTM_DELMULTICAST, +#define RTM_DELMULTICAST RTM_DELMULTICAST + RTM_GETMULTICAST, +#define RTM_GETMULTICAST RTM_GETMULTICAST + + RTM_NEWANYCAST = 60, +#define RTM_NEWANYCAST RTM_NEWANYCAST + RTM_DELANYCAST, +#define RTM_DELANYCAST RTM_DELANYCAST + RTM_GETANYCAST, +#define RTM_GETANYCAST RTM_GETANYCAST + + RTM_NEWNEIGHTBL = 64, +#define RTM_NEWNEIGHTBL RTM_NEWNEIGHTBL + RTM_GETNEIGHTBL = 66, +#define RTM_GETNEIGHTBL RTM_GETNEIGHTBL + RTM_SETNEIGHTBL, +#define RTM_SETNEIGHTBL RTM_SETNEIGHTBL + + RTM_NEWNDUSEROPT = 68, +#define RTM_NEWNDUSEROPT RTM_NEWNDUSEROPT + + RTM_NEWADDRLABEL = 72, +#define RTM_NEWADDRLABEL RTM_NEWADDRLABEL + RTM_DELADDRLABEL, +#define RTM_DELADDRLABEL RTM_DELADDRLABEL + RTM_GETADDRLABEL, +#define RTM_GETADDRLABEL RTM_GETADDRLABEL + + RTM_GETDCB = 78, +#define RTM_GETDCB RTM_GETDCB + RTM_SETDCB, +#define RTM_SETDCB RTM_SETDCB + + RTM_NEWNETCONF = 80, +#define RTM_NEWNETCONF RTM_NEWNETCONF + RTM_DELNETCONF, +#define RTM_DELNETCONF RTM_DELNETCONF + RTM_GETNETCONF = 82, +#define RTM_GETNETCONF RTM_GETNETCONF + + RTM_NEWMDB = 84, +#define RTM_NEWMDB RTM_NEWMDB + RTM_DELMDB = 85, +#define RTM_DELMDB RTM_DELMDB + RTM_GETMDB = 86, +#define RTM_GETMDB RTM_GETMDB + + RTM_NEWNSID = 88, +#define RTM_NEWNSID RTM_NEWNSID + RTM_DELNSID = 89, +#define RTM_DELNSID RTM_DELNSID + RTM_GETNSID = 90, +#define RTM_GETNSID RTM_GETNSID + + RTM_NEWSTATS = 92, +#define RTM_NEWSTATS RTM_NEWSTATS + RTM_GETSTATS = 94, +#define RTM_GETSTATS RTM_GETSTATS + RTM_SETSTATS, +#define RTM_SETSTATS RTM_SETSTATS + + RTM_NEWCACHEREPORT = 96, +#define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT + + RTM_NEWCHAIN = 100, +#define RTM_NEWCHAIN RTM_NEWCHAIN + RTM_DELCHAIN, +#define RTM_DELCHAIN RTM_DELCHAIN + RTM_GETCHAIN, +#define RTM_GETCHAIN RTM_GETCHAIN + + RTM_NEWNEXTHOP = 104, +#define RTM_NEWNEXTHOP RTM_NEWNEXTHOP + RTM_DELNEXTHOP, +#define RTM_DELNEXTHOP RTM_DELNEXTHOP + RTM_GETNEXTHOP, +#define RTM_GETNEXTHOP RTM_GETNEXTHOP + + RTM_NEWLINKPROP = 108, +#define RTM_NEWLINKPROP RTM_NEWLINKPROP + RTM_DELLINKPROP, +#define RTM_DELLINKPROP RTM_DELLINKPROP + RTM_GETLINKPROP, +#define RTM_GETLINKPROP RTM_GETLINKPROP + + RTM_NEWVLAN = 112, +#define RTM_NEWVLAN RTM_NEWVLAN + RTM_DELVLAN, +#define RTM_DELVLAN RTM_DELVLAN + RTM_GETVLAN, +#define RTM_GETVLAN RTM_GETVLAN + + RTM_NEWNEXTHOPBUCKET = 116, +#define RTM_NEWNEXTHOPBUCKET RTM_NEWNEXTHOPBUCKET + RTM_DELNEXTHOPBUCKET, +#define RTM_DELNEXTHOPBUCKET RTM_DELNEXTHOPBUCKET + RTM_GETNEXTHOPBUCKET, +#define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET + + RTM_NEWTUNNEL = 120, +#define RTM_NEWTUNNEL RTM_NEWTUNNEL + RTM_DELTUNNEL, +#define RTM_DELTUNNEL RTM_DELTUNNEL + RTM_GETTUNNEL, +#define RTM_GETTUNNEL RTM_GETTUNNEL + + __RTM_MAX, +#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) +}; + +#define RTM_NR_MSGTYPES (RTM_MAX + 1 - RTM_BASE) +#define RTM_NR_FAMILIES (RTM_NR_MSGTYPES >> 2) +#define RTM_FAM(cmd) (((cmd) - RTM_BASE) >> 2) + +/* + Generic structure for encapsulation of optional route information. + It is reminiscent of sockaddr, but with sa_family replaced + with attribute type. + */ + +struct rtattr { + unsigned short rta_len; + unsigned short rta_type; +}; + +/* Macros to handle rtattributes */ + +#define RTA_ALIGNTO 4U +#define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) ) +#define RTA_OK(rta,len) ((len) >= (int)sizeof(struct rtattr) && \ + (rta)->rta_len >= sizeof(struct rtattr) && \ + (rta)->rta_len <= (len)) +#define RTA_NEXT(rta,attrlen) ((attrlen) -= RTA_ALIGN((rta)->rta_len), \ + (struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len))) +#define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len)) +#define RTA_SPACE(len) RTA_ALIGN(RTA_LENGTH(len)) +#define RTA_DATA(rta) ((void*)(((char*)(rta)) + RTA_LENGTH(0))) +#define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0)) + + + + +/****************************************************************************** + * Definitions used in routing table administration. + ****/ + +struct rtmsg { + unsigned char rtm_family; + unsigned char rtm_dst_len; + unsigned char rtm_src_len; + unsigned char rtm_tos; + + unsigned char rtm_table; /* Routing table id */ + unsigned char rtm_protocol; /* Routing protocol; see below */ + unsigned char rtm_scope; /* See below */ + unsigned char rtm_type; /* See below */ + + unsigned rtm_flags; +}; + +/* rtm_type */ + +enum { + RTN_UNSPEC, + RTN_UNICAST, /* Gateway or direct route */ + RTN_LOCAL, /* Accept locally */ + RTN_BROADCAST, /* Accept locally as broadcast, + send as broadcast */ + RTN_ANYCAST, /* Accept locally as broadcast, + but send as unicast */ + RTN_MULTICAST, /* Multicast route */ + RTN_BLACKHOLE, /* Drop */ + RTN_UNREACHABLE, /* Destination is unreachable */ + RTN_PROHIBIT, /* Administratively prohibited */ + RTN_THROW, /* Not in this table */ + RTN_NAT, /* Translate this address */ + RTN_XRESOLVE, /* Use external resolver */ + __RTN_MAX +}; + +#define RTN_MAX (__RTN_MAX - 1) + + +/* rtm_protocol */ + +#define RTPROT_UNSPEC 0 +#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects; + not used by current IPv4 */ +#define RTPROT_KERNEL 2 /* Route installed by kernel */ +#define RTPROT_BOOT 3 /* Route installed during boot */ +#define RTPROT_STATIC 4 /* Route installed by administrator */ + +/* Values of protocol >= RTPROT_STATIC are not interpreted by kernel; + they are just passed from user and back as is. + It will be used by hypothetical multiple routing daemons. + Note that protocol values should be standardized in order to + avoid conflicts. + */ + +#define RTPROT_GATED 8 /* Apparently, GateD */ +#define RTPROT_RA 9 /* RDISC/ND router advertisements */ +#define RTPROT_MRT 10 /* Merit MRT */ +#define RTPROT_ZEBRA 11 /* Zebra */ +#define RTPROT_BIRD 12 /* BIRD */ +#define RTPROT_DNROUTED 13 /* DECnet routing daemon */ +#define RTPROT_XORP 14 /* XORP */ +#define RTPROT_NTK 15 /* Netsukuku */ +#define RTPROT_DHCP 16 /* DHCP client */ +#define RTPROT_MROUTED 17 /* Multicast daemon */ +#define RTPROT_KEEPALIVED 18 /* Keepalived daemon */ +#define RTPROT_BABEL 42 /* Babel daemon */ +#define RTPROT_OVN 84 /* OVN daemon */ +#define RTPROT_OPENR 99 /* Open Routing (Open/R) Routes */ +#define RTPROT_BGP 186 /* BGP Routes */ +#define RTPROT_ISIS 187 /* ISIS Routes */ +#define RTPROT_OSPF 188 /* OSPF Routes */ +#define RTPROT_RIP 189 /* RIP Routes */ +#define RTPROT_EIGRP 192 /* EIGRP Routes */ + +/* rtm_scope + + Really it is not scope, but sort of distance to the destination. + NOWHERE are reserved for not existing destinations, HOST is our + local addresses, LINK are destinations, located on directly attached + link and UNIVERSE is everywhere in the Universe. + + Intermediate values are also possible f.e. interior routes + could be assigned a value between UNIVERSE and LINK. +*/ + +enum rt_scope_t { + RT_SCOPE_UNIVERSE=0, +/* User defined values */ + RT_SCOPE_SITE=200, + RT_SCOPE_LINK=253, + RT_SCOPE_HOST=254, + RT_SCOPE_NOWHERE=255 +}; + +/* rtm_flags */ + +#define RTM_F_NOTIFY 0x100 /* Notify user of route change */ +#define RTM_F_CLONED 0x200 /* This route is cloned */ +#define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */ +#define RTM_F_PREFIX 0x800 /* Prefix addresses */ +#define RTM_F_LOOKUP_TABLE 0x1000 /* set rtm_table to FIB lookup result */ +#define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */ +#define RTM_F_OFFLOAD 0x4000 /* route is offloaded */ +#define RTM_F_TRAP 0x8000 /* route is trapping packets */ +#define RTM_F_OFFLOAD_FAILED 0x20000000 /* route offload failed, this value + * is chosen to avoid conflicts with + * other flags defined in + * include/uapi/linux/ipv6_route.h + */ + +/* Reserved table identifiers */ + +enum rt_class_t { + RT_TABLE_UNSPEC=0, +/* User defined values */ + RT_TABLE_COMPAT=252, + RT_TABLE_DEFAULT=253, + RT_TABLE_MAIN=254, + RT_TABLE_LOCAL=255, + RT_TABLE_MAX=0xFFFFFFFF +}; + + +/* Routing message attributes */ + +enum rtattr_type_t { + RTA_UNSPEC, + RTA_DST, + RTA_SRC, + RTA_IIF, + RTA_OIF, + RTA_GATEWAY, + RTA_PRIORITY, + RTA_PREFSRC, + RTA_METRICS, + RTA_MULTIPATH, + RTA_PROTOINFO, /* no longer used */ + RTA_FLOW, + RTA_CACHEINFO, + RTA_SESSION, /* no longer used */ + RTA_MP_ALGO, /* no longer used */ + RTA_TABLE, + RTA_MARK, + RTA_MFC_STATS, + RTA_VIA, + RTA_NEWDST, + RTA_PREF, + RTA_ENCAP_TYPE, + RTA_ENCAP, + RTA_EXPIRES, + RTA_PAD, + RTA_UID, + RTA_TTL_PROPAGATE, + RTA_IP_PROTO, + RTA_SPORT, + RTA_DPORT, + RTA_NH_ID, + RTA_FLOWLABEL, + __RTA_MAX +}; + +#define RTA_MAX (__RTA_MAX - 1) + +#define RTM_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct rtmsg)))) +#define RTM_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct rtmsg)) + +/* RTM_MULTIPATH --- array of struct rtnexthop. + * + * "struct rtnexthop" describes all necessary nexthop information, + * i.e. parameters of path to a destination via this nexthop. + * + * At the moment it is impossible to set different prefsrc, mtu, window + * and rtt for different paths from multipath. + */ + +struct rtnexthop { + unsigned short rtnh_len; + unsigned char rtnh_flags; + unsigned char rtnh_hops; + int rtnh_ifindex; +}; + +/* rtnh_flags */ + +#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ +#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ +#define RTNH_F_ONLINK 4 /* Gateway is forced on link */ +#define RTNH_F_OFFLOAD 8 /* Nexthop is offloaded */ +#define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */ +#define RTNH_F_UNRESOLVED 32 /* The entry is unresolved (ipmr) */ +#define RTNH_F_TRAP 64 /* Nexthop is trapping packets */ + +#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | \ + RTNH_F_OFFLOAD | RTNH_F_TRAP) + +/* Macros to handle hexthops */ + +#define RTNH_ALIGNTO 4 +#define RTNH_ALIGN(len) ( ((len)+RTNH_ALIGNTO-1) & ~(RTNH_ALIGNTO-1) ) +#define RTNH_OK(rtnh,len) ((rtnh)->rtnh_len >= sizeof(struct rtnexthop) && \ + ((int)(rtnh)->rtnh_len) <= (len)) +#define RTNH_NEXT(rtnh) ((struct rtnexthop*)(((char*)(rtnh)) + RTNH_ALIGN((rtnh)->rtnh_len))) +#define RTNH_LENGTH(len) (RTNH_ALIGN(sizeof(struct rtnexthop)) + (len)) +#define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len)) +#define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0))) + +/* RTA_VIA */ +struct rtvia { + __kernel_sa_family_t rtvia_family; + __u8 rtvia_addr[]; +}; + +/* RTM_CACHEINFO */ + +struct rta_cacheinfo { + __u32 rta_clntref; + __u32 rta_lastuse; + __s32 rta_expires; + __u32 rta_error; + __u32 rta_used; + +#define RTNETLINK_HAVE_PEERINFO 1 + __u32 rta_id; + __u32 rta_ts; + __u32 rta_tsage; +}; + +/* RTM_METRICS --- array of struct rtattr with types of RTAX_* */ + +enum { + RTAX_UNSPEC, +#define RTAX_UNSPEC RTAX_UNSPEC + RTAX_LOCK, +#define RTAX_LOCK RTAX_LOCK + RTAX_MTU, +#define RTAX_MTU RTAX_MTU + RTAX_WINDOW, +#define RTAX_WINDOW RTAX_WINDOW + RTAX_RTT, +#define RTAX_RTT RTAX_RTT + RTAX_RTTVAR, +#define RTAX_RTTVAR RTAX_RTTVAR + RTAX_SSTHRESH, +#define RTAX_SSTHRESH RTAX_SSTHRESH + RTAX_CWND, +#define RTAX_CWND RTAX_CWND + RTAX_ADVMSS, +#define RTAX_ADVMSS RTAX_ADVMSS + RTAX_REORDERING, +#define RTAX_REORDERING RTAX_REORDERING + RTAX_HOPLIMIT, +#define RTAX_HOPLIMIT RTAX_HOPLIMIT + RTAX_INITCWND, +#define RTAX_INITCWND RTAX_INITCWND + RTAX_FEATURES, +#define RTAX_FEATURES RTAX_FEATURES + RTAX_RTO_MIN, +#define RTAX_RTO_MIN RTAX_RTO_MIN + RTAX_INITRWND, +#define RTAX_INITRWND RTAX_INITRWND + RTAX_QUICKACK, +#define RTAX_QUICKACK RTAX_QUICKACK + RTAX_CC_ALGO, +#define RTAX_CC_ALGO RTAX_CC_ALGO + RTAX_FASTOPEN_NO_COOKIE, +#define RTAX_FASTOPEN_NO_COOKIE RTAX_FASTOPEN_NO_COOKIE + __RTAX_MAX +}; + +#define RTAX_MAX (__RTAX_MAX - 1) + +#define RTAX_FEATURE_ECN (1 << 0) +#define RTAX_FEATURE_SACK (1 << 1) /* unused */ +#define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */ +#define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */ +#define RTAX_FEATURE_TCP_USEC_TS (1 << 4) + +#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \ + RTAX_FEATURE_SACK | \ + RTAX_FEATURE_TIMESTAMP | \ + RTAX_FEATURE_ALLFRAG | \ + RTAX_FEATURE_TCP_USEC_TS) + +struct rta_session { + __u8 proto; + __u8 pad1; + __u16 pad2; + + union { + struct { + __u16 sport; + __u16 dport; + } ports; + + struct { + __u8 type; + __u8 code; + __u16 ident; + } icmpt; + + __u32 spi; + } u; +}; + +struct rta_mfc_stats { + __u64 mfcs_packets; + __u64 mfcs_bytes; + __u64 mfcs_wrong_if; +}; + +/**** + * General form of address family dependent message. + ****/ + +struct rtgenmsg { + unsigned char rtgen_family; +}; + +/***************************************************************** + * Link layer specific messages. + ****/ + +/* struct ifinfomsg + * passes link level specific information, not dependent + * on network protocol. + */ + +struct ifinfomsg { + unsigned char ifi_family; + unsigned char __ifi_pad; + unsigned short ifi_type; /* ARPHRD_* */ + int ifi_index; /* Link index */ + unsigned ifi_flags; /* IFF_* flags */ + unsigned ifi_change; /* IFF_* change mask */ +}; + +/******************************************************************** + * prefix information + ****/ + +struct prefixmsg { + unsigned char prefix_family; + unsigned char prefix_pad1; + unsigned short prefix_pad2; + int prefix_ifindex; + unsigned char prefix_type; + unsigned char prefix_len; + unsigned char prefix_flags; + unsigned char prefix_pad3; +}; + +enum +{ + PREFIX_UNSPEC, + PREFIX_ADDRESS, + PREFIX_CACHEINFO, + __PREFIX_MAX +}; + +#define PREFIX_MAX (__PREFIX_MAX - 1) + +struct prefix_cacheinfo { + __u32 preferred_time; + __u32 valid_time; +}; + + +/***************************************************************** + * Traffic control messages. + ****/ + +struct tcmsg { + unsigned char tcm_family; + unsigned char tcm__pad1; + unsigned short tcm__pad2; + int tcm_ifindex; + __u32 tcm_handle; + __u32 tcm_parent; +/* tcm_block_index is used instead of tcm_parent + * in case tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK + */ +#define tcm_block_index tcm_parent + __u32 tcm_info; +}; + +/* For manipulation of filters in shared block, tcm_ifindex is set to + * TCM_IFINDEX_MAGIC_BLOCK, and tcm_parent is aliased to tcm_block_index + * which is the block index. + */ +#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU) + +enum { + TCA_UNSPEC, + TCA_KIND, + TCA_OPTIONS, + TCA_STATS, + TCA_XSTATS, + TCA_RATE, + TCA_FCNT, + TCA_STATS2, + TCA_STAB, + TCA_PAD, + TCA_DUMP_INVISIBLE, + TCA_CHAIN, + TCA_HW_OFFLOAD, + TCA_INGRESS_BLOCK, + TCA_EGRESS_BLOCK, + TCA_DUMP_FLAGS, + TCA_EXT_WARN_MSG, + __TCA_MAX +}; + +#define TCA_MAX (__TCA_MAX - 1) + +#define TCA_DUMP_FLAGS_TERSE (1 << 0) /* Means that in dump user gets only basic + * data necessary to identify the objects + * (handle, cookie, etc.) and stats. + */ + +#define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg)))) +#define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg)) + +/******************************************************************** + * Neighbor Discovery userland options + ****/ + +struct nduseroptmsg { + unsigned char nduseropt_family; + unsigned char nduseropt_pad1; + unsigned short nduseropt_opts_len; /* Total length of options */ + int nduseropt_ifindex; + __u8 nduseropt_icmp_type; + __u8 nduseropt_icmp_code; + unsigned short nduseropt_pad2; + unsigned int nduseropt_pad3; + /* Followed by one or more ND options */ +}; + +enum { + NDUSEROPT_UNSPEC, + NDUSEROPT_SRCADDR, + __NDUSEROPT_MAX +}; + +#define NDUSEROPT_MAX (__NDUSEROPT_MAX - 1) + +#ifndef __KERNEL__ +/* RTnetlink multicast groups - backwards compatibility for userspace */ +#define RTMGRP_LINK 1 +#define RTMGRP_NOTIFY 2 +#define RTMGRP_NEIGH 4 +#define RTMGRP_TC 8 + +#define RTMGRP_IPV4_IFADDR 0x10 +#define RTMGRP_IPV4_MROUTE 0x20 +#define RTMGRP_IPV4_ROUTE 0x40 +#define RTMGRP_IPV4_RULE 0x80 + +#define RTMGRP_IPV6_IFADDR 0x100 +#define RTMGRP_IPV6_MROUTE 0x200 +#define RTMGRP_IPV6_ROUTE 0x400 +#define RTMGRP_IPV6_IFINFO 0x800 + +#define RTMGRP_DECnet_IFADDR 0x1000 +#define RTMGRP_DECnet_ROUTE 0x4000 + +#define RTMGRP_IPV6_PREFIX 0x20000 +#endif + +/* RTnetlink multicast groups */ +enum rtnetlink_groups { + RTNLGRP_NONE, +#define RTNLGRP_NONE RTNLGRP_NONE + RTNLGRP_LINK, +#define RTNLGRP_LINK RTNLGRP_LINK + RTNLGRP_NOTIFY, +#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY + RTNLGRP_NEIGH, +#define RTNLGRP_NEIGH RTNLGRP_NEIGH + RTNLGRP_TC, +#define RTNLGRP_TC RTNLGRP_TC + RTNLGRP_IPV4_IFADDR, +#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR + RTNLGRP_IPV4_MROUTE, +#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE + RTNLGRP_IPV4_ROUTE, +#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE + RTNLGRP_IPV4_RULE, +#define RTNLGRP_IPV4_RULE RTNLGRP_IPV4_RULE + RTNLGRP_IPV6_IFADDR, +#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR + RTNLGRP_IPV6_MROUTE, +#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE + RTNLGRP_IPV6_ROUTE, +#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE + RTNLGRP_IPV6_IFINFO, +#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO + RTNLGRP_DECnet_IFADDR, +#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR + RTNLGRP_NOP2, + RTNLGRP_DECnet_ROUTE, +#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE + RTNLGRP_DECnet_RULE, +#define RTNLGRP_DECnet_RULE RTNLGRP_DECnet_RULE + RTNLGRP_NOP4, + RTNLGRP_IPV6_PREFIX, +#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX + RTNLGRP_IPV6_RULE, +#define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE + RTNLGRP_ND_USEROPT, +#define RTNLGRP_ND_USEROPT RTNLGRP_ND_USEROPT + RTNLGRP_PHONET_IFADDR, +#define RTNLGRP_PHONET_IFADDR RTNLGRP_PHONET_IFADDR + RTNLGRP_PHONET_ROUTE, +#define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE + RTNLGRP_DCB, +#define RTNLGRP_DCB RTNLGRP_DCB + RTNLGRP_IPV4_NETCONF, +#define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF + RTNLGRP_IPV6_NETCONF, +#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF + RTNLGRP_MDB, +#define RTNLGRP_MDB RTNLGRP_MDB + RTNLGRP_MPLS_ROUTE, +#define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE + RTNLGRP_NSID, +#define RTNLGRP_NSID RTNLGRP_NSID + RTNLGRP_MPLS_NETCONF, +#define RTNLGRP_MPLS_NETCONF RTNLGRP_MPLS_NETCONF + RTNLGRP_IPV4_MROUTE_R, +#define RTNLGRP_IPV4_MROUTE_R RTNLGRP_IPV4_MROUTE_R + RTNLGRP_IPV6_MROUTE_R, +#define RTNLGRP_IPV6_MROUTE_R RTNLGRP_IPV6_MROUTE_R + RTNLGRP_NEXTHOP, +#define RTNLGRP_NEXTHOP RTNLGRP_NEXTHOP + RTNLGRP_BRVLAN, +#define RTNLGRP_BRVLAN RTNLGRP_BRVLAN + RTNLGRP_MCTP_IFADDR, +#define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR + RTNLGRP_TUNNEL, +#define RTNLGRP_TUNNEL RTNLGRP_TUNNEL + RTNLGRP_STATS, +#define RTNLGRP_STATS RTNLGRP_STATS + RTNLGRP_IPV4_MCADDR, +#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR + RTNLGRP_IPV6_MCADDR, +#define RTNLGRP_IPV6_MCADDR RTNLGRP_IPV6_MCADDR + RTNLGRP_IPV6_ACADDR, +#define RTNLGRP_IPV6_ACADDR RTNLGRP_IPV6_ACADDR + __RTNLGRP_MAX +}; +#define RTNLGRP_MAX (__RTNLGRP_MAX - 1) + +/* TC action piece */ +struct tcamsg { + unsigned char tca_family; + unsigned char tca__pad1; + unsigned short tca__pad2; +}; + +enum { + TCA_ROOT_UNSPEC, + TCA_ROOT_TAB, +#define TCA_ACT_TAB TCA_ROOT_TAB +#define TCAA_MAX TCA_ROOT_TAB + TCA_ROOT_FLAGS, + TCA_ROOT_COUNT, + TCA_ROOT_TIME_DELTA, /* in msecs */ + TCA_ROOT_EXT_WARN_MSG, + __TCA_ROOT_MAX, +#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1) +}; + +#define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg)))) +#define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg)) +/* tcamsg flags stored in attribute TCA_ROOT_FLAGS + * + * TCA_ACT_FLAG_LARGE_DUMP_ON user->kernel to request for larger than + * TCA_ACT_MAX_PRIO actions in a dump. All dump responses will contain the + * number of actions being dumped stored in for user app's consumption in + * TCA_ROOT_COUNT + * + * TCA_ACT_FLAG_TERSE_DUMP user->kernel to request terse (brief) dump that only + * includes essential action info (kind, index, etc.) + * + */ +#define TCA_FLAG_LARGE_DUMP_ON (1 << 0) +#define TCA_ACT_FLAG_LARGE_DUMP_ON TCA_FLAG_LARGE_DUMP_ON +#define TCA_ACT_FLAG_TERSE_DUMP (1 << 1) + +/* New extended info filters for IFLA_EXT_MASK */ +#define RTEXT_FILTER_VF (1 << 0) +#define RTEXT_FILTER_BRVLAN (1 << 1) +#define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2) +#define RTEXT_FILTER_SKIP_STATS (1 << 3) +#define RTEXT_FILTER_MRP (1 << 4) +#define RTEXT_FILTER_CFM_CONFIG (1 << 5) +#define RTEXT_FILTER_CFM_STATUS (1 << 6) +#define RTEXT_FILTER_MST (1 << 7) + +/* End of information exported to user level */ + + + +#endif /* _UAPI__LINUX_RTNETLINK_H */ diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 887a25286441..1686861aae20 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -98,43 +98,97 @@ struct statx_timestamp { */ struct statx { /* 0x00 */ - __u32 stx_mask; /* What results were written [uncond] */ - __u32 stx_blksize; /* Preferred general I/O size [uncond] */ - __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* What results were written [uncond] */ + __u32 stx_mask; + + /* Preferred general I/O size [uncond] */ + __u32 stx_blksize; + + /* Flags conveying information about the file [uncond] */ + __u64 stx_attributes; + /* 0x10 */ - __u32 stx_nlink; /* Number of hard links */ - __u32 stx_uid; /* User ID of owner */ - __u32 stx_gid; /* Group ID of owner */ - __u16 stx_mode; /* File mode */ + /* Number of hard links */ + __u32 stx_nlink; + + /* User ID of owner */ + __u32 stx_uid; + + /* Group ID of owner */ + __u32 stx_gid; + + /* File mode */ + __u16 stx_mode; __u16 __spare0[1]; + /* 0x20 */ - __u64 stx_ino; /* Inode number */ - __u64 stx_size; /* File size */ - __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ + /* Inode number */ + __u64 stx_ino; + + /* File size */ + __u64 stx_size; + + /* Number of 512-byte blocks allocated */ + __u64 stx_blocks; + + /* Mask to show what's supported in stx_attributes */ + __u64 stx_attributes_mask; + /* 0x40 */ - struct statx_timestamp stx_atime; /* Last access time */ - struct statx_timestamp stx_btime; /* File creation time */ - struct statx_timestamp stx_ctime; /* Last attribute change time */ - struct statx_timestamp stx_mtime; /* Last data modification time */ + /* Last access time */ + struct statx_timestamp stx_atime; + + /* File creation time */ + struct statx_timestamp stx_btime; + + /* Last attribute change time */ + struct statx_timestamp stx_ctime; + + /* Last data modification time */ + struct statx_timestamp stx_mtime; + /* 0x80 */ - __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_major; __u32 stx_rdev_minor; - __u32 stx_dev_major; /* ID of device containing file [uncond] */ + + /* ID of device containing file [uncond] */ + __u32 stx_dev_major; __u32 stx_dev_minor; + /* 0x90 */ __u64 stx_mnt_id; - __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ - __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ + + /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_mem_align; + + /* File offset alignment for direct I/O */ + __u32 stx_dio_offset_align; + /* 0xa0 */ - __u64 stx_subvol; /* Subvolume identifier */ - __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ - __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* Subvolume identifier */ + __u64 stx_subvol; + + /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_min; + + /* Max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; + /* 0xb0 */ - __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ - __u32 __spare1[1]; - /* 0xb8 */ - __u64 __spare3[9]; /* Spare space for future expansion */ + /* Max atomic write segment count */ + __u32 stx_atomic_write_segments_max; + + /* File offset alignment for direct I/O reads */ + __u32 stx_dio_read_offset_align; + + /* Optimised max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max_opt; + __u32 __spare2[1]; + + /* 0xc0 */ + __u64 __spare3[8]; /* Spare space for future expansion */ + /* 0x100 */ }; @@ -164,6 +218,7 @@ struct statx { #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ #define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ +#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ diff --git a/tools/include/vdso/unaligned.h b/tools/include/vdso/unaligned.h index eee3d2a4dbe4..ff0c06b6513e 100644 --- a/tools/include/vdso/unaligned.h +++ b/tools/include/vdso/unaligned.h @@ -2,14 +2,14 @@ #ifndef __VDSO_UNALIGNED_H #define __VDSO_UNALIGNED_H -#define __get_unaligned_t(type, ptr) ({ \ - const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x; \ +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed * __get_pptr = (typeof(__get_pptr))(ptr); \ + __get_pptr->x; \ }) -#define __put_unaligned_t(type, val, ptr) do { \ - struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x = (val); \ +#define __put_unaligned_t(type, val, ptr) do { \ + struct { type x; } __packed * __put_pptr = (typeof(__put_pptr))(ptr); \ + __put_pptr->x = (val); \ } while (0) #endif /* __VDSO_UNALIGNED_H */ |