From 6ea2987c9a7b6c5f37d08a3eaa664c9ff7467670 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 25 Jul 2024 18:54:18 +0200 Subject: tools/nolibc: include arch.h from string.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit string.h tests for the macros NOLIBC_ARCH_HAS_$FUNC to use the architecture-optimized function variants. However if string.h is included before arch.h header then that check does not work, leading to duplicate function definitions. Fixes: 553845eebd60 ("tools/nolibc: x86-64: Use `rep movsb` for `memcpy()` and `memmove()`") Fixes: 12108aa8c1a1 ("tools/nolibc: x86-64: Use `rep stosb` for `memset()`") Cc: stable@vger.kernel.org Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20240725-arch-has-func-v1-1-5521ed354acd@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/string.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h index f9ab28421e6d..9ec9c24f38c0 100644 --- a/tools/include/nolibc/string.h +++ b/tools/include/nolibc/string.h @@ -7,6 +7,7 @@ #ifndef _NOLIBC_STRING_H #define _NOLIBC_STRING_H +#include "arch.h" #include "std.h" static void *malloc(size_t len); -- cgit v1.2.3 From ae1f550efc11eaf1496c431d9c6e784cb49124c5 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 25 Jul 2024 19:10:44 +0200 Subject: tools/nolibc: add stdbool.h header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stdbool.h is very simple. Provide an implementation for the user convenience. Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20240725-nolibc-stdbool-v1-1-a6ee2c80bcde@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 3 ++- tools/include/nolibc/stdbool.h | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 tools/include/nolibc/stdbool.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index e69c26abe1ea..a1f55fb24bb3 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -35,6 +35,7 @@ all_files := \ stackprotector.h \ std.h \ stdarg.h \ + stdbool.h \ stdint.h \ stdlib.h \ string.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 989e707263a4..92436b1e4441 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -74,7 +74,8 @@ * -I../nolibc -o hello hello.c -lgcc * * The available standard (but limited) include files are: - * ctype.h, errno.h, signal.h, stdarg.h, stdio.h, stdlib.h, string.h, time.h + * ctype.h, errno.h, signal.h, stdarg.h, stdbool.h stdio.h, stdlib.h, + * string.h, time.h * * In addition, the following ones are expected to be provided by the compiler: * float.h, stddef.h diff --git a/tools/include/nolibc/stdbool.h b/tools/include/nolibc/stdbool.h new file mode 100644 index 000000000000..60feece22f17 --- /dev/null +++ b/tools/include/nolibc/stdbool.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Boolean types support for NOLIBC + * Copyright (C) 2024 Thomas Weißschuh + */ + +#ifndef _NOLIBC_STDBOOL_H +#define _NOLIBC_STDBOOL_H + +#define bool _Bool +#define true 1 +#define false 0 + +#define __bool_true_false_are_defined 1 + +#endif /* _NOLIBC_STDBOOL_H */ -- cgit v1.2.3 From 06dd4c5a561c48c66745352bae0b2c04bbe455be Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 18 Jul 2024 14:13:47 +0530 Subject: perf annotate: Add disasm_line__parse() to parse raw instruction for powerpc Currently, the perf tool infrastructure uses the disasm_line__parse function to parse disassembled line. Example snippet from objdump: objdump --start-address=
--stop-address=
-d --no-show-raw-insn -C c0000000010224b4: lwz r10,0(r9) This line "lwz r10,0(r9)" is parsed to extract instruction name, registers names and offset. In powerpc, the approach for data type profiling uses raw instruction instead of result from objdump to identify the instruction category and extract the source/target registers. Example: 38 01 81 e8 ld r4,312(r1) Here "38 01 81 e8" is the raw instruction representation. Add function "disasm_line__parse_powerpc" to handle parsing of raw instruction. Also update "struct disasm_line" to save the binary code/ With the change, function captures: line -> "38 01 81 e8 ld r4,312(r1)" raw instruction "38 01 81 e8" Raw instruction is used later to extract the reg/offset fields. Macros are added to extract opcode and register fields. "struct disasm_line" is updated to carry union of "bytes" and "raw_insn" of 32 bit to carry raw code (raw). Function "disasm_line__parse_powerpc fills the raw instruction hex value and can use macros to get opcode. There is no changes in existing code paths, which parses the disassembled code. The size of raw instruction depends on architecture. In case of powerpc, the parsing the disasm line needs to handle cases for reading binary code directly from DSO as well as parsing the objdump result. Hence adding the logic into separate function instead of updating "disasm_line__parse". The architecture using the instruction name and present approach is not altered. Since this approach targets powerpc, the macro implementation is added for powerpc as of now. Since the disasm_line__parse is used in other cases (perf annotate) and not only data tye profiling, the powerpc callback includes changes to work with binary code as well as mnemonic representation. Also in case if the DSO read fails and libcapstone is not supported, the approach fallback to use objdump as option. Hence as option, patch has changes to ensure objdump option also works well. Reviewed-by: Kajol Jain Reviewed-by: Namhyung Kim Signed-off-by: Athira Rajeev Tested-by: Kajol Jain Cc: Adrian Hunter Cc: Akanksha J N Cc: Christophe Leroy Cc: Disha Goel Cc: Hari Bathini Cc: Ian Rogers Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Segher Boessenkool Link: https://lore.kernel.org/lkml/20240718084358.72242-5-atrajeev@linux.vnet.ibm.com [ Add check for strndup() result ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/string.h | 2 + tools/lib/string.c | 13 +++++++ tools/perf/arch/powerpc/annotate/instructions.c | 1 + tools/perf/arch/powerpc/util/dwarf-regs.c | 9 +++++ tools/perf/util/annotate.h | 5 ++- tools/perf/util/disasm.c | 51 ++++++++++++++++++++++++- 6 files changed, 79 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h index db5c99318c79..0acb1fc14e19 100644 --- a/tools/include/linux/string.h +++ b/tools/include/linux/string.h @@ -46,5 +46,7 @@ extern char * __must_check skip_spaces(const char *); extern char *strim(char *); +extern void remove_spaces(char *s); + extern void *memchr_inv(const void *start, int c, size_t bytes); #endif /* _TOOLS_LINUX_STRING_H_ */ diff --git a/tools/lib/string.c b/tools/lib/string.c index 8b6892f959ab..3126d2cff716 100644 --- a/tools/lib/string.c +++ b/tools/lib/string.c @@ -153,6 +153,19 @@ char *strim(char *s) return skip_spaces(s); } +/* + * remove_spaces - Removes whitespaces from @s + */ +void remove_spaces(char *s) +{ + char *d = s; + + do { + while (*d == ' ') + ++d; + } while ((*s++ = *d++)); +} + /** * strreplace - Replace all occurrences of character in string. * @s: The string to operate on. diff --git a/tools/perf/arch/powerpc/annotate/instructions.c b/tools/perf/arch/powerpc/annotate/instructions.c index a3f423c27cae..d57fd023ef9c 100644 --- a/tools/perf/arch/powerpc/annotate/instructions.c +++ b/tools/perf/arch/powerpc/annotate/instructions.c @@ -55,6 +55,7 @@ static int powerpc__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->initialized = true; arch->associate_instruction_ops = powerpc__associate_instruction_ops; arch->objdump.comment_char = '#'; + annotate_opts.show_asm_raw = true; } return 0; diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c index 0c4f4caf53ac..430623ca5612 100644 --- a/tools/perf/arch/powerpc/util/dwarf-regs.c +++ b/tools/perf/arch/powerpc/util/dwarf-regs.c @@ -98,3 +98,12 @@ int regs_query_register_offset(const char *name) return roff->ptregs_offset; return -EINVAL; } + +#define PPC_OP(op) (((op) >> 26) & 0x3F) +#define PPC_RA(a) (((a) >> 16) & 0x1f) +#define PPC_RT(t) (((t) >> 21) & 0x1f) +#define PPC_RB(b) (((b) >> 11) & 0x1f) +#define PPC_D(D) ((D) & 0xfffe) +#define PPC_DS(DS) ((DS) & 0xfffc) +#define OP_LD 58 +#define OP_STD 62 diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index d5c821c22f79..9ba772f46270 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -113,7 +113,10 @@ struct annotation_line { struct disasm_line { struct ins ins; struct ins_operands ops; - + union { + u8 bytes[4]; + u32 raw_insn; + } raw; /* This needs to be at the end. */ struct annotation_line al; }; diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c index 931cd92dcc40..6d4055b9b966 100644 --- a/tools/perf/util/disasm.c +++ b/tools/perf/util/disasm.c @@ -44,6 +44,7 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size, static void ins__sort(struct arch *arch); static int disasm_line__parse(char *line, const char **namep, char **rawp); +static int disasm_line__parse_powerpc(struct disasm_line *dl); static __attribute__((constructor)) void symbol__init_regexpr(void) { @@ -845,6 +846,51 @@ out: return -1; } +/* + * Parses the result captured from symbol__disassemble_* + * Example, line read from DSO file in powerpc: + * line: 38 01 81 e8 + * opcode: fetched from arch specific get_opcode_insn + * rawp_insn: e8810138 + * + * rawp_insn is used later to extract the reg/offset fields + */ +#define PPC_OP(op) (((op) >> 26) & 0x3F) +#define RAW_BYTES 11 + +static int disasm_line__parse_powerpc(struct disasm_line *dl) +{ + char *line = dl->al.line; + const char **namep = &dl->ins.name; + char **rawp = &dl->ops.raw; + char *tmp_raw_insn, *name_raw_insn = skip_spaces(line); + char *name = skip_spaces(name_raw_insn + RAW_BYTES); + int objdump = 0; + + if (strlen(line) > RAW_BYTES) + objdump = 1; + + if (name_raw_insn[0] == '\0') + return -1; + + if (objdump) { + disasm_line__parse(name, namep, rawp); + } else + *namep = ""; + + tmp_raw_insn = strndup(name_raw_insn, 11); + if (tmp_raw_insn == NULL) + return -1; + + remove_spaces(tmp_raw_insn); + + sscanf(tmp_raw_insn, "%x", &dl->raw.raw_insn); + if (objdump) + dl->raw.raw_insn = be32_to_cpu(dl->raw.raw_insn); + + return 0; +} + static void annotation_line__init(struct annotation_line *al, struct annotate_args *args, int nr) @@ -898,7 +944,10 @@ struct disasm_line *disasm_line__new(struct annotate_args *args) goto out_delete; if (args->offset != -1) { - if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) + if (arch__is(args->arch, "powerpc")) { + if (disasm_line__parse_powerpc(dl) < 0) + goto out_free_line; + } else if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; disasm_line__init_ins(dl, args->arch, &args->ms); -- cgit v1.2.3 From d68c08173b70952cd74bb45075b843f4a637a43b Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 12 Jul 2024 03:51:37 +0000 Subject: memblock tests: include export.h in linkage.h as kernel dose In kernel code, linkage.h includes export.h. Let's sync with kernel. This is a preparation for move init.h in common include directory. Signed-off-by: Wei Yang CC: Mike Rapoport Link: https://lore.kernel.org/r/20240712035138.24674-2-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (Microsoft) --- tools/include/linux/linkage.h | 2 ++ tools/testing/memblock/linux/init.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/linux/linkage.h b/tools/include/linux/linkage.h index bc763d500262..20dee24d7e1b 100644 --- a/tools/include/linux/linkage.h +++ b/tools/include/linux/linkage.h @@ -1,4 +1,6 @@ #ifndef _TOOLS_INCLUDE_LINUX_LINKAGE_H #define _TOOLS_INCLUDE_LINUX_LINKAGE_H +#include + #endif /* _TOOLS_INCLUDE_LINUX_LINKAGE_H */ diff --git a/tools/testing/memblock/linux/init.h b/tools/testing/memblock/linux/init.h index 4aeddce53310..bd74abc5cba6 100644 --- a/tools/testing/memblock/linux/init.h +++ b/tools/testing/memblock/linux/init.h @@ -3,7 +3,6 @@ #define _LINUX_INIT_H #include -#include #define __section(section) __attribute__((__section__(section))) -- cgit v1.2.3 From e2ae9cf39f8806d2d8f5eb0a22ba511804a804ec Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 12 Jul 2024 03:51:38 +0000 Subject: tools/testing: abstract two init.h into common include directory Currently we have two test suits define its own init.h. This is a little redundant. Let's create a init.h in common include directory and merge these two into it. Signed-off-by: Wei Yang CC: Mike Rapoport CC: Liam R. Howlett Link: https://lore.kernel.org/r/20240712035138.24674-3-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (Microsoft) --- tools/include/linux/compiler.h | 4 ---- tools/include/linux/init.h | 40 +++++++++++++++++++++++++++++++++++ tools/testing/memblock/linux/init.h | 32 ---------------------------- tools/testing/radix-tree/linux/init.h | 2 -- tools/testing/radix-tree/maple.c | 2 +- 5 files changed, 41 insertions(+), 39 deletions(-) create mode 100644 tools/include/linux/init.h delete mode 100644 tools/testing/memblock/linux/init.h delete mode 100644 tools/testing/radix-tree/linux/init.h (limited to 'tools/include') diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 6f7f22ac9da5..4b5a45919ff8 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -126,10 +126,6 @@ # define unlikely(x) __builtin_expect(!!(x), 0) #endif -#ifndef __init -# define __init -#endif - #include /* diff --git a/tools/include/linux/init.h b/tools/include/linux/init.h new file mode 100644 index 000000000000..7ed407976dda --- /dev/null +++ b/tools/include/linux/init.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_INIT_H_ +#define _TOOLS_LINUX_INIT_H_ + +#include + +#ifndef __init +# define __init +#endif + +#ifndef __exit +# define __exit +#endif + +#define __section(section) __attribute__((__section__(section))) + +#define __initconst +#define __meminit +#define __meminitdata +#define __refdata +#define __initdata + +struct obs_kernel_param { + const char *str; + int (*setup_func)(char *st); + int early; +}; + +#define __setup_param(str, unique_id, fn, early) \ + static const char __setup_str_##unique_id[] __initconst \ + __aligned(1) = str; \ + static struct obs_kernel_param __setup_##unique_id \ + __used __section(".init.setup") \ + __aligned(__alignof__(struct obs_kernel_param)) = \ + { __setup_str_##unique_id, fn, early } + +#define early_param(str, fn) \ + __setup_param(str, fn, fn, 1) + +#endif /* _TOOLS_LINUX_INIT_H_ */ diff --git a/tools/testing/memblock/linux/init.h b/tools/testing/memblock/linux/init.h deleted file mode 100644 index bd74abc5cba6..000000000000 --- a/tools/testing/memblock/linux/init.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_INIT_H -#define _LINUX_INIT_H - -#include - -#define __section(section) __attribute__((__section__(section))) - -#define __initconst -#define __meminit -#define __meminitdata -#define __refdata -#define __initdata - -struct obs_kernel_param { - const char *str; - int (*setup_func)(char *st); - int early; -}; - -#define __setup_param(str, unique_id, fn, early) \ - static const char __setup_str_##unique_id[] __initconst \ - __aligned(1) = str; \ - static struct obs_kernel_param __setup_##unique_id \ - __used __section(".init.setup") \ - __aligned(__alignof__(struct obs_kernel_param)) = \ - { __setup_str_##unique_id, fn, early } - -#define early_param(str, fn) \ - __setup_param(str, fn, fn, 1) - -#endif diff --git a/tools/testing/radix-tree/linux/init.h b/tools/testing/radix-tree/linux/init.h deleted file mode 100644 index 81563c3dfce7..000000000000 --- a/tools/testing/radix-tree/linux/init.h +++ /dev/null @@ -1,2 +0,0 @@ -#define __init -#define __exit diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index cd1cf05503b4..3437292babff 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -14,7 +14,7 @@ #include "test.h" #include #include -#include "linux/init.h" +#include #define module_init(x) #define module_exit(x) -- cgit v1.2.3 From 39f64e402f659890a99d415eaf63a01f3b80a9a8 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 6 Aug 2024 01:03:15 +0000 Subject: memblock test: fix implicit declaration of function 'virt_to_phys' Commit 94ff46de4a73 ("memblock: Move late alloc warning down to phys alloc") introduce the usage of virt_to_phys(), which is not defined in memblock tests. Define it in mm.h to fix the build error. Signed-off-by: Wei Yang Link: https://lore.kernel.org/r/20240806010319.29194-1-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (Microsoft) --- tools/include/linux/mm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools/include') diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h index cad4f2927983..677c37e4a18c 100644 --- a/tools/include/linux/mm.h +++ b/tools/include/linux/mm.h @@ -25,6 +25,12 @@ static inline void *phys_to_virt(unsigned long address) return __va(address); } +#define virt_to_phys virt_to_phys +static inline phys_addr_t virt_to_phys(volatile void *address) +{ + return (phys_addr_t)address; +} + void reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid); static inline void totalram_pages_inc(void) -- cgit v1.2.3 From 9f76c2ade323121f9006f6a529e0795317e16b5c Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 6 Aug 2024 01:03:16 +0000 Subject: memblock test: add the definition of __setup() Commit 1e4c64b71c9b ("mm/memblock: Add "reserve_mem" to reserved named memory at boot up") introduce usage of __setup(), which is not defined in memblock test. Define it in init.h to fix the build error. Signed-off-by: Wei Yang Link: https://lore.kernel.org/r/20240806010319.29194-2-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (Microsoft) --- tools/include/linux/init.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools/include') diff --git a/tools/include/linux/init.h b/tools/include/linux/init.h index 7ed407976dda..51b5cde28639 100644 --- a/tools/include/linux/init.h +++ b/tools/include/linux/init.h @@ -34,6 +34,9 @@ struct obs_kernel_param { __aligned(__alignof__(struct obs_kernel_param)) = \ { __setup_str_##unique_id, fn, early } +#define __setup(str, fn) \ + __setup_param(str, fn, fn, 0) + #define early_param(str, fn) \ __setup_param(str, fn, fn, 1) -- cgit v1.2.3 From a88cde5769d523e4ae6aad61237e4a5f6bd2309a Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 6 Aug 2024 01:03:17 +0000 Subject: memblock test: fix implicit declaration of function 'memparse' Commit 1e4c64b71c9b ("mm/memblock: Add "reserve_mem" to reserved named memory at boot up") introduce the usage of memparse(), which is not defined in memblock test. Add the definition and link it to fix the build. Signed-off-by: Wei Yang Link: https://lore.kernel.org/r/20240806010319.29194-3-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (Microsoft) --- tools/include/linux/string.h | 1 + tools/lib/cmdline.c | 53 +++++++++++++++++++++++++++++++++++ tools/testing/memblock/Makefile | 2 +- tools/testing/memblock/linux/kernel.h | 1 + 4 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 tools/lib/cmdline.c (limited to 'tools/include') diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h index db5c99318c79..fb8eda3019b5 100644 --- a/tools/include/linux/string.h +++ b/tools/include/linux/string.h @@ -47,4 +47,5 @@ extern char * __must_check skip_spaces(const char *); extern char *strim(char *); extern void *memchr_inv(const void *start, int c, size_t bytes); +extern unsigned long long memparse(const char *ptr, char **retptr); #endif /* _TOOLS_LINUX_STRING_H_ */ diff --git a/tools/lib/cmdline.c b/tools/lib/cmdline.c new file mode 100644 index 000000000000..c85f00f43c5e --- /dev/null +++ b/tools/lib/cmdline.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * From lib/cmdline.c + */ +#include + +#if __has_attribute(__fallthrough__) +# define fallthrough __attribute__((__fallthrough__)) +#else +# define fallthrough do {} while (0) /* fallthrough */ +#endif + +unsigned long long memparse(const char *ptr, char **retptr) +{ + char *endptr; /* local pointer to end of parsed string */ + + unsigned long long ret = strtoll(ptr, &endptr, 0); + + switch (*endptr) { + case 'E': + case 'e': + ret <<= 10; + fallthrough; + case 'P': + case 'p': + ret <<= 10; + fallthrough; + case 'T': + case 't': + ret <<= 10; + fallthrough; + case 'G': + case 'g': + ret <<= 10; + fallthrough; + case 'M': + case 'm': + ret <<= 10; + fallthrough; + case 'K': + case 'k': + ret <<= 10; + endptr++; + fallthrough; + default: + break; + } + + if (retptr) + *retptr = endptr; + + return ret; +} diff --git a/tools/testing/memblock/Makefile b/tools/testing/memblock/Makefile index 7a1ca694a982..d80982ccdc20 100644 --- a/tools/testing/memblock/Makefile +++ b/tools/testing/memblock/Makefile @@ -8,7 +8,7 @@ LDFLAGS += -fsanitize=address -fsanitize=undefined TARGETS = main TEST_OFILES = tests/alloc_nid_api.o tests/alloc_helpers_api.o tests/alloc_api.o \ tests/basic_api.o tests/common.o tests/alloc_exact_nid_api.o -DEP_OFILES = memblock.o lib/slab.o mmzone.o slab.o +DEP_OFILES = memblock.o lib/slab.o mmzone.o slab.o cmdline.o OFILES = main.o $(DEP_OFILES) $(TEST_OFILES) EXTR_SRC = ../../../mm/memblock.c diff --git a/tools/testing/memblock/linux/kernel.h b/tools/testing/memblock/linux/kernel.h index d2f148bd8902..c16d9cd2d1ae 100644 --- a/tools/testing/memblock/linux/kernel.h +++ b/tools/testing/memblock/linux/kernel.h @@ -8,5 +8,6 @@ #include #include #include +#include #endif -- cgit v1.2.3 From 9e3d665384fca2a1c56283c7a79a968243ef4614 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 6 Aug 2024 01:03:19 +0000 Subject: memblock test: fix implicit declaration of function 'strscpy' Commit 1e4c64b71c9b ("mm/memblock: Add "reserve_mem" to reserved named memory at boot up") introduce the usage of strscpy, which breaks the memblock test. Let's define it as strcpy in userspace to fix it. Signed-off-by: Wei Yang Link: https://lore.kernel.org/r/20240806010319.29194-5-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (Microsoft) --- tools/include/linux/string.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/include') diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h index fb8eda3019b5..41e7fa734922 100644 --- a/tools/include/linux/string.h +++ b/tools/include/linux/string.h @@ -12,6 +12,8 @@ void argv_free(char **argv); int strtobool(const char *s, bool *res); +#define strscpy strcpy + /* * glibc based builds needs the extern while uClibc doesn't. * However uClibc headers also define __GLIBC__ hence the hack below -- cgit v1.2.3 From fbc05142ccdd0061f6d0e489608935943d2984a1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Aug 2024 12:01:40 -0700 Subject: perf tools: Add tools/include/uapi/README Write down the reason why we keep a copy of headers to the README file instead of adding it to every commit messages. Suggested-by: Jani Nikula Original-by: Arnaldo Carvalho de Melo Original-by: Ingo Molnar Signed-off-by: Namhyung Kim --- tools/include/uapi/README | 73 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 tools/include/uapi/README (limited to 'tools/include') diff --git a/tools/include/uapi/README b/tools/include/uapi/README new file mode 100644 index 000000000000..7147b1b2cb28 --- /dev/null +++ b/tools/include/uapi/README @@ -0,0 +1,73 @@ +Why we want a copy of kernel headers in tools? +============================================== + +There used to be no copies, with tools/ code using kernel headers +directly. From time to time tools/perf/ broke due to legitimate kernel +hacking. At some point Linus complained about such direct usage. Then we +adopted the current model. + +The way these headers are used in perf are not restricted to just +including them to compile something. + +There are sometimes used in scripts that convert defines into string +tables, etc, so some change may break one of these scripts, or new MSRs +may use some different #define pattern, etc. + +E.g.: + + $ ls -1 tools/perf/trace/beauty/*.sh | head -5 + tools/perf/trace/beauty/arch_errno_names.sh + tools/perf/trace/beauty/drm_ioctl.sh + tools/perf/trace/beauty/fadvise.sh + tools/perf/trace/beauty/fsconfig.sh + tools/perf/trace/beauty/fsmount.sh + $ + $ tools/perf/trace/beauty/fadvise.sh + static const char *fadvise_advices[] = { + [0] = "NORMAL", + [1] = "RANDOM", + [2] = "SEQUENTIAL", + [3] = "WILLNEED", + [4] = "DONTNEED", + [5] = "NOREUSE", + }; + $ + +The tools/perf/check-headers.sh script, part of the tools/ build +process, points out changes in the original files. + +So its important not to touch the copies in tools/ when doing changes in +the original kernel headers, that will be done later, when +check-headers.sh inform about the change to the perf tools hackers. + +Another explanation from Ingo Molnar: +It's better than all the alternatives we tried so far: + + - Symbolic links and direct #includes: this was the original approach but + was pushed back on from the kernel side, when tooling modified the + headers and broke them accidentally for kernel builds. + + - Duplicate self-defined ABI headers like glibc: double the maintenance + burden, double the chance for mistakes, plus there's no tech-driven + notification mechanism to look at new kernel side changes. + +What we are doing now is a third option: + + - A software-enforced copy-on-write mechanism of kernel headers to + tooling, driven by non-fatal warnings on the tooling side build when + kernel headers get modified: + + Warning: Kernel ABI header differences: + diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h + diff -u tools/include/uapi/linux/fs.h include/uapi/linux/fs.h + diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h + ... + + The tooling policy is to always pick up the kernel side headers as-is, + and integate them into the tooling build. The warnings above serve as a + notification to tooling maintainers that there's changes on the kernel + side. + +We've been using this for many years now, and it might seem hacky, but +works surprisingly well. + -- cgit v1.2.3 From aef21f6b6a4aae648c890e74c2322d10ab267249 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Aug 2024 09:59:26 -0700 Subject: tools/include: Sync uapi/drm/i915_drm.h with the kernel sources To pick up changes from: 0f1bb41bf396 drm/i915: Support replaying GPU hangs with captured context image This should be used to beautify DRM syscall arguments and it addresses these tools/perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h Please see tools/include/uapi/README for details (it's in the first patch of this series). Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: intel-gfx@lists.freedesktop.org Signed-off-by: Namhyung Kim --- tools/include/uapi/drm/i915_drm.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'tools/include') diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index d4d86e566e07..535cb68fdb5c 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -2163,6 +2163,15 @@ struct drm_i915_gem_context_param { * supports this per context flag. */ #define I915_CONTEXT_PARAM_LOW_LATENCY 0xe + +/* + * I915_CONTEXT_PARAM_CONTEXT_IMAGE: + * + * Allows userspace to provide own context images. + * + * Note that this is a debug API not available on production kernel builds. + */ +#define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf /* Must be kept compact -- no holes and well documented */ /** @value: Context parameter value to be set or queried */ @@ -2564,6 +2573,24 @@ struct i915_context_param_engines { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +struct i915_gem_context_param_context_image { + /** @engine: Engine class & instance to be configured. */ + struct i915_engine_class_instance engine; + + /** @flags: One of the supported flags or zero. */ + __u32 flags; +#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0) + + /** @size: Size of the image blob pointed to by @image. */ + __u32 size; + + /** @mbz: Must be zero. */ + __u32 mbz; + + /** @image: Userspace memory containing the context image. */ + __u64 image; +} __attribute__((packed)); + /** * struct drm_i915_gem_context_create_ext_setparam - Context parameter * to set or query during context creation. -- cgit v1.2.3 From a625df3995c31a5d8cf46f2337b207e93bef9bdd Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Aug 2024 12:07:50 -0700 Subject: tools/include: Sync uapi/linux/kvm.h with the kernel sources And other arch-specific UAPI headers to pick up changes from: 4b23e0c199b2 KVM: Ensure new code that references immediate_exit gets extra scrutiny 85542adb65ec KVM: x86: Add KVM_RUN_X86_GUEST_MODE kvm_run flag 6fef518594bc KVM: x86: Add a capability to configure bus frequency for APIC timer 34ff65901735 x86/sev: Use kernel provided SVSM Calling Areas 5dcc1e76144f Merge tag 'kvm-x86-misc-6.11' of https://github.com/kvm-x86/linux into HEAD 9a0d2f4995dd KVM: PPC: Book3S HV: Add one-reg interface for HASHPKEYR register e9eb790b2557 KVM: PPC: Book3S HV: Add one-reg interface for HASHKEYR register 1a1e6865f516 KVM: PPC: Book3S HV: Add one-reg interface for DEXCR register This should be used to beautify KVM syscall arguments and it addresses these tools/perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h diff -u tools/arch/x86/include/uapi/asm/svm.h arch/x86/include/uapi/asm/svm.h diff -u tools/arch/powerpc/include/uapi/asm/kvm.h arch/powerpc/include/uapi/asm/kvm.h Please see tools/include/uapi/README for details (it's in the first patch of this series). Cc: Paolo Bonzini Cc: kvm@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/arch/powerpc/include/uapi/asm/kvm.h | 3 ++ tools/arch/x86/include/uapi/asm/kvm.h | 49 +++++++++++++++++++++++++++++++ tools/arch/x86/include/uapi/asm/svm.h | 1 + tools/include/uapi/linux/kvm.h | 17 ++++++++++- 4 files changed, 69 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 1691297a766a..eaeda001784e 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -645,6 +645,9 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) #define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4) #define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5) +#define KVM_REG_PPC_DEXCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc6) +#define KVM_REG_PPC_HASHKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc7) +#define KVM_REG_PPC_HASHPKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc8) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 9fae1b73b529..bf57a824f722 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -106,6 +106,7 @@ struct kvm_ioapic_state { #define KVM_RUN_X86_SMM (1 << 0) #define KVM_RUN_X86_BUS_LOCK (1 << 1) +#define KVM_RUN_X86_GUEST_MODE (1 << 2) /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { @@ -697,6 +698,11 @@ enum sev_cmd_id { /* Second time is the charm; improved versions of the above ioctls. */ KVM_SEV_INIT2, + /* SNP-specific commands */ + KVM_SEV_SNP_LAUNCH_START = 100, + KVM_SEV_SNP_LAUNCH_UPDATE, + KVM_SEV_SNP_LAUNCH_FINISH, + KVM_SEV_NR_MAX, }; @@ -824,6 +830,48 @@ struct kvm_sev_receive_update_data { __u32 pad2; }; +struct kvm_sev_snp_launch_start { + __u64 policy; + __u8 gosvw[16]; + __u16 flags; + __u8 pad0[6]; + __u64 pad1[4]; +}; + +/* Kept in sync with firmware values for simplicity. */ +#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1 +#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3 +#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4 +#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5 +#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6 + +struct kvm_sev_snp_launch_update { + __u64 gfn_start; + __u64 uaddr; + __u64 len; + __u8 type; + __u8 pad0; + __u16 flags; + __u32 pad1; + __u64 pad2[4]; +}; + +#define KVM_SEV_SNP_ID_BLOCK_SIZE 96 +#define KVM_SEV_SNP_ID_AUTH_SIZE 4096 +#define KVM_SEV_SNP_FINISH_DATA_SIZE 32 + +struct kvm_sev_snp_launch_finish { + __u64 id_block_uaddr; + __u64 id_auth_uaddr; + __u8 id_block_en; + __u8 auth_key_en; + __u8 vcek_disabled; + __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE]; + __u8 pad0[3]; + __u16 flags; + __u64 pad1[4]; +}; + #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) @@ -874,5 +922,6 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SW_PROTECTED_VM 1 #define KVM_X86_SEV_VM 2 #define KVM_X86_SEV_ES_VM 3 +#define KVM_X86_SNP_VM 4 #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 80e1df482337..1814b413fd57 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -115,6 +115,7 @@ #define SVM_VMGEXIT_AP_CREATE_ON_INIT 0 #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 +#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index e5af8c692dc0..637efc055145 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -192,11 +192,24 @@ struct kvm_xen_exit { /* Flags that describe what fields in emulation_failure hold valid data. */ #define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0) +/* + * struct kvm_run can be modified by userspace at any time, so KVM must be + * careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM() + * renames fields in struct kvm_run from to __unsafe when + * compiled into the kernel, ensuring that any use within KVM is obvious and + * gets extra scrutiny. + */ +#ifdef __KERNEL__ +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe +#else +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol +#endif + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ __u8 request_interrupt_window; - __u8 immediate_exit; + __u8 HINT_UNSAFE_IN_KVM(immediate_exit); __u8 padding1[6]; /* out */ @@ -918,6 +931,8 @@ struct kvm_enable_cap { #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 #define KVM_CAP_PRE_FAULT_MEMORY 236 +#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 +#define KVM_CAP_X86_GUEST_MODE 238 struct kvm_irq_routing_irqchip { __u32 irqchip; -- cgit v1.2.3 From 8ec9497d3ef34fab216e277eca5035811f06b421 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Aug 2024 12:07:50 -0700 Subject: tools/include: Sync uapi/linux/perf.h with the kernel sources To pick up changes from: 608f6976c309 perf/x86/intel: Support new data source for Lunar Lake This should be used to beautify perf syscall arguments and it addresses these tools/perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Please see tools/include/uapi/README for details (it's in the first patch of this series). Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Mark Rutland Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Ian Rogers Cc: Adrian Hunter Cc: "Liang, Kan" Cc: linux-perf-users@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/linux/perf_event.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 3a64499b0f5d..4842c36fdf80 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1349,12 +1349,14 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0x7 available */ +#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ +/* 0x7 available */ #define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ #define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ #define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ +#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ #define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ #define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ -- cgit v1.2.3 From 568901e709d7fa564dfdc75816ea59fec65d20a0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Aug 2024 12:07:50 -0700 Subject: tools/include: Sync uapi/asm-generic/unistd.h with the kernel sources And arch syscall tables to pick up changes from: b1e31c134a8a powerpc: restore some missing spu syscalls d3882564a77c syscalls: fix compat_sys_io_pgetevents_time64 usage 54233a425403 uretprobe: change syscall number, again 63ded110979b uprobe: Change uretprobe syscall scope and number 9142be9e6443 x86/syscall: Mark exit[_group] syscall handlers __noreturn 9aae1baa1c5d x86, arm: Add missing license tag to syscall tables files 5c28424e9a34 syscalls: Fix to add sys_uretprobe to syscall.tbl 190fec72df4a uprobe: Wire up uretprobe system call This should be used to beautify syscall arguments and it addresses these tools/perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl Please see tools/include/uapi/README for details (it's in the first patch of this series). Cc: Arnd Bergmann Cc: linux-arch@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/asm-generic/unistd.h | 2 +- tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 6 +++++- tools/perf/arch/s390/entry/syscalls/syscall.tbl | 2 +- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 8 +++++--- 4 files changed, 12 insertions(+), 6 deletions(-) (limited to 'tools/include') diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index a00d53d02723..5bf6148cac2b 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -737,7 +737,7 @@ __SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64) #define __NR_ppoll_time64 414 __SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64) #define __NR_io_pgetevents_time64 416 -__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents) +__SC_COMP(__NR_io_pgetevents_time64, sys_io_pgetevents, compat_sys_io_pgetevents_time64) #define __NR_recvmmsg_time64 417 __SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64) #define __NR_mq_timedsend_time64 418 diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 3656f1ca7a21..ebae8415dfbb 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -230,8 +230,10 @@ 178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64 179 64 pread64 sys_pread64 +179 spu pread64 sys_pread64 180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64 180 64 pwrite64 sys_pwrite64 +180 spu pwrite64 sys_pwrite64 181 common chown sys_chown 182 common getcwd sys_getcwd 183 common capget sys_capget @@ -246,6 +248,7 @@ 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead 191 64 readahead sys_readahead +191 spu readahead sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64 194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64 @@ -293,6 +296,7 @@ 232 nospu set_tid_address sys_set_tid_address 233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64 233 64 fadvise64 sys_fadvise64 +233 spu fadvise64 sys_fadvise64 234 nospu exit_group sys_exit_group 235 nospu lookup_dcookie sys_ni_syscall 236 common epoll_create sys_epoll_create @@ -502,7 +506,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index bd0fee24ad10..01071182763e 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -418,7 +418,7 @@ 412 32 utimensat_time64 - sys_utimensat 413 32 pselect6_time64 - compat_sys_pselect6_time64 414 32 ppoll_time64 - compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 - sys_io_pgetevents +416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 - sys_mq_timedsend 419 32 mq_timedreceive_time64 - sys_mq_timedreceive diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index a396f6e6ab5b..7093ee21c0d1 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -1,8 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # # 64-bit system call numbers and entry vectors # # The format is: -# +# [ [noreturn]] # # The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls # @@ -68,7 +69,7 @@ 57 common fork sys_fork 58 common vfork sys_vfork 59 64 execve sys_execve -60 common exit sys_exit +60 common exit sys_exit - noreturn 61 common wait4 sys_wait4 62 common kill sys_kill 63 common uname sys_newuname @@ -239,7 +240,7 @@ 228 common clock_gettime sys_clock_gettime 229 common clock_getres sys_clock_getres 230 common clock_nanosleep sys_clock_nanosleep -231 common exit_group sys_exit_group +231 common exit_group sys_exit_group - noreturn 232 common epoll_wait sys_epoll_wait 233 common epoll_ctl sys_epoll_ctl 234 common tgkill sys_tgkill @@ -343,6 +344,7 @@ 332 common statx sys_statx 333 common io_pgetevents sys_io_pgetevents 334 common rseq sys_rseq +335 common uretprobe sys_uretprobe # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal sys_pidfd_send_signal -- cgit v1.2.3 From ed86525f1f4b738bae75c73e89f25430bd0af1b0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Aug 2024 12:07:50 -0700 Subject: tools/include: Sync network socket headers with the kernel sources To pick up changes from: d25a92ccae6b net/smc: Introduce IPPROTO_SMC 060f4ba6e403 io_uring/net: move charging socket out of zc io_uring bb6aaf736680 net: Split a __sys_listen helper for io_uring dc2e77979412 net: Split a __sys_bind helper for io_uring This should be used to beautify socket syscall arguments and it addresses these tools/perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/in.h include/uapi/linux/in.h diff -u tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h Please see tools/include/uapi/README for details (it's in the first patch of this series). Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: netdev@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/linux/in.h | 2 ++ tools/perf/trace/beauty/include/linux/socket.h | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index e682ab628dfa..d358add1611c 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -81,6 +81,8 @@ enum { #define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW + IPPROTO_SMC = 256, /* Shared Memory Communications */ +#define IPPROTO_SMC IPPROTO_SMC IPPROTO_MPTCP = 262, /* Multipath TCP connection */ #define IPPROTO_MPTCP IPPROTO_MPTCP IPPROTO_MAX diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 89d16b90370b..df9cdb8bbfb8 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -76,7 +76,7 @@ struct msghdr { __kernel_size_t msg_controllen; /* ancillary data buffer length */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ struct ubuf_info *msg_ubuf; - int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb, + int (*sg_from_iter)(struct sk_buff *skb, struct iov_iter *from, size_t length); }; @@ -442,11 +442,14 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, extern int __sys_socket(int family, int type, int protocol); extern struct file *__sys_socket_file(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); +extern int __sys_bind_socket(struct socket *sock, struct sockaddr_storage *address, + int addrlen); extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, int addrlen, int file_flags); extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen); extern int __sys_listen(int fd, int backlog); +extern int __sys_listen_socket(struct socket *sock, int backlog); extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, -- cgit v1.2.3 From 845295f4004c7e1591bab4bad01b51f37d32272f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 6 Aug 2024 12:07:50 -0700 Subject: tools/include: Sync filesystem headers with the kernel sources To pick up changes from: 0f9ca80fa4f9 fs: Add initial atomic write support info to statx f9af549d1fd3 fs: export mount options via statmount() 0a3deb11858a fs: Allow listmount() in foreign mount namespace 09b31295f833 fs: export the mount ns id via statmount d04bccd8c19d listmount: allow listing in reverse order bfc69fd05ef9 fs/procfs: add build ID fetching to PROCMAP_QUERY API ed5d583a88a9 fs/procfs: implement efficient VMA querying API for /proc//maps This should be used to beautify FS syscall arguments and it addresses these tools/perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/stat.h include/uapi/linux/stat.h diff -u tools/perf/trace/beauty/include/uapi/linux/fs.h include/uapi/linux/fs.h diff -u tools/perf/trace/beauty/include/uapi/linux/mount.h include/uapi/linux/mount.h diff -u tools/perf/trace/beauty/include/uapi/linux/stat.h include/uapi/linux/stat.h Please see tools/include/uapi/README for details (it's in the first patch of this series). Cc: Alexander Viro Cc: Christian Brauner Cc: Jan Kara Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/linux/stat.h | 12 +- tools/perf/trace/beauty/include/uapi/linux/fs.h | 163 ++++++++++++++++++++- tools/perf/trace/beauty/include/uapi/linux/mount.h | 10 +- tools/perf/trace/beauty/include/uapi/linux/stat.h | 12 +- 4 files changed, 189 insertions(+), 8 deletions(-) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 67626d535316..887a25286441 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -126,9 +126,15 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ - __u64 __spare3[11]; /* Spare space for future expansion */ + __u64 stx_subvol; /* Subvolume identifier */ + __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* 0xb0 */ + __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ + __u32 __spare1[1]; + /* 0xb8 */ + __u64 __spare3[9]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -157,6 +163,7 @@ struct statx { #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ +#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -192,6 +199,7 @@ struct statx { #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ +#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 45e4e64fd664..753971770733 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -329,12 +329,17 @@ typedef int __bitwise __kernel_rwf_t; /* per-IO negation of O_APPEND */ #define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020) +/* Atomic Write */ +#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND | RWF_NOAPPEND) + RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC) + +#define PROCFS_IOCTL_MAGIC 'f' /* Pagemap ioctl */ -#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg) +#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg) /* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */ #define PAGE_IS_WPALLOWED (1 << 0) @@ -393,4 +398,158 @@ struct pm_scan_arg { __u64 return_mask; }; +/* /proc//maps ioctl */ +#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query) + +enum procmap_query_flags { + /* + * VMA permission flags. + * + * Can be used as part of procmap_query.query_flags field to look up + * only VMAs satisfying specified subset of permissions. E.g., specifying + * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs, + * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only + * return read/write VMAs, though both executable/non-executable and + * private/shared will be ignored. + * + * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags + * field to specify actual VMA permissions. + */ + PROCMAP_QUERY_VMA_READABLE = 0x01, + PROCMAP_QUERY_VMA_WRITABLE = 0x02, + PROCMAP_QUERY_VMA_EXECUTABLE = 0x04, + PROCMAP_QUERY_VMA_SHARED = 0x08, + /* + * Query modifier flags. + * + * By default VMA that covers provided address is returned, or -ENOENT + * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest + * VMA with vma_start > addr will be returned if no covering VMA is + * found. + * + * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that + * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA + * to iterate all VMAs with file backing. + */ + PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10, + PROCMAP_QUERY_FILE_BACKED_VMA = 0x20, +}; + +/* + * Input/output argument structured passed into ioctl() call. It can be used + * to query a set of VMAs (Virtual Memory Areas) of a process. + * + * Each field can be one of three kinds, marked in a short comment to the + * right of the field: + * - "in", input argument, user has to provide this value, kernel doesn't modify it; + * - "out", output argument, kernel sets this field with VMA data; + * - "in/out", input and output argument; user provides initial value (used + * to specify maximum allowable buffer size), and kernel sets it to actual + * amount of data written (or zero, if there is no data). + * + * If matching VMA is found (according to criterias specified by + * query_addr/query_flags, all the out fields are filled out, and ioctl() + * returns 0. If there is no matching VMA, -ENOENT will be returned. + * In case of any other error, negative error code other than -ENOENT is + * returned. + * + * Most of the data is similar to the one returned as text in /proc//maps + * file, but procmap_query provides more querying flexibility. There are no + * consistency guarantees between subsequent ioctl() calls, but data returned + * for matched VMA is self-consistent. + */ +struct procmap_query { + /* Query struct size, for backwards/forward compatibility */ + __u64 size; + /* + * Query flags, a combination of enum procmap_query_flags values. + * Defines query filtering and behavior, see enum procmap_query_flags. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_flags; /* in */ + /* + * Query address. By default, VMA that covers this address will + * be looked up. PROCMAP_QUERY_* flags above modify this default + * behavior further. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_addr; /* in */ + /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */ + __u64 vma_start; /* out */ + __u64 vma_end; /* out */ + /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */ + __u64 vma_flags; /* out */ + /* VMA backing page size granularity. */ + __u64 vma_page_size; /* out */ + /* + * VMA file offset. If VMA has file backing, this specifies offset + * within the file that VMA's start address corresponds to. + * Is set to zero if VMA has no backing file. + */ + __u64 vma_offset; /* out */ + /* Backing file's inode number, or zero, if VMA has no backing file. */ + __u64 inode; /* out */ + /* Backing file's device major/minor number, or zero, if VMA has no backing file. */ + __u32 dev_major; /* out */ + __u32 dev_minor; /* out */ + /* + * If set to non-zero value, signals the request to return VMA name + * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix + * appended, if file was unlinked from FS) for matched VMA. VMA name + * can also be some special name (e.g., "[heap]", "[stack]") or could + * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME). + * + * Kernel will set this field to zero, if VMA has no associated name. + * Otherwise kernel will return actual amount of bytes filled in + * user-supplied buffer (see vma_name_addr field below), including the + * terminating zero. + * + * If VMA name is longer that user-supplied maximum buffer size, + * -E2BIG error is returned. + * + * If this field is set to non-zero value, vma_name_addr should point + * to valid user space memory buffer of at least vma_name_size bytes. + * If set to zero, vma_name_addr should be set to zero as well + */ + __u32 vma_name_size; /* in/out */ + /* + * If set to non-zero value, signals the request to extract and return + * VMA's backing file's build ID, if the backing file is an ELF file + * and it contains embedded build ID. + * + * Kernel will set this field to zero, if VMA has no backing file, + * backing file is not an ELF file, or ELF file has no build ID + * embedded. + * + * Build ID is a binary value (not a string). Kernel will set + * build_id_size field to exact number of bytes used for build ID. + * If build ID is requested and present, but needs more bytes than + * user-supplied maximum buffer size (see build_id_addr field below), + * -E2BIG error will be returned. + * + * If this field is set to non-zero value, build_id_addr should point + * to valid user space memory buffer of at least build_id_size bytes. + * If set to zero, build_id_addr should be set to zero as well + */ + __u32 build_id_size; /* in/out */ + /* + * User-supplied address of a buffer of at least vma_name_size bytes + * for kernel to fill with matched VMA's name (see vma_name_size field + * description above for details). + * + * Should be set to zero if VMA name should not be returned. + */ + __u64 vma_name_addr; /* in */ + /* + * User-supplied address of a buffer of at least build_id_size bytes + * for kernel to fill with matched VMA's ELF build ID, if available + * (see build_id_size field description above for details). + * + * Should be set to zero if build ID should not be returned. + */ + __u64 build_id_addr; /* in */ +}; + #endif /* _UAPI_LINUX_FS_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h index ad5478dbad00..225bc366ffcb 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/mount.h +++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h @@ -154,7 +154,7 @@ struct mount_attr { */ struct statmount { __u32 size; /* Total size, including strings */ - __u32 __spare1; + __u32 mnt_opts; /* [str] Mount options of the mount */ __u64 mask; /* What results were written */ __u32 sb_dev_major; /* Device ID */ __u32 sb_dev_minor; @@ -172,7 +172,8 @@ struct statmount { __u64 propagate_from; /* Propagation from in current namespace */ __u32 mnt_root; /* [str] Root of mount relative to root of fs */ __u32 mnt_point; /* [str] Mountpoint relative to current root */ - __u64 __spare2[50]; + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u64 __spare2[49]; char str[]; /* Variable size part containing strings */ }; @@ -188,10 +189,12 @@ struct mnt_id_req { __u32 spare; __u64 mnt_id; __u64 param; + __u64 mnt_ns_id; }; /* List of all mnt_id_req versions. */ #define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ /* * @mask bits for statmount(2) @@ -202,10 +205,13 @@ struct mnt_id_req { #define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ #define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ /* * Special @mnt_id values that can be passed to listmount */ #define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ #endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h index 67626d535316..887a25286441 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/stat.h +++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h @@ -126,9 +126,15 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ - __u64 __spare3[11]; /* Spare space for future expansion */ + __u64 stx_subvol; /* Subvolume identifier */ + __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* 0xb0 */ + __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ + __u32 __spare1[1]; + /* 0xb8 */ + __u64 __spare3[9]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -157,6 +163,7 @@ struct statx { #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ +#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -192,6 +199,7 @@ struct statx { #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ +#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ -- cgit v1.2.3 From 3882dccf48f9fbe787b5df3187d708ef348ac860 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Thu, 8 Aug 2024 16:05:57 +0100 Subject: bpf/bpf_get,set_sockopt: add option to set TCP-BPF sock ops flags Currently the only opportunity to set sock ops flags dictating which callbacks fire for a socket is from within a TCP-BPF sockops program. This is problematic if the connection is already set up as there is no further chance to specify callbacks for that socket. Add TCP_BPF_SOCK_OPS_CB_FLAGS to bpf_setsockopt() and bpf_getsockopt() to allow users to specify callbacks later, either via an iterator over sockets or via a socket-specific program triggered by a setsockopt() on the socket. Previous discussion on this here [1]. [1] https://lore.kernel.org/bpf/f42f157b-6e52-dd4d-3d97-9b86c84c0b00@oracle.com/ Signed-off-by: Alan Maguire Link: https://lore.kernel.org/r/20240808150558.1035626-2-alan.maguire@oracle.com Signed-off-by: Martin KaFai Lau --- include/uapi/linux/bpf.h | 3 ++- net/core/filter.c | 16 ++++++++++++++++ tools/include/uapi/linux/bpf.h | 3 ++- 3 files changed, 20 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 35bcf52dbc65..e05b39e39c3f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2851,7 +2851,7 @@ union bpf_attr { * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**, * **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**, * **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**, - * **TCP_BPF_RTO_MIN**. + * **TCP_BPF_RTO_MIN**, **TCP_BPF_SOCK_OPS_CB_FLAGS**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports the following *optname*\ s: * **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**. @@ -7080,6 +7080,7 @@ enum { TCP_BPF_SYN = 1005, /* Copy the TCP header */ TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */ TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ + TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */ }; enum { diff --git a/net/core/filter.c b/net/core/filter.c index f3c72cf86099..d96a50f3f016 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5278,6 +5278,11 @@ static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname, return -EINVAL; inet_csk(sk)->icsk_rto_min = timeout; break; + case TCP_BPF_SOCK_OPS_CB_FLAGS: + if (val & ~(BPF_SOCK_OPS_ALL_CB_FLAGS)) + return -EINVAL; + tp->bpf_sock_ops_cb_flags = val; + break; default: return -EINVAL; } @@ -5366,6 +5371,17 @@ static int sol_tcp_sockopt(struct sock *sk, int optname, if (*optlen < 1) return -EINVAL; break; + case TCP_BPF_SOCK_OPS_CB_FLAGS: + if (*optlen != sizeof(int)) + return -EINVAL; + if (getopt) { + struct tcp_sock *tp = tcp_sk(sk); + int cb_flags = tp->bpf_sock_ops_cb_flags; + + memcpy(optval, &cb_flags, *optlen); + return 0; + } + return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen); default: if (getopt) return -EINVAL; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 35bcf52dbc65..e05b39e39c3f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2851,7 +2851,7 @@ union bpf_attr { * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**, * **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**, * **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**, - * **TCP_BPF_RTO_MIN**. + * **TCP_BPF_RTO_MIN**, **TCP_BPF_SOCK_OPS_CB_FLAGS**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports the following *optname*\ s: * **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**. @@ -7080,6 +7080,7 @@ enum { TCP_BPF_SYN = 1005, /* Copy the TCP header */ TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */ TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ + TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */ }; enum { -- cgit v1.2.3 From 754283ce8326fdce75d589c99cc58456199c123d Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sun, 28 Jul 2024 22:34:11 +0200 Subject: tools/nolibc: pass argc, argv and envp to constructors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 2005 glibc has passed argc, argv, and envp to all constructors. As it is cheap and easy to do so, mirror that behaviour in nolibc. This makes it easier to migrate applications to nolibc. Link: https://lore.kernel.org/r/20240728-nolibc-constructor-args-v1-1-36d0bf5cd4c0@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/crt.h | 23 ++++++++++++----------- tools/testing/selftests/nolibc/nolibc-test.c | 5 +++-- 2 files changed, 15 insertions(+), 13 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h index 43b551468c2a..ac291574f6c0 100644 --- a/tools/include/nolibc/crt.h +++ b/tools/include/nolibc/crt.h @@ -13,11 +13,11 @@ const unsigned long *_auxv __attribute__((weak)); static void __stack_chk_init(void); static void exit(int); -extern void (*const __preinit_array_start[])(void) __attribute__((weak)); -extern void (*const __preinit_array_end[])(void) __attribute__((weak)); +extern void (*const __preinit_array_start[])(int, char **, char**) __attribute__((weak)); +extern void (*const __preinit_array_end[])(int, char **, char**) __attribute__((weak)); -extern void (*const __init_array_start[])(void) __attribute__((weak)); -extern void (*const __init_array_end[])(void) __attribute__((weak)); +extern void (*const __init_array_start[])(int, char **, char**) __attribute__((weak)); +extern void (*const __init_array_end[])(int, char **, char**) __attribute__((weak)); extern void (*const __fini_array_start[])(void) __attribute__((weak)); extern void (*const __fini_array_end[])(void) __attribute__((weak)); @@ -29,7 +29,8 @@ void _start_c(long *sp) char **argv; char **envp; int exitcode; - void (* const *func)(void); + void (* const *ctor_func)(int, char **, char **); + void (* const *dtor_func)(void); const unsigned long *auxv; /* silence potential warning: conflicting types for 'main' */ int _nolibc_main(int, char **, char **) __asm__ ("main"); @@ -66,16 +67,16 @@ void _start_c(long *sp) ; _auxv = auxv; - for (func = __preinit_array_start; func < __preinit_array_end; func++) - (*func)(); - for (func = __init_array_start; func < __init_array_end; func++) - (*func)(); + for (ctor_func = __preinit_array_start; ctor_func < __preinit_array_end; ctor_func++) + (*ctor_func)(argc, argv, envp); + for (ctor_func = __init_array_start; ctor_func < __init_array_end; ctor_func++) + (*ctor_func)(argc, argv, envp); /* go to application */ exitcode = _nolibc_main(argc, argv, envp); - for (func = __fini_array_end; func > __fini_array_start;) - (*--func)(); + for (dtor_func = __fini_array_end; dtor_func > __fini_array_start;) + (*--dtor_func)(); exit(exitcode); } diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 093d0512f4c5..0800b10fc3f7 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -686,9 +686,10 @@ static void constructor1(void) } __attribute__((constructor)) -static void constructor2(void) +static void constructor2(int argc, char **argv, char **envp) { - constructor_test_value *= 2; + if (argc && argv && envp) + constructor_test_value *= 2; } int run_startup(int min, int max) -- cgit v1.2.3 From 55850eb4e582f0696f40b8315ac31a45d6a4955e Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 7 Aug 2024 23:51:37 +0200 Subject: tools/nolibc: arm: use clang-compatible asm syntax MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The clang assembler rejects the current syntax. Switch to a syntax accepted by both GCC and clang. Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20240807-nolibc-llvm-v2-1-c20f2f5fc7c2@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/arch-arm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h index cae4afa7c1c7..d1c19d973e55 100644 --- a/tools/include/nolibc/arch-arm.h +++ b/tools/include/nolibc/arch-arm.h @@ -188,8 +188,8 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) { __asm__ volatile ( - "mov %r0, sp\n" /* save stack pointer to %r0, as arg1 of _start_c */ - "and ip, %r0, #-8\n" /* sp must be 8-byte aligned in the callee */ + "mov r0, sp\n" /* save stack pointer to %r0, as arg1 of _start_c */ + "and ip, r0, #-8\n" /* sp must be 8-byte aligned in the callee */ "mov sp, ip\n" "bl _start_c\n" /* transfer to c runtime */ ); -- cgit v1.2.3 From 0daf8c86a45163f35b8d4f7795068c2511dd0ad9 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 7 Aug 2024 23:51:38 +0200 Subject: tools/nolibc: mips: load current function to $t9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MIPS calling convention requires the address of the current function to be available in $t9. This was not done so far. For GCC this seems to have worked, but when compiled with clang the executable segfault instantly. Properly load the address of _start_c() into $t9 before calling it. Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20240807-nolibc-llvm-v2-2-c20f2f5fc7c2@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/arch-mips.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index 62cc50ef3288..a2ee77ed2fbb 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -194,7 +194,9 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_ "li $t0, -8\n" "and $sp, $sp, $t0\n" /* $sp must be 8-byte aligned */ "addiu $sp, $sp, -16\n" /* the callee expects to save a0..a3 there */ - "jal _start_c\n" /* transfer to c runtime */ + "lui $t9, %hi(_start_c)\n" /* ABI requires current function address in $t9 */ + "ori $t9, %lo(_start_c)\n" + "jalr $t9\n" /* transfer to c runtime */ " nop\n" /* delayed slot */ ".set pop\n" ); -- cgit v1.2.3 From 1daea158d0aae0770371f3079305a29fdb66829e Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 7 Aug 2024 23:51:39 +0200 Subject: tools/nolibc: powerpc: limit stack-protector workaround to GCC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As mentioned in the comment, the workaround for __attribute__((no_stack_protector)) is only necessary on GCC. Avoid applying the workaround on clang, as clang does not recognize __attribute__((__optimize__)) and would fail. Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20240807-nolibc-llvm-v2-3-c20f2f5fc7c2@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/arch-powerpc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h index ac212e6185b2..41ebd394b90c 100644 --- a/tools/include/nolibc/arch-powerpc.h +++ b/tools/include/nolibc/arch-powerpc.h @@ -172,7 +172,7 @@ _ret; \ }) -#ifndef __powerpc64__ +#if !defined(__powerpc64__) && !defined(__clang__) /* FIXME: For 32-bit PowerPC, with newer gcc compilers (e.g. gcc 13.1.0), * "omit-frame-pointer" fails with __attribute__((no_stack_protector)) but * works with __attribute__((__optimize__("-fno-stack-protector"))) -- cgit v1.2.3 From 02a62b551cee585f7c2c93f54d1230d5714ff7d4 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 7 Aug 2024 23:51:40 +0200 Subject: tools/nolibc: compiler: introduce __nolibc_has_attribute() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent compilers support __has_attribute() to check if a certain compiler attribute is supported. Unfortunately we have to first check if __has_attribute is supported in the first place and then if a specific attribute is present. These two checks can't be folded into a single condition as that would lead to errors. Nesting the two conditions like below works, but becomes ugly as soon as #else blocks are used as those need to be duplicated for both levels of #if. #if defined __has_attribute # if __has_attribute (nonnull) # define ATTR_NONNULL __attribute__ ((nonnull)) # endif #endif Introduce a new helper which makes the usage of __has_attribute() nicer and migrate the current user to it. Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20240807-nolibc-llvm-v2-4-c20f2f5fc7c2@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/compiler.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h index beddc3665d69..1730d0454a55 100644 --- a/tools/include/nolibc/compiler.h +++ b/tools/include/nolibc/compiler.h @@ -6,20 +6,22 @@ #ifndef _NOLIBC_COMPILER_H #define _NOLIBC_COMPILER_H +#if defined(__has_attribute) +# define __nolibc_has_attribute(attr) __has_attribute(attr) +#else +# define __nolibc_has_attribute(attr) 0 +#endif + #if defined(__SSP__) || defined(__SSP_STRONG__) || defined(__SSP_ALL__) || defined(__SSP_EXPLICIT__) #define _NOLIBC_STACKPROTECTOR #endif /* defined(__SSP__) ... */ -#if defined(__has_attribute) -# if __has_attribute(no_stack_protector) -# define __no_stack_protector __attribute__((no_stack_protector)) -# else -# define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector"))) -# endif +#if __nolibc_has_attribute(no_stack_protector) +# define __no_stack_protector __attribute__((no_stack_protector)) #else # define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector"))) -#endif /* defined(__has_attribute) */ +#endif /* __nolibc_has_attribute(no_stack_protector) */ #endif /* _NOLIBC_COMPILER_H */ -- cgit v1.2.3 From d0f8a8973f265f6a276f99d091af99edfb2b87de Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 8 Aug 2024 00:14:13 +0000 Subject: mm/memblock: introduce a new helper memblock_estimated_nr_free_pages() During bootup, system may need the number of free pages in the whole system to do some calculation before all pages are freed to buddy system. Usually this number is get from totalram_pages(). Since we plan to move the free pages accounting in __free_pages_core(), this value may not represent total free pages at the early stage, especially when CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled. Instead of using raw memblock api, let's introduce a new helper for user to get the estimated number of free pages from memblock point of view. Signed-off-by: Wei Yang CC: David Hildenbrand Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/20240808001415.6298-1-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (Microsoft) --- include/linux/memblock.h | 1 + mm/memblock.c | 17 +++++++++++++++++ tools/include/linux/pfn.h | 1 + 3 files changed, 19 insertions(+) (limited to 'tools/include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index fc4d75c6cec3..673d5cae7c81 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -467,6 +467,7 @@ static inline __init_memblock bool memblock_bottom_up(void) phys_addr_t memblock_phys_mem_size(void); phys_addr_t memblock_reserved_size(void); +unsigned long memblock_estimated_nr_free_pages(void); phys_addr_t memblock_start_of_DRAM(void); phys_addr_t memblock_end_of_DRAM(void); void memblock_enforce_memory_limit(phys_addr_t memory_limit); diff --git a/mm/memblock.c b/mm/memblock.c index 3b9dc2d89b8a..213057603b65 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1731,6 +1731,23 @@ phys_addr_t __init_memblock memblock_reserved_size(void) return memblock.reserved.total_size; } +/** + * memblock_estimated_nr_free_pages - return estimated number of free pages + * from memblock point of view + * + * During bootup, subsystems might need a rough estimate of the number of free + * pages in the whole system, before precise numbers are available from the + * buddy. Especially with CONFIG_DEFERRED_STRUCT_PAGE_INIT, the numbers + * obtained from the buddy might be very imprecise during bootup. + * + * Return: + * An estimated number of free pages from memblock point of view. + */ +unsigned long __init memblock_estimated_nr_free_pages(void) +{ + return PHYS_PFN(memblock_phys_mem_size() - memblock_reserved_size()); +} + /* lowest address */ phys_addr_t __init_memblock memblock_start_of_DRAM(void) { diff --git a/tools/include/linux/pfn.h b/tools/include/linux/pfn.h index 7512a58189eb..f77a30d70152 100644 --- a/tools/include/linux/pfn.h +++ b/tools/include/linux/pfn.h @@ -7,4 +7,5 @@ #define PFN_UP(x) (((x) + PAGE_SIZE - 1) >> PAGE_SHIFT) #define PFN_DOWN(x) ((x) >> PAGE_SHIFT) #define PFN_PHYS(x) ((phys_addr_t)(x) << PAGE_SHIFT) +#define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT)) #endif -- cgit v1.2.3 From ef32e9b6a325d1d013b30d898b4dff94082902cd Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 7 Aug 2024 23:51:41 +0200 Subject: tools/nolibc: move entrypoint specifics to compiler.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The specific attributes for the _start entrypoint are duplicated for each architecture. Deduplicate it into a dedicated #define into compiler.h. For clang compatibility, the epilogue will also need to be adapted, so move that one, too. Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20240807-nolibc-llvm-v2-5-c20f2f5fc7c2@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/arch-aarch64.h | 4 ++-- tools/include/nolibc/arch-arm.h | 4 ++-- tools/include/nolibc/arch-i386.h | 4 ++-- tools/include/nolibc/arch-loongarch.h | 4 ++-- tools/include/nolibc/arch-mips.h | 4 ++-- tools/include/nolibc/arch-powerpc.h | 4 ++-- tools/include/nolibc/arch-riscv.h | 4 ++-- tools/include/nolibc/arch-s390.h | 4 ++-- tools/include/nolibc/arch-x86_64.h | 4 ++-- tools/include/nolibc/compiler.h | 3 +++ 10 files changed, 21 insertions(+), 18 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h index b23ac1f04035..06fdef7b291a 100644 --- a/tools/include/nolibc/arch-aarch64.h +++ b/tools/include/nolibc/arch-aarch64.h @@ -142,13 +142,13 @@ }) /* startup code */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( "mov x0, sp\n" /* save stack pointer to x0, as arg1 of _start_c */ "and sp, x0, -16\n" /* sp must be 16-byte aligned in the callee */ "bl _start_c\n" /* transfer to c runtime */ ); - __builtin_unreachable(); + __nolibc_entrypoint_epilogue(); } #endif /* _NOLIBC_ARCH_AARCH64_H */ diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h index d1c19d973e55..6180ff99ab43 100644 --- a/tools/include/nolibc/arch-arm.h +++ b/tools/include/nolibc/arch-arm.h @@ -185,7 +185,7 @@ }) /* startup code */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( "mov r0, sp\n" /* save stack pointer to %r0, as arg1 of _start_c */ @@ -193,7 +193,7 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_ "mov sp, ip\n" "bl _start_c\n" /* transfer to c runtime */ ); - __builtin_unreachable(); + __nolibc_entrypoint_epilogue(); } #endif /* _NOLIBC_ARCH_ARM_H */ diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h index 28c26a00a762..ff5afc35bbd8 100644 --- a/tools/include/nolibc/arch-i386.h +++ b/tools/include/nolibc/arch-i386.h @@ -162,7 +162,7 @@ * 2) The deepest stack frame should be set to zero * */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( "xor %ebp, %ebp\n" /* zero the stack frame */ @@ -174,7 +174,7 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_ "call _start_c\n" /* transfer to c runtime */ "hlt\n" /* ensure it does not return */ ); - __builtin_unreachable(); + __nolibc_entrypoint_epilogue(); } #endif /* _NOLIBC_ARCH_I386_H */ diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h index 3f8ef8f86c0f..fb519545959e 100644 --- a/tools/include/nolibc/arch-loongarch.h +++ b/tools/include/nolibc/arch-loongarch.h @@ -149,14 +149,14 @@ #endif /* startup code */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ LONG_BSTRINS " $sp, $zero, 3, 0\n" /* $sp must be 16-byte aligned */ "bl _start_c\n" /* transfer to c runtime */ ); - __builtin_unreachable(); + __nolibc_entrypoint_epilogue(); } #endif /* _NOLIBC_ARCH_LOONGARCH_H */ diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index a2ee77ed2fbb..1791a8ce58da 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -179,7 +179,7 @@ }) /* startup code, note that it's called __start on MIPS */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector __start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __start(void) { __asm__ volatile ( ".set push\n" @@ -200,7 +200,7 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_ " nop\n" /* delayed slot */ ".set pop\n" ); - __builtin_unreachable(); + __nolibc_entrypoint_epilogue(); } #endif /* _NOLIBC_ARCH_MIPS_H */ diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h index 41ebd394b90c..ee2fdb8d601d 100644 --- a/tools/include/nolibc/arch-powerpc.h +++ b/tools/include/nolibc/arch-powerpc.h @@ -184,7 +184,7 @@ #endif /* !__powerpc64__ */ /* startup code */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { #ifdef __powerpc64__ #if _CALL_ELF == 2 @@ -215,7 +215,7 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_ "bl _start_c\n" /* transfer to c runtime */ ); #endif - __builtin_unreachable(); + __nolibc_entrypoint_epilogue(); } #endif /* _NOLIBC_ARCH_POWERPC_H */ diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h index 1927c643c739..8827bf936212 100644 --- a/tools/include/nolibc/arch-riscv.h +++ b/tools/include/nolibc/arch-riscv.h @@ -140,7 +140,7 @@ }) /* startup code */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( ".option push\n" @@ -151,7 +151,7 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_ "andi sp, a0, -16\n" /* sp must be 16-byte aligned */ "call _start_c\n" /* transfer to c runtime */ ); - __builtin_unreachable(); + __nolibc_entrypoint_epilogue(); } #endif /* _NOLIBC_ARCH_RISCV_H */ diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h index 5d60fd43f883..2ec13d8b9a2d 100644 --- a/tools/include/nolibc/arch-s390.h +++ b/tools/include/nolibc/arch-s390.h @@ -139,7 +139,7 @@ }) /* startup code */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( "lgr %r2, %r15\n" /* save stack pointer to %r2, as arg1 of _start_c */ @@ -147,7 +147,7 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_ "xc 0(8,%r15), 0(%r15)\n" /* clear backchain */ "brasl %r14, _start_c\n" /* transfer to c runtime */ ); - __builtin_unreachable(); + __nolibc_entrypoint_epilogue(); } struct s390_mmap_arg_struct { diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h index 68609f421934..65252c005a30 100644 --- a/tools/include/nolibc/arch-x86_64.h +++ b/tools/include/nolibc/arch-x86_64.h @@ -161,7 +161,7 @@ * 2) The deepest stack frame should be zero (the %rbp). * */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( "xor %ebp, %ebp\n" /* zero the stack frame */ @@ -170,7 +170,7 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_ "call _start_c\n" /* transfer to c runtime */ "hlt\n" /* ensure it does not return */ ); - __builtin_unreachable(); + __nolibc_entrypoint_epilogue(); } #define NOLIBC_ARCH_HAS_MEMMOVE diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h index 1730d0454a55..3a1d5b8d4fb9 100644 --- a/tools/include/nolibc/compiler.h +++ b/tools/include/nolibc/compiler.h @@ -12,6 +12,9 @@ # define __nolibc_has_attribute(attr) 0 #endif +#define __nolibc_entrypoint __attribute__((optimize("Os", "omit-frame-pointer"))) +#define __nolibc_entrypoint_epilogue() __builtin_unreachable() + #if defined(__SSP__) || defined(__SSP_STRONG__) || defined(__SSP_ALL__) || defined(__SSP_EXPLICIT__) #define _NOLIBC_STACKPROTECTOR -- cgit v1.2.3 From e098eebb63cb1c03813559b5db9da4451ba3a318 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 7 Aug 2024 23:51:42 +0200 Subject: tools/nolibc: compiler: use attribute((naked)) if available MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current entrypoint attributes optimize("Os", "omit-frame-pointer") are intended to avoid all compiler generated code, like function porologue and epilogue. This is the exact usecase implemented by the attribute "naked". Unfortunately this is not implemented by GCC for all targets, so only use it where available. This also provides compatibility with clang, which recognizes the "naked" attribute but not the previously used attribute "optimized". Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20240807-nolibc-llvm-v2-6-c20f2f5fc7c2@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/compiler.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h index 3a1d5b8d4fb9..9bc6a706a332 100644 --- a/tools/include/nolibc/compiler.h +++ b/tools/include/nolibc/compiler.h @@ -12,8 +12,13 @@ # define __nolibc_has_attribute(attr) 0 #endif -#define __nolibc_entrypoint __attribute__((optimize("Os", "omit-frame-pointer"))) -#define __nolibc_entrypoint_epilogue() __builtin_unreachable() +#if __nolibc_has_attribute(naked) +# define __nolibc_entrypoint __attribute__((naked)) +# define __nolibc_entrypoint_epilogue() +#else +# define __nolibc_entrypoint __attribute__((optimize("Os", "omit-frame-pointer"))) +# define __nolibc_entrypoint_epilogue() __builtin_unreachable() +#endif /* __nolibc_has_attribute(naked) */ #if defined(__SSP__) || defined(__SSP_STRONG__) || defined(__SSP_ALL__) || defined(__SSP_EXPLICIT__) -- cgit v1.2.3 From 0021d6670d1a997549a39bf629da9940bf4068ed Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 12 Aug 2024 22:50:17 +0200 Subject: tools/nolibc: crt: mark _start_c() as used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During LTO the reference from the asm startup code to the _start_c() function is not visible and _start_c() is removed. This will then lead to errors during linking. As _start_c() is indeed always used, mark it as such. Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20240812-nolibc-lto-v2-1-736af7bbefa8@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/crt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h index ac291574f6c0..bbcd5fd09806 100644 --- a/tools/include/nolibc/crt.h +++ b/tools/include/nolibc/crt.h @@ -22,7 +22,7 @@ extern void (*const __init_array_end[])(int, char **, char**) __attribute__((wea extern void (*const __fini_array_start[])(void) __attribute__((weak)); extern void (*const __fini_array_end[])(void) __attribute__((weak)); -__attribute__((weak)) +__attribute__((weak,used)) void _start_c(long *sp) { long argc; -- cgit v1.2.3 From ff7b9abbfce985b92f71c855246508edb0980cd6 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 12 Aug 2024 22:50:18 +0200 Subject: tools/nolibc: stackprotector: mark implicitly used symbols as used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During LTO the references from the compiler-generated prologue and epilogues to the stack protector symbols are not visible and the symbols are removed. This will then lead to errors during linking. As those symbols are already #ifdeffed-out if unused mark them as "used" to prevent their removal. Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20240812-nolibc-lto-v2-2-736af7bbefa8@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/stackprotector.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/stackprotector.h b/tools/include/nolibc/stackprotector.h index 13f1d0e60387..1d0d5259ec41 100644 --- a/tools/include/nolibc/stackprotector.h +++ b/tools/include/nolibc/stackprotector.h @@ -18,7 +18,7 @@ * triggering stack protector errors themselves */ -__attribute__((weak,noreturn,section(".text.nolibc_stack_chk"))) +__attribute__((weak,used,noreturn,section(".text.nolibc_stack_chk"))) void __stack_chk_fail(void) { pid_t pid; @@ -34,7 +34,7 @@ void __stack_chk_fail_local(void) __stack_chk_fail(); } -__attribute__((weak,section(".data.nolibc_stack_chk"))) +__attribute__((weak,used,section(".data.nolibc_stack_chk"))) uintptr_t __stack_chk_guard; static __no_stack_protector void __stack_chk_init(void) -- cgit v1.2.3 From 25fb329a23c78d59a055a7b1329d18f30a2be92d Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 12 Aug 2024 22:50:19 +0200 Subject: tools/nolibc: x86_64: use local label in memcpy/memmove MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling arch-x86_64.h with clang and binutils LD yields duplicate label errors: .../gcc-13.2.0-nolibc/x86_64-linux/bin/x86_64-linux-ld: error: LLVM gold plugin: :44:1: symbol '.Lbackward_copy' is already defined .Lbackward_copy:leaq -1(%rdi, %rcx, 1), %rdi Instead of a local symbol use a local label which can be defined multiple times and therefore avoids the error. Reviewed-by: Ammar Faizi Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20240812-nolibc-lto-v2-3-736af7bbefa8@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/arch-x86_64.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h index 65252c005a30..1e40620a2b33 100644 --- a/tools/include/nolibc/arch-x86_64.h +++ b/tools/include/nolibc/arch-x86_64.h @@ -193,10 +193,10 @@ __asm__ ( "movq %rdi, %rdx\n\t" "subq %rsi, %rdx\n\t" "cmpq %rcx, %rdx\n\t" - "jb .Lbackward_copy\n\t" + "jb 1f\n\t" "rep movsb\n\t" "retq\n" -".Lbackward_copy:" +"1:" /* backward copy */ "leaq -1(%rdi, %rcx, 1), %rdi\n\t" "leaq -1(%rsi, %rcx, 1), %rsi\n\t" "std\n\t" -- cgit v1.2.3 From 65ab5ac4df012388481d0414fcac1d5ac1721fb3 Mon Sep 17 00:00:00 2001 From: Jordan Rome Date: Fri, 23 Aug 2024 12:51:00 -0700 Subject: bpf: Add bpf_copy_from_user_str kfunc This adds a kfunc wrapper around strncpy_from_user, which can be called from sleepable BPF programs. This matches the non-sleepable 'bpf_probe_read_user_str' helper except it includes an additional 'flags' param, which allows consumers to clear the entire destination buffer on success or failure. Signed-off-by: Jordan Rome Link: https://lore.kernel.org/r/20240823195101.3621028-1-linux@jordanrome.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 9 +++++++++ kernel/bpf/helpers.c | 42 ++++++++++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 9 +++++++++ 3 files changed, 60 insertions(+) (limited to 'tools/include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e2629457d72d..c3a5728db115 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7513,4 +7513,13 @@ struct bpf_iter_num { __u64 __opaque[1]; } __attribute__((aligned(8))); +/* + * Flags to control BPF kfunc behaviour. + * - BPF_F_PAD_ZEROS: Pad destination buffer with zeros. (See the respective + * helper documentation for details.) + */ +enum bpf_kfunc_flags { + BPF_F_PAD_ZEROS = (1ULL << 0), +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index f12f075b70c5..7e7761c537f8 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2962,6 +2962,47 @@ __bpf_kfunc void bpf_iter_bits_destroy(struct bpf_iter_bits *it) bpf_mem_free(&bpf_global_ma, kit->bits); } +/** + * bpf_copy_from_user_str() - Copy a string from an unsafe user address + * @dst: Destination address, in kernel space. This buffer must be + * at least @dst__sz bytes long. + * @dst__sz: Maximum number of bytes to copy, includes the trailing NUL. + * @unsafe_ptr__ign: Source address, in user space. + * @flags: The only supported flag is BPF_F_PAD_ZEROS + * + * Copies a NUL-terminated string from userspace to BPF space. If user string is + * too long this will still ensure zero termination in the dst buffer unless + * buffer size is 0. + * + * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst to 0 on success and + * memset all of @dst on failure. + */ +__bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user *unsafe_ptr__ign, u64 flags) +{ + int ret; + + if (unlikely(flags & ~BPF_F_PAD_ZEROS)) + return -EINVAL; + + if (unlikely(!dst__sz)) + return 0; + + ret = strncpy_from_user(dst, unsafe_ptr__ign, dst__sz - 1); + if (ret < 0) { + if (flags & BPF_F_PAD_ZEROS) + memset((char *)dst, 0, dst__sz); + + return ret; + } + + if (flags & BPF_F_PAD_ZEROS) + memset((char *)dst + ret, 0, dst__sz - ret); + else + ((char *)dst)[ret] = '\0'; + + return ret + 1; +} + __bpf_kfunc_end_defs(); BTF_KFUNCS_START(generic_btf_ids) @@ -3047,6 +3088,7 @@ BTF_ID_FLAGS(func, bpf_preempt_enable) BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY) +BTF_ID_FLAGS(func, bpf_copy_from_user_str, KF_SLEEPABLE) BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 35bcf52dbc65..f329ee44627a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -7512,4 +7512,13 @@ struct bpf_iter_num { __u64 __opaque[1]; } __attribute__((aligned(8))); +/* + * Flags to control BPF kfunc behaviour. + * - BPF_F_PAD_ZEROS: Pad destination buffer with zeros. (See the respective + * helper documentation for details.) + */ +enum bpf_kfunc_flags { + BPF_F_PAD_ZEROS = (1ULL << 0), +}; + #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From 1506af6db8c4abbe3d5dd573ec72b90f81abfcf7 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 22 Jul 2024 11:11:48 +0100 Subject: perf: cs-etm: Support version 0.1 of HW_ID packets v0.1 HW_ID packets have a new field that describes which sink each CPU writes to. Use the sink ID to link trace ID maps to each other so that mappings are shared wherever the sink is shared. Also update the error message to show that overlapping IDs aren't an error in per-thread mode, just not supported. In the future we can use the CPU ID from the AUX records, or watch for changing sink IDs on HW_ID packets to use the correct decoders. Reviewed-by: Mike Leach Signed-off-by: James Clark Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexandre Torgue Cc: Anshuman Khandual Cc: Ganapatrao Kulkarni Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: John Garry Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Maxime Coquelin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: Will Deacon Link: https://lore.kernel.org/r/20240722101202.26915-7-james.clark@linaro.org Signed-off-by: James Clark Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/coresight-pmu.h | 17 ++++-- tools/perf/util/cs-etm.c | 100 ++++++++++++++++++++++++++++++++---- 2 files changed, 103 insertions(+), 14 deletions(-) (limited to 'tools/include') diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index 51ac441a37c3..89b0ac0014b0 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -49,12 +49,21 @@ * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload. * Used to associate a CPU with the CoreSight Trace ID. * [07:00] - Trace ID - uses 8 bits to make value easy to read in file. - * [59:08] - Unused (SBZ) - * [63:60] - Version + * [39:08] - Sink ID - as reported in /sys/bus/event_source/devices/cs_etm/sinks/ + * Added in minor version 1. + * [55:40] - Unused (SBZ) + * [59:56] - Minor Version - previously existing fields are compatible with + * all minor versions. + * [63:60] - Major Version - previously existing fields mean different things + * in new major versions. */ #define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0) -#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60) +#define CS_AUX_HW_ID_SINK_ID_MASK GENMASK_ULL(39, 8) -#define CS_AUX_HW_ID_CURR_VERSION 0 +#define CS_AUX_HW_ID_MINOR_VERSION_MASK GENMASK_ULL(59, 56) +#define CS_AUX_HW_ID_MAJOR_VERSION_MASK GENMASK_ULL(63, 60) + +#define CS_AUX_HW_ID_MAJOR_VERSION 0 +#define CS_AUX_HW_ID_MINOR_VERSION 1 #endif diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 01c60a660fb3..775d4f35c270 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -118,6 +118,12 @@ struct cs_etm_queue { struct cs_etm_traceid_queue **traceid_queues; /* Conversion between traceID and metadata pointers */ struct intlist *traceid_list; + /* + * Same as traceid_list, but traceid_list may be a reference to another + * queue's which has a matching sink ID. + */ + struct intlist *own_traceid_list; + u32 sink_id; }; static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm); @@ -142,6 +148,7 @@ static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata); (queue_nr << 16 | trace_chan_id) #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16) #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff) +#define SINK_UNSET ((u32) -1) static u32 cs_etm__get_v7_protocol_version(u32 etmidr) { @@ -241,7 +248,16 @@ static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq, int err; if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) { - pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n"); + /* + * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs + * are expected (but not supported) in per-thread mode, + * rather than signifying an error. + */ + if (etmq->etm->per_thread_decoding) + pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n"); + else + pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n"); + return -EINVAL; } @@ -326,6 +342,64 @@ static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu, return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data); } +static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu, + u64 hw_id) +{ + struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu); + int ret; + u64 *cpu_data; + u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id); + u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id); + + /* + * Check sink id hasn't changed in per-cpu mode. In per-thread mode, + * let it pass for now until an actual overlapping trace ID is hit. In + * most cases IDs won't overlap even if the sink changes. + */ + if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET && + etmq->sink_id != sink_id) { + pr_err("CS_ETM: mismatch between sink IDs\n"); + return -EINVAL; + } + + etmq->sink_id = sink_id; + + /* Find which other queues use this sink and link their ID maps */ + for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) { + struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv; + + /* Different sinks, skip */ + if (other_etmq->sink_id != etmq->sink_id) + continue; + + /* Already linked, skip */ + if (other_etmq->traceid_list == etmq->traceid_list) + continue; + + /* At the point of first linking, this one should be empty */ + if (!intlist__empty(etmq->traceid_list)) { + pr_err("CS_ETM: Can't link populated trace ID lists\n"); + return -EINVAL; + } + + etmq->own_traceid_list = NULL; + intlist__delete(etmq->traceid_list); + etmq->traceid_list = other_etmq->traceid_list; + break; + } + + cpu_data = get_cpu_data(etm, cpu); + ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data); + if (ret) + return ret; + + ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data); + if (ret) + return ret; + + return 0; +} + static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata) { u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC]; @@ -414,10 +488,10 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session, /* extract and parse the HW ID */ hw_id = event->aux_output_hw_id.hw_id; - version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id); + version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id); /* check that we can handle this version */ - if (version > CS_AUX_HW_ID_CURR_VERSION) { + if (version > CS_AUX_HW_ID_MAJOR_VERSION) { pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n", version); return -EINVAL; @@ -442,7 +516,10 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session, return -EINVAL; } - return cs_etm__process_trace_id_v0(etm, cpu, hw_id); + if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0) + return cs_etm__process_trace_id_v0(etm, cpu, hw_id); + + return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id); } void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, @@ -882,12 +959,14 @@ static void cs_etm__free_queue(void *priv) cs_etm_decoder__free(etmq->decoder); cs_etm__free_traceid_queues(etmq); - /* First remove all traceID/metadata nodes for the RB tree */ - intlist__for_each_entry_safe(inode, tmp, etmq->traceid_list) - intlist__remove(etmq->traceid_list, inode); + if (etmq->own_traceid_list) { + /* First remove all traceID/metadata nodes for the RB tree */ + intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list) + intlist__remove(etmq->own_traceid_list, inode); - /* Then the RB tree itself */ - intlist__delete(etmq->traceid_list); + /* Then the RB tree itself */ + intlist__delete(etmq->own_traceid_list); + } free(etmq); } @@ -1081,7 +1160,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(void) * has to be made for each packet that gets decoded, optimizing access * in anything other than a sequential array is worth doing. */ - etmq->traceid_list = intlist__new(NULL); + etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL); if (!etmq->traceid_list) goto out_free; @@ -1113,6 +1192,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, etmq->queue_nr = queue_nr; queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */ etmq->offset = 0; + etmq->sink_id = SINK_UNSET; return 0; } -- cgit v1.2.3 From f8d92fc527ff0388b2e94c101afb0e5c7496199f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Aug 2024 09:13:23 +0200 Subject: selftests: vDSO: fix include order in build of test_vdso_chacha Building test_vdso_chacha currently leads to following issue: In file included from /home/chleroy/linux-powerpc/include/linux/limits.h:7, from /opt/powerpc64-e5500--glibc--stable-2024.02-1/powerpc64-buildroot-linux-gnu/sysroot/usr/include/bits/local_lim.h:38, from /opt/powerpc64-e5500--glibc--stable-2024.02-1/powerpc64-buildroot-linux-gnu/sysroot/usr/include/bits/posix1_lim.h:161, from /opt/powerpc64-e5500--glibc--stable-2024.02-1/powerpc64-buildroot-linux-gnu/sysroot/usr/include/limits.h:195, from /opt/powerpc64-e5500--glibc--stable-2024.02-1/lib/gcc/powerpc64-buildroot-linux-gnu/12.3.0/include-fixed/limits.h:203, from /opt/powerpc64-e5500--glibc--stable-2024.02-1/lib/gcc/powerpc64-buildroot-linux-gnu/12.3.0/include-fixed/syslimits.h:7, from /opt/powerpc64-e5500--glibc--stable-2024.02-1/lib/gcc/powerpc64-buildroot-linux-gnu/12.3.0/include-fixed/limits.h:34, from /tmp/sodium/usr/local/include/sodium/export.h:7, from /tmp/sodium/usr/local/include/sodium/crypto_stream_chacha20.h:14, from vdso_test_chacha.c:6: /opt/powerpc64-e5500--glibc--stable-2024.02-1/powerpc64-buildroot-linux-gnu/sysroot/usr/include/bits/xopen_lim.h:99:6: error: missing binary operator before token "(" 99 | # if INT_MAX == 32767 | ^~~~~~~ /opt/powerpc64-e5500--glibc--stable-2024.02-1/powerpc64-buildroot-linux-gnu/sysroot/usr/include/bits/xopen_lim.h:102:7: error: missing binary operator before token "(" 102 | # if INT_MAX == 2147483647 | ^~~~~~~ /opt/powerpc64-e5500--glibc--stable-2024.02-1/powerpc64-buildroot-linux-gnu/sysroot/usr/include/bits/xopen_lim.h:126:6: error: missing binary operator before token "(" 126 | # if LONG_MAX == 2147483647 | ^~~~~~~~ This is due to kernel include/linux/limits.h being included instead of libc's limits.h. This is because directory include/ is added through option -isystem so it goes prior to glibc's include directory. Replace -isystem by -idirafter. But this implies that now tools/include/linux/linkage.h is included instead of include/linux/linkage.h, so define a stub for SYM_FUNC_START() and SYM_FUNC_END(). Signed-off-by: Christophe Leroy Signed-off-by: Jason A. Donenfeld --- tools/include/linux/linkage.h | 4 ++++ tools/testing/selftests/vDSO/Makefile | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/tools/include/linux/linkage.h b/tools/include/linux/linkage.h index bc763d500262..a48ff086899c 100644 --- a/tools/include/linux/linkage.h +++ b/tools/include/linux/linkage.h @@ -1,4 +1,8 @@ #ifndef _TOOLS_INCLUDE_LINUX_LINKAGE_H #define _TOOLS_INCLUDE_LINUX_LINKAGE_H +#define SYM_FUNC_START(x) .globl x; x: + +#define SYM_FUNC_END(x) + #endif /* _TOOLS_INCLUDE_LINUX_LINKAGE_H */ diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index c9a819cacbf2..10ffdda3f2fa 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -42,7 +42,7 @@ $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \ $(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(ARCH)/vdso/vgetrandom-chacha.S $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ - -isystem $(top_srcdir)/arch/$(ARCH)/include \ - -isystem $(top_srcdir)/include \ + -idirafter $(top_srcdir)/arch/$(ARCH)/include \ + -idirafter $(top_srcdir)/include \ -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \ -Wa,--noexecstack $(SODIUM) -- cgit v1.2.3 From 3efd7ab46d0aebc3e567a9846e79a98dbad3291c Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Tue, 10 Sep 2024 17:14:46 +0000 Subject: net: netdev netlink api to bind dma-buf to a net device API takes the dma-buf fd as input, and binds it to the netdevice. The user can specify the rx queues to bind the dma-buf to. Suggested-by: Stanislav Fomichev Signed-off-by: Mina Almasry Reviewed-by: Donald Hunter Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20240910171458.219195-3-almasrymina@google.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 47 +++++++++++++++++++++++++++++++++ include/uapi/linux/netdev.h | 11 ++++++++ net/core/netdev-genl-gen.c | 19 +++++++++++++ net/core/netdev-genl-gen.h | 2 ++ net/core/netdev-genl.c | 6 +++++ tools/include/uapi/linux/netdev.h | 11 ++++++++ 6 files changed, 96 insertions(+) (limited to 'tools/include') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 959755be4d7f..4930e8142aa6 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -457,6 +457,39 @@ attribute-sets: Number of times driver re-started accepting send requests to this queue from the stack. type: uint + - + name: queue-id + subset-of: queue + attributes: + - + name: id + - + name: type + - + name: dmabuf + attributes: + - + name: ifindex + doc: netdev ifindex to bind the dmabuf to. + type: u32 + checks: + min: 1 + - + name: queues + doc: receive queues to bind the dmabuf to. + type: nest + nested-attributes: queue-id + multi-attr: true + - + name: fd + doc: dmabuf file descriptor to bind. + type: u32 + - + name: id + doc: id of the dmabuf binding + type: u32 + checks: + min: 1 operations: list: @@ -619,6 +652,20 @@ operations: - rx-bytes - tx-packets - tx-bytes + - + name: bind-rx + doc: Bind dmabuf to netdev + attribute-set: dmabuf + flags: [ admin-perm ] + do: + request: + attributes: + - ifindex + - fd + - queues + reply: + attributes: + - id mcast-groups: list: diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 43742ac5b00d..91bf3ecc5f1d 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -173,6 +173,16 @@ enum { NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) }; +enum { + NETDEV_A_DMABUF_IFINDEX = 1, + NETDEV_A_DMABUF_QUEUES, + NETDEV_A_DMABUF_FD, + NETDEV_A_DMABUF_ID, + + __NETDEV_A_DMABUF_MAX, + NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, @@ -186,6 +196,7 @@ enum { NETDEV_CMD_QUEUE_GET, NETDEV_CMD_NAPI_GET, NETDEV_CMD_QSTATS_GET, + NETDEV_CMD_BIND_RX, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 8350a0afa9ec..6b7fe6035067 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -27,6 +27,11 @@ const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFIND [NETDEV_A_PAGE_POOL_IFINDEX] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_page_pool_ifindex_range), }; +const struct nla_policy netdev_queue_id_nl_policy[NETDEV_A_QUEUE_TYPE + 1] = { + [NETDEV_A_QUEUE_ID] = { .type = NLA_U32, }, + [NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1), +}; + /* NETDEV_CMD_DEV_GET - do */ static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1] = { [NETDEV_A_DEV_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), @@ -74,6 +79,13 @@ static const struct nla_policy netdev_qstats_get_nl_policy[NETDEV_A_QSTATS_SCOPE [NETDEV_A_QSTATS_SCOPE] = NLA_POLICY_MASK(NLA_UINT, 0x1), }; +/* NETDEV_CMD_BIND_RX - do */ +static const struct nla_policy netdev_bind_rx_nl_policy[NETDEV_A_DMABUF_FD + 1] = { + [NETDEV_A_DMABUF_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), + [NETDEV_A_DMABUF_FD] = { .type = NLA_U32, }, + [NETDEV_A_DMABUF_QUEUES] = NLA_POLICY_NESTED(netdev_queue_id_nl_policy), +}; + /* Ops table for netdev */ static const struct genl_split_ops netdev_nl_ops[] = { { @@ -151,6 +163,13 @@ static const struct genl_split_ops netdev_nl_ops[] = { .maxattr = NETDEV_A_QSTATS_SCOPE, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = NETDEV_CMD_BIND_RX, + .doit = netdev_nl_bind_rx_doit, + .policy = netdev_bind_rx_nl_policy, + .maxattr = NETDEV_A_DMABUF_FD, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, }; static const struct genl_multicast_group netdev_nl_mcgrps[] = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index 4db40fd5b4a9..67c34005750c 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -13,6 +13,7 @@ /* Common nested types */ extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1]; +extern const struct nla_policy netdev_queue_id_nl_policy[NETDEV_A_QUEUE_TYPE + 1]; int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); @@ -30,6 +31,7 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info); enum { NETDEV_NLGRP_MGMT, diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index a17d7eaeb001..699c34b9b03c 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -723,6 +723,12 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, return err; } +/* Stub */ +int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info) +{ + return 0; +} + static int netdev_genl_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 43742ac5b00d..91bf3ecc5f1d 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -173,6 +173,16 @@ enum { NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1) }; +enum { + NETDEV_A_DMABUF_IFINDEX = 1, + NETDEV_A_DMABUF_QUEUES, + NETDEV_A_DMABUF_FD, + NETDEV_A_DMABUF_ID, + + __NETDEV_A_DMABUF_MAX, + NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, @@ -186,6 +196,7 @@ enum { NETDEV_CMD_QUEUE_GET, NETDEV_CMD_NAPI_GET, NETDEV_CMD_QSTATS_GET, + NETDEV_CMD_BIND_RX, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) -- cgit v1.2.3 From d0caf9876a1c9f844307effb598ad1312d9e0025 Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Tue, 10 Sep 2024 17:14:57 +0000 Subject: netdev: add dmabuf introspection Add dmabuf information to page_pool stats: $ ./cli.py --spec ../netlink/specs/netdev.yaml --dump page-pool-get ... {'dmabuf': 10, 'id': 456, 'ifindex': 3, 'inflight': 1023, 'inflight-mem': 4190208}, {'dmabuf': 10, 'id': 455, 'ifindex': 3, 'inflight': 1023, 'inflight-mem': 4190208}, {'dmabuf': 10, 'id': 454, 'ifindex': 3, 'inflight': 1023, 'inflight-mem': 4190208}, {'dmabuf': 10, 'id': 453, 'ifindex': 3, 'inflight': 1023, 'inflight-mem': 4190208}, {'dmabuf': 10, 'id': 452, 'ifindex': 3, 'inflight': 1023, 'inflight-mem': 4190208}, {'dmabuf': 10, 'id': 451, 'ifindex': 3, 'inflight': 1023, 'inflight-mem': 4190208}, {'dmabuf': 10, 'id': 450, 'ifindex': 3, 'inflight': 1023, 'inflight-mem': 4190208}, {'dmabuf': 10, 'id': 449, 'ifindex': 3, 'inflight': 1023, 'inflight-mem': 4190208}, And queue stats: $ ./cli.py --spec ../netlink/specs/netdev.yaml --dump queue-get ... {'dmabuf': 10, 'id': 8, 'ifindex': 3, 'type': 'rx'}, {'dmabuf': 10, 'id': 9, 'ifindex': 3, 'type': 'rx'}, {'dmabuf': 10, 'id': 10, 'ifindex': 3, 'type': 'rx'}, {'dmabuf': 10, 'id': 11, 'ifindex': 3, 'type': 'rx'}, {'dmabuf': 10, 'id': 12, 'ifindex': 3, 'type': 'rx'}, {'dmabuf': 10, 'id': 13, 'ifindex': 3, 'type': 'rx'}, {'dmabuf': 10, 'id': 14, 'ifindex': 3, 'type': 'rx'}, {'dmabuf': 10, 'id': 15, 'ifindex': 3, 'type': 'rx'}, Suggested-by: Jakub Kicinski Signed-off-by: Mina Almasry Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20240910171458.219195-14-almasrymina@google.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 10 ++++++++++ include/uapi/linux/netdev.h | 2 ++ net/core/netdev-genl.c | 7 +++++++ net/core/page_pool_user.c | 5 +++++ tools/include/uapi/linux/netdev.h | 2 ++ 5 files changed, 26 insertions(+) (limited to 'tools/include') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 0c747530c275..08412c279297 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -167,6 +167,10 @@ attribute-sets: "re-attached", they are just waiting to disappear. Attribute is absent if Page Pool has not been detached, and can still be used to allocate new memory. + - + name: dmabuf + doc: ID of the dmabuf this page-pool is attached to. + type: u32 - name: page-pool-info subset-of: page-pool @@ -268,6 +272,10 @@ attribute-sets: name: napi-id doc: ID of the NAPI instance which services this queue. type: u32 + - + name: dmabuf + doc: ID of the dmabuf attached to this queue, if any. + type: u32 - name: qstats @@ -543,6 +551,7 @@ operations: - inflight - inflight-mem - detach-time + - dmabuf dump: reply: *pp-reply config-cond: page-pool @@ -607,6 +616,7 @@ operations: - type - napi-id - ifindex + - dmabuf dump: request: attributes: diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 91bf3ecc5f1d..7c308f04e7a0 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -93,6 +93,7 @@ enum { NETDEV_A_PAGE_POOL_INFLIGHT, NETDEV_A_PAGE_POOL_INFLIGHT_MEM, NETDEV_A_PAGE_POOL_DETACH_TIME, + NETDEV_A_PAGE_POOL_DMABUF, __NETDEV_A_PAGE_POOL_MAX, NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) @@ -131,6 +132,7 @@ enum { NETDEV_A_QUEUE_IFINDEX, NETDEV_A_QUEUE_TYPE, NETDEV_A_QUEUE_NAPI_ID, + NETDEV_A_QUEUE_DMABUF, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 9153a8ab0cf8..1cb954f2d39e 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -295,6 +295,7 @@ static int netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, u32 q_type, const struct genl_info *info) { + struct net_devmem_dmabuf_binding *binding; struct netdev_rx_queue *rxq; struct netdev_queue *txq; void *hdr; @@ -314,6 +315,12 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, if (rxq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID, rxq->napi->napi_id)) goto nla_put_failure; + + binding = rxq->mp_params.mp_priv; + if (binding && + nla_put_u32(rsp, NETDEV_A_QUEUE_DMABUF, binding->id)) + goto nla_put_failure; + break; case NETDEV_QUEUE_TYPE_TX: txq = netdev_get_tx_queue(netdev, q_idx); diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index cd6267ba6fa3..48335766c1bf 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -9,6 +9,7 @@ #include #include +#include "devmem.h" #include "page_pool_priv.h" #include "netdev-genl-gen.h" @@ -213,6 +214,7 @@ static int page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, const struct genl_info *info) { + struct net_devmem_dmabuf_binding *binding = pool->mp_priv; size_t inflight, refsz; void *hdr; @@ -242,6 +244,9 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, pool->user.detach_time)) goto err_cancel; + if (binding && nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id)) + goto err_cancel; + genlmsg_end(rsp, hdr); return 0; diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 91bf3ecc5f1d..7c308f04e7a0 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -93,6 +93,7 @@ enum { NETDEV_A_PAGE_POOL_INFLIGHT, NETDEV_A_PAGE_POOL_INFLIGHT_MEM, NETDEV_A_PAGE_POOL_DETACH_TIME, + NETDEV_A_PAGE_POOL_DMABUF, __NETDEV_A_PAGE_POOL_MAX, NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) @@ -131,6 +132,7 @@ enum { NETDEV_A_QUEUE_IFINDEX, NETDEV_A_QUEUE_TYPE, NETDEV_A_QUEUE_NAPI_ID, + NETDEV_A_QUEUE_DMABUF, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) -- cgit v1.2.3 From 712676ea2bb3882a852bcf49862c4247317fc9b2 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Tue, 3 Sep 2024 12:09:17 +0000 Subject: arm64: vDSO: Wire up getrandom() vDSO implementation Hook up the generic vDSO implementation to the aarch64 vDSO data page. The _vdso_rng_data required data is placed within the _vdso_data vvar page, by using a offset larger than the vdso_data. The vDSO function requires a ChaCha20 implementation that does not write to the stack, and that can do an entire ChaCha20 permutation. The one provided uses NEON on the permute operation, with a fallback to the syscall for chips that do not support AdvSIMD. This also passes the vdso_test_chacha test along with vdso_test_getrandom. The vdso_test_getrandom bench-single result on Neoverse-N1 shows: vdso: 25000000 times in 0.783884250 seconds libc: 25000000 times in 8.780275399 seconds syscall: 25000000 times in 8.786581518 seconds A small fixup to arch/arm64/include/asm/mman.h was required to avoid pulling kernel code into the vDSO, similar to what's already done in arch/arm64/include/asm/rwonce.h. Signed-off-by: Adhemerval Zanella Reviewed-by: Ard Biesheuvel Acked-by: Will Deacon Signed-off-by: Jason A. Donenfeld --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/mman.h | 6 +- arch/arm64/include/asm/vdso/getrandom.h | 50 +++++++++ arch/arm64/include/asm/vdso/vsyscall.h | 15 +++ arch/arm64/kernel/vdso.c | 6 - arch/arm64/kernel/vdso/Makefile | 25 +++-- arch/arm64/kernel/vdso/vdso.lds.S | 4 + arch/arm64/kernel/vdso/vgetrandom-chacha.S | 172 +++++++++++++++++++++++++++++ arch/arm64/kernel/vdso/vgetrandom.c | 15 +++ tools/arch/arm64/vdso | 1 + tools/include/linux/compiler.h | 4 + tools/testing/selftests/vDSO/Makefile | 3 +- 12 files changed, 286 insertions(+), 16 deletions(-) create mode 100644 arch/arm64/include/asm/vdso/getrandom.h create mode 100644 arch/arm64/kernel/vdso/vgetrandom-chacha.S create mode 100644 arch/arm64/kernel/vdso/vgetrandom.c create mode 120000 tools/arch/arm64/vdso (limited to 'tools/include') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a2f8ff354ca6..7f7424d1b3b8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -262,6 +262,7 @@ config ARM64 select TRACE_IRQFLAGS_NMI_SUPPORT select HAVE_SOFTIRQ_ON_OWN_STACK select USER_STACKTRACE_SUPPORT + select VDSO_GETRANDOM help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h index 5966ee4a6154..ceb4370a69c5 100644 --- a/arch/arm64/include/asm/mman.h +++ b/arch/arm64/include/asm/mman.h @@ -2,9 +2,11 @@ #ifndef __ASM_MMAN_H__ #define __ASM_MMAN_H__ +#include + +#ifndef BUILD_VDSO #include #include -#include static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, unsigned long pkey __always_unused) @@ -60,4 +62,6 @@ static inline bool arch_validate_flags(unsigned long vm_flags) } #define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags) +#endif /* !BUILD_VDSO */ + #endif /* ! __ASM_MMAN_H__ */ diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h new file mode 100644 index 000000000000..342f807e2044 --- /dev/null +++ b/arch/arm64/include/asm/vdso/getrandom.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_VDSO_GETRANDOM_H +#define __ASM_VDSO_GETRANDOM_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include + +/** + * getrandom_syscall - Invoke the getrandom() syscall. + * @buffer: Destination buffer to fill with random bytes. + * @len: Size of @buffer in bytes. + * @flags: Zero or more GRND_* flags. + * Returns: The number of random bytes written to @buffer, or a negative value indicating an error. + */ +static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, unsigned int _flags) +{ + register void *buffer asm ("x0") = _buffer; + register size_t len asm ("x1") = _len; + register unsigned int flags asm ("x2") = _flags; + register long ret asm ("x0"); + register long nr asm ("x8") = __NR_getrandom; + + asm volatile( + " svc #0\n" + : "=r" (ret) + : "r" (buffer), "r" (len), "r" (flags), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) +{ + /* + * The RNG data is in the real VVAR data page, but if a task belongs to a time namespace + * then VVAR_DATA_PAGE_OFFSET points to the namespace-specific VVAR page and VVAR_TIMENS_ + * PAGE_OFFSET points to the real VVAR page. + */ + if (IS_ENABLED(CONFIG_TIME_NS) && _vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS) + return (void *)&_vdso_rng_data + VVAR_TIMENS_PAGE_OFFSET * (1UL << CONFIG_PAGE_SHIFT); + return &_vdso_rng_data; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h index f94b1457c117..5b6d0dd3cef5 100644 --- a/arch/arm64/include/asm/vdso/vsyscall.h +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -2,11 +2,19 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H +#define __VDSO_RND_DATA_OFFSET 480 + #ifndef __ASSEMBLY__ #include #include +enum vvar_pages { + VVAR_DATA_PAGE_OFFSET, + VVAR_TIMENS_PAGE_OFFSET, + VVAR_NR_PAGES, +}; + #define VDSO_PRECISION_MASK ~(0xFF00ULL<<48) extern struct vdso_data *vdso_data; @@ -21,6 +29,13 @@ struct vdso_data *__arm64_get_k_vdso_data(void) } #define __arch_get_k_vdso_data __arm64_get_k_vdso_data +static __always_inline +struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void) +{ + return (void *)vdso_data + __VDSO_RND_DATA_OFFSET; +} +#define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data + static __always_inline void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) { diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 89b6e7840002..706c9c3a7a50 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -34,12 +34,6 @@ enum vdso_abi { VDSO_ABI_AA32, }; -enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES, -}; - struct vdso_abi_info { const char *name; const char *vdso_code_start; diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index d11da6461278..35685c036044 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -9,7 +9,7 @@ # Include the generic Makefile to check the built vdso. include $(srctree)/lib/vdso/Makefile -obj-vdso := vgettimeofday.o note.o sigreturn.o +obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o # Build rules targets := $(obj-vdso) vdso.so vdso.so.dbg @@ -34,19 +34,28 @@ ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO # -Wmissing-prototypes and -Wmissing-declarations are removed from -# the CFLAGS of vgettimeofday.c to make possible to build the -# kernel with CONFIG_WERROR enabled. -CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ - $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ - $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ - -Wmissing-prototypes -Wmissing-declarations +# the CFLAGS to make possible to build the kernel with CONFIG_WERROR enabled. +CC_FLAGS_REMOVE_VDSO := $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ + $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ + $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ + -Wmissing-prototypes -Wmissing-declarations -CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables +CC_FLAGS_ADD_VDSO := -O2 -mcmodel=tiny -fasynchronous-unwind-tables + +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_REMOVE_VDSO) +CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_REMOVE_VDSO) + +CFLAGS_vgettimeofday.o = $(CC_FLAGS_ADD_VDSO) +CFLAGS_vgetrandom.o = $(CC_FLAGS_ADD_VDSO) ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) endif +ifneq ($(c-getrandom-y),) + CFLAGS_vgetrandom.o += -include $(c-getrandom-y) +endif + targets += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index 45354f2ddf70..f204a9ddc833 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -11,7 +11,9 @@ #include #include #include +#include #include +#include OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64") OUTPUT_ARCH(aarch64) @@ -19,6 +21,7 @@ OUTPUT_ARCH(aarch64) SECTIONS { PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); + PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); #ifdef CONFIG_TIME_NS PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); #endif @@ -102,6 +105,7 @@ VERSION __kernel_gettimeofday; __kernel_clock_gettime; __kernel_clock_getres; + __kernel_getrandom; local: *; }; } diff --git a/arch/arm64/kernel/vdso/vgetrandom-chacha.S b/arch/arm64/kernel/vdso/vgetrandom-chacha.S new file mode 100644 index 000000000000..67890b445309 --- /dev/null +++ b/arch/arm64/kernel/vdso/vgetrandom-chacha.S @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + + .text + +#define state0 v0 +#define state1 v1 +#define state2 v2 +#define state3 v3 +#define copy0 v4 +#define copy0_q q4 +#define copy1 v5 +#define copy2 v6 +#define copy3 v7 +#define copy3_d d7 +#define one_d d16 +#define one_q q16 +#define one_v v16 +#define tmp v17 +#define rot8 v18 + +/* + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive + * number of blocks of output with nonce 0, taking an input key and 8-bytes + * counter. Importantly does not spill to the stack. + * + * This implementation avoids d8-d15 because they are callee-save in user + * space. + * + * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, + * const uint8_t *key, + * uint32_t *counter, + * size_t nblocks) + * + * x0: output bytes + * x1: 32-byte key input + * x2: 8-byte counter input/output + * x3: number of 64-byte block to write to output + */ +SYM_FUNC_START(__arch_chacha20_blocks_nostack) + + /* copy0 = "expand 32-byte k" */ + mov_q x8, 0x3320646e61707865 + mov_q x9, 0x6b20657479622d32 + mov copy0.d[0], x8 + mov copy0.d[1], x9 + + /* copy1,copy2 = key */ + ld1 { copy1.4s, copy2.4s }, [x1] + /* copy3 = counter || zero nonce */ + ld1 { copy3.2s }, [x2] + + movi one_v.2s, #1 + uzp1 one_v.4s, one_v.4s, one_v.4s + +.Lblock: + /* copy state to auxiliary vectors for the final add after the permute. */ + mov state0.16b, copy0.16b + mov state1.16b, copy1.16b + mov state2.16b, copy2.16b + mov state3.16b, copy3.16b + + mov w4, 20 +.Lpermute: + /* + * Permute one 64-byte block where the state matrix is stored in the four NEON + * registers state0-state3. It performs matrix operations on four words in parallel, + * but requires shuffling to rearrange the words after each round. + */ + +.Ldoubleround: + /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */ + add state0.4s, state0.4s, state1.4s + eor state3.16b, state3.16b, state0.16b + rev32 state3.8h, state3.8h + + /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */ + add state2.4s, state2.4s, state3.4s + eor tmp.16b, state1.16b, state2.16b + shl state1.4s, tmp.4s, #12 + sri state1.4s, tmp.4s, #20 + + /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */ + add state0.4s, state0.4s, state1.4s + eor tmp.16b, state3.16b, state0.16b + shl state3.4s, tmp.4s, #8 + sri state3.4s, tmp.4s, #24 + + /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */ + add state2.4s, state2.4s, state3.4s + eor tmp.16b, state1.16b, state2.16b + shl state1.4s, tmp.4s, #7 + sri state1.4s, tmp.4s, #25 + + /* state1[0,1,2,3] = state1[1,2,3,0] */ + ext state1.16b, state1.16b, state1.16b, #4 + /* state2[0,1,2,3] = state2[2,3,0,1] */ + ext state2.16b, state2.16b, state2.16b, #8 + /* state3[0,1,2,3] = state3[1,2,3,0] */ + ext state3.16b, state3.16b, state3.16b, #12 + + /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */ + add state0.4s, state0.4s, state1.4s + eor state3.16b, state3.16b, state0.16b + rev32 state3.8h, state3.8h + + /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */ + add state2.4s, state2.4s, state3.4s + eor tmp.16b, state1.16b, state2.16b + shl state1.4s, tmp.4s, #12 + sri state1.4s, tmp.4s, #20 + + /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */ + add state0.4s, state0.4s, state1.4s + eor tmp.16b, state3.16b, state0.16b + shl state3.4s, tmp.4s, #8 + sri state3.4s, tmp.4s, #24 + + /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */ + add state2.4s, state2.4s, state3.4s + eor tmp.16b, state1.16b, state2.16b + shl state1.4s, tmp.4s, #7 + sri state1.4s, tmp.4s, #25 + + /* state1[0,1,2,3] = state1[3,0,1,2] */ + ext state1.16b, state1.16b, state1.16b, #12 + /* state2[0,1,2,3] = state2[2,3,0,1] */ + ext state2.16b, state2.16b, state2.16b, #8 + /* state3[0,1,2,3] = state3[1,2,3,0] */ + ext state3.16b, state3.16b, state3.16b, #4 + + subs w4, w4, #2 + b.ne .Ldoubleround + + /* output0 = state0 + state0 */ + add state0.4s, state0.4s, copy0.4s + /* output1 = state1 + state1 */ + add state1.4s, state1.4s, copy1.4s + /* output2 = state2 + state2 */ + add state2.4s, state2.4s, copy2.4s + /* output2 = state3 + state3 */ + add state3.4s, state3.4s, copy3.4s + st1 { state0.16b - state3.16b }, [x0] + + /* + * ++copy3.counter, the 'add' clears the upper half of the SIMD register + * which is the expected behaviour here. + */ + add copy3_d, copy3_d, one_d + + /* output += 64, --nblocks */ + add x0, x0, 64 + subs x3, x3, #1 + b.ne .Lblock + + /* counter = copy3.counter */ + st1 { copy3.2s }, [x2] + + /* Zero out the potentially sensitive regs, in case nothing uses these again. */ + movi state0.16b, #0 + movi state1.16b, #0 + movi state2.16b, #0 + movi state3.16b, #0 + movi copy1.16b, #0 + movi copy2.16b, #0 + ret +SYM_FUNC_END(__arch_chacha20_blocks_nostack) + +emit_aarch64_feature_1_and diff --git a/arch/arm64/kernel/vdso/vgetrandom.c b/arch/arm64/kernel/vdso/vgetrandom.c new file mode 100644 index 000000000000..832fe195292b --- /dev/null +++ b/arch/arm64/kernel/vdso/vgetrandom.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +typeof(__cvdso_getrandom) __kernel_getrandom; + +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) +{ + if (alternative_has_cap_likely(ARM64_HAS_FPSIMD)) + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); + + if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags)) + return -ENOSYS; + return getrandom_syscall(buffer, len, flags); +} diff --git a/tools/arch/arm64/vdso b/tools/arch/arm64/vdso new file mode 120000 index 000000000000..233c7a26f6e5 --- /dev/null +++ b/tools/arch/arm64/vdso @@ -0,0 +1 @@ +../../../arch/arm64/kernel/vdso \ No newline at end of file diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 6f7f22ac9da5..4366da278033 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -2,6 +2,8 @@ #ifndef _TOOLS_LINUX_COMPILER_H_ #define _TOOLS_LINUX_COMPILER_H_ +#ifndef __ASSEMBLY__ + #include #ifndef __compiletime_error @@ -224,4 +226,6 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __asm__ ("" : "=r" (var) : "0" (var)) #endif +#endif /* __ASSEMBLY__ */ + #endif /* _TOOLS_LINUX_COMPILER_H */ diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 04930125035e..3c6fafbd83a6 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -9,7 +9,7 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) TEST_GEN_PROGS += vdso_standalone_test_x86 endif TEST_GEN_PROGS += vdso_test_correctness -ifeq ($(ARCH)$(CONFIG_X86_32),$(filter $(ARCH)$(CONFIG_X86_32),x86 x86_64 loongarch)) +ifeq ($(ARCH)$(CONFIG_X86_32),$(filter $(ARCH)$(CONFIG_X86_32),x86 x86_64 loongarch arm64)) TEST_GEN_PROGS += vdso_test_getrandom TEST_GEN_PROGS += vdso_test_chacha endif @@ -40,5 +40,6 @@ $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \ $(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(SRCARCH)/vdso/vgetrandom-chacha.S $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ -idirafter $(top_srcdir)/arch/$(SRCARCH)/include \ + -idirafter $(top_srcdir)/arch/$(SRCARCH)/include/generated \ -idirafter $(top_srcdir)/include \ -D__ASSEMBLY__ -Wa,--noexecstack -- cgit v1.2.3 From 8bc7c5e525584903ea83332e18a2118ed3b1985e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 3 Sep 2024 14:52:45 +0200 Subject: selftests: vDSO: don't include generated headers for chacha test It's not correct to use $(top_srcdir) for generated header files, for builds that are done out of tree via O=, and $(objtree) isn't valid in the selftests context. Instead, just obviate the need for these generated header files by defining empty stubs in tools/include, which is the same thing that's done for rwlock.h. Reviewed-by: Adhemerval Zanella Signed-off-by: Jason A. Donenfeld --- tools/include/generated/asm-offsets.h | 0 tools/include/generated/asm/cpucap-defs.h | 0 tools/include/generated/asm/sysreg-defs.h | 0 tools/testing/selftests/vDSO/Makefile | 2 +- 4 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 tools/include/generated/asm-offsets.h create mode 100644 tools/include/generated/asm/cpucap-defs.h create mode 100644 tools/include/generated/asm/sysreg-defs.h (limited to 'tools/include') diff --git a/tools/include/generated/asm-offsets.h b/tools/include/generated/asm-offsets.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/include/generated/asm/cpucap-defs.h b/tools/include/generated/asm/cpucap-defs.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/include/generated/asm/sysreg-defs.h b/tools/include/generated/asm/sysreg-defs.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 3c6fafbd83a6..610056acc1b7 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -39,7 +39,7 @@ $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \ $(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(SRCARCH)/vdso/vgetrandom-chacha.S $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ + -idirafter $(top_srcdir)/tools/include/generated \ -idirafter $(top_srcdir)/arch/$(SRCARCH)/include \ - -idirafter $(top_srcdir)/arch/$(SRCARCH)/include/generated \ -idirafter $(top_srcdir)/include \ -D__ASSEMBLY__ -Wa,--noexecstack -- cgit v1.2.3 From 210860e7f7337e47e77577fa5bb168767e2d8a1e Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 15 Sep 2024 01:07:27 +0200 Subject: selftests: vDSO: check cpu caps before running chacha test Some archs -- arm64 and s390x -- implemented chacha using instructions that are available most places, but aren't always available. The kernel handles this just fine, but the selftest does not. Check the hwcaps before running, and skip the test if the cpu doesn't support it. As well, on s390x, always emit the fallback instructions of an alternative block, to ensure maximum compatibility. Co-developed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Jason A. Donenfeld --- tools/include/asm/alternative.h | 10 ++++++++++ tools/testing/selftests/vDSO/vdso_test_chacha.c | 20 ++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'tools/include') diff --git a/tools/include/asm/alternative.h b/tools/include/asm/alternative.h index 7ce02a223732..8e548ac8f740 100644 --- a/tools/include/asm/alternative.h +++ b/tools/include/asm/alternative.h @@ -2,8 +2,18 @@ #ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H #define _TOOLS_ASM_ALTERNATIVE_ASM_H +#if defined(__s390x__) +#ifdef __ASSEMBLY__ +.macro ALTERNATIVE oldinstr, newinstr, feature + \oldinstr +.endm +#endif +#else + /* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ #define ALTERNATIVE # #endif + +#endif diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c index e81d72c9882e..b1ea532c5996 100644 --- a/tools/testing/selftests/vDSO/vdso_test_chacha.c +++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c @@ -5,11 +5,29 @@ #include #include +#include #include #include #include #include "../kselftest.h" +#if defined(__aarch64__) +static bool cpu_has_capabilities(void) +{ + return getauxval(AT_HWCAP) & HWCAP_ASIMD; +} +#elif defined(__s390x__) +static bool cpu_has_capabilities(void) +{ + return getauxval(AT_HWCAP) & HWCAP_S390_VXRS; +} +#else +static bool cpu_has_capabilities(void) +{ + return true; +} +#endif + static uint32_t rol32(uint32_t word, unsigned int shift) { return (word << (shift & 31)) | (word >> ((-shift) & 31)); @@ -67,6 +85,8 @@ int main(int argc, char *argv[]) uint8_t output1[BLOCK_SIZE * BLOCKS], output2[BLOCK_SIZE * BLOCKS]; ksft_print_header(); + if (!cpu_has_capabilities()) + ksft_exit_skip("Required CPU capabilities missing\n"); ksft_set_plan(1); for (unsigned int trial = 0; trial < TRIALS; ++trial) { -- cgit v1.2.3 From 6d74d178fe6eaf61e384f3be6ba64150bddce8a6 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Tue, 6 Aug 2024 15:01:23 -0700 Subject: tools: Add riscv barrier implementation Many of the other architectures use their custom barrier implementations. Use the barrier code from the kernel sources to optimize barriers in tools. Signed-off-by: Charlie Jenkins Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20240806-optimize_ring_buffer_read_riscv-v2-1-ca7e193ae198@rivosinc.com Signed-off-by: Palmer Dabbelt --- tools/arch/riscv/include/asm/barrier.h | 39 ++++++++++++++++++++++++++++++++++ tools/arch/riscv/include/asm/fence.h | 13 ++++++++++++ tools/include/asm/barrier.h | 2 ++ 3 files changed, 54 insertions(+) create mode 100644 tools/arch/riscv/include/asm/barrier.h create mode 100644 tools/arch/riscv/include/asm/fence.h (limited to 'tools/include') diff --git a/tools/arch/riscv/include/asm/barrier.h b/tools/arch/riscv/include/asm/barrier.h new file mode 100644 index 000000000000..6997f197086d --- /dev/null +++ b/tools/arch/riscv/include/asm/barrier.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copied from the kernel sources to tools/arch/riscv: + * + * Copyright (C) 2012 ARM Ltd. + * Copyright (C) 2013 Regents of the University of California + * Copyright (C) 2017 SiFive + */ + +#ifndef _TOOLS_LINUX_ASM_RISCV_BARRIER_H +#define _TOOLS_LINUX_ASM_RISCV_BARRIER_H + +#include +#include + +/* These barriers need to enforce ordering on both devices and memory. */ +#define mb() RISCV_FENCE(iorw, iorw) +#define rmb() RISCV_FENCE(ir, ir) +#define wmb() RISCV_FENCE(ow, ow) + +/* These barriers do not need to enforce ordering on devices, just memory. */ +#define smp_mb() RISCV_FENCE(rw, rw) +#define smp_rmb() RISCV_FENCE(r, r) +#define smp_wmb() RISCV_FENCE(w, w) + +#define smp_store_release(p, v) \ +do { \ + RISCV_FENCE(rw, w); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + RISCV_FENCE(r, rw); \ + ___p1; \ +}) + +#endif /* _TOOLS_LINUX_ASM_RISCV_BARRIER_H */ diff --git a/tools/arch/riscv/include/asm/fence.h b/tools/arch/riscv/include/asm/fence.h new file mode 100644 index 000000000000..37860e86771d --- /dev/null +++ b/tools/arch/riscv/include/asm/fence.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copied from the kernel sources to tools/arch/riscv: + */ + +#ifndef _ASM_RISCV_FENCE_H +#define _ASM_RISCV_FENCE_H + +#define RISCV_FENCE_ASM(p, s) "\tfence " #p "," #s "\n" +#define RISCV_FENCE(p, s) \ + ({ __asm__ __volatile__ (RISCV_FENCE_ASM(p, s) : : : "memory"); }) + +#endif /* _ASM_RISCV_FENCE_H */ diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h index 8d378c57cb01..0c21678ac5e6 100644 --- a/tools/include/asm/barrier.h +++ b/tools/include/asm/barrier.h @@ -8,6 +8,8 @@ #include "../../arch/arm64/include/asm/barrier.h" #elif defined(__powerpc__) #include "../../arch/powerpc/include/asm/barrier.h" +#elif defined(__riscv) +#include "../../arch/riscv/include/asm/barrier.h" #elif defined(__s390__) #include "../../arch/s390/include/asm/barrier.h" #elif defined(__sh__) -- cgit v1.2.3 From aa5736dc7aa4d6f0e5e4e4147d9aef42bb82deab Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Tue, 6 Aug 2024 15:01:24 -0700 Subject: tools: Optimize ring buffer for riscv Now that the riscv tools tree supports optimized barriers, use them in the ring buffer. Signed-off-by: Charlie Jenkins Reviewed-by: Andrea Parri Link: https://lore.kernel.org/r/20240806-optimize_ring_buffer_read_riscv-v2-2-ca7e193ae198@rivosinc.com Signed-off-by: Palmer Dabbelt --- tools/include/linux/ring_buffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/linux/ring_buffer.h b/tools/include/linux/ring_buffer.h index 6c02617377c2..a74c397359c7 100644 --- a/tools/include/linux/ring_buffer.h +++ b/tools/include/linux/ring_buffer.h @@ -55,7 +55,7 @@ static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base) * READ_ONCE() + smp_mb() pair. */ #if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \ - defined(__ia64__) || defined(__sparc__) && defined(__arch64__) + defined(__ia64__) || defined(__sparc__) && defined(__arch64__) || defined(__riscv) return smp_load_acquire(&base->data_head); #else u64 head = READ_ONCE(base->data_head); -- cgit v1.2.3 From e08ec26928554c36e34e089f663dc9114d77b68c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 19 Sep 2024 14:40:08 +0200 Subject: tools: Add additional SYM_*() stubs to linkage.h Similar to commit f8d92fc527ff ("selftests: vDSO: fix include order in build of test_vdso_chacha") add SYM_DATA_START, SYM_DATA_START_LOCAL, and SYM_DATA_END stubs to tools/include/linux/linkage.h so that the proper macros can be used within the kernel's vdso code as well as in the vdso_test_chacha selftest. Signed-off-by: Heiko Carstens Reviewed-by: Jens Remus Signed-off-by: Vasily Gorbik --- tools/include/linux/linkage.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/linux/linkage.h b/tools/include/linux/linkage.h index a48ff086899c..a89620c550ed 100644 --- a/tools/include/linux/linkage.h +++ b/tools/include/linux/linkage.h @@ -2,7 +2,9 @@ #define _TOOLS_INCLUDE_LINUX_LINKAGE_H #define SYM_FUNC_START(x) .globl x; x: - #define SYM_FUNC_END(x) +#define SYM_DATA_START(x) .globl x; x: +#define SYM_DATA_START_LOCAL(x) x: +#define SYM_DATA_END(x) #endif /* _TOOLS_INCLUDE_LINUX_LINKAGE_H */ -- cgit v1.2.3