From 2cb16181a1d1f93a88f2b4640e7638fc0549da93 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 21 Mar 2024 14:05:06 -0400 Subject: x86/boot: Simplify boot stack setup Define the symbol __top_init_kernel_stack instead of duplicating the offset from __end_init_task in multiple places. Signed-off-by: Brian Gerst Signed-off-by: Ingo Molnar Cc: Kees Cook Cc: Uros Bizjak Cc: Linus Torvalds Cc: Andy Lutomirski Link: https://lore.kernel.org/r/20240321180506.89030-1-brgerst@gmail.com --- arch/x86/include/asm/processor.h | 6 ++---- arch/x86/kernel/head_32.S | 11 +---------- arch/x86/kernel/head_64.S | 2 +- arch/x86/kernel/vmlinux.lds.S | 3 +++ arch/x86/xen/xen-head.S | 2 +- 5 files changed, 8 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 811548f131f4..7fa01d951365 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -636,12 +636,10 @@ static __always_inline void prefetchw(const void *x) #define KSTK_ESP(task) (task_pt_regs(task)->sp) #else -extern unsigned long __end_init_task[]; +extern unsigned long __top_init_kernel_stack[]; #define INIT_THREAD { \ - .sp = (unsigned long)&__end_init_task - \ - TOP_OF_KERNEL_STACK_PADDING - \ - sizeof(struct pt_regs), \ + .sp = (unsigned long)&__top_init_kernel_stack, \ } extern unsigned long KSTK_ESP(struct task_struct *task); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index b50f3641c4d6..a9de527ba5c4 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -44,9 +44,6 @@ #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id - -#define SIZEOF_PTREGS 17*4 - /* * Worst-case size of the kernel mapping we need to make: * a relocatable kernel can live anywhere in lowmem, so we need to be able @@ -488,13 +485,7 @@ SYM_DATA_END(initial_page_table) .data .balign 4 -/* - * The SIZEOF_PTREGS gap is a convention which helps the in-kernel unwinder - * reliably detect the end of the stack. - */ -SYM_DATA(initial_stack, - .long init_thread_union + THREAD_SIZE - - SIZEOF_PTREGS - TOP_OF_KERNEL_STACK_PADDING) +SYM_DATA(initial_stack, .long __top_init_kernel_stack) __INITRODATA int_msg: diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index d8198fbd70e5..b11526869a40 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -66,7 +66,7 @@ SYM_CODE_START_NOALIGN(startup_64) mov %rsi, %r15 /* Set up the stack for verify_cpu() */ - leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp + leaq __top_init_kernel_stack(%rip), %rsp /* Setup GSBASE to allow stack canary access for C code */ movl $MSR_GS_BASE, %ecx diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 56451fd2099e..91085c3430de 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -172,6 +172,9 @@ SECTIONS /* init_task */ INIT_TASK_DATA(THREAD_SIZE) + /* equivalent to task_pt_regs(&init_task) */ + __top_init_kernel_stack = __end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE; + #ifdef CONFIG_X86_32 /* 32 bit has nosave before _edata */ NOSAVE_DATA diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 04101b984f24..758bcd47b72d 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -49,7 +49,7 @@ SYM_CODE_START(startup_xen) ANNOTATE_NOENDBR cld - leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp + leaq __top_init_kernel_stack(%rip), %rsp /* Set up %gs. * -- cgit v1.2.3 From 8f69cba096b5cfd09be2d06c15d08dbd4a58c9d7 Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Fri, 22 Mar 2024 01:16:16 -0700 Subject: x86: Rename __{start,end}_init_task to __{start,end}_init_stack The stack of a task has been separated from the memory of a task_struct struture for a long time on x86, as a result __{start,end}_init_task no longer mark the start and end of the init_task structure, but its stack only. Rename __{start,end}_init_task to __{start,end}_init_stack. Note other architectures are not affected because __{start,end}_init_task are used on x86 only. Signed-off-by: Xin Li (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20240322081616.3346181-1-xin@zytor.com --- arch/x86/kernel/vmlinux.lds.S | 2 +- include/asm-generic/vmlinux.lds.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 91085c3430de..4c04a366f07d 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -173,7 +173,7 @@ SECTIONS INIT_TASK_DATA(THREAD_SIZE) /* equivalent to task_pt_regs(&init_task) */ - __top_init_kernel_stack = __end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE; + __top_init_kernel_stack = __end_init_stack - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE; #ifdef CONFIG_X86_32 /* 32 bit has nosave before _edata */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f7749d0f2562..87082434e244 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -399,13 +399,13 @@ #define INIT_TASK_DATA(align) \ . = ALIGN(align); \ - __start_init_task = .; \ + __start_init_stack = .; \ init_thread_union = .; \ init_stack = .; \ KEEP(*(.data..init_task)) \ KEEP(*(.data..init_thread_info)) \ - . = __start_init_task + THREAD_SIZE; \ - __end_init_task = .; + . = __start_init_stack + THREAD_SIZE; \ + __end_init_stack = .; #define JUMP_TABLE_DATA \ . = ALIGN(8); \ -- cgit v1.2.3 From 76e9762d66373354b45c33b60e9a53ef2a3c5ff2 Mon Sep 17 00:00:00 2001 From: Guixiong Wei Date: Sun, 17 Mar 2024 23:05:47 +0800 Subject: x86/boot: Ignore relocations in .notes sections in walk_relocs() too Commit: aaa8736370db ("x86, relocs: Ignore relocations in .notes section") ... only started ignoring the .notes sections in print_absolute_relocs(), but the same logic should also by applied in walk_relocs() to avoid such relocations. [ mingo: Fixed various typos in the changelog, removed extra curly braces from the code. ] Fixes: aaa8736370db ("x86, relocs: Ignore relocations in .notes section") Fixes: 5ead97c84fa7 ("xen: Core Xen implementation") Fixes: da1a679cde9b ("Add /sys/kernel/notes") Signed-off-by: Guixiong Wei Signed-off-by: Ingo Molnar Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20240317150547.24910-1-weiguixiong@bytedance.com --- arch/x86/tools/relocs.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index b029fb81ebee..e7a44a7f617f 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -746,6 +746,15 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { continue; } + + /* + * Do not perform relocations in .notes sections; any + * values there are meant for pre-boot consumption (e.g. + * startup_xen). + */ + if (sec_applies->shdr.sh_type == SHT_NOTE) + continue; + sh_symtab = sec_symtab->symtab; sym_strtab = sec_symtab->link->strtab; for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) { -- cgit v1.2.3 From 17608373cf907afe97654998663cc2ab8070e90b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 24 Mar 2024 04:46:10 +0100 Subject: x86/build: Clean up arch/x86/tools/relocs.c a bit So: - Follow Documentation/CodingStyle for: - curly braces - variable definitions - return statements - etc. - Fix unnecessary linebreaks - Don't split user-visible error strings over multiple lines ... - It's fine to use vertical alignment to make code more readable, but it should be internally consistent for definitions visible on a single page ... - There's 40+ die() statements that are basically asserts and never trigger. Make them single-line, to move them out of sight as much as possible. Signed-off-by: Ingo Molnar Cc: linux-kernel@vger.kernel.org --- arch/x86/tools/relocs.c | 362 ++++++++++++++++++++++++------------------------ 1 file changed, 178 insertions(+), 184 deletions(-) (limited to 'arch') diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index e7a44a7f617f..c101bed61940 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -11,41 +11,42 @@ #define Elf_Shdr ElfW(Shdr) #define Elf_Sym ElfW(Sym) -static Elf_Ehdr ehdr; -static unsigned long shnum; -static unsigned int shstrndx; -static unsigned int shsymtabndx; -static unsigned int shxsymtabndx; +static Elf_Ehdr ehdr; +static unsigned long shnum; +static unsigned int shstrndx; +static unsigned int shsymtabndx; +static unsigned int shxsymtabndx; static int sym_index(Elf_Sym *sym); struct relocs { - uint32_t *offset; - unsigned long count; - unsigned long size; + uint32_t *offset; + unsigned long count; + unsigned long size; }; -static struct relocs relocs16; -static struct relocs relocs32; +static struct relocs relocs16; +static struct relocs relocs32; + #if ELF_BITS == 64 -static struct relocs relocs32neg; -static struct relocs relocs64; -#define FMT PRIu64 +static struct relocs relocs32neg; +static struct relocs relocs64; +# define FMT PRIu64 #else -#define FMT PRIu32 +# define FMT PRIu32 #endif struct section { - Elf_Shdr shdr; - struct section *link; - Elf_Sym *symtab; - Elf32_Word *xsymtab; - Elf_Rel *reltab; - char *strtab; + Elf_Shdr shdr; + struct section *link; + Elf_Sym *symtab; + Elf32_Word *xsymtab; + Elf_Rel *reltab; + char *strtab; }; -static struct section *secs; +static struct section *secs; -static const char * const sym_regex_kernel[S_NSYMTYPES] = { +static const char * const sym_regex_kernel[S_NSYMTYPES] = { /* * Following symbols have been audited. There values are constant and do * not change if bzImage is loaded at a different physical address than @@ -115,13 +116,13 @@ static const char * const sym_regex_realmode[S_NSYMTYPES] = { "^pa_", }; -static const char * const *sym_regex; +static const char * const *sym_regex; + +static regex_t sym_regex_c[S_NSYMTYPES]; -static regex_t sym_regex_c[S_NSYMTYPES]; static int is_reloc(enum symtype type, const char *sym_name) { - return sym_regex[type] && - !regexec(&sym_regex_c[type], sym_name, 0, NULL, 0); + return sym_regex[type] && !regexec(&sym_regex_c[type], sym_name, 0, NULL, 0); } static void regex_init(int use_real_mode) @@ -139,8 +140,7 @@ static void regex_init(int use_real_mode) if (!sym_regex[i]) continue; - err = regcomp(&sym_regex_c[i], sym_regex[i], - REG_EXTENDED|REG_NOSUB); + err = regcomp(&sym_regex_c[i], sym_regex[i], REG_EXTENDED|REG_NOSUB); if (err) { regerror(err, &sym_regex_c[i], errbuf, sizeof(errbuf)); @@ -163,9 +163,10 @@ static const char *sym_type(unsigned type) #undef SYM_TYPE }; const char *name = "unknown sym type name"; - if (type < ARRAY_SIZE(type_name)) { + + if (type < ARRAY_SIZE(type_name)) name = type_name[type]; - } + return name; } @@ -179,9 +180,10 @@ static const char *sym_bind(unsigned bind) #undef SYM_BIND }; const char *name = "unknown sym bind name"; - if (bind < ARRAY_SIZE(bind_name)) { + + if (bind < ARRAY_SIZE(bind_name)) name = bind_name[bind]; - } + return name; } @@ -196,9 +198,10 @@ static const char *sym_visibility(unsigned visibility) #undef SYM_VISIBILITY }; const char *name = "unknown sym visibility name"; - if (visibility < ARRAY_SIZE(visibility_name)) { + + if (visibility < ARRAY_SIZE(visibility_name)) name = visibility_name[visibility]; - } + return name; } @@ -244,9 +247,10 @@ static const char *rel_type(unsigned type) #undef REL_TYPE }; const char *name = "unknown type rel type name"; - if (type < ARRAY_SIZE(type_name) && type_name[type]) { + + if (type < ARRAY_SIZE(type_name) && type_name[type]) name = type_name[type]; - } + return name; } @@ -256,15 +260,14 @@ static const char *sec_name(unsigned shndx) const char *name; sec_strtab = secs[shstrndx].strtab; name = ""; - if (shndx < shnum) { + + if (shndx < shnum) name = sec_strtab + secs[shndx].shdr.sh_name; - } - else if (shndx == SHN_ABS) { + else if (shndx == SHN_ABS) name = "ABSOLUTE"; - } - else if (shndx == SHN_COMMON) { + else if (shndx == SHN_COMMON) name = "COMMON"; - } + return name; } @@ -272,18 +275,19 @@ static const char *sym_name(const char *sym_strtab, Elf_Sym *sym) { const char *name; name = ""; - if (sym->st_name) { + + if (sym->st_name) name = sym_strtab + sym->st_name; - } - else { + else name = sec_name(sym_index(sym)); - } + return name; } static Elf_Sym *sym_lookup(const char *symname) { int i; + for (i = 0; i < shnum; i++) { struct section *sec = &secs[i]; long nsyms; @@ -309,14 +313,15 @@ static Elf_Sym *sym_lookup(const char *symname) } #if BYTE_ORDER == LITTLE_ENDIAN -#define le16_to_cpu(val) (val) -#define le32_to_cpu(val) (val) -#define le64_to_cpu(val) (val) +# define le16_to_cpu(val) (val) +# define le32_to_cpu(val) (val) +# define le64_to_cpu(val) (val) #endif + #if BYTE_ORDER == BIG_ENDIAN -#define le16_to_cpu(val) bswap_16(val) -#define le32_to_cpu(val) bswap_32(val) -#define le64_to_cpu(val) bswap_64(val) +# define le16_to_cpu(val) bswap_16(val) +# define le32_to_cpu(val) bswap_32(val) +# define le64_to_cpu(val) bswap_64(val) #endif static uint16_t elf16_to_cpu(uint16_t val) @@ -337,13 +342,13 @@ static uint64_t elf64_to_cpu(uint64_t val) { return le64_to_cpu(val); } -#define elf_addr_to_cpu(x) elf64_to_cpu(x) -#define elf_off_to_cpu(x) elf64_to_cpu(x) -#define elf_xword_to_cpu(x) elf64_to_cpu(x) +# define elf_addr_to_cpu(x) elf64_to_cpu(x) +# define elf_off_to_cpu(x) elf64_to_cpu(x) +# define elf_xword_to_cpu(x) elf64_to_cpu(x) #else -#define elf_addr_to_cpu(x) elf32_to_cpu(x) -#define elf_off_to_cpu(x) elf32_to_cpu(x) -#define elf_xword_to_cpu(x) elf32_to_cpu(x) +# define elf_addr_to_cpu(x) elf32_to_cpu(x) +# define elf_off_to_cpu(x) elf32_to_cpu(x) +# define elf_xword_to_cpu(x) elf32_to_cpu(x) #endif static int sym_index(Elf_Sym *sym) @@ -365,22 +370,17 @@ static int sym_index(Elf_Sym *sym) static void read_ehdr(FILE *fp) { - if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) { - die("Cannot read ELF header: %s\n", - strerror(errno)); - } - if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0) { + if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) + die("Cannot read ELF header: %s\n", strerror(errno)); + if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0) die("No ELF magic\n"); - } - if (ehdr.e_ident[EI_CLASS] != ELF_CLASS) { + if (ehdr.e_ident[EI_CLASS] != ELF_CLASS) die("Not a %d bit executable\n", ELF_BITS); - } - if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) { + if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB) die("Not a LSB ELF executable\n"); - } - if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) { + if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) die("Unknown ELF version\n"); - } + /* Convert the fields to native endian */ ehdr.e_type = elf_half_to_cpu(ehdr.e_type); ehdr.e_machine = elf_half_to_cpu(ehdr.e_machine); @@ -439,19 +439,18 @@ static void read_shdrs(FILE *fp) Elf_Shdr shdr; secs = calloc(shnum, sizeof(struct section)); - if (!secs) { - die("Unable to allocate %ld section headers\n", - shnum); - } - if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) { - die("Seek to %" FMT " failed: %s\n", - ehdr.e_shoff, strerror(errno)); - } + if (!secs) + die("Unable to allocate %ld section headers\n", shnum); + + if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) + die("Seek to %" FMT " failed: %s\n", ehdr.e_shoff, strerror(errno)); + for (i = 0; i < shnum; i++) { struct section *sec = &secs[i]; + if (fread(&shdr, sizeof(shdr), 1, fp) != 1) - die("Cannot read ELF section headers %d/%ld: %s\n", - i, shnum, strerror(errno)); + die("Cannot read ELF section headers %d/%ld: %s\n", i, shnum, strerror(errno)); + sec->shdr.sh_name = elf_word_to_cpu(shdr.sh_name); sec->shdr.sh_type = elf_word_to_cpu(shdr.sh_type); sec->shdr.sh_flags = elf_xword_to_cpu(shdr.sh_flags); @@ -471,31 +470,28 @@ static void read_shdrs(FILE *fp) static void read_strtabs(FILE *fp) { int i; + for (i = 0; i < shnum; i++) { struct section *sec = &secs[i]; - if (sec->shdr.sh_type != SHT_STRTAB) { + + if (sec->shdr.sh_type != SHT_STRTAB) continue; - } + sec->strtab = malloc(sec->shdr.sh_size); - if (!sec->strtab) { - die("malloc of %" FMT " bytes for strtab failed\n", - sec->shdr.sh_size); - } - if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { - die("Seek to %" FMT " failed: %s\n", - sec->shdr.sh_offset, strerror(errno)); - } - if (fread(sec->strtab, 1, sec->shdr.sh_size, fp) - != sec->shdr.sh_size) { - die("Cannot read symbol table: %s\n", - strerror(errno)); - } + if (!sec->strtab) + die("malloc of %" FMT " bytes for strtab failed\n", sec->shdr.sh_size); + + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) + die("Seek to %" FMT " failed: %s\n", sec->shdr.sh_offset, strerror(errno)); + + if (fread(sec->strtab, 1, sec->shdr.sh_size, fp) != sec->shdr.sh_size) + die("Cannot read symbol table: %s\n", strerror(errno)); } } static void read_symtabs(FILE *fp) { - int i,j; + int i, j; for (i = 0; i < shnum; i++) { struct section *sec = &secs[i]; @@ -504,19 +500,15 @@ static void read_symtabs(FILE *fp) switch (sec->shdr.sh_type) { case SHT_SYMTAB_SHNDX: sec->xsymtab = malloc(sec->shdr.sh_size); - if (!sec->xsymtab) { - die("malloc of %" FMT " bytes for xsymtab failed\n", - sec->shdr.sh_size); - } - if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { - die("Seek to %" FMT " failed: %s\n", - sec->shdr.sh_offset, strerror(errno)); - } - if (fread(sec->xsymtab, 1, sec->shdr.sh_size, fp) - != sec->shdr.sh_size) { - die("Cannot read extended symbol table: %s\n", - strerror(errno)); - } + if (!sec->xsymtab) + die("malloc of %" FMT " bytes for xsymtab failed\n", sec->shdr.sh_size); + + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) + die("Seek to %" FMT " failed: %s\n", sec->shdr.sh_offset, strerror(errno)); + + if (fread(sec->xsymtab, 1, sec->shdr.sh_size, fp) != sec->shdr.sh_size) + die("Cannot read extended symbol table: %s\n", strerror(errno)); + shxsymtabndx = i; continue; @@ -524,19 +516,15 @@ static void read_symtabs(FILE *fp) num_syms = sec->shdr.sh_size / sizeof(Elf_Sym); sec->symtab = malloc(sec->shdr.sh_size); - if (!sec->symtab) { - die("malloc of %" FMT " bytes for symtab failed\n", - sec->shdr.sh_size); - } - if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { - die("Seek to %" FMT " failed: %s\n", - sec->shdr.sh_offset, strerror(errno)); - } - if (fread(sec->symtab, 1, sec->shdr.sh_size, fp) - != sec->shdr.sh_size) { - die("Cannot read symbol table: %s\n", - strerror(errno)); - } + if (!sec->symtab) + die("malloc of %" FMT " bytes for symtab failed\n", sec->shdr.sh_size); + + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) + die("Seek to %" FMT " failed: %s\n", sec->shdr.sh_offset, strerror(errno)); + + if (fread(sec->symtab, 1, sec->shdr.sh_size, fp) != sec->shdr.sh_size) + die("Cannot read symbol table: %s\n", strerror(errno)); + for (j = 0; j < num_syms; j++) { Elf_Sym *sym = &sec->symtab[j]; @@ -557,28 +545,27 @@ static void read_symtabs(FILE *fp) static void read_relocs(FILE *fp) { - int i,j; + int i, j; + for (i = 0; i < shnum; i++) { struct section *sec = &secs[i]; - if (sec->shdr.sh_type != SHT_REL_TYPE) { + + if (sec->shdr.sh_type != SHT_REL_TYPE) continue; - } + sec->reltab = malloc(sec->shdr.sh_size); - if (!sec->reltab) { - die("malloc of %" FMT " bytes for relocs failed\n", - sec->shdr.sh_size); - } - if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) { - die("Seek to %" FMT " failed: %s\n", - sec->shdr.sh_offset, strerror(errno)); - } - if (fread(sec->reltab, 1, sec->shdr.sh_size, fp) - != sec->shdr.sh_size) { - die("Cannot read symbol table: %s\n", - strerror(errno)); - } + if (!sec->reltab) + die("malloc of %" FMT " bytes for relocs failed\n", sec->shdr.sh_size); + + if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) + die("Seek to %" FMT " failed: %s\n", sec->shdr.sh_offset, strerror(errno)); + + if (fread(sec->reltab, 1, sec->shdr.sh_size, fp) != sec->shdr.sh_size) + die("Cannot read symbol table: %s\n", strerror(errno)); + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) { Elf_Rel *rel = &sec->reltab[j]; + rel->r_offset = elf_addr_to_cpu(rel->r_offset); rel->r_info = elf_xword_to_cpu(rel->r_info); #if (SHT_REL_TYPE == SHT_RELA) @@ -601,23 +588,27 @@ static void print_absolute_symbols(void) printf("Absolute symbols\n"); printf(" Num: Value Size Type Bind Visibility Name\n"); + for (i = 0; i < shnum; i++) { struct section *sec = &secs[i]; char *sym_strtab; int j; - if (sec->shdr.sh_type != SHT_SYMTAB) { + if (sec->shdr.sh_type != SHT_SYMTAB) continue; - } + sym_strtab = sec->link->strtab; + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Sym); j++) { Elf_Sym *sym; const char *name; + sym = &sec->symtab[j]; name = sym_name(sym_strtab, sym); - if (sym->st_shndx != SHN_ABS) { + + if (sym->st_shndx != SHN_ABS) continue; - } + printf(format, j, sym->st_value, sym->st_size, sym_type(ELF_ST_TYPE(sym->st_info)), @@ -645,34 +636,37 @@ static void print_absolute_relocs(void) char *sym_strtab; Elf_Sym *sh_symtab; int j; - if (sec->shdr.sh_type != SHT_REL_TYPE) { + + if (sec->shdr.sh_type != SHT_REL_TYPE) continue; - } + sec_symtab = sec->link; sec_applies = &secs[sec->shdr.sh_info]; - if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { + if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) continue; - } + /* * Do not perform relocations in .notes section; any * values there are meant for pre-boot consumption (e.g. * startup_xen). */ - if (sec_applies->shdr.sh_type == SHT_NOTE) { + if (sec_applies->shdr.sh_type == SHT_NOTE) continue; - } + sh_symtab = sec_symtab->symtab; sym_strtab = sec_symtab->link->strtab; + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) { Elf_Rel *rel; Elf_Sym *sym; const char *name; + rel = &sec->reltab[j]; sym = &sh_symtab[ELF_R_SYM(rel->r_info)]; name = sym_name(sym_strtab, sym); - if (sym->st_shndx != SHN_ABS) { + + if (sym->st_shndx != SHN_ABS) continue; - } /* Absolute symbols are not relocated if bzImage is * loaded at a non-compiled address. Display a warning @@ -691,10 +685,8 @@ static void print_absolute_relocs(void) continue; if (!printed) { - printf("WARNING: Absolute relocations" - " present\n"); - printf("Offset Info Type Sym.Value " - "Sym.Name\n"); + printf("WARNING: Absolute relocations present\n"); + printf("Offset Info Type Sym.Value Sym.Name\n"); printed = 1; } @@ -718,8 +710,8 @@ static void add_reloc(struct relocs *r, uint32_t offset) void *mem = realloc(r->offset, newsize * sizeof(r->offset[0])); if (!mem) - die("realloc of %ld entries for relocs failed\n", - newsize); + die("realloc of %ld entries for relocs failed\n", newsize); + r->offset = mem; r->size = newsize; } @@ -730,6 +722,7 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, const char *symname)) { int i; + /* Walk through the relocations */ for (i = 0; i < shnum; i++) { char *sym_strtab; @@ -738,14 +731,13 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, int j; struct section *sec = &secs[i]; - if (sec->shdr.sh_type != SHT_REL_TYPE) { + if (sec->shdr.sh_type != SHT_REL_TYPE) continue; - } + sec_symtab = sec->link; sec_applies = &secs[sec->shdr.sh_info]; - if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) { + if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) continue; - } /* * Do not perform relocations in .notes sections; any @@ -757,6 +749,7 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, sh_symtab = sec_symtab->symtab; sym_strtab = sec_symtab->link->strtab; + for (j = 0; j < sec->shdr.sh_size/sizeof(Elf_Rel); j++) { Elf_Rel *rel = &sec->reltab[j]; Elf_Sym *sym = &sh_symtab[ELF_R_SYM(rel->r_info)]; @@ -790,14 +783,16 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, * kernel data and does not require special treatment. * */ -static int per_cpu_shndx = -1; +static int per_cpu_shndx = -1; static Elf_Addr per_cpu_load_addr; static void percpu_init(void) { int i; + for (i = 0; i < shnum; i++) { ElfW(Sym) *sym; + if (strcmp(sec_name(i), ".data..percpu")) continue; @@ -810,6 +805,7 @@ static void percpu_init(void) per_cpu_shndx = i; per_cpu_load_addr = sym->st_value; + return; } } @@ -880,8 +876,7 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, * Only used by jump labels */ if (is_percpu_sym(sym, symname)) - die("Invalid R_X86_64_PC64 relocation against per-CPU symbol %s\n", - symname); + die("Invalid R_X86_64_PC64 relocation against per-CPU symbol %s\n", symname); break; case R_X86_64_32: @@ -901,8 +896,7 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, if (is_reloc(S_ABS, symname)) break; - die("Invalid absolute %s relocation: %s\n", - rel_type(r_type), symname); + die("Invalid absolute %s relocation: %s\n", rel_type(r_type), symname); break; } @@ -922,8 +916,7 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, break; default: - die("Unsupported relocation type: %s (%d)\n", - rel_type(r_type), r_type); + die("Unsupported relocation type: %s (%d)\n", rel_type(r_type), r_type); break; } @@ -960,8 +953,7 @@ static int do_reloc32(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, if (is_reloc(S_ABS, symname)) break; - die("Invalid absolute %s relocation: %s\n", - rel_type(r_type), symname); + die("Invalid absolute %s relocation: %s\n", rel_type(r_type), symname); break; } @@ -969,16 +961,14 @@ static int do_reloc32(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, break; default: - die("Unsupported relocation type: %s (%d)\n", - rel_type(r_type), r_type); + die("Unsupported relocation type: %s (%d)\n", rel_type(r_type), r_type); break; } return 0; } -static int do_reloc_real(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, - const char *symname) +static int do_reloc_real(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, const char *symname) { unsigned r_type = ELF32_R_TYPE(rel->r_info); int shn_abs = (sym->st_shndx == SHN_ABS) && !is_reloc(S_REL, symname); @@ -1013,9 +1003,7 @@ static int do_reloc_real(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, if (!is_reloc(S_LIN, symname)) break; } - die("Invalid %s %s relocation: %s\n", - shn_abs ? "absolute" : "relative", - rel_type(r_type), symname); + die("Invalid %s %s relocation: %s\n", shn_abs ? "absolute" : "relative", rel_type(r_type), symname); break; case R_386_32: @@ -1036,14 +1024,11 @@ static int do_reloc_real(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, add_reloc(&relocs32, rel->r_offset); break; } - die("Invalid %s %s relocation: %s\n", - shn_abs ? "absolute" : "relative", - rel_type(r_type), symname); + die("Invalid %s %s relocation: %s\n", shn_abs ? "absolute" : "relative", rel_type(r_type), symname); break; default: - die("Unsupported relocation type: %s (%d)\n", - rel_type(r_type), r_type); + die("Unsupported relocation type: %s (%d)\n", rel_type(r_type), r_type); break; } @@ -1055,7 +1040,10 @@ static int do_reloc_real(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, static int cmp_relocs(const void *va, const void *vb) { const uint32_t *a, *b; - a = va; b = vb; + + a = va; + b = vb; + return (*a == *b)? 0 : (*a > *b)? 1 : -1; } @@ -1069,6 +1057,7 @@ static int write32(uint32_t v, FILE *f) unsigned char buf[4]; put_unaligned_le32(v, buf); + return fwrite(buf, 1, 4, f) == 4 ? 0 : -1; } @@ -1081,8 +1070,7 @@ static void emit_relocs(int as_text, int use_real_mode) { int i; int (*write_reloc)(uint32_t, FILE *) = write32; - int (*do_reloc)(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, - const char *symname); + int (*do_reloc)(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, const char *symname); #if ELF_BITS == 64 if (!use_real_mode) @@ -1169,6 +1157,7 @@ static int do_reloc_info(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, rel_type(ELF_R_TYPE(rel->r_info)), symname, sec_name(sym_index(sym))); + return 0; } @@ -1194,19 +1183,24 @@ void process(FILE *fp, int use_real_mode, int as_text, read_strtabs(fp); read_symtabs(fp); read_relocs(fp); + if (ELF_BITS == 64) percpu_init(); + if (show_absolute_syms) { print_absolute_symbols(); return; } + if (show_absolute_relocs) { print_absolute_relocs(); return; } + if (show_reloc_info) { print_reloc_info(); return; } + emit_relocs(as_text, use_real_mode); } -- cgit v1.2.3 From 4faa0e5d6d79fc4c6e1943e8b62a65744d8439a0 Mon Sep 17 00:00:00 2001 From: Julian Stecklina Date: Thu, 28 Mar 2024 16:42:12 +0100 Subject: x86/boot: Move kernel cmdline setup earlier in the boot process (again) When split_lock_detect=off (or similar) is specified in CONFIG_CMDLINE, its effect is lost. The flow is currently this: setup_arch(): -> early_cpu_init() -> early_identify_cpu() -> sld_setup() -> sld_state_setup() -> Looks for split_lock_detect in boot_command_line -> e820__memory_setup() -> Assemble final command line: boot_command_line = builtin_cmdline + boot_cmdline -> parse_early_param() There were earlier attempts at fixing this in: 8d48bf8206f7 ("x86/boot: Pull up cmdline preparation and early param parsing") later reverted in: fbe618399854 ("Revert "x86/boot: Pull up cmdline preparation and early param parsing"") ... because parse_early_param() can't be called before e820__memory_setup(). In this patch, we just move the command line concatenation to the beginning of early_cpu_init(). This should fix sld_state_setup(), while not running in the same issues as the earlier attempt. The order is now: setup_arch(): -> Assemble final command line: boot_command_line = builtin_cmdline + boot_cmdline -> early_cpu_init() -> early_identify_cpu() -> sld_setup() -> sld_state_setup() -> Looks for split_lock_detect in boot_command_line -> e820__memory_setup() -> parse_early_param() Signed-off-by: Julian Stecklina Signed-off-by: Ingo Molnar Cc: Kees Cook Cc: linux-kernel@vger.kernel.org --- arch/x86/kernel/setup.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 3e1e96efadfe..4c35f1b2a0f4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -753,6 +753,22 @@ void __init setup_arch(char **cmdline_p) boot_cpu_data.x86_phys_bits = MAX_PHYSMEM_BITS; #endif +#ifdef CONFIG_CMDLINE_BOOL +#ifdef CONFIG_CMDLINE_OVERRIDE + strscpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); +#else + if (builtin_cmdline[0]) { + /* append boot loader cmdline to builtin */ + strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); + strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); + strscpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); + } +#endif +#endif + + strscpy(command_line, boot_command_line, COMMAND_LINE_SIZE); + *cmdline_p = command_line; + /* * If we have OLPC OFW, we might end up relocating the fixmap due to * reserve_top(), so do this before touching the ioremap area. @@ -832,22 +848,6 @@ void __init setup_arch(char **cmdline_p) bss_resource.start = __pa_symbol(__bss_start); bss_resource.end = __pa_symbol(__bss_stop)-1; -#ifdef CONFIG_CMDLINE_BOOL -#ifdef CONFIG_CMDLINE_OVERRIDE - strscpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); -#else - if (builtin_cmdline[0]) { - /* append boot loader cmdline to builtin */ - strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); - strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); - strscpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); - } -#endif -#endif - - strscpy(command_line, boot_command_line, COMMAND_LINE_SIZE); - *cmdline_p = command_line; - /* * x86_configure_nx() is called before parse_early_param() to detect * whether hardware doesn't support NX (so that the early EHCI debug -- cgit v1.2.3 From a0025f587c685e5ff842fb0194036f2ca0b6eaf4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 10 Apr 2024 17:13:55 +0200 Subject: x86/boot/64: Clear most of CR4 in startup_64(), except PAE, MCE and LA57 The early 64-bit boot code must be entered with a 1:1 mapping of the bootable image, but it cannot operate without a 1:1 mapping of all the assets in memory that it accesses, and therefore, it creates such mappings for all known assets upfront, and additional ones on demand when a page fault happens on a memory address. These mappings are created with the global bit G set, as the flags used to create page table descriptors are based on __PAGE_KERNEL_LARGE_EXEC defined by the core kernel, even though the context where these mappings are used is very different. This means that the TLB maintenance carried out by the decompressor is not sufficient if it is entered with CR4.PGE enabled, which has been observed to happen with the stage0 bootloader of project Oak. While this is a dubious practice if no global mappings are being used to begin with, the decompressor is clearly at fault here for creating global mappings and not performing the appropriate TLB maintenance. Since commit: f97b67a773cd84b ("x86/decompressor: Only call the trampoline when changing paging levels") CR4 is no longer modified by the decompressor if no change in the number of paging levels is needed. Before that, CR4 would always be set to a consistent value with PGE cleared. So let's reinstate a simplified version of the original logic to put CR4 into a known state, and preserve the PAE, MCE and LA57 bits, none of which can be modified freely at this point (PAE and LA57 cannot be changed while running in long mode, and MCE cannot be cleared when running under some hypervisors). This effectively clears PGE and works around the project Oak bug. Fixes: f97b67a773cd84b ("x86/decompressor: Only call the trampoline when ...") Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: Linus Torvalds Cc: "H. Peter Anvin" Link: https://lore.kernel.org/r/20240410151354.506098-2-ardb+git@google.com --- arch/x86/boot/compressed/head_64.S | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index bf4a10a5794f..1dcb794c5479 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -398,6 +398,11 @@ SYM_CODE_START(startup_64) call sev_enable #endif + /* Preserve only the CR4 bits that must be preserved, and clear the rest */ + movq %cr4, %rax + andl $(X86_CR4_PAE | X86_CR4_MCE | X86_CR4_LA57), %eax + movq %rax, %cr4 + /* * configure_5level_paging() updates the number of paging levels using * a trampoline in 32-bit addressable memory if the current number does -- cgit v1.2.3