From 974f221c84b05b1dc2f5ea50dc16d2a9d1e95eda Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 Apr 2016 17:09:04 -0700 Subject: x86/boot: Move compressed kernel to the end of the decompression buffer This change makes later calculations about where the kernel is located easier to reason about. To better understand this change, we must first clarify what 'VO' and 'ZO' are. These values were introduced in commits by hpa: 77d1a4999502 ("x86, boot: make symbols from the main vmlinux available") 37ba7ab5e33c ("x86, boot: make kernel_alignment adjustable; new bzImage fields") Specifically: All names prefixed with 'VO_': - relate to the uncompressed kernel image - the size of the VO image is: VO__end-VO__text ("VO_INIT_SIZE" define) All names prefixed with 'ZO_': - relate to the bootable compressed kernel image (boot/compressed/vmlinux), which is composed of the following memory areas: - head text - compressed kernel (VO image and relocs table) - decompressor code - the size of the ZO image is: ZO__end - ZO_startup_32 ("ZO_INIT_SIZE" define, though see below) The 'INIT_SIZE' value is used to find the larger of the two image sizes: #define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset) #define VO_INIT_SIZE (VO__end - VO__text) #if ZO_INIT_SIZE > VO_INIT_SIZE # define INIT_SIZE ZO_INIT_SIZE #else # define INIT_SIZE VO_INIT_SIZE #endif The current code uses extract_offset to decide where to position the copied ZO (i.e. ZO starts at extract_offset). (This is why ZO_INIT_SIZE currently includes the extract_offset.) Why does z_extract_offset exist? It's needed because we are trying to minimize the amount of RAM used for the whole act of creating an uncompressed, executable, properly relocation-linked kernel image in system memory. We do this so that kernels can be booted on even very small systems. To achieve the goal of minimal memory consumption we have implemented an in-place decompression strategy: instead of cleanly separating the VO and ZO images and also allocating some memory for the decompression code's runtime needs, we instead create this elaborate layout of memory buffers where the output (decompressed) stream, as it progresses, overlaps with and destroys the input (compressed) stream. This can only be done safely if the ZO image is placed to the end of the VO range, plus a certain amount of safety distance to make sure that when the last bytes of the VO range are decompressed, the compressed stream pointer is safely beyond the end of the VO range. z_extract_offset is calculated in arch/x86/boot/compressed/mkpiggy.c during the build process, at a point when we know the exact compressed and uncompressed size of the kernel images and can calculate this safe minimum offset value. (Note that the mkpiggy.c calculation is not perfect, because we don't know the decompressor used at that stage, so the z_extract_offset calculation is necessarily imprecise and is mostly based on gzip internals - we'll improve that in the next patch.) When INIT_SIZE is bigger than VO_INIT_SIZE (uncommon but possible), the copied ZO occupies the memory from extract_offset to the end of decompression buffer. It overlaps with the soon-to-be-uncompressed kernel like this: |-----compressed kernel image------| V V 0 extract_offset +INIT_SIZE |-----------|---------------|-------------------------|--------| | | | | VO__text startup_32 of ZO VO__end ZO__end ^ ^ |-------uncompressed kernel image---------| When INIT_SIZE is equal to VO_INIT_SIZE (likely) there's still space left from end of ZO to the end of decompressing buffer, like below. |-compressed kernel image-| V V 0 extract_offset +INIT_SIZE |-----------|---------------|-------------------------|--------| | | | | VO__text startup_32 of ZO ZO__end VO__end ^ ^ |------------uncompressed kernel image-------------| To simplify calculations and avoid special cases, it is cleaner to always place the compressed kernel image in memory so that ZO__end is at the end of the decompression buffer, instead of placing t at the start of extract_offset as is currently done. This patch adds BP_init_size (which is the INIT_SIZE as passed in from the boot_params) into asm-offsets.c to make it visible to the assembly code. Then when moving the ZO, it calculates the starting position of the copied ZO (via BP_init_size and the ZO run size) so that the VO__end will be at the end of the decompression buffer. To make the position calculation safe, the end of ZO is page aligned (and a comment is added to the existing VO alignment for good measure). Signed-off-by: Yinghai Lu [ Rewrote changelog and comments. ] Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: lasse.collin@tukaani.org Link: http://lkml.kernel.org/r/1461888548-32439-3-git-send-email-keescook@chromium.org [ Rewrote the changelog some more. ] Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/mkpiggy.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/boot/compressed/mkpiggy.c') diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c index d8222f213182..b980046c3329 100644 --- a/arch/x86/boot/compressed/mkpiggy.c +++ b/arch/x86/boot/compressed/mkpiggy.c @@ -85,9 +85,6 @@ int main(int argc, char *argv[]) printf("z_output_len = %lu\n", (unsigned long)olen); printf(".globl z_extract_offset\n"); printf("z_extract_offset = 0x%lx\n", offs); - /* z_extract_offset_negative allows simplification of head_32.S */ - printf(".globl z_extract_offset_negative\n"); - printf("z_extract_offset_negative = -0x%lx\n", offs); printf(".globl z_run_size\n"); printf("z_run_size = %lu\n", run_size); -- cgit v1.2.3 From d607251ba9acc0b5faeaa08818f60d041dd19472 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 Apr 2016 17:09:05 -0700 Subject: x86/boot: Calculate decompression size during boot not build Currently z_extract_offset is calculated in boot/compressed/mkpiggy.c. This doesn't work well because mkpiggy.c doesn't know the details of the decompressor in use. As a result, it can only make an estimation, which has risks: - output + output_len (VO) could be much bigger than input + input_len (ZO). In this case, the decompressed kernel plus relocs could overwrite the decompression code while it is running. - The head code of ZO could be bigger than z_extract_offset. In this case an overwrite could happen when the head code is running to move ZO to the end of buffer. Though currently the size of the head code is very small it's still a potential risk. Since there is no rule to limit the size of the head code of ZO, it runs the risk of suddenly becoming a (hard to find) bug. Instead, this moves the z_extract_offset calculation into header.S, and makes adjustments to be sure that the above two cases can never happen, and further corrects the comments describing the calculations. Since we have (in the previous patch) made ZO always be located against the end of decompression buffer, z_extract_offset is only used here to calculate an appropriate buffer size (INIT_SIZE), and is not longer used elsewhere. As such, it can be removed from voffset.h. Additionally clean up #if/#else #define to improve readability. Signed-off-by: Yinghai Lu Signed-off-by: Baoquan He [ Rewrote the changelog and comments. ] Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: lasse.collin@tukaani.org Link: http://lkml.kernel.org/r/1461888548-32439-4-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/Makefile | 2 +- arch/x86/boot/compressed/mkpiggy.c | 21 ++++----------------- arch/x86/boot/header.S | 35 +++++++++++++++++++++++++---------- 3 files changed, 30 insertions(+), 28 deletions(-) (limited to 'arch/x86/boot/compressed/mkpiggy.c') diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index b1ef9e489084..942f7dabfb1e 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -95,7 +95,7 @@ targets += voffset.h $(obj)/voffset.h: vmlinux FORCE $(call if_changed,voffset) -sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p' +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c index b980046c3329..f095ed9e7d3c 100644 --- a/arch/x86/boot/compressed/mkpiggy.c +++ b/arch/x86/boot/compressed/mkpiggy.c @@ -18,11 +18,10 @@ * * H. Peter Anvin * - * ----------------------------------------------------------------------- */ - -/* - * Compute the desired load offset from a compressed program; outputs - * a small assembly wrapper with the appropriate symbols defined. + * ----------------------------------------------------------------------- + * + * Outputs a small assembly wrapper with the appropriate symbols defined. + * */ #include @@ -35,7 +34,6 @@ int main(int argc, char *argv[]) { uint32_t olen; long ilen; - unsigned long offs; unsigned long run_size; FILE *f = NULL; int retval = 1; @@ -67,15 +65,6 @@ int main(int argc, char *argv[]) ilen = ftell(f); olen = get_unaligned_le32(&olen); - /* - * Now we have the input (compressed) and output (uncompressed) - * sizes, compute the necessary decompression offset... - */ - - offs = (olen > ilen) ? olen - ilen : 0; - offs += olen >> 12; /* Add 8 bytes for each 32K block */ - offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */ - offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ run_size = atoi(argv[2]); printf(".section \".rodata..compressed\",\"a\",@progbits\n"); @@ -83,8 +72,6 @@ int main(int argc, char *argv[]) printf("z_input_len = %lu\n", ilen); printf(".globl z_output_len\n"); printf("z_output_len = %lu\n", (unsigned long)olen); - printf(".globl z_extract_offset\n"); - printf("z_extract_offset = 0x%lx\n", offs); printf(".globl z_run_size\n"); printf("z_run_size = %lu\n", run_size); diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index fd85b9e4e953..3dd5be33aaa7 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -508,13 +508,10 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr # To avoid problems with the compressed data's meta information an extra 18 # bytes are needed. Leading to the formula: # -# extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size +# extra_bytes = (uncompressed_size >> 12) + 32768 + 18 # # Adding 8 bytes per 32K is a bit excessive but much easier to calculate. # Adding 32768 instead of 32767 just makes for round numbers. -# Adding the decompressor_size is necessary as it musht live after all -# of the data as well. Last I measured the decompressor is about 14K. -# 10K of actual data and 4K of bss. # # Above analysis is for decompressing gzip compressed kernel only. Up to # now 6 different decompressor are supported all together. And among them @@ -524,17 +521,35 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr # the description in lib/decompressor_xxx.c for specific information. # # extra_bytes = (uncompressed_size >> 12) + 65536 + 128 -# -# Note that this calculation, which results in z_extract_offset (below), -# is currently generated in compressed/mkpiggy.c -#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset) +#define ZO_z_extra_bytes ((ZO_z_output_len >> 12) + 65536 + 128) +#if ZO_z_output_len > ZO_z_input_len +# define ZO_z_extract_offset (ZO_z_output_len + ZO_z_extra_bytes - \ + ZO_z_input_len) +#else +# define ZO_z_extract_offset ZO_z_extra_bytes +#endif + +/* + * The extract_offset has to be bigger than ZO head section. Otherwise when + * the head code is running to move ZO to the end of the buffer, it will + * overwrite the head code itself. + */ +#if (ZO__ehead - ZO_startup_32) > ZO_z_extract_offset +# define ZO_z_min_extract_offset ((ZO__ehead - ZO_startup_32 + 4095) & ~4095) +#else +# define ZO_z_min_extract_offset ((ZO_z_extract_offset + 4095) & ~4095) +#endif + +#define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_min_extract_offset) + #define VO_INIT_SIZE (VO__end - VO__text) #if ZO_INIT_SIZE > VO_INIT_SIZE -#define INIT_SIZE ZO_INIT_SIZE +# define INIT_SIZE ZO_INIT_SIZE #else -#define INIT_SIZE VO_INIT_SIZE +# define INIT_SIZE VO_INIT_SIZE #endif + init_size: .long INIT_SIZE # kernel initialization size handover_offset: .long 0 # Filled in by build.c -- cgit v1.2.3 From 4d2d542482205d3df1a0852751f5b004cc6390cc Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 Apr 2016 17:09:07 -0700 Subject: x86/KASLR: Clean up unused code from old 'run_size' and rename it to 'kernel_total_size' Since 'run_size' is now calculated in misc.c, the old script and associated argument passing is no longer needed. This patch removes them, and renames 'run_size' to the more descriptive 'kernel_total_size'. Signed-off-by: Yinghai Lu Signed-off-by: Baoquan He [ Rewrote the changelog, renamed 'run_size' to 'kernel_total_size' ] Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Triplett Cc: Junjie Mao Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vivek Goyal Cc: lasse.collin@tukaani.org Link: http://lkml.kernel.org/r/1461888548-32439-6-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/Makefile | 4 +--- arch/x86/boot/compressed/head_32.S | 3 +-- arch/x86/boot/compressed/head_64.S | 3 --- arch/x86/boot/compressed/misc.c | 11 ++++------ arch/x86/boot/compressed/mkpiggy.c | 10 ++------- arch/x86/tools/calc_run_size.sh | 42 -------------------------------------- 6 files changed, 8 insertions(+), 65 deletions(-) delete mode 100644 arch/x86/tools/calc_run_size.sh (limited to 'arch/x86/boot/compressed/mkpiggy.c') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 45997805adad..adef26d22224 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -121,10 +121,8 @@ suffix-$(CONFIG_KERNEL_XZ) := xz suffix-$(CONFIG_KERNEL_LZO) := lzo suffix-$(CONFIG_KERNEL_LZ4) := lz4 -RUN_SIZE = $(shell $(OBJDUMP) -h vmlinux | \ - $(CONFIG_SHELL) $(srctree)/arch/x86/tools/calc_run_size.sh) quiet_cmd_mkpiggy = MKPIGGY $@ - cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false ) + cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) targets += piggy.S $(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 8a95e2f845aa..1038524270e7 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -238,7 +238,6 @@ relocated: * Do the extraction, and jump to the new kernel.. */ /* push arguments for extract_kernel: */ - pushl $z_run_size /* size of kernel with .bss and .brk */ pushl $z_output_len /* decompressed length, end of relocs */ movl BP_init_size(%esi), %eax @@ -254,7 +253,7 @@ relocated: pushl %eax /* heap area */ pushl %esi /* real mode pointer */ call extract_kernel /* returns kernel location in %eax */ - addl $28, %esp + addl $24, %esp /* * Jump to the extracted kernel. diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 09cdc0c3ee7e..7c047002950c 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -415,8 +415,6 @@ relocated: * Do the extraction, and jump to the new kernel.. */ pushq %rsi /* Save the real mode argument */ - movq $z_run_size, %r9 /* size of kernel with .bss and .brk */ - pushq %r9 movq %rsi, %rdi /* real mode address */ leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ leaq input_data(%rip), %rdx /* input_data */ @@ -424,7 +422,6 @@ relocated: movq %rbp, %r8 /* output target address */ movq $z_output_len, %r9 /* decompressed length, end of relocs */ call extract_kernel /* returns kernel location in %rax */ - popq %r9 popq %rsi /* diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index cda93d16ad4d..bee6238e7cfc 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -340,9 +340,9 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, unsigned char *input_data, unsigned long input_len, unsigned char *output, - unsigned long output_len, - unsigned long run_size) + unsigned long output_len) { + const unsigned long kernel_total_size = VO__end - VO__text; unsigned char *output_orig = output; /* Retain x86 boot parameters pointer passed from startup_32/64. */ @@ -364,8 +364,6 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, lines = boot_params->screen_info.orig_video_lines; cols = boot_params->screen_info.orig_video_cols; - run_size = VO__end - VO__text; - console_init(); debug_putstr("early console in extract_kernel\n"); @@ -377,7 +375,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, debug_putaddr(input_len); debug_putaddr(output); debug_putaddr(output_len); - debug_putaddr(run_size); + debug_putaddr(kernel_total_size); /* * The memory hole needed for the kernel is the larger of either @@ -385,8 +383,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, * entire decompressed kernel plus .bss and .brk sections. */ output = choose_random_location(input_data, input_len, output, - output_len > run_size ? output_len - : run_size); + max(output_len, kernel_total_size)); /* Validate memory location choices. */ if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c index f095ed9e7d3c..72bad2c8debe 100644 --- a/arch/x86/boot/compressed/mkpiggy.c +++ b/arch/x86/boot/compressed/mkpiggy.c @@ -34,13 +34,11 @@ int main(int argc, char *argv[]) { uint32_t olen; long ilen; - unsigned long run_size; FILE *f = NULL; int retval = 1; - if (argc < 3) { - fprintf(stderr, "Usage: %s compressed_file run_size\n", - argv[0]); + if (argc < 2) { + fprintf(stderr, "Usage: %s compressed_file\n", argv[0]); goto bail; } @@ -65,15 +63,11 @@ int main(int argc, char *argv[]) ilen = ftell(f); olen = get_unaligned_le32(&olen); - run_size = atoi(argv[2]); - printf(".section \".rodata..compressed\",\"a\",@progbits\n"); printf(".globl z_input_len\n"); printf("z_input_len = %lu\n", ilen); printf(".globl z_output_len\n"); printf("z_output_len = %lu\n", (unsigned long)olen); - printf(".globl z_run_size\n"); - printf("z_run_size = %lu\n", run_size); printf(".globl input_data, input_data_end\n"); printf("input_data:\n"); diff --git a/arch/x86/tools/calc_run_size.sh b/arch/x86/tools/calc_run_size.sh deleted file mode 100644 index 1a4c17bb3910..000000000000 --- a/arch/x86/tools/calc_run_size.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/sh -# -# Calculate the amount of space needed to run the kernel, including room for -# the .bss and .brk sections. -# -# Usage: -# objdump -h a.out | sh calc_run_size.sh - -NUM='\([0-9a-fA-F]*[ \t]*\)' -OUT=$(sed -n 's/^[ \t0-9]*.b[sr][sk][ \t]*'"$NUM$NUM$NUM$NUM"'.*/\1\4/p') -if [ -z "$OUT" ] ; then - echo "Never found .bss or .brk file offset" >&2 - exit 1 -fi - -OUT=$(echo ${OUT# }) -sizeA=$(printf "%d" 0x${OUT%% *}) -OUT=${OUT#* } -offsetA=$(printf "%d" 0x${OUT%% *}) -OUT=${OUT#* } -sizeB=$(printf "%d" 0x${OUT%% *}) -OUT=${OUT#* } -offsetB=$(printf "%d" 0x${OUT%% *}) - -run_size=$(( $offsetA + $sizeA + $sizeB )) - -# BFD linker shows the same file offset in ELF. -if [ "$offsetA" -ne "$offsetB" ] ; then - # Gold linker shows them as consecutive. - endB=$(( $offsetB + $sizeB )) - if [ "$endB" != "$run_size" ] ; then - printf "sizeA: 0x%x\n" $sizeA >&2 - printf "offsetA: 0x%x\n" $offsetA >&2 - printf "sizeB: 0x%x\n" $sizeB >&2 - printf "offsetB: 0x%x\n" $offsetB >&2 - echo ".bss and .brk are non-contiguous" >&2 - exit 1 - fi -fi - -printf "%d\n" $run_size -exit 0 -- cgit v1.2.3