From 67e3f828bd4bf5e4eb4214dc4eb227d8f1c8a877 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 12:28:01 +0300 Subject: ARM: efistub: replace adrl pseudo-op with adr_l macro invocation The ARM 'adrl' pseudo instruction is a bit problematic, as it does not exist in Thumb mode, and it is not implemented by Clang either. Since the Thumb variant has a slightly bigger range, it is sometimes necessary to emit the 'adrl' variant in ARM mode where Thumb mode can use adr just fine. However, that still leaves the Clang issue, which does not appear to be supporting this any time soon. So let's switch to the adr_l macro, which works for both ARM and Thumb, and has unlimited range. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/boot/compressed/head.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 2e04ec5b5446..5b591dacbaaf 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -1440,8 +1440,7 @@ ENTRY(efi_enter_kernel) mov r4, r0 @ preserve image base mov r8, r1 @ preserve DT pointer - ARM( adrl r0, call_cache_fn ) - THUMB( adr r0, call_cache_fn ) + adr_l r0, call_cache_fn adr r1, 0f @ clean the region of code we bl cache_clean_flush @ may run with the MMU off -- cgit v1.2.3 From 62c4a2e202b18e1d7176875b7e7af240f340596b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:06 +0300 Subject: ARM: head-common.S: use PC-relative insn sequence for __proc_info Replace the open coded PC relative offset calculations with a pair of adr_l invocations. This removes some open coded arithmetic involving virtual addresses, avoids literal pools on v7+, and slightly reduces the footprint of the code. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/head-common.S | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 4a3982812a40..9a5ab6c19568 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -170,11 +170,12 @@ ENDPROC(lookup_processor_type) * r9 = cpuid (preserved) */ __lookup_processor_type: - adr r3, __lookup_processor_type_data - ldmia r3, {r4 - r6} - sub r3, r3, r4 @ get offset between virt&phys - add r5, r5, r3 @ convert virt addresses to - add r6, r6, r3 @ physical address space + /* + * Look in for information about the __proc_info + * structure. + */ + adr_l r5, __proc_info_begin + adr_l r6, __proc_info_end 1: ldmia r5, {r3, r4} @ value, mask and r4, r4, r9 @ mask wanted bits teq r3, r4 @@ -186,17 +187,6 @@ __lookup_processor_type: 2: ret lr ENDPROC(__lookup_processor_type) -/* - * Look in for information about the __proc_info structure. - */ - .align 2 - .type __lookup_processor_type_data, %object -__lookup_processor_type_data: - .long . - .long __proc_info_begin - .long __proc_info_end - .size __lookup_processor_type_data, . - __lookup_processor_type_data - __error_lpae: #ifdef CONFIG_DEBUG_LL adr r0, str_lpae -- cgit v1.2.3 From 172c34c9ff0144c3e1d96a9b54d6fecfe5d17c3c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:16 +0300 Subject: ARM: head-common.S: use PC-relative insn sequence for idmap creation Replace the open coded PC relative offset calculations involving __turn_mmu_on and __turn_mmu_on_end with a pair of adr_l invocations. This removes some open coded arithmetic involving virtual addresses, avoids literal pools on v7+, and slightly reduces the footprint of the code. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/head.S | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 7e3f36809011..f5a636fee9d0 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -224,11 +224,8 @@ __create_page_tables: * Create identity mapping to cater for __enable_mmu. * This identity mapping will be removed by paging_init(). */ - adr r0, __turn_mmu_on_loc - ldmia r0, {r3, r5, r6} - sub r0, r0, r3 @ virt->phys offset - add r5, r5, r0 @ phys __turn_mmu_on - add r6, r6, r0 @ phys __turn_mmu_on_end + adr_l r5, __turn_mmu_on @ _pa(__turn_mmu_on) + adr_l r6, __turn_mmu_on_end @ _pa(__turn_mmu_on_end) mov r5, r5, lsr #SECTION_SHIFT mov r6, r6, lsr #SECTION_SHIFT @@ -351,11 +348,6 @@ __create_page_tables: ret lr ENDPROC(__create_page_tables) .ltorg - .align -__turn_mmu_on_loc: - .long . - .long __turn_mmu_on - .long __turn_mmu_on_end #if defined(CONFIG_SMP) .text -- cgit v1.2.3 From 91580f0dbf24c6d616091526a900213bc7aa48fe Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:23 +0300 Subject: ARM: head.S: use PC-relative insn sequence for secondary_data Replace the open coded PC relative offset calculations with adr_l and ldr_l invocations. This removes some open coded arithmetic involving virtual addresses, avoids literal pools on v7+, and slightly reduces the footprint of the code. Note that it also removes a stale comment about the contents of r6. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/head.S | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index f5a636fee9d0..478506b2d51f 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -383,10 +383,8 @@ ENTRY(secondary_startup) /* * Use the page tables supplied from __cpu_up. */ - adr r4, __secondary_data - ldmia r4, {r5, r7, r12} @ address to jump to after - sub lr, r4, r5 @ mmu has been enabled - add r3, r7, lr + adr_l r3, secondary_data + mov_l r12, __secondary_switched ldrd r4, r5, [r3, #0] @ get secondary_data.pgdir ARM_BE8(eor r4, r4, r5) @ Swap r5 and r4 in BE: ARM_BE8(eor r5, r4, r5) @ it can be done in 3 steps @@ -401,22 +399,13 @@ ARM_BE8(eor r4, r4, r5) @ without using a temp reg. ENDPROC(secondary_startup) ENDPROC(secondary_startup_arm) - /* - * r6 = &secondary_data - */ ENTRY(__secondary_switched) - ldr sp, [r7, #12] @ get secondary_data.stack + ldr_l r7, secondary_data + 12 @ get secondary_data.stack + mov sp, r7 mov fp, #0 b secondary_start_kernel ENDPROC(__secondary_switched) - .align - - .type __secondary_data, %object -__secondary_data: - .long . - .long secondary_data - .long __secondary_switched #endif /* defined(CONFIG_SMP) */ -- cgit v1.2.3 From 450abd38fe6c6313ce9bdd9dce81c1dd604f6fb0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:48:20 +0300 Subject: ARM: kernel: use relative references for UP/SMP alternatives Currently, the .alt.smp.init section contains the virtual addresses of the patch sites. Since patching may occur both before and after switching into virtual mode, this requires some manual handling of the address when applying the UP alternative. Let's simplify this by using relative offsets in the table entries: this allows us to simply add each entry's address to its contents, regardless of whether we are running in virtual mode or not. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/assembler.h | 4 ++-- arch/arm/include/asm/processor.h | 2 +- arch/arm/kernel/head.S | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 72627c5fb3b2..6ed30421f697 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -259,7 +259,7 @@ */ #define ALT_UP(instr...) \ .pushsection ".alt.smp.init", "a" ;\ - .long 9998b ;\ + .long 9998b - . ;\ 9997: instr ;\ .if . - 9997b == 2 ;\ nop ;\ @@ -270,7 +270,7 @@ .popsection #define ALT_UP_B(label) \ .pushsection ".alt.smp.init", "a" ;\ - .long 9998b ;\ + .long 9998b - . ;\ W(b) . + (label - 9998b) ;\ .popsection #else diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index b9241051e5cb..9e6b97286307 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -96,7 +96,7 @@ unsigned long get_wchan(struct task_struct *p); #define __ALT_SMP_ASM(smp, up) \ "9998: " smp "\n" \ " .pushsection \".alt.smp.init\", \"a\"\n" \ - " .long 9998b\n" \ + " .long 9998b - .\n" \ " " up "\n" \ " .popsection\n" #else diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 478506b2d51f..cdc79fcee43e 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -546,14 +546,15 @@ smp_on_up: __do_fixup_smp_on_up: cmp r4, r5 reths lr - ldmia r4!, {r0, r6} - ARM( str r6, [r0, r3] ) - THUMB( add r0, r0, r3 ) + ldmia r4, {r0, r6} + ARM( str r6, [r0, r4] ) + THUMB( add r0, r0, r4 ) + add r4, r4, #8 #ifdef __ARMEB__ THUMB( mov r6, r6, ror #16 ) @ Convert word order for big-endian. #endif THUMB( strh r6, [r0], #2 ) @ For Thumb-2, store as two halfwords - THUMB( mov r6, r6, lsr #16 ) @ to be robust against misaligned r3. + THUMB( mov r6, r6, lsr #16 ) @ to be robust against misaligned r0. THUMB( strh r6, [r0] ) b __do_fixup_smp_on_up ENDPROC(__do_fixup_smp_on_up) @@ -562,7 +563,6 @@ ENTRY(fixup_smp) stmfd sp!, {r4 - r6, lr} mov r4, r0 add r5, r0, r1 - mov r3, #0 bl __do_fixup_smp_on_up ldmfd sp!, {r4 - r6, pc} ENDPROC(fixup_smp) -- cgit v1.2.3 From 59d2f2827dfdccf8911d5e51465136b52ba623c4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:29 +0300 Subject: ARM: head: use PC-relative insn sequence for __smp_alt Now that calling __do_fixup_smp_on_up() can be done without passing the physical-to-virtual offset in r3, we can replace the open coded PC relative offset calculations with a pair of adr_l invocations. This removes some open coded arithmetic involving virtual addresses, avoids literal pools on v7+, and slightly reduces the footprint of the code. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/head.S | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index cdc79fcee43e..5e031a0bf9a9 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -520,19 +520,11 @@ ARM_BE8(rev r0, r0) @ byteswap if big endian retne lr __fixup_smp_on_up: - adr r0, 1f - ldmia r0, {r3 - r5} - sub r3, r0, r3 - add r4, r4, r3 - add r5, r5, r3 + adr_l r4, __smpalt_begin + adr_l r5, __smpalt_end b __do_fixup_smp_on_up ENDPROC(__fixup_smp) - .align -1: .word . - .word __smpalt_begin - .word __smpalt_end - .pushsection .data .align 2 .globl smp_on_up -- cgit v1.2.3 From d74d2b225018baa0e04e080ee9e80b21667ba3a2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:34 +0300 Subject: ARM: sleep.S: use PC-relative insn sequence for sleep_save_sp/mpidr_hash Replace the open coded PC relative offset calculations with adr_l and ldr_l invocations. This removes some open coded PC relative arithmetic, avoids literal pools on v7+, and slightly reduces the footprint of the code. Note that ALT_SMP() expects a single instruction so move the macro invocation after it. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/sleep.S | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 5dc8b80bb693..43077e11dafd 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -72,8 +72,9 @@ ENTRY(__cpu_suspend) ldr r3, =sleep_save_sp stmfd sp!, {r0, r1} @ save suspend func arg and pointer ldr r3, [r3, #SLEEP_SAVE_SP_VIRT] - ALT_SMP(ldr r0, =mpidr_hash) + ALT_SMP(W(nop)) @ don't use adr_l inside ALT_SMP() ALT_UP_B(1f) + adr_l r0, mpidr_hash /* This ldmia relies on the memory layout of the mpidr_hash struct */ ldmia r0, {r1, r6-r8} @ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts compute_mpidr_hash r0, r6, r7, r8, r2, r1 @@ -147,9 +148,8 @@ no_hyp: mov r1, #0 ALT_SMP(mrc p15, 0, r0, c0, c0, 5) ALT_UP_B(1f) - adr r2, mpidr_hash_ptr - ldr r3, [r2] - add r2, r2, r3 @ r2 = struct mpidr_hash phys address + adr_l r2, mpidr_hash @ r2 = struct mpidr_hash phys address + /* * This ldmia relies on the memory layout of the mpidr_hash * struct mpidr_hash. @@ -157,10 +157,7 @@ no_hyp: ldmia r2, { r3-r6 } @ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts compute_mpidr_hash r1, r4, r5, r6, r0, r3 1: - adr r0, _sleep_save_sp - ldr r2, [r0] - add r0, r0, r2 - ldr r0, [r0, #SLEEP_SAVE_SP_PHYS] + ldr_l r0, sleep_save_sp + SLEEP_SAVE_SP_PHYS ldr r0, [r0, r1, lsl #2] @ load phys pgd, stack, resume fn @@ -177,12 +174,6 @@ ENDPROC(cpu_resume_arm) ENDPROC(cpu_resume_no_hyp) #endif - .align 2 -_sleep_save_sp: - .long sleep_save_sp - . -mpidr_hash_ptr: - .long mpidr_hash - . @ mpidr_hash struct offset - .data .align 2 .type sleep_save_sp, #object -- cgit v1.2.3 From 3bcf906b194cebb6817cbb2f07b69e12aa5d7f51 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:46 +0300 Subject: ARM: head.S: use PC relative insn sequence to calculate PHYS_OFFSET Replace the open coded arithmetic with a simple adr_l/sub pair. This removes some open coded arithmetic involving virtual addresses, avoids literal pools on v7+, and slightly reduces the footprint of the code. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/head.S | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 5e031a0bf9a9..ae0b08b47f52 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -103,10 +103,8 @@ ENTRY(stext) #endif #ifndef CONFIG_XIP_KERNEL - adr r3, 2f - ldmia r3, {r4, r8} - sub r4, r3, r4 @ (PHYS_OFFSET - PAGE_OFFSET) - add r8, r8, r4 @ PHYS_OFFSET + adr_l r8, _text @ __pa(_text) + sub r8, r8, #TEXT_OFFSET @ PHYS_OFFSET #else ldr r8, =PLAT_PHYS_OFFSET @ always constant in this case #endif @@ -158,10 +156,6 @@ ENTRY(stext) 1: b __enable_mmu ENDPROC(stext) .ltorg -#ifndef CONFIG_XIP_KERNEL -2: .long . - .long PAGE_OFFSET -#endif /* * Setup the initial page tables. We only setup the barest -- cgit v1.2.3 From aaac3733171fca948c4fb66b78257620e3885339 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:52 +0300 Subject: ARM: kvm: replace open coded VA->PA calculations with adr_l call Replace the open coded calculations of the actual physical address of the KVM stub vector table with a single adr_l invocation. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/boot/compressed/head.S | 15 ++------------- arch/arm/kernel/hyp-stub.S | 27 ++++++++++++--------------- 2 files changed, 14 insertions(+), 28 deletions(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 5b591dacbaaf..9905fb7560df 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -468,15 +468,10 @@ dtb_check_done: /* * Compute the address of the hyp vectors after relocation. - * This requires some arithmetic since we cannot directly - * reference __hyp_stub_vectors in a PC-relative way. * Call __hyp_set_vectors with the new address so that we * can HVC again after the copy. */ -0: adr r0, 0b - movw r1, #:lower16:__hyp_stub_vectors - 0b - movt r1, #:upper16:__hyp_stub_vectors - 0b - add r0, r0, r1 + adr_l r0, __hyp_stub_vectors sub r0, r0, r5 add r0, r0, r10 bl __hyp_set_vectors @@ -627,17 +622,11 @@ not_relocated: mov r0, #0 cmp r0, #HYP_MODE @ if not booted in HYP mode... bne __enter_kernel @ boot kernel directly - adr r12, .L__hyp_reentry_vectors_offset - ldr r0, [r12] - add r0, r0, r12 - + adr_l r0, __hyp_reentry_vectors bl __hyp_set_vectors __HVC(0) @ otherwise bounce to hyp mode b . @ should never be reached - - .align 2 -.L__hyp_reentry_vectors_offset: .long __hyp_reentry_vectors - . #else b __enter_kernel #endif diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 26d8e03b1dd3..103d0bdb2b7e 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -24,41 +24,38 @@ ENTRY(__boot_cpu_mode) .text /* - * Save the primary CPU boot mode. Requires 3 scratch registers. + * Save the primary CPU boot mode. Requires 2 scratch registers. */ - .macro store_primary_cpu_mode reg1, reg2, reg3 + .macro store_primary_cpu_mode reg1, reg2 mrs \reg1, cpsr and \reg1, \reg1, #MODE_MASK - adr \reg2, .L__boot_cpu_mode_offset - ldr \reg3, [\reg2] - str \reg1, [\reg2, \reg3] + str_l \reg1, __boot_cpu_mode, \reg2 .endm /* * Compare the current mode with the one saved on the primary CPU. * If they don't match, record that fact. The Z bit indicates * if there's a match or not. - * Requires 3 additionnal scratch registers. + * Requires 2 additional scratch registers. */ - .macro compare_cpu_mode_with_primary mode, reg1, reg2, reg3 - adr \reg2, .L__boot_cpu_mode_offset - ldr \reg3, [\reg2] - ldr \reg1, [\reg2, \reg3] + .macro compare_cpu_mode_with_primary mode, reg1, reg2 + adr_l \reg2, __boot_cpu_mode + ldr \reg1, [\reg2] cmp \mode, \reg1 @ matches primary CPU boot mode? orrne \reg1, \reg1, #BOOT_CPU_MODE_MISMATCH - strne \reg1, [\reg2, \reg3] @ record what happened and give up + strne \reg1, [\reg2] @ record what happened and give up .endm #else /* ZIMAGE */ - .macro store_primary_cpu_mode reg1:req, reg2:req, reg3:req + .macro store_primary_cpu_mode reg1:req, reg2:req .endm /* * The zImage loader only runs on one CPU, so we don't bother with mult-CPU * consistency checking: */ - .macro compare_cpu_mode_with_primary mode, reg1, reg2, reg3 + .macro compare_cpu_mode_with_primary mode, reg1, reg2 cmp \mode, \mode .endm @@ -73,7 +70,7 @@ ENTRY(__boot_cpu_mode) */ @ Call this from the primary CPU ENTRY(__hyp_stub_install) - store_primary_cpu_mode r4, r5, r6 + store_primary_cpu_mode r4, r5 ENDPROC(__hyp_stub_install) @ fall through... @@ -87,7 +84,7 @@ ENTRY(__hyp_stub_install_secondary) * If the secondary has booted with a different mode, give up * immediately. */ - compare_cpu_mode_with_primary r4, r5, r6, r7 + compare_cpu_mode_with_primary r4, r5, r6 retne lr /* -- cgit v1.2.3