diff options
Diffstat (limited to 'arch/arm64/kernel/head.S')
-rw-r--r-- | arch/arm64/kernel/head.S | 261 |
1 files changed, 92 insertions, 169 deletions
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 07f930540f4a..19f915e8f6e0 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -36,7 +36,7 @@ #include <asm/page.h> #include <asm/virt.h> -#define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) +#define __PHYS_OFFSET (KERNEL_START - TEXT_OFFSET) #if (TEXT_OFFSET & 0xfff) != 0 #error TEXT_OFFSET must be at least 4KB aligned @@ -46,13 +46,6 @@ #error TEXT_OFFSET must be less than 2MB #endif - .macro pgtbl, ttb0, ttb1, virt_to_phys - ldr \ttb1, =swapper_pg_dir - ldr \ttb0, =idmap_pg_dir - add \ttb1, \ttb1, \virt_to_phys - add \ttb0, \ttb0, \virt_to_phys - .endm - #ifdef CONFIG_ARM64_64K_PAGES #define BLOCK_SHIFT PAGE_SHIFT #define BLOCK_SIZE PAGE_SIZE @@ -63,7 +56,7 @@ #define TABLE_SHIFT PUD_SHIFT #endif -#define KERNEL_START KERNEL_RAM_VADDR +#define KERNEL_START _text #define KERNEL_END _end /* @@ -240,40 +233,43 @@ section_table: #endif ENTRY(stext) - mov x21, x0 // x21=FDT + bl preserve_boot_args bl el2_setup // Drop to EL1, w20=cpu_boot_mode - bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET + adrp x24, __PHYS_OFFSET bl set_cpu_boot_mode_flag - mrs x22, midr_el1 // x22=cpuid - mov x0, x22 - bl lookup_processor_type - mov x23, x0 // x23=current cpu_table - /* - * __error_p may end up out of range for cbz if text areas are - * aligned up to section sizes. - */ - cbnz x23, 1f // invalid processor (x23=0)? - b __error_p -1: + bl __vet_fdt bl __create_page_tables // x25=TTBR0, x26=TTBR1 /* - * The following calls CPU specific code in a position independent - * manner. See arch/arm64/mm/proc.S for details. x23 = base of - * cpu_info structure selected by lookup_processor_type above. + * The following calls CPU setup code, see arch/arm64/mm/proc.S for + * details. * On return, the CPU will be ready for the MMU to be turned on and * the TCR will have been set. */ - ldr x27, __switch_data // address to jump to after + ldr x27, =__mmap_switched // address to jump to after // MMU has been enabled - adrp lr, __enable_mmu // return (PIC) address - add lr, lr, #:lo12:__enable_mmu - ldr x12, [x23, #CPU_INFO_SETUP] - add x12, x12, x28 // __virt_to_phys - br x12 // initialise processor + adr_l lr, __enable_mmu // return (PIC) address + b __cpu_setup // initialise processor ENDPROC(stext) /* + * Preserve the arguments passed by the bootloader in x0 .. x3 + */ +preserve_boot_args: + mov x21, x0 // x21=FDT + + adr_l x0, boot_args // record the contents of + stp x21, x1, [x0] // x0 .. x3 at kernel entry + stp x2, x3, [x0, #16] + + dmb sy // needed before dc ivac with + // MMU off + + add x1, x0, #0x20 // 4 x 8 bytes + b __inval_cache_range // tail call +ENDPROC(preserve_boot_args) + +/* * Determine validity of the x21 FDT pointer. * The dtb must be 8-byte aligned and live in the first 512M of memory. */ @@ -356,7 +352,8 @@ ENDPROC(__vet_fdt) * - pgd entry for fixed mappings (TTBR1) */ __create_page_tables: - pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses + adrp x25, idmap_pg_dir + adrp x26, swapper_pg_dir mov x27, lr /* @@ -385,12 +382,50 @@ __create_page_tables: * Create the identity mapping. */ mov x0, x25 // idmap_pg_dir - ldr x3, =KERNEL_START - add x3, x3, x28 // __pa(KERNEL_START) + adrp x3, KERNEL_START // __pa(KERNEL_START) + +#ifndef CONFIG_ARM64_VA_BITS_48 +#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) +#define EXTRA_PTRS (1 << (48 - EXTRA_SHIFT)) + + /* + * If VA_BITS < 48, it may be too small to allow for an ID mapping to be + * created that covers system RAM if that is located sufficiently high + * in the physical address space. So for the ID map, use an extended + * virtual range in that case, by configuring an additional translation + * level. + * First, we have to verify our assumption that the current value of + * VA_BITS was chosen such that all translation levels are fully + * utilised, and that lowering T0SZ will always result in an additional + * translation level to be configured. + */ +#if VA_BITS != EXTRA_SHIFT +#error "Mismatch between VA_BITS and page size/number of translation levels" +#endif + + /* + * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the + * entire kernel image can be ID mapped. As T0SZ == (64 - #bits used), + * this number conveniently equals the number of leading zeroes in + * the physical address of KERNEL_END. + */ + adrp x5, KERNEL_END + clz x5, x5 + cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough? + b.ge 1f // .. then skip additional level + + adr_l x6, idmap_t0sz + str x5, [x6] + dmb sy + dc ivac, x6 // Invalidate potentially stale cache line + + create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6 +1: +#endif + create_pgd_entry x0, x3, x5, x6 - ldr x6, =KERNEL_END mov x5, x3 // __pa(KERNEL_START) - add x6, x6, x28 // __pa(KERNEL_END) + adr_l x6, KERNEL_END // __pa(KERNEL_END) create_block_map x0, x7, x3, x5, x6 /* @@ -399,7 +434,7 @@ __create_page_tables: mov x0, x26 // swapper_pg_dir mov x5, #PAGE_OFFSET create_pgd_entry x0, x5, x3, x6 - ldr x6, =KERNEL_END + ldr x6, =KERNEL_END // __va(KERNEL_END) mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 @@ -426,6 +461,7 @@ __create_page_tables: */ mov x0, x25 add x1, x26, #SWAPPER_DIR_SIZE + dmb sy bl __inval_cache_range mov lr, x27 @@ -433,37 +469,22 @@ __create_page_tables: ENDPROC(__create_page_tables) .ltorg - .align 3 - .type __switch_data, %object -__switch_data: - .quad __mmap_switched - .quad __bss_start // x6 - .quad __bss_stop // x7 - .quad processor_id // x4 - .quad __fdt_pointer // x5 - .quad memstart_addr // x6 - .quad init_thread_union + THREAD_START_SP // sp - /* - * The following fragment of code is executed with the MMU on in MMU mode, and - * uses absolute addresses; this is not position independent. + * The following fragment of code is executed with the MMU enabled. */ + .set initial_sp, init_thread_union + THREAD_START_SP __mmap_switched: - adr x3, __switch_data + 8 + adr_l x6, __bss_start + adr_l x7, __bss_stop - ldp x6, x7, [x3], #16 1: cmp x6, x7 b.hs 2f str xzr, [x6], #8 // Clear BSS b 1b 2: - ldp x4, x5, [x3], #16 - ldr x6, [x3], #8 - ldr x16, [x3] - mov sp, x16 - str x22, [x4] // Save processor ID - str x21, [x5] // Save FDT pointer - str x24, [x6] // Save PHYS_OFFSET + adr_l sp, initial_sp, x4 + str_l x21, __fdt_pointer, x5 // Save FDT pointer + str_l x24, memstart_addr, x6 // Save PHYS_OFFSET mov x29, #0 b start_kernel ENDPROC(__mmap_switched) @@ -566,8 +587,7 @@ ENDPROC(el2_setup) * in x20. See arch/arm64/include/asm/virt.h for more info. */ ENTRY(set_cpu_boot_mode_flag) - ldr x1, =__boot_cpu_mode // Compute __boot_cpu_mode - add x1, x1, x28 + adr_l x1, __boot_cpu_mode cmp w20, #BOOT_CPU_MODE_EL2 b.ne 1f add x1, x1, #4 @@ -588,29 +608,21 @@ ENDPROC(set_cpu_boot_mode_flag) .align L1_CACHE_SHIFT ENTRY(__boot_cpu_mode) .long BOOT_CPU_MODE_EL2 - .long 0 + .long BOOT_CPU_MODE_EL1 .popsection #ifdef CONFIG_SMP - .align 3 -1: .quad . - .quad secondary_holding_pen_release - /* * This provides a "holding pen" for platforms to hold all secondary * cores are held until we're ready for them to initialise. */ ENTRY(secondary_holding_pen) bl el2_setup // Drop to EL1, w20=cpu_boot_mode - bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET bl set_cpu_boot_mode_flag mrs x0, mpidr_el1 ldr x1, =MPIDR_HWID_BITMASK and x0, x0, x1 - adr x1, 1b - ldp x2, x3, [x1] - sub x1, x1, x2 - add x3, x3, x1 + adr_l x3, secondary_holding_pen_release pen: ldr x4, [x3] cmp x4, x0 b.eq secondary_startup @@ -624,7 +636,6 @@ ENDPROC(secondary_holding_pen) */ ENTRY(secondary_entry) bl el2_setup // Drop to EL1 - bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET bl set_cpu_boot_mode_flag b secondary_startup ENDPROC(secondary_entry) @@ -633,16 +644,9 @@ ENTRY(secondary_startup) /* * Common entry point for secondary CPUs. */ - mrs x22, midr_el1 // x22=cpuid - mov x0, x22 - bl lookup_processor_type - mov x23, x0 // x23=current cpu_table - cbz x23, __error_p // invalid processor (x23=0)? - - pgtbl x25, x26, x28 // x25=TTBR0, x26=TTBR1 - ldr x12, [x23, #CPU_INFO_SETUP] - add x12, x12, x28 // __virt_to_phys - blr x12 // initialise processor + adrp x25, idmap_pg_dir + adrp x26, swapper_pg_dir + bl __cpu_setup // initialise processor ldr x21, =secondary_data ldr x27, =__secondary_switched // address to jump to after enabling the MMU @@ -658,11 +662,12 @@ ENDPROC(__secondary_switched) #endif /* CONFIG_SMP */ /* - * Setup common bits before finally enabling the MMU. Essentially this is just - * loading the page table pointer and vector base registers. + * Enable the MMU. * - * On entry to this code, x0 must contain the SCTLR_EL1 value for turning on - * the MMU. + * x0 = SCTLR_EL1 value for turning on the MMU. + * x27 = *virtual* address to jump to upon completion + * + * other registers depend on the function called upon completion */ __enable_mmu: ldr x5, =vectors @@ -670,89 +675,7 @@ __enable_mmu: msr ttbr0_el1, x25 // load TTBR0 msr ttbr1_el1, x26 // load TTBR1 isb - b __turn_mmu_on -ENDPROC(__enable_mmu) - -/* - * Enable the MMU. This completely changes the structure of the visible memory - * space. You will not be able to trace execution through this. - * - * x0 = system control register - * x27 = *virtual* address to jump to upon completion - * - * other registers depend on the function called upon completion - * - * We align the entire function to the smallest power of two larger than it to - * ensure it fits within a single block map entry. Otherwise were PHYS_OFFSET - * close to the end of a 512MB or 1GB block we might require an additional - * table to map the entire function. - */ - .align 4 -__turn_mmu_on: msr sctlr_el1, x0 isb br x27 -ENDPROC(__turn_mmu_on) - -/* - * Calculate the start of physical memory. - */ -__calc_phys_offset: - adr x0, 1f - ldp x1, x2, [x0] - sub x28, x0, x1 // x28 = PHYS_OFFSET - PAGE_OFFSET - add x24, x2, x28 // x24 = PHYS_OFFSET - ret -ENDPROC(__calc_phys_offset) - - .align 3 -1: .quad . - .quad PAGE_OFFSET - -/* - * Exception handling. Something went wrong and we can't proceed. We ought to - * tell the user, but since we don't have any guarantee that we're even - * running on the right architecture, we do virtually nothing. - */ -__error_p: -ENDPROC(__error_p) - -__error: -1: nop - b 1b -ENDPROC(__error) - -/* - * This function gets the processor ID in w0 and searches the cpu_table[] for - * a match. It returns a pointer to the struct cpu_info it found. The - * cpu_table[] must end with an empty (all zeros) structure. - * - * This routine can be called via C code and it needs to work with the MMU - * both disabled and enabled (the offset is calculated automatically). - */ -ENTRY(lookup_processor_type) - adr x1, __lookup_processor_type_data - ldp x2, x3, [x1] - sub x1, x1, x2 // get offset between VA and PA - add x3, x3, x1 // convert VA to PA -1: - ldp w5, w6, [x3] // load cpu_id_val and cpu_id_mask - cbz w5, 2f // end of list? - and w6, w6, w0 - cmp w5, w6 - b.eq 3f - add x3, x3, #CPU_INFO_SZ - b 1b -2: - mov x3, #0 // unknown processor -3: - mov x0, x3 - ret -ENDPROC(lookup_processor_type) - - .align 3 - .type __lookup_processor_type_data, %object -__lookup_processor_type_data: - .quad . - .quad cpu_table - .size __lookup_processor_type_data, . - __lookup_processor_type_data +ENDPROC(__enable_mmu) |