diff options
Diffstat (limited to 'arch/arm64/kernel/head.S')
-rw-r--r-- | arch/arm64/kernel/head.S | 434 |
1 files changed, 228 insertions, 206 deletions
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 0a6e4f924df8..8ce88e08c030 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -132,6 +132,8 @@ efi_head: #endif #ifdef CONFIG_EFI + .globl stext_offset + .set stext_offset, stext - efi_head .align 3 pe_header: .ascii "PE" @@ -155,12 +157,12 @@ optional_header: .long 0 // SizeOfInitializedData .long 0 // SizeOfUninitializedData .long efi_stub_entry - efi_head // AddressOfEntryPoint - .long stext - efi_head // BaseOfCode + .long stext_offset // BaseOfCode extra_header_fields: .quad 0 // ImageBase - .long 0x20 // SectionAlignment - .long 0x8 // FileAlignment + .long 0x1000 // SectionAlignment + .long PECOFF_FILE_ALIGNMENT // FileAlignment .short 0 // MajorOperatingSystemVersion .short 0 // MinorOperatingSystemVersion .short 0 // MajorImageVersion @@ -172,7 +174,7 @@ extra_header_fields: .long _end - efi_head // SizeOfImage // Everything before the kernel image is considered part of the header - .long stext - efi_head // SizeOfHeaders + .long stext_offset // SizeOfHeaders .long 0 // CheckSum .short 0xa // Subsystem (EFI application) .short 0 // DllCharacteristics @@ -217,16 +219,24 @@ section_table: .byte 0 .byte 0 // end of 0 padding of section name .long _end - stext // VirtualSize - .long stext - efi_head // VirtualAddress + .long stext_offset // VirtualAddress .long _edata - stext // SizeOfRawData - .long stext - efi_head // PointerToRawData + .long stext_offset // PointerToRawData .long 0 // PointerToRelocations (0 for executables) .long 0 // PointerToLineNumbers (0 for executables) .short 0 // NumberOfRelocations (0 for executables) .short 0 // NumberOfLineNumbers (0 for executables) .long 0xe0500020 // Characteristics (section flags) - .align 5 + + /* + * EFI will load stext onwards at the 4k section alignment + * described in the PE/COFF header. To ensure that instruction + * sequences using an adrp and a :lo12: immediate will function + * correctly at this alignment, we must ensure that stext is + * placed at a 4k boundary in the Image to begin with. + */ + .align 12 #endif ENTRY(stext) @@ -238,7 +248,13 @@ ENTRY(stext) mov x0, x22 bl lookup_processor_type mov x23, x0 // x23=current cpu_table - cbz x23, __error_p // invalid processor (x23=0)? + /* + * __error_p may end up out of range for cbz if text areas are + * aligned up to section sizes. + */ + cbnz x23, 1f // invalid processor (x23=0)? + b __error_p +1: bl __vet_fdt bl __create_page_tables // x25=TTBR0, x26=TTBR1 /* @@ -250,13 +266,214 @@ ENTRY(stext) */ ldr x27, __switch_data // address to jump to after // MMU has been enabled - adr lr, __enable_mmu // return (PIC) address + adrp lr, __enable_mmu // return (PIC) address + add lr, lr, #:lo12:__enable_mmu ldr x12, [x23, #CPU_INFO_SETUP] add x12, x12, x28 // __virt_to_phys br x12 // initialise processor ENDPROC(stext) /* + * Determine validity of the x21 FDT pointer. + * The dtb must be 8-byte aligned and live in the first 512M of memory. + */ +__vet_fdt: + tst x21, #0x7 + b.ne 1f + cmp x21, x24 + b.lt 1f + mov x0, #(1 << 29) + add x0, x0, x24 + cmp x21, x0 + b.ge 1f + ret +1: + mov x21, #0 + ret +ENDPROC(__vet_fdt) +/* + * Macro to create a table entry to the next page. + * + * tbl: page table address + * virt: virtual address + * shift: #imm page table shift + * ptrs: #imm pointers per table page + * + * Preserves: virt + * Corrupts: tmp1, tmp2 + * Returns: tbl -> next level table page address + */ + .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 + lsr \tmp1, \virt, #\shift + and \tmp1, \tmp1, #\ptrs - 1 // table index + add \tmp2, \tbl, #PAGE_SIZE + orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type + str \tmp2, [\tbl, \tmp1, lsl #3] + add \tbl, \tbl, #PAGE_SIZE // next level table page + .endm + +/* + * Macro to populate the PGD (and possibily PUD) for the corresponding + * block entry in the next level (tbl) for the given virtual address. + * + * Preserves: tbl, next, virt + * Corrupts: tmp1, tmp2 + */ + .macro create_pgd_entry, tbl, virt, tmp1, tmp2 + create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 +#if SWAPPER_PGTABLE_LEVELS == 3 + create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 +#endif + .endm + +/* + * Macro to populate block entries in the page table for the start..end + * virtual range (inclusive). + * + * Preserves: tbl, flags + * Corrupts: phys, start, end, pstate + */ + .macro create_block_map, tbl, flags, phys, start, end + lsr \phys, \phys, #BLOCK_SHIFT + lsr \start, \start, #BLOCK_SHIFT + and \start, \start, #PTRS_PER_PTE - 1 // table index + orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry + lsr \end, \end, #BLOCK_SHIFT + and \end, \end, #PTRS_PER_PTE - 1 // table end index +9999: str \phys, [\tbl, \start, lsl #3] // store the entry + add \start, \start, #1 // next entry + add \phys, \phys, #BLOCK_SIZE // next block + cmp \start, \end + b.ls 9999b + .endm + +/* + * Setup the initial page tables. We only setup the barest amount which is + * required to get the kernel running. The following sections are required: + * - identity mapping to enable the MMU (low address, TTBR0) + * - first few MB of the kernel linear mapping to jump to once the MMU has + * been enabled, including the FDT blob (TTBR1) + * - pgd entry for fixed mappings (TTBR1) + */ +__create_page_tables: + pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses + mov x27, lr + + /* + * Invalidate the idmap and swapper page tables to avoid potential + * dirty cache lines being evicted. + */ + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range + + /* + * Clear the idmap and swapper page tables. + */ + mov x0, x25 + add x6, x26, #SWAPPER_DIR_SIZE +1: stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + stp xzr, xzr, [x0], #16 + cmp x0, x6 + b.lo 1b + + ldr x7, =MM_MMUFLAGS + + /* + * Create the identity mapping. + */ + mov x0, x25 // idmap_pg_dir + ldr x3, =KERNEL_START + add x3, x3, x28 // __pa(KERNEL_START) + create_pgd_entry x0, x3, x5, x6 + ldr x6, =KERNEL_END + mov x5, x3 // __pa(KERNEL_START) + add x6, x6, x28 // __pa(KERNEL_END) + create_block_map x0, x7, x3, x5, x6 + + /* + * Map the kernel image (starting with PHYS_OFFSET). + */ + mov x0, x26 // swapper_pg_dir + mov x5, #PAGE_OFFSET + create_pgd_entry x0, x5, x3, x6 + ldr x6, =KERNEL_END + mov x3, x24 // phys offset + create_block_map x0, x7, x3, x5, x6 + + /* + * Map the FDT blob (maximum 2MB; must be within 512MB of + * PHYS_OFFSET). + */ + mov x3, x21 // FDT phys address + and x3, x3, #~((1 << 21) - 1) // 2MB aligned + mov x6, #PAGE_OFFSET + sub x5, x3, x24 // subtract PHYS_OFFSET + tst x5, #~((1 << 29) - 1) // within 512MB? + csel x21, xzr, x21, ne // zero the FDT pointer + b.ne 1f + add x5, x5, x6 // __va(FDT blob) + add x6, x5, #1 << 21 // 2MB for the FDT blob + sub x6, x6, #1 // inclusive range + create_block_map x0, x7, x3, x5, x6 +1: + /* + * Since the page tables have been populated with non-cacheable + * accesses (MMU disabled), invalidate the idmap and swapper page + * tables again to remove any speculatively loaded cache lines. + */ + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range + + mov lr, x27 + ret +ENDPROC(__create_page_tables) + .ltorg + + .align 3 + .type __switch_data, %object +__switch_data: + .quad __mmap_switched + .quad __bss_start // x6 + .quad __bss_stop // x7 + .quad processor_id // x4 + .quad __fdt_pointer // x5 + .quad memstart_addr // x6 + .quad init_thread_union + THREAD_START_SP // sp + +/* + * The following fragment of code is executed with the MMU on in MMU mode, and + * uses absolute addresses; this is not position independent. + */ +__mmap_switched: + adr x3, __switch_data + 8 + + ldp x6, x7, [x3], #16 +1: cmp x6, x7 + b.hs 2f + str xzr, [x6], #8 // Clear BSS + b 1b +2: + ldp x4, x5, [x3], #16 + ldr x6, [x3], #8 + ldr x16, [x3] + mov sp, x16 + str x22, [x4] // Save processor ID + str x21, [x5] // Save FDT pointer + str x24, [x6] // Save PHYS_OFFSET + mov x29, #0 + b start_kernel +ENDPROC(__mmap_switched) + +/* + * end early head section, begin head code that is also used for + * hotplug and needs to have the same protections as the text region + */ + .section ".text","ax" +/* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. * @@ -331,7 +548,8 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr vttbr_el2, xzr /* Hypervisor stub */ - adr x0, __hyp_stub_vectors + adrp x0, __hyp_stub_vectors + add x0, x0, #:lo12:__hyp_stub_vectors msr vbar_el2, x0 /* spsr */ @@ -492,183 +710,6 @@ ENDPROC(__calc_phys_offset) .quad PAGE_OFFSET /* - * Macro to create a table entry to the next page. - * - * tbl: page table address - * virt: virtual address - * shift: #imm page table shift - * ptrs: #imm pointers per table page - * - * Preserves: virt - * Corrupts: tmp1, tmp2 - * Returns: tbl -> next level table page address - */ - .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 - lsr \tmp1, \virt, #\shift - and \tmp1, \tmp1, #\ptrs - 1 // table index - add \tmp2, \tbl, #PAGE_SIZE - orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type - str \tmp2, [\tbl, \tmp1, lsl #3] - add \tbl, \tbl, #PAGE_SIZE // next level table page - .endm - -/* - * Macro to populate the PGD (and possibily PUD) for the corresponding - * block entry in the next level (tbl) for the given virtual address. - * - * Preserves: tbl, next, virt - * Corrupts: tmp1, tmp2 - */ - .macro create_pgd_entry, tbl, virt, tmp1, tmp2 - create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 -#if SWAPPER_PGTABLE_LEVELS == 3 - create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 -#endif - .endm - -/* - * Macro to populate block entries in the page table for the start..end - * virtual range (inclusive). - * - * Preserves: tbl, flags - * Corrupts: phys, start, end, pstate - */ - .macro create_block_map, tbl, flags, phys, start, end - lsr \phys, \phys, #BLOCK_SHIFT - lsr \start, \start, #BLOCK_SHIFT - and \start, \start, #PTRS_PER_PTE - 1 // table index - orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry - lsr \end, \end, #BLOCK_SHIFT - and \end, \end, #PTRS_PER_PTE - 1 // table end index -9999: str \phys, [\tbl, \start, lsl #3] // store the entry - add \start, \start, #1 // next entry - add \phys, \phys, #BLOCK_SIZE // next block - cmp \start, \end - b.ls 9999b - .endm - -/* - * Setup the initial page tables. We only setup the barest amount which is - * required to get the kernel running. The following sections are required: - * - identity mapping to enable the MMU (low address, TTBR0) - * - first few MB of the kernel linear mapping to jump to once the MMU has - * been enabled, including the FDT blob (TTBR1) - * - pgd entry for fixed mappings (TTBR1) - */ -__create_page_tables: - pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses - mov x27, lr - - /* - * Invalidate the idmap and swapper page tables to avoid potential - * dirty cache lines being evicted. - */ - mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE - bl __inval_cache_range - - /* - * Clear the idmap and swapper page tables. - */ - mov x0, x25 - add x6, x26, #SWAPPER_DIR_SIZE -1: stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - stp xzr, xzr, [x0], #16 - cmp x0, x6 - b.lo 1b - - ldr x7, =MM_MMUFLAGS - - /* - * Create the identity mapping. - */ - mov x0, x25 // idmap_pg_dir - ldr x3, =KERNEL_START - add x3, x3, x28 // __pa(KERNEL_START) - create_pgd_entry x0, x3, x5, x6 - ldr x6, =KERNEL_END - mov x5, x3 // __pa(KERNEL_START) - add x6, x6, x28 // __pa(KERNEL_END) - create_block_map x0, x7, x3, x5, x6 - - /* - * Map the kernel image (starting with PHYS_OFFSET). - */ - mov x0, x26 // swapper_pg_dir - mov x5, #PAGE_OFFSET - create_pgd_entry x0, x5, x3, x6 - ldr x6, =KERNEL_END - mov x3, x24 // phys offset - create_block_map x0, x7, x3, x5, x6 - - /* - * Map the FDT blob (maximum 2MB; must be within 512MB of - * PHYS_OFFSET). - */ - mov x3, x21 // FDT phys address - and x3, x3, #~((1 << 21) - 1) // 2MB aligned - mov x6, #PAGE_OFFSET - sub x5, x3, x24 // subtract PHYS_OFFSET - tst x5, #~((1 << 29) - 1) // within 512MB? - csel x21, xzr, x21, ne // zero the FDT pointer - b.ne 1f - add x5, x5, x6 // __va(FDT blob) - add x6, x5, #1 << 21 // 2MB for the FDT blob - sub x6, x6, #1 // inclusive range - create_block_map x0, x7, x3, x5, x6 -1: - /* - * Since the page tables have been populated with non-cacheable - * accesses (MMU disabled), invalidate the idmap and swapper page - * tables again to remove any speculatively loaded cache lines. - */ - mov x0, x25 - add x1, x26, #SWAPPER_DIR_SIZE - bl __inval_cache_range - - mov lr, x27 - ret -ENDPROC(__create_page_tables) - .ltorg - - .align 3 - .type __switch_data, %object -__switch_data: - .quad __mmap_switched - .quad __bss_start // x6 - .quad __bss_stop // x7 - .quad processor_id // x4 - .quad __fdt_pointer // x5 - .quad memstart_addr // x6 - .quad init_thread_union + THREAD_START_SP // sp - -/* - * The following fragment of code is executed with the MMU on in MMU mode, and - * uses absolute addresses; this is not position independent. - */ -__mmap_switched: - adr x3, __switch_data + 8 - - ldp x6, x7, [x3], #16 -1: cmp x6, x7 - b.hs 2f - str xzr, [x6], #8 // Clear BSS - b 1b -2: - ldp x4, x5, [x3], #16 - ldr x6, [x3], #8 - ldr x16, [x3] - mov sp, x16 - str x22, [x4] // Save processor ID - str x21, [x5] // Save FDT pointer - str x24, [x6] // Save PHYS_OFFSET - mov x29, #0 - b start_kernel -ENDPROC(__mmap_switched) - -/* * Exception handling. Something went wrong and we can't proceed. We ought to * tell the user, but since we don't have any guarantee that we're even * running on the right architecture, we do virtually nothing. @@ -715,22 +756,3 @@ __lookup_processor_type_data: .quad . .quad cpu_table .size __lookup_processor_type_data, . - __lookup_processor_type_data - -/* - * Determine validity of the x21 FDT pointer. - * The dtb must be 8-byte aligned and live in the first 512M of memory. - */ -__vet_fdt: - tst x21, #0x7 - b.ne 1f - cmp x21, x24 - b.lt 1f - mov x0, #(1 << 29) - add x0, x0, x24 - cmp x21, x0 - b.ge 1f - ret -1: - mov x21, #0 - ret -ENDPROC(__vet_fdt) |