From f4693c2716b35d0846fd45a4ad7db78bfb25efc8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 8 Oct 2020 17:36:00 +0200 Subject: arm64: mm: extend linear region for 52-bit VA configurations For historical reasons, the arm64 kernel VA space is configured as two equally sized halves, i.e., on a 48-bit VA build, the VA space is split into a 47-bit vmalloc region and a 47-bit linear region. When support for 52-bit virtual addressing was added, this equal split was kept, resulting in a substantial waste of virtual address space in the linear region: 48-bit VA 52-bit VA 0xffff_ffff_ffff_ffff +-------------+ +-------------+ | vmalloc | | vmalloc | 0xffff_8000_0000_0000 +-------------+ _PAGE_END(48) +-------------+ | linear | : : 0xffff_0000_0000_0000 +-------------+ : : : : : : : : : : : : : : : : : currently : : unusable : : : : : : unused : : by : : : : : : : : hardware : : : : : : : 0xfff8_0000_0000_0000 : : _PAGE_END(52) +-------------+ : : | | : : | | : : | | : : | | : : | | : unusable : | | : : | linear | : by : | | : : | region | : hardware : | | : : | | : : | | : : | | : : | | : : | | : : | | 0xfff0_0000_0000_0000 +-------------+ PAGE_OFFSET +-------------+ As illustrated above, the 52-bit VA kernel uses 47 bits for the vmalloc space (as before), to ensure that a single 64k granule kernel image can support any 64k granule capable system, regardless of whether it supports the 52-bit virtual addressing extension. However, due to the fact that the VA space is still split in equal halves, the linear region is only 2^51 bytes in size, wasting almost half of the 52-bit VA space. Let's fix this, by abandoning the equal split, and simply assigning all VA space outside of the vmalloc region to the linear region. The KASAN shadow region is reconfigured so that it ends at the start of the vmalloc region, and grows downwards. That way, the arrangement of the vmalloc space (which contains kernel mappings, modules, BPF region, the vmemmap array etc) is identical between non-KASAN and KASAN builds, which aids debugging. Signed-off-by: Ard Biesheuvel Reviewed-by: Steve Capper Link: https://lore.kernel.org/r/20201008153602.9467-3-ardb@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/memory.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'arch/arm64/include/asm/memory.h') diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index cd61239bae8c..8e89f9b9091e 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -44,7 +44,7 @@ #define _PAGE_OFFSET(va) (-(UL(1) << (va))) #define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS)) #define KIMAGE_VADDR (MODULES_END) -#define BPF_JIT_REGION_START (KASAN_SHADOW_END) +#define BPF_JIT_REGION_START (_PAGE_END(VA_BITS_MIN)) #define BPF_JIT_REGION_SIZE (SZ_128M) #define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE) #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) @@ -76,10 +76,11 @@ #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) #define KASAN_SHADOW_END ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) \ + KASAN_SHADOW_OFFSET) +#define PAGE_END (KASAN_SHADOW_END - (1UL << (vabits_actual - KASAN_SHADOW_SCALE_SHIFT))) #define KASAN_THREAD_SHIFT 1 #else #define KASAN_THREAD_SHIFT 0 -#define KASAN_SHADOW_END (_PAGE_END(VA_BITS_MIN)) +#define PAGE_END (_PAGE_END(VA_BITS_MIN)) #endif /* CONFIG_KASAN */ #define MIN_THREAD_SHIFT (14 + KASAN_THREAD_SHIFT) @@ -167,7 +168,6 @@ #include extern u64 vabits_actual; -#define PAGE_END (_PAGE_END(vabits_actual)) extern s64 memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ @@ -238,11 +238,9 @@ static inline const void *__tag_set(const void *addr, u8 tag) /* - * The linear kernel range starts at the bottom of the virtual address - * space. Testing the top bit for the start of the region is a - * sufficient check and avoids having to worry about the tag. + * The linear kernel range starts at the bottom of the virtual address space. */ -#define __is_lm_address(addr) (!(((u64)addr) & BIT(vabits_actual - 1))) +#define __is_lm_address(addr) (((u64)(addr) & ~PAGE_OFFSET) < (PAGE_END - PAGE_OFFSET)) #define __lm_to_phys(addr) (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET) #define __kimg_to_phys(addr) ((addr) - kimage_voffset) -- cgit v1.2.3 From 8c96400d6a39be763130a5c493647c57726f7013 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 8 Oct 2020 17:36:01 +0200 Subject: arm64: mm: make vmemmap region a projection of the linear region Now that we have reverted the introduction of the vmemmap struct page pointer and the separate physvirt_offset, we can simplify things further, and place the vmemmap region in the VA space in such a way that virtual to page translations and vice versa can be implemented using a single arithmetic shift. One happy coincidence resulting from this is that the 48-bit/4k and 52-bit/64k configurations (which are assumed to be the two most prevalent) end up with the same placement of the vmemmap region. In a subsequent patch, we will take advantage of this, and unify the memory maps even more. Signed-off-by: Ard Biesheuvel Reviewed-by: Steve Capper Link: https://lore.kernel.org/r/20201008153602.9467-4-ardb@kernel.org Signed-off-by: Catalin Marinas --- Documentation/arm64/memory.rst | 30 +++++++++++++++--------------- arch/arm64/include/asm/memory.h | 14 ++++++-------- arch/arm64/mm/init.c | 2 ++ 3 files changed, 23 insertions(+), 23 deletions(-) (limited to 'arch/arm64/include/asm/memory.h') diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst index ee51eb66a578..476edb6015b2 100644 --- a/Documentation/arm64/memory.rst +++ b/Documentation/arm64/memory.rst @@ -35,14 +35,14 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit):: [ ffff600000000000 ffff7fffffffffff ] 32TB [ kasan shadow region ] ffff800000000000 ffff800007ffffff 128MB bpf jit region ffff800008000000 ffff80000fffffff 128MB modules - ffff800010000000 fffffdffbffeffff 125TB vmalloc - fffffdffbfff0000 fffffdfffe5f8fff ~998MB [guard region] - fffffdfffe5f9000 fffffdfffe9fffff 4124KB fixed mappings - fffffdfffea00000 fffffdfffebfffff 2MB [guard region] - fffffdfffec00000 fffffdffffbfffff 16MB PCI I/O space - fffffdffffc00000 fffffdffffdfffff 2MB [guard region] - fffffdffffe00000 ffffffffffdfffff 2TB vmemmap - ffffffffffe00000 ffffffffffffffff 2MB [guard region] + ffff800010000000 fffffbffbffeffff 123TB vmalloc + fffffbffbfff0000 fffffbfffe7f8fff ~998MB [guard region] + fffffbfffe7f9000 fffffbfffebfffff 4124KB fixed mappings + fffffbfffec00000 fffffbfffedfffff 2MB [guard region] + fffffbfffee00000 fffffbffffdfffff 16MB PCI I/O space + fffffbffffe00000 fffffbffffffffff 2MB [guard region] + fffffc0000000000 fffffdffffffffff 2TB vmemmap + fffffe0000000000 ffffffffffffffff 2TB [guard region] AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):: @@ -55,13 +55,13 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support): ffff800000000000 ffff800007ffffff 128MB bpf jit region ffff800008000000 ffff80000fffffff 128MB modules ffff800010000000 fffff81ffffeffff 120TB vmalloc - fffff81fffff0000 fffffc1ffe58ffff ~3TB [guard region] - fffffc1ffe590000 fffffc1ffe9fffff 4544KB fixed mappings - fffffc1ffea00000 fffffc1ffebfffff 2MB [guard region] - fffffc1ffec00000 fffffc1fffbfffff 16MB PCI I/O space - fffffc1fffc00000 fffffc1fffdfffff 2MB [guard region] - fffffc1fffe00000 ffffffffffdfffff 3968GB vmemmap - ffffffffffe00000 ffffffffffffffff 2MB [guard region] + fffff81fffff0000 fffffbfffe38ffff ~3TB [guard region] + fffffbfffe390000 fffffbfffebfffff 4544KB fixed mappings + fffffbfffec00000 fffffbfffedfffff 2MB [guard region] + fffffbfffee00000 fffffbffffdfffff 16MB PCI I/O space + fffffbffffe00000 fffffbffffffffff 2MB [guard region] + fffffc0000000000 ffffffdfffffffff ~4TB vmemmap + ffffffe000000000 ffffffffffffffff 128GB [guard region] Translation table lookup with 4KB pages:: diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 8e89f9b9091e..ecd6342e27ca 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -30,8 +30,8 @@ * keep a constant PAGE_OFFSET and "fallback" to using the higher end * of the VMEMMAP where 52-bit support is not available in hardware. */ -#define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) \ - >> (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT)) +#define VMEMMAP_SHIFT (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT) +#define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) >> VMEMMAP_SHIFT) /* * PAGE_OFFSET - the virtual address of the start of the linear map, at the @@ -50,7 +50,7 @@ #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) #define MODULES_VADDR (BPF_JIT_REGION_END) #define MODULES_VSIZE (SZ_128M) -#define VMEMMAP_START (-VMEMMAP_SIZE - SZ_2M) +#define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT))) #define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE) #define PCI_IO_END (VMEMMAP_START - SZ_2M) #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) @@ -308,15 +308,13 @@ static inline void *phys_to_virt(phys_addr_t x) #else #define page_to_virt(x) ({ \ __typeof__(x) __page = x; \ - u64 __idx = ((u64)__page - VMEMMAP_START) / sizeof(struct page);\ - u64 __addr = PAGE_OFFSET + (__idx * PAGE_SIZE); \ + u64 __addr = (u64)__page << VMEMMAP_SHIFT; \ (void *)__tag_set((const void *)__addr, page_kasan_tag(__page));\ }) #define virt_to_page(x) ({ \ - u64 __idx = (__tag_reset((u64)x) - PAGE_OFFSET) / PAGE_SIZE; \ - u64 __addr = VMEMMAP_START + (__idx * sizeof(struct page)); \ - (struct page *)__addr; \ + u64 __addr = __tag_reset((u64)(x)) & PAGE_MASK; \ + (struct page *)((s64)__addr >> VMEMMAP_SHIFT); \ }) #endif /* !CONFIG_SPARSEMEM_VMEMMAP || CONFIG_DEBUG_VIRTUAL */ diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 7e15d92836d8..3a5e9f9298e9 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -502,6 +502,8 @@ static void __init free_unused_memmap(void) */ void __init mem_init(void) { + BUILD_BUG_ON(!is_power_of_2(sizeof(struct page))); + if (swiotlb_force == SWIOTLB_FORCE || max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit)) swiotlb_init(1); -- cgit v1.2.3 From 9ad7c6d5e75b160c9ce5775db610d964af45b83f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 8 Oct 2020 17:36:02 +0200 Subject: arm64: mm: tidy up top of kernel VA space Tidy up the way the top of the kernel VA space is organized, by mirroring the 256 MB region we have below the vmalloc space, and populating it top down with the PCI I/O space, some guard regions, and the fixmap region. The latter region is itself populated top down, and today only covers about 4 MB, and so 224 MB is ample, and no guard region is therefore required. The resulting layout is identical between 48-bit/4k and 52-bit/64k configurations. Signed-off-by: Ard Biesheuvel Reviewed-by: Steve Capper Link: https://lore.kernel.org/r/20201008153602.9467-5-ardb@kernel.org Signed-off-by: Catalin Marinas --- Documentation/arm64/memory.rst | 22 ++++++++++------------ arch/arm64/include/asm/memory.h | 4 ++-- arch/arm64/include/asm/pgtable.h | 2 +- 3 files changed, 13 insertions(+), 15 deletions(-) (limited to 'arch/arm64/include/asm/memory.h') diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst index 476edb6015b2..3d62604fa7bd 100644 --- a/Documentation/arm64/memory.rst +++ b/Documentation/arm64/memory.rst @@ -35,12 +35,11 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit):: [ ffff600000000000 ffff7fffffffffff ] 32TB [ kasan shadow region ] ffff800000000000 ffff800007ffffff 128MB bpf jit region ffff800008000000 ffff80000fffffff 128MB modules - ffff800010000000 fffffbffbffeffff 123TB vmalloc - fffffbffbfff0000 fffffbfffe7f8fff ~998MB [guard region] - fffffbfffe7f9000 fffffbfffebfffff 4124KB fixed mappings - fffffbfffec00000 fffffbfffedfffff 2MB [guard region] - fffffbfffee00000 fffffbffffdfffff 16MB PCI I/O space - fffffbffffe00000 fffffbffffffffff 2MB [guard region] + ffff800010000000 fffffbffefffffff 124TB vmalloc + fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down) + fffffbfffe000000 fffffbfffe7fffff 8MB [guard region] + fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space + fffffbffff800000 fffffbffffffffff 8MB [guard region] fffffc0000000000 fffffdffffffffff 2TB vmemmap fffffe0000000000 ffffffffffffffff 2TB [guard region] @@ -54,12 +53,11 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support): [ fffd800000000000 ffff7fffffffffff ] 512TB [ kasan shadow region ] ffff800000000000 ffff800007ffffff 128MB bpf jit region ffff800008000000 ffff80000fffffff 128MB modules - ffff800010000000 fffff81ffffeffff 120TB vmalloc - fffff81fffff0000 fffffbfffe38ffff ~3TB [guard region] - fffffbfffe390000 fffffbfffebfffff 4544KB fixed mappings - fffffbfffec00000 fffffbfffedfffff 2MB [guard region] - fffffbfffee00000 fffffbffffdfffff 16MB PCI I/O space - fffffbffffe00000 fffffbffffffffff 2MB [guard region] + ffff800010000000 fffffbffefffffff 124TB vmalloc + fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down) + fffffbfffe000000 fffffbfffe7fffff 8MB [guard region] + fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space + fffffbffff800000 fffffbffffffffff 8MB [guard region] fffffc0000000000 ffffffdfffffffff ~4TB vmemmap ffffffe000000000 ffffffffffffffff 128GB [guard region] diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index ecd6342e27ca..03e9b112bd94 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -52,9 +52,9 @@ #define MODULES_VSIZE (SZ_128M) #define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT))) #define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE) -#define PCI_IO_END (VMEMMAP_START - SZ_2M) +#define PCI_IO_END (VMEMMAP_START - SZ_8M) #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) -#define FIXADDR_TOP (PCI_IO_START - SZ_2M) +#define FIXADDR_TOP (VMEMMAP_START - SZ_32M) #if VA_BITS > 48 #define VA_BITS_MIN (48) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 4ff12a7adcfd..ec307b8bcb15 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -22,7 +22,7 @@ * and fixed mappings */ #define VMALLOC_START (MODULES_END) -#define VMALLOC_END (- PUD_SIZE - VMEMMAP_SIZE - SZ_64K) +#define VMALLOC_END (VMEMMAP_START - SZ_256M) #define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) -- cgit v1.2.3 From c1090bb10d5e15906d296936e64317e35c43f21d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 10 Nov 2020 19:05:11 +0100 Subject: arm64: mm: don't assume struct page is always 64 bytes Commit 8c96400d6a39be7 simplified the page-to-virt and virt-to-page conversions, based on the assumption that struct page is always 64 bytes in size, in which case we can use a single signed shift to perform the conversion (provided that the vmemmap array is placed appropriately in the kernel VA space) Unfortunately, this assumption turns out not to hold, and so we need to revert part of this commit, and go back to an affine transformation. Given that all the quantities involved are compile time constants, this should not make any practical difference. Fixes: 8c96400d6a39 ("arm64: mm: make vmemmap region a projection of the linear region") Reported-by: Geert Uytterhoeven Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20201110180511.29083-1-ardb@kernel.org Tested-by: Geert Uytterhoeven Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/memory.h | 8 +++++--- arch/arm64/mm/init.c | 2 -- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/arm64/include/asm/memory.h') diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 03e9b112bd94..556cb2d62b5b 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -308,13 +308,15 @@ static inline void *phys_to_virt(phys_addr_t x) #else #define page_to_virt(x) ({ \ __typeof__(x) __page = x; \ - u64 __addr = (u64)__page << VMEMMAP_SHIFT; \ + u64 __idx = ((u64)__page - VMEMMAP_START) / sizeof(struct page);\ + u64 __addr = PAGE_OFFSET + (__idx * PAGE_SIZE); \ (void *)__tag_set((const void *)__addr, page_kasan_tag(__page));\ }) #define virt_to_page(x) ({ \ - u64 __addr = __tag_reset((u64)(x)) & PAGE_MASK; \ - (struct page *)((s64)__addr >> VMEMMAP_SHIFT); \ + u64 __idx = (__tag_reset((u64)x) - PAGE_OFFSET) / PAGE_SIZE; \ + u64 __addr = VMEMMAP_START + (__idx * sizeof(struct page)); \ + (struct page *)__addr; \ }) #endif /* !CONFIG_SPARSEMEM_VMEMMAP || CONFIG_DEBUG_VIRTUAL */ diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 3a5e9f9298e9..7e15d92836d8 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -502,8 +502,6 @@ static void __init free_unused_memmap(void) */ void __init mem_init(void) { - BUILD_BUG_ON(!is_power_of_2(sizeof(struct page))); - if (swiotlb_force == SWIOTLB_FORCE || max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit)) swiotlb_init(1); -- cgit v1.2.3