summaryrefslogtreecommitdiff
path: root/arch/riscv/include
diff options
context:
space:
mode:
authorPalmer Dabbelt <palmer@rivosinc.com>2022-01-20 06:23:41 +0300
committerPalmer Dabbelt <palmer@rivosinc.com>2022-01-20 06:37:44 +0300
commit0c34e79e52bb13881c08f1a2c2390b7b88ff10c7 (patch)
treee8caa54b625f2364cee088c37c088de1d28a32b8 /arch/riscv/include
parentfc839c6d33c8828514f595822f457e51328507e5 (diff)
parentc774de22c430733487f70d755067d9ea55dbe6de (diff)
downloadlinux-0c34e79e52bb13881c08f1a2c2390b7b88ff10c7.tar.xz
RISC-V: Introduce sv48 support without relocatable kernel
This patchset allows to have a single kernel for sv39 and sv48 without being relocatable. The idea comes from Arnd Bergmann who suggested to do the same as x86, that is mapping the kernel to the end of the address space, which allows the kernel to be linked at the same address for both sv39 and sv48 and then does not require to be relocated at runtime. This implements sv48 support at runtime. The kernel will try to boot with 4-level page table and will fallback to 3-level if the HW does not support it. Folding the 4th level into a 3-level page table has almost no cost at runtime. Note that kasan region had to be moved to the end of the address space since its location must be known at compile-time and then be valid for both sv39 and sv48 (and sv57 that is coming). * riscv-sv48-v3: riscv: Explicit comment about user virtual address space size riscv: Use pgtable_l4_enabled to output mmu_type in cpuinfo riscv: Implement sv48 support asm-generic: Prepare for riscv use of pud_alloc_one and pud_free riscv: Allow to dynamically define VA_BITS riscv: Introduce functions to switch pt_ops riscv: Split early kasan mapping to prepare sv48 introduction riscv: Move KASAN mapping next to the kernel mapping riscv: Get rid of MAXPHYSMEM configs Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
Diffstat (limited to 'arch/riscv/include')
-rw-r--r--arch/riscv/include/asm/csr.h3
-rw-r--r--arch/riscv/include/asm/fixmap.h1
-rw-r--r--arch/riscv/include/asm/kasan.h11
-rw-r--r--arch/riscv/include/asm/page.h16
-rw-r--r--arch/riscv/include/asm/pgalloc.h40
-rw-r--r--arch/riscv/include/asm/pgtable-64.h108
-rw-r--r--arch/riscv/include/asm/pgtable.h65
-rw-r--r--arch/riscv/include/asm/sparsemem.h6
8 files changed, 228 insertions, 22 deletions
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 5046f431645c..ae711692eec9 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -40,14 +40,13 @@
#ifndef CONFIG_64BIT
#define SATP_PPN _AC(0x003FFFFF, UL)
#define SATP_MODE_32 _AC(0x80000000, UL)
-#define SATP_MODE SATP_MODE_32
#define SATP_ASID_BITS 9
#define SATP_ASID_SHIFT 22
#define SATP_ASID_MASK _AC(0x1FF, UL)
#else
#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
#define SATP_MODE_39 _AC(0x8000000000000000, UL)
-#define SATP_MODE SATP_MODE_39
+#define SATP_MODE_48 _AC(0x9000000000000000, UL)
#define SATP_ASID_BITS 16
#define SATP_ASID_SHIFT 44
#define SATP_ASID_MASK _AC(0xFFFF, UL)
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 54cbf07fb4e9..58a718573ad6 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -24,6 +24,7 @@ enum fixed_addresses {
FIX_HOLE,
FIX_PTE,
FIX_PMD,
+ FIX_PUD,
FIX_TEXT_POKE1,
FIX_TEXT_POKE0,
FIX_EARLYCON_MEM_BASE,
diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h
index b00f503ec124..0b85e363e778 100644
--- a/arch/riscv/include/asm/kasan.h
+++ b/arch/riscv/include/asm/kasan.h
@@ -27,13 +27,18 @@
*/
#define KASAN_SHADOW_SCALE_SHIFT 3
-#define KASAN_SHADOW_SIZE (UL(1) << ((CONFIG_VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
-#define KASAN_SHADOW_START KERN_VIRT_START
-#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
+#define KASAN_SHADOW_SIZE (UL(1) << ((VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
+/*
+ * Depending on the size of the virtual address space, the region may not be
+ * aligned on PGDIR_SIZE, so force its alignment to ease its population.
+ */
+#define KASAN_SHADOW_START ((KASAN_SHADOW_END - KASAN_SHADOW_SIZE) & PGDIR_MASK)
+#define KASAN_SHADOW_END MODULES_LOWEST_VADDR
#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
void kasan_init(void);
asmlinkage void kasan_early_init(void);
+void kasan_swapper_init(void);
#endif
#endif
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index b3e5ff0125fe..160e3a1e8f8b 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -31,9 +31,20 @@
* When not using MMU this corresponds to the first free page in
* physical memory (aligned on a page boundary).
*/
+#ifdef CONFIG_64BIT
+#ifdef CONFIG_MMU
+#define PAGE_OFFSET kernel_map.page_offset
+#else
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
-
-#define KERN_VIRT_SIZE (-PAGE_OFFSET)
+#endif
+/*
+ * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
+ * define the PAGE_OFFSET value for SV39.
+ */
+#define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL)
+#else
+#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
+#endif /* CONFIG_64BIT */
#ifndef __ASSEMBLY__
@@ -86,6 +97,7 @@ extern unsigned long riscv_pfn_base;
#endif /* CONFIG_MMU */
struct kernel_mapping {
+ unsigned long page_offset;
unsigned long virt_addr;
uintptr_t phys_addr;
uintptr_t size;
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 0af6933a7100..11823004b87a 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -11,6 +11,8 @@
#include <asm/tlb.h>
#ifdef CONFIG_MMU
+#define __HAVE_ARCH_PUD_ALLOC_ONE
+#define __HAVE_ARCH_PUD_FREE
#include <asm-generic/pgalloc.h>
static inline void pmd_populate_kernel(struct mm_struct *mm,
@@ -36,6 +38,44 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
+
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
+{
+ if (pgtable_l4_enabled) {
+ unsigned long pfn = virt_to_pfn(pud);
+
+ set_p4d(p4d, __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+ }
+}
+
+static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
+ pud_t *pud)
+{
+ if (pgtable_l4_enabled) {
+ unsigned long pfn = virt_to_pfn(pud);
+
+ set_p4d_safe(p4d,
+ __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+ }
+}
+
+#define pud_alloc_one pud_alloc_one
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ if (pgtable_l4_enabled)
+ return __pud_alloc_one(mm, addr);
+
+ return NULL;
+}
+
+#define pud_free pud_free
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ if (pgtable_l4_enabled)
+ __pud_free(mm, pud);
+}
+
+#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
#endif /* __PAGETABLE_PMD_FOLDED */
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 228261aa9628..bbbdd66e5e2f 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -8,16 +8,36 @@
#include <linux/const.h>
-#define PGDIR_SHIFT 30
+extern bool pgtable_l4_enabled;
+
+#define PGDIR_SHIFT_L3 30
+#define PGDIR_SHIFT_L4 39
+#define PGDIR_SIZE_L3 (_AC(1, UL) << PGDIR_SHIFT_L3)
+
+#define PGDIR_SHIFT (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)
/* Size of region mapped by a page global directory */
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
+/* pud is folded into pgd in case of 3-level page table */
+#define PUD_SHIFT 30
+#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE - 1))
+
#define PMD_SHIFT 21
/* Size of region mapped by a page middle directory */
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE - 1))
+/* Page Upper Directory entry */
+typedef struct {
+ unsigned long pud;
+} pud_t;
+
+#define pud_val(x) ((x).pud)
+#define __pud(x) ((pud_t) { (x) })
+#define PTRS_PER_PUD (PAGE_SIZE / sizeof(pud_t))
+
/* Page Middle Directory entry */
typedef struct {
unsigned long pmd;
@@ -59,6 +79,16 @@ static inline void pud_clear(pud_t *pudp)
set_pud(pudp, __pud(0));
}
+static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot)
+{
+ return __pud((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
+}
+
+static inline unsigned long _pud_pfn(pud_t pud)
+{
+ return pud_val(pud) >> _PAGE_PFN_SHIFT;
+}
+
static inline pmd_t *pud_pgtable(pud_t pud)
{
return (pmd_t *)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT);
@@ -69,6 +99,17 @@ static inline struct page *pud_page(pud_t pud)
return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
}
+#define mm_pud_folded mm_pud_folded
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+ if (pgtable_l4_enabled)
+ return false;
+
+ return true;
+}
+
+#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+
static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot)
{
return __pmd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
@@ -84,4 +125,69 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pud_ERROR(e) \
+ pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
+
+static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ *p4dp = p4d;
+ else
+ set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) });
+}
+
+static inline int p4d_none(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return (p4d_val(p4d) == 0);
+
+ return 0;
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return (p4d_val(p4d) & _PAGE_PRESENT);
+
+ return 1;
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return !p4d_present(p4d);
+
+ return 0;
+}
+
+static inline void p4d_clear(p4d_t *p4d)
+{
+ if (pgtable_l4_enabled)
+ set_p4d(p4d, __p4d(0));
+}
+
+static inline pud_t *p4d_pgtable(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
+
+ return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) });
+}
+
+static inline struct page *p4d_page(p4d_t p4d)
+{
+ return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
+}
+
+#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+#define pud_offset pud_offset
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+ if (pgtable_l4_enabled)
+ return p4d_pgtable(*p4d) + pud_index(address);
+
+ return (pud_t *)p4d;
+}
+
#endif /* _ASM_RISCV_PGTABLE_64_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 67f687aee673..7e949f25c933 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -24,6 +24,17 @@
#define KERNEL_LINK_ADDR PAGE_OFFSET
#endif
+/* Number of entries in the page global directory */
+#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t))
+/* Number of entries in the page table */
+#define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t))
+
+/*
+ * Half of the kernel address space (half of the entries of the page global
+ * directory) is for the direct mapping.
+ */
+#define KERN_VIRT_SIZE ((PTRS_PER_PGD / 2 * PGDIR_SIZE) / 2)
+
#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
#define VMALLOC_END PAGE_OFFSET
#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
@@ -39,8 +50,10 @@
/* Modules always live before the kernel */
#ifdef CONFIG_64BIT
-#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
-#define MODULES_END (PFN_ALIGN((unsigned long)&_start))
+/* This is used to define the end of the KASAN shadow region */
+#define MODULES_LOWEST_VADDR (KERNEL_LINK_ADDR - SZ_2G)
+#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
+#define MODULES_END (PFN_ALIGN((unsigned long)&_start))
#endif
/*
@@ -48,8 +61,14 @@
* struct pages to map half the virtual address space. Then
* position vmemmap directly below the VMALLOC region.
*/
+#ifdef CONFIG_64BIT
+#define VA_BITS (pgtable_l4_enabled ? 48 : 39)
+#else
+#define VA_BITS 32
+#endif
+
#define VMEMMAP_SHIFT \
- (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
+ (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT)
#define VMEMMAP_END VMALLOC_START
#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE)
@@ -83,8 +102,7 @@
#ifndef __ASSEMBLY__
-/* Page Upper Directory not used in RISC-V */
-#include <asm-generic/pgtable-nopud.h>
+#include <asm-generic/pgtable-nop4d.h>
#include <asm/page.h>
#include <asm/tlbflush.h>
#include <linux/mm_types.h>
@@ -107,12 +125,20 @@
#define XIP_FIXUP(addr) (addr)
#endif /* CONFIG_XIP_KERNEL */
-#ifdef CONFIG_MMU
-/* Number of entries in the page global directory */
-#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t))
-/* Number of entries in the page table */
-#define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t))
+struct pt_alloc_ops {
+ pte_t *(*get_pte_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pte)(uintptr_t va);
+#ifndef __PAGETABLE_PMD_FOLDED
+ pmd_t *(*get_pmd_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pmd)(uintptr_t va);
+ pud_t *(*get_pud_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pud)(uintptr_t va);
+#endif
+};
+
+extern struct pt_alloc_ops pt_ops __initdata;
+#ifdef CONFIG_MMU
/* Number of PGD entries that a user-mode program can use */
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
@@ -659,7 +685,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
* and give the kernel the other (upper) half.
*/
#ifdef CONFIG_64BIT
-#define KERN_VIRT_START (-(BIT(CONFIG_VA_BITS)) + TASK_SIZE)
+#define KERN_VIRT_START (-(BIT(VA_BITS)) + TASK_SIZE)
#else
#define KERN_VIRT_START FIXADDR_START
#endif
@@ -667,11 +693,22 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
/*
* Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
* Note that PGDIR_SIZE must evenly divide TASK_SIZE.
+ * Task size is:
+ * - 0x9fc00000 (~2.5GB) for RV32.
+ * - 0x4000000000 ( 256GB) for RV64 using SV39 mmu
+ * - 0x800000000000 ( 128TB) for RV64 using SV48 mmu
+ *
+ * Note that PGDIR_SIZE must evenly divide TASK_SIZE since "RISC-V
+ * Instruction Set Manual Volume II: Privileged Architecture" states that
+ * "load and store effective addresses, which are 64bits, must have bits
+ * 63–48 all equal to bit 47, or else a page-fault exception will occur."
*/
#ifdef CONFIG_64BIT
-#define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2)
+#define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2)
+#define TASK_SIZE_MIN (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2)
#else
-#define TASK_SIZE FIXADDR_START
+#define TASK_SIZE FIXADDR_START
+#define TASK_SIZE_MIN TASK_SIZE
#endif
#else /* CONFIG_MMU */
@@ -697,6 +734,8 @@ extern uintptr_t _dtb_early_pa;
#define dtb_early_va _dtb_early_va
#define dtb_early_pa _dtb_early_pa
#endif /* CONFIG_XIP_KERNEL */
+extern u64 satp_mode;
+extern bool pgtable_l4_enabled;
void paging_init(void);
void misc_mem_init(void);
diff --git a/arch/riscv/include/asm/sparsemem.h b/arch/riscv/include/asm/sparsemem.h
index 45a7018a8118..63acaecc3374 100644
--- a/arch/riscv/include/asm/sparsemem.h
+++ b/arch/riscv/include/asm/sparsemem.h
@@ -4,7 +4,11 @@
#define _ASM_RISCV_SPARSEMEM_H
#ifdef CONFIG_SPARSEMEM
-#define MAX_PHYSMEM_BITS CONFIG_PA_BITS
+#ifdef CONFIG_64BIT
+#define MAX_PHYSMEM_BITS 56
+#else
+#define MAX_PHYSMEM_BITS 34
+#endif /* CONFIG_64BIT */
#define SECTION_SIZE_BITS 27
#endif /* CONFIG_SPARSEMEM */