From 1baa82f48030f38d1895301f1ec93acbcb3d15db Mon Sep 17 00:00:00 2001
From: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Date: Mon, 2 Mar 2015 19:19:14 +0000
Subject: arm64: Implement cpu_relax as yield

ARM64 has the yield nop hint which has the intended semantics of
cpu_relax. Implement.

The immediate application is ARM CPU emulators. An emulator can take
advantage of the yield hint to de-prioritise an emulated CPU in favor
of other emulation tasks. QEMU A64 SMP emulation has yield awareness,
and sees a significant boot time performance increase with this change.

Signed-off-by: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/processor.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 20e9591a60cf..d2c37a1df0eb 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -127,7 +127,11 @@ extern void release_thread(struct task_struct *);
 
 unsigned long get_wchan(struct task_struct *p);
 
-#define cpu_relax()			barrier()
+static inline void cpu_relax(void)
+{
+	asm volatile("yield" ::: "memory");
+}
+
 #define cpu_relax_lowlatency()                cpu_relax()
 
 /* Thread switching */
-- 
cgit v1.2.3


From 137650aad96c9594683445e41afa8ac5a2097520 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 13 Mar 2015 16:14:34 +0000
Subject: arm64: apply alternatives for !SMP kernels

Currently we only perform alternative patching for kernels built with
CONFIG_SMP, as we call apply_alternatives_all() in smp.c, which is only
built for CONFIG_SMP. Thus !SMP kernels may not have necessary
alternatives patched in.

This patch ensures that we call apply_alternatives_all() once all CPUs
are booted, even for !SMP kernels, by having the smp_init_cpus() stub
call this for !SMP kernels via up_late_init. A new wrapper,
do_post_cpus_up_work, is added so we can hook other calls here later
(e.g. boot mode logging).

Cc: Andre Przywara <andre.przywara@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Fixes: e039ee4ee3fcf174 ("arm64: add alternative runtime patching")
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig                |  4 ++++
 arch/arm64/include/asm/smp_plat.h |  2 ++
 arch/arm64/kernel/setup.c         | 12 ++++++++++++
 arch/arm64/kernel/smp.c           |  2 +-
 4 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1b8e97331ffb..0d46debbc9a7 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -470,6 +470,10 @@ config HOTPLUG_CPU
 
 source kernel/Kconfig.preempt
 
+config UP_LATE_INIT
+       def_bool y
+       depends on !SMP
+
 config HZ
 	int
 	default 100
diff --git a/arch/arm64/include/asm/smp_plat.h b/arch/arm64/include/asm/smp_plat.h
index 59e282311b58..8dcd61e32176 100644
--- a/arch/arm64/include/asm/smp_plat.h
+++ b/arch/arm64/include/asm/smp_plat.h
@@ -40,4 +40,6 @@ static inline u32 mpidr_hash_size(void)
 extern u64 __cpu_logical_map[NR_CPUS];
 #define cpu_logical_map(cpu)    __cpu_logical_map[cpu]
 
+void __init do_post_cpus_up_work(void);
+
 #endif /* __ASM_SMP_PLAT_H */
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index e8420f635bd4..781f4697dc26 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -207,6 +207,18 @@ static void __init smp_build_mpidr_hash(void)
 }
 #endif
 
+void __init do_post_cpus_up_work(void)
+{
+	apply_alternatives_all();
+}
+
+#ifdef CONFIG_UP_LATE_INIT
+void __init up_late_init(void)
+{
+	do_post_cpus_up_work();
+}
+#endif /* CONFIG_UP_LATE_INIT */
+
 static void __init setup_processor(void)
 {
 	struct cpu_info *cpu_info;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 328b8ce4b007..4257369341e4 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -309,7 +309,7 @@ void cpu_die(void)
 void __init smp_cpus_done(unsigned int max_cpus)
 {
 	pr_info("SMP: Total of %d processors activated.\n", num_online_cpus());
-	apply_alternatives_all();
+	do_post_cpus_up_work();
 }
 
 void __init smp_prepare_boot_cpu(void)
-- 
cgit v1.2.3


From 18ccb0cab49ef7868eaf9504f257e1a84683dbbd Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@suse.de>
Date: Mon, 16 Mar 2015 16:32:22 +0000
Subject: arm64: fix implementation of mmap2 compat syscall

The arm mmap2 syscall takes the offset in units of 4K, thus with 64K pages
the offset needs to be scaled to units of pages.

Signed-off-by: Andreas Schwab <schwab@suse.de>
Signed-off-by: Alexander Graf <agraf@suse.de>
[will: removed redundant lr parameter, localised PAGE_SHIFT #if check]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/unistd32.h |  2 +-
 arch/arm64/kernel/entry32.S       | 18 ++++++++++++++++++
 arch/arm64/kernel/sys32.c         |  1 +
 3 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 27224426e0bf..cef934a90f17 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -406,7 +406,7 @@ __SYSCALL(__NR_vfork, sys_vfork)
 #define __NR_ugetrlimit 191	/* SuS compliant getrlimit */
 __SYSCALL(__NR_ugetrlimit, compat_sys_getrlimit)		/* SuS compliant getrlimit */
 #define __NR_mmap2 192
-__SYSCALL(__NR_mmap2, sys_mmap_pgoff)
+__SYSCALL(__NR_mmap2, compat_sys_mmap2_wrapper)
 #define __NR_truncate64 193
 __SYSCALL(__NR_truncate64, compat_sys_truncate64_wrapper)
 #define __NR_ftruncate64 194
diff --git a/arch/arm64/kernel/entry32.S b/arch/arm64/kernel/entry32.S
index 9a8f6ae2530e..bd9bfaa9269b 100644
--- a/arch/arm64/kernel/entry32.S
+++ b/arch/arm64/kernel/entry32.S
@@ -19,9 +19,12 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/const.h>
 
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
+#include <asm/errno.h>
+#include <asm/page.h>
 
 /*
  * System call wrappers for the AArch32 compatibility layer.
@@ -53,6 +56,21 @@ ENTRY(compat_sys_fstatfs64_wrapper)
 	b	compat_sys_fstatfs64
 ENDPROC(compat_sys_fstatfs64_wrapper)
 
+/*
+ * Note: off_4k (w5) is always in units of 4K. If we can't do the
+ * requested offset because it is not page-aligned, we return -EINVAL.
+ */
+ENTRY(compat_sys_mmap2_wrapper)
+#if PAGE_SHIFT > 12
+	tst	w5, #~PAGE_MASK >> 12
+	b.ne	1f
+	lsr	w5, w5, #PAGE_SHIFT - 12
+#endif
+	b	sys_mmap_pgoff
+1:	mov	x0, #-EINVAL
+	ret
+ENDPROC(compat_sys_mmap2_wrapper)
+
 /*
  * Wrappers for AArch32 syscalls that either take 64-bit parameters
  * in registers or that take 32-bit parameters which require sign
diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c
index 2d5ab3c90b82..a40b1343b819 100644
--- a/arch/arm64/kernel/sys32.c
+++ b/arch/arm64/kernel/sys32.c
@@ -37,6 +37,7 @@ asmlinkage long compat_sys_readahead_wrapper(void);
 asmlinkage long compat_sys_fadvise64_64_wrapper(void);
 asmlinkage long compat_sys_sync_file_range2_wrapper(void);
 asmlinkage long compat_sys_fallocate_wrapper(void);
+asmlinkage long compat_sys_mmap2_wrapper(void);
 
 #undef __SYSCALL
 #define __SYSCALL(nr, sym)	[nr] = sym,
-- 
cgit v1.2.3


From 19fc577579c86fec6e2523baeb457b02e939796f Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 4 Mar 2015 13:27:34 +0000
Subject: arm64: fixmap: make FIX_TEXT_POKE0 permanent

The FIX_TEXT_POKE0 is currently at the end of the temporary fixmap
slots, despite the fact that it can be used at any point during runtime
(e.g. for poking the text of loaded modules), and thus should be a
permanent fixmap slot (as is the case on arm and x86).

This patch moves FIX_TEXT_POKE0 into the set of permanent fixmap slots.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Kees Cook <keescook@chromium.org>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Laura Abbott <lauraa@codeaurora.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/fixmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index defa0ff98250..926495686554 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -33,6 +33,7 @@
 enum fixed_addresses {
 	FIX_HOLE,
 	FIX_EARLYCON_MEM_BASE,
+	FIX_TEXT_POKE0,
 	__end_of_permanent_fixed_addresses,
 
 	/*
@@ -49,7 +50,6 @@ enum fixed_addresses {
 
 	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
 	FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
-	FIX_TEXT_POKE0,
 	__end_of_fixed_addresses
 };
 
-- 
cgit v1.2.3


From a591ede4cd1cac02d3398a9ad332bd0bba460efe Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 18 Mar 2015 14:55:20 +0000
Subject: arm64: Get rid of struct cpu_table

struct cpu_table is an artifact left from the (very) early days of
the arm64 port, and its only real use is to allow the most beautiful
"AArch64 Processor" string to be displayed at boot time.

Really? Yes, really.

Let's get rid of it. In order to avoid another BogoMips-gate, the
aforementioned string is preserved.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cputable.h | 30 ----------------
 arch/arm64/kernel/Makefile        |  2 +-
 arch/arm64/kernel/asm-offsets.c   |  4 ---
 arch/arm64/kernel/cputable.c      | 33 -----------------
 arch/arm64/kernel/head.S          | 76 +++------------------------------------
 arch/arm64/kernel/setup.c         | 16 ++-------
 6 files changed, 8 insertions(+), 153 deletions(-)
 delete mode 100644 arch/arm64/include/asm/cputable.h
 delete mode 100644 arch/arm64/kernel/cputable.c

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/cputable.h b/arch/arm64/include/asm/cputable.h
deleted file mode 100644
index e3bd983d3661..000000000000
--- a/arch/arm64/include/asm/cputable.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * arch/arm64/include/asm/cputable.h
- *
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#ifndef __ASM_CPUTABLE_H
-#define __ASM_CPUTABLE_H
-
-struct cpu_info {
-	unsigned int	cpu_id_val;
-	unsigned int	cpu_id_mask;
-	const char	*cpu_name;
-	unsigned long	(*cpu_setup)(void);
-};
-
-extern struct cpu_info *lookup_processor_type(unsigned int);
-
-#endif
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 5ee07eee80c2..d5e70747c7a2 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -12,7 +12,7 @@ CFLAGS_REMOVE_insn.o = -pg
 CFLAGS_REMOVE_return_address.o = -pg
 
 # Object file lists.
-arm64-obj-y		:= cputable.o debug-monitors.o entry.o irq.o fpsimd.o	\
+arm64-obj-y		:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   entry-fpsimd.o process.o ptrace.o setup.o signal.o	\
 			   sys.o stacktrace.o time.o traps.o io.o vdso.o	\
 			   hyp-stub.o psci.o psci-call.o cpu_ops.o insn.o	\
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index f7fa65d4c352..14dd3d1afa57 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -24,7 +24,6 @@
 #include <linux/kvm_host.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
-#include <asm/cputable.h>
 #include <asm/smp_plat.h>
 #include <asm/suspend.h>
 #include <asm/vdso_datapage.h>
@@ -71,9 +70,6 @@ int main(void)
   BLANK();
   DEFINE(PAGE_SZ,	       	PAGE_SIZE);
   BLANK();
-  DEFINE(CPU_INFO_SZ,		sizeof(struct cpu_info));
-  DEFINE(CPU_INFO_SETUP,	offsetof(struct cpu_info, cpu_setup));
-  BLANK();
   DEFINE(DMA_BIDIRECTIONAL,	DMA_BIDIRECTIONAL);
   DEFINE(DMA_TO_DEVICE,		DMA_TO_DEVICE);
   DEFINE(DMA_FROM_DEVICE,	DMA_FROM_DEVICE);
diff --git a/arch/arm64/kernel/cputable.c b/arch/arm64/kernel/cputable.c
deleted file mode 100644
index fd3993cb060f..000000000000
--- a/arch/arm64/kernel/cputable.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * arch/arm64/kernel/cputable.c
- *
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/init.h>
-
-#include <asm/cputable.h>
-
-extern unsigned long __cpu_setup(void);
-
-struct cpu_info cpu_table[] = {
-	{
-		.cpu_id_val	= 0x000f0000,
-		.cpu_id_mask	= 0x000f0000,
-		.cpu_name	= "AArch64 Processor",
-		.cpu_setup	= __cpu_setup,
-	},
-	{ /* Empty */ },
-};
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index d17649d39392..ebb9e630230a 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -245,22 +245,12 @@ ENTRY(stext)
 	bl	__calc_phys_offset		// x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET
 	bl	set_cpu_boot_mode_flag
 	mrs	x22, midr_el1			// x22=cpuid
-	mov	x0, x22
-	bl	lookup_processor_type
-	mov	x23, x0				// x23=current cpu_table
-	/*
-	 * __error_p may end up out of range for cbz if text areas are
-	 * aligned up to section sizes.
-	 */
-	cbnz	x23, 1f				// invalid processor (x23=0)?
-	b	__error_p
-1:
+
 	bl	__vet_fdt
 	bl	__create_page_tables		// x25=TTBR0, x26=TTBR1
 	/*
-	 * The following calls CPU specific code in a position independent
-	 * manner. See arch/arm64/mm/proc.S for details. x23 = base of
-	 * cpu_info structure selected by lookup_processor_type above.
+	 * The following calls CPU setup code, see arch/arm64/mm/proc.S for
+	 * details.
 	 * On return, the CPU will be ready for the MMU to be turned on and
 	 * the TCR will have been set.
 	 */
@@ -268,9 +258,7 @@ ENTRY(stext)
 						// MMU has been enabled
 	adrp	lr, __enable_mmu		// return (PIC) address
 	add	lr, lr, #:lo12:__enable_mmu
-	ldr	x12, [x23, #CPU_INFO_SETUP]
-	add	x12, x12, x28			// __virt_to_phys
-	br	x12				// initialise processor
+	b	__cpu_setup			// initialise processor
 ENDPROC(stext)
 
 /*
@@ -634,15 +622,9 @@ ENTRY(secondary_startup)
 	 * Common entry point for secondary CPUs.
 	 */
 	mrs	x22, midr_el1			// x22=cpuid
-	mov	x0, x22
-	bl	lookup_processor_type
-	mov	x23, x0				// x23=current cpu_table
-	cbz	x23, __error_p			// invalid processor (x23=0)?
 
 	pgtbl	x25, x26, x28			// x25=TTBR0, x26=TTBR1
-	ldr	x12, [x23, #CPU_INFO_SETUP]
-	add	x12, x12, x28			// __virt_to_phys
-	blr	x12				// initialise processor
+	bl	__cpu_setup			// initialise processor
 
 	ldr	x21, =secondary_data
 	ldr	x27, =__secondary_switched	// address to jump to after enabling the MMU
@@ -708,51 +690,3 @@ ENDPROC(__calc_phys_offset)
 	.align 3
 1:	.quad	.
 	.quad	PAGE_OFFSET
-
-/*
- * Exception handling. Something went wrong and we can't proceed. We ought to
- * tell the user, but since we don't have any guarantee that we're even
- * running on the right architecture, we do virtually nothing.
- */
-__error_p:
-ENDPROC(__error_p)
-
-__error:
-1:	nop
-	b	1b
-ENDPROC(__error)
-
-/*
- * This function gets the processor ID in w0 and searches the cpu_table[] for
- * a match. It returns a pointer to the struct cpu_info it found. The
- * cpu_table[] must end with an empty (all zeros) structure.
- *
- * This routine can be called via C code and it needs to work with the MMU
- * both disabled and enabled (the offset is calculated automatically).
- */
-ENTRY(lookup_processor_type)
-	adr	x1, __lookup_processor_type_data
-	ldp	x2, x3, [x1]
-	sub	x1, x1, x2			// get offset between VA and PA
-	add	x3, x3, x1			// convert VA to PA
-1:
-	ldp	w5, w6, [x3]			// load cpu_id_val and cpu_id_mask
-	cbz	w5, 2f				// end of list?
-	and	w6, w6, w0
-	cmp	w5, w6
-	b.eq	3f
-	add	x3, x3, #CPU_INFO_SZ
-	b	1b
-2:
-	mov	x3, #0				// unknown processor
-3:
-	mov	x0, x3
-	ret
-ENDPROC(lookup_processor_type)
-
-	.align	3
-	.type	__lookup_processor_type_data, %object
-__lookup_processor_type_data:
-	.quad	.
-	.quad	cpu_table
-	.size	__lookup_processor_type_data, . - __lookup_processor_type_data
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 14808947bf46..3852405d70b5 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -50,7 +50,6 @@
 #include <asm/cpu.h>
 #include <asm/cputype.h>
 #include <asm/elf.h>
-#include <asm/cputable.h>
 #include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
 #include <asm/sections.h>
@@ -84,7 +83,6 @@ unsigned int compat_elf_hwcap2 __read_mostly;
 
 DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 
-static const char *cpu_name;
 phys_addr_t __fdt_pointer __initdata;
 
 /*
@@ -234,22 +232,12 @@ void __init up_late_init(void)
 
 static void __init setup_processor(void)
 {
-	struct cpu_info *cpu_info;
 	u64 features, block;
 	u32 cwg;
 	int cls;
 
-	cpu_info = lookup_processor_type(read_cpuid_id());
-	if (!cpu_info) {
-		printk("CPU configuration botched (ID %08x), unable to continue.\n",
-		       read_cpuid_id());
-		while (1);
-	}
-
-	cpu_name = cpu_info->cpu_name;
-
-	printk("CPU: %s [%08x] revision %d\n",
-	       cpu_name, read_cpuid_id(), read_cpuid_id() & 15);
+	printk("CPU: AArch64 Processor [%08x] revision %d\n",
+	       read_cpuid_id(), read_cpuid_id() & 15);
 
 	sprintf(init_utsname()->machine, ELF_PLATFORM);
 	elf_hwcap = 0;
-- 
cgit v1.2.3


From b784a5d97d0af4835dd0125a3e0e5d0fd48128d6 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 4 Mar 2015 19:45:38 +0100
Subject: arm64: add macros for common adrp usages

The adrp instruction is mostly used in combination with either
an add, a ldr or a str instruction with the low bits of the
referenced symbol in the 12-bit immediate of the followup
instruction.

Introduce the macros adr_l, ldr_l and str_l that encapsulate
these common patterns.

Tested-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/assembler.h | 48 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 750bac4e637e..144b64ad96c3 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -159,4 +159,52 @@ lr	.req	x30		// link register
 	orr	\rd, \lbits, \hbits, lsl #32
 	.endm
 
+/*
+ * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where
+ * <symbol> is within the range +/- 4 GB of the PC.
+ */
+	/*
+	 * @dst: destination register (64 bit wide)
+	 * @sym: name of the symbol
+	 * @tmp: optional scratch register to be used if <dst> == sp, which
+	 *       is not allowed in an adrp instruction
+	 */
+	.macro	adr_l, dst, sym, tmp=
+	.ifb	\tmp
+	adrp	\dst, \sym
+	add	\dst, \dst, :lo12:\sym
+	.else
+	adrp	\tmp, \sym
+	add	\dst, \tmp, :lo12:\sym
+	.endif
+	.endm
+
+	/*
+	 * @dst: destination register (32 or 64 bit wide)
+	 * @sym: name of the symbol
+	 * @tmp: optional 64-bit scratch register to be used if <dst> is a
+	 *       32-bit wide register, in which case it cannot be used to hold
+	 *       the address
+	 */
+	.macro	ldr_l, dst, sym, tmp=
+	.ifb	\tmp
+	adrp	\dst, \sym
+	ldr	\dst, [\dst, :lo12:\sym]
+	.else
+	adrp	\tmp, \sym
+	ldr	\dst, [\tmp, :lo12:\sym]
+	.endif
+	.endm
+
+	/*
+	 * @src: source register (32 or 64 bit wide)
+	 * @sym: name of the symbol
+	 * @tmp: mandatory 64-bit scratch register to calculate the address
+	 *       while <src> needs to be preserved.
+	 */
+	.macro	str_l, src, sym, tmp
+	adrp	\tmp, \sym
+	str	\src, [\tmp, :lo12:\sym]
+	.endm
+
 #endif	/* __ASM_ASSEMBLER_H */
-- 
cgit v1.2.3


From ce47fbb7c8956742a6de06b9706b1c6236339f51 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 19 Mar 2015 19:19:40 +0000
Subject: arm64: proc: remove unused cpu_get_pgd macro

cpu_get_pgd isn't used anywhere and is Probably Not What You Want.
Remove it before anybody decides to use it.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/proc-fns.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index 9a8fd84f8fb2..4d9ede7b6361 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -41,15 +41,6 @@ extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
 
 #define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm)
 
-#define cpu_get_pgd()					\
-({							\
-	unsigned long pg;				\
-	asm("mrs	%0, ttbr0_el1\n"		\
-	    : "=r" (pg));				\
-	pg &= ~0xffff000000003ffful;			\
-	(pgd_t *)phys_to_virt(pg);			\
-})
-
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* __ASM_PROCFNS_H */
-- 
cgit v1.2.3


From dd006da21646f1c86f0242eb8f527d093303127a Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 19 Mar 2015 16:42:27 +0000
Subject: arm64: mm: increase VA range of identity map

The page size and the number of translation levels, and hence the supported
virtual address range, are build-time configurables on arm64 whose optimal
values are use case dependent. However, in the current implementation, if
the system's RAM is located at a very high offset, the virtual address range
needs to reflect that merely because the identity mapping, which is only used
to enable or disable the MMU, requires the extended virtual range to map the
physical memory at an equal virtual offset.

This patch relaxes that requirement, by increasing the number of translation
levels for the identity mapping only, and only when actually needed, i.e.,
when system RAM's offset is found to be out of reach at runtime.

Tested-by: Laura Abbott <lauraa@codeaurora.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/mmu_context.h   | 43 ++++++++++++++++++++++++++++++++++
 arch/arm64/include/asm/page.h          |  6 +++--
 arch/arm64/include/asm/pgtable-hwdef.h |  7 +++++-
 arch/arm64/kernel/head.S               | 37 +++++++++++++++++++++++++++++
 arch/arm64/kernel/smp.c                |  1 +
 arch/arm64/mm/mmu.c                    |  7 +++++-
 arch/arm64/mm/proc-macros.S            | 10 ++++++++
 arch/arm64/mm/proc.S                   |  3 +++
 8 files changed, 110 insertions(+), 4 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index a9eee33dfa62..ecf2d060036b 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -64,6 +64,49 @@ static inline void cpu_set_reserved_ttbr0(void)
 	: "r" (ttbr));
 }
 
+/*
+ * TCR.T0SZ value to use when the ID map is active. Usually equals
+ * TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
+ * physical memory, in which case it will be smaller.
+ */
+extern u64 idmap_t0sz;
+
+static inline bool __cpu_uses_extended_idmap(void)
+{
+	return (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48) &&
+		unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
+}
+
+static inline void __cpu_set_tcr_t0sz(u64 t0sz)
+{
+	unsigned long tcr;
+
+	if (__cpu_uses_extended_idmap())
+		asm volatile (
+		"	mrs	%0, tcr_el1	;"
+		"	bfi	%0, %1, %2, %3	;"
+		"	msr	tcr_el1, %0	;"
+		"	isb"
+		: "=&r" (tcr)
+		: "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+}
+
+/*
+ * Set TCR.T0SZ to the value appropriate for activating the identity map.
+ */
+static inline void cpu_set_idmap_tcr_t0sz(void)
+{
+	__cpu_set_tcr_t0sz(idmap_t0sz);
+}
+
+/*
+ * Set TCR.T0SZ to its default value (based on VA_BITS)
+ */
+static inline void cpu_set_default_tcr_t0sz(void)
+{
+	__cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS));
+}
+
 static inline void switch_new_context(struct mm_struct *mm)
 {
 	unsigned long flags;
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 22b16232bd60..3d02b1869eb8 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -33,7 +33,9 @@
  * image. Both require pgd, pud (4 levels only) and pmd tables to (section)
  * map the kernel. With the 64K page configuration, swapper and idmap need to
  * map to pte level. The swapper also maps the FDT (see __create_page_tables
- * for more information).
+ * for more information). Note that the number of ID map translation levels
+ * could be increased on the fly if system RAM is out of reach for the default
+ * VA range, so 3 pages are reserved in all cases.
  */
 #ifdef CONFIG_ARM64_64K_PAGES
 #define SWAPPER_PGTABLE_LEVELS	(CONFIG_ARM64_PGTABLE_LEVELS)
@@ -42,7 +44,7 @@
 #endif
 
 #define SWAPPER_DIR_SIZE	(SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
-#define IDMAP_DIR_SIZE		(SWAPPER_DIR_SIZE)
+#define IDMAP_DIR_SIZE		(3 * PAGE_SIZE)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 5f930cc9ea83..847e864202cc 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -143,7 +143,12 @@
 /*
  * TCR flags.
  */
-#define TCR_TxSZ(x)		(((UL(64) - (x)) << 16) | ((UL(64) - (x)) << 0))
+#define TCR_T0SZ_OFFSET		0
+#define TCR_T1SZ_OFFSET		16
+#define TCR_T0SZ(x)		((UL(64) - (x)) << TCR_T0SZ_OFFSET)
+#define TCR_T1SZ(x)		((UL(64) - (x)) << TCR_T1SZ_OFFSET)
+#define TCR_TxSZ(x)		(TCR_T0SZ(x) | TCR_T1SZ(x))
+#define TCR_TxSZ_WIDTH		6
 #define TCR_IRGN_NC		((UL(0) << 8) | (UL(0) << 24))
 #define TCR_IRGN_WBWA		((UL(1) << 8) | (UL(1) << 24))
 #define TCR_IRGN_WT		((UL(2) << 8) | (UL(2) << 24))
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 1fdf42041f42..51c9811e683c 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -383,6 +383,43 @@ __create_page_tables:
 	 */
 	mov	x0, x25				// idmap_pg_dir
 	adrp	x3, KERNEL_START		// __pa(KERNEL_START)
+
+#ifndef CONFIG_ARM64_VA_BITS_48
+#define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
+#define EXTRA_PTRS	(1 << (48 - EXTRA_SHIFT))
+
+	/*
+	 * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
+	 * created that covers system RAM if that is located sufficiently high
+	 * in the physical address space. So for the ID map, use an extended
+	 * virtual range in that case, by configuring an additional translation
+	 * level.
+	 * First, we have to verify our assumption that the current value of
+	 * VA_BITS was chosen such that all translation levels are fully
+	 * utilised, and that lowering T0SZ will always result in an additional
+	 * translation level to be configured.
+	 */
+#if VA_BITS != EXTRA_SHIFT
+#error "Mismatch between VA_BITS and page size/number of translation levels"
+#endif
+
+	/*
+	 * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
+	 * entire kernel image can be ID mapped. As T0SZ == (64 - #bits used),
+	 * this number conveniently equals the number of leading zeroes in
+	 * the physical address of KERNEL_END.
+	 */
+	adrp	x5, KERNEL_END
+	clz	x5, x5
+	cmp	x5, TCR_T0SZ(VA_BITS)	// default T0SZ small enough?
+	b.ge	1f			// .. then skip additional level
+
+	str_l	x5, idmap_t0sz, x6
+
+	create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6
+1:
+#endif
+
 	create_pgd_entry x0, x3, x5, x6
 	mov	x5, x3				// __pa(KERNEL_START)
 	adr_l	x6, KERNEL_END			// __pa(KERNEL_END)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 4257369341e4..ffe8e1b814e0 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -151,6 +151,7 @@ asmlinkage void secondary_start_kernel(void)
 	 */
 	cpu_set_reserved_ttbr0();
 	flush_tlb_all();
+	cpu_set_default_tcr_t0sz();
 
 	preempt_disable();
 	trace_hardirqs_off();
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index c9267acb699c..428aaf86c95b 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -40,6 +40,8 @@
 
 #include "mm.h"
 
+u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
+
 /*
  * Empty_zero_page is a special page that is used for zero-initialized data
  * and COW.
@@ -454,6 +456,7 @@ void __init paging_init(void)
 	 */
 	cpu_set_reserved_ttbr0();
 	flush_tlb_all();
+	cpu_set_default_tcr_t0sz();
 }
 
 /*
@@ -461,8 +464,10 @@ void __init paging_init(void)
  */
 void setup_mm_for_reboot(void)
 {
-	cpu_switch_mm(idmap_pg_dir, &init_mm);
+	cpu_set_reserved_ttbr0();
 	flush_tlb_all();
+	cpu_set_idmap_tcr_t0sz();
+	cpu_switch_mm(idmap_pg_dir, &init_mm);
 }
 
 /*
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
index 005d29e2977d..4c4d93c4bf65 100644
--- a/arch/arm64/mm/proc-macros.S
+++ b/arch/arm64/mm/proc-macros.S
@@ -52,3 +52,13 @@
 	mov	\reg, #4			// bytes per word
 	lsl	\reg, \reg, \tmp		// actual cache line size
 	.endm
+
+/*
+ * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
+ */
+	.macro	tcr_set_idmap_t0sz, valreg, tmpreg
+#ifndef CONFIG_ARM64_VA_BITS_48
+	ldr_l	\tmpreg, idmap_t0sz
+	bfi	\valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
+#endif
+	.endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 28eebfb6af76..cdd754e19b9b 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -156,6 +156,7 @@ ENTRY(cpu_do_resume)
 	msr	cpacr_el1, x6
 	msr	ttbr0_el1, x1
 	msr	ttbr1_el1, x7
+	tcr_set_idmap_t0sz x8, x7
 	msr	tcr_el1, x8
 	msr	vbar_el1, x9
 	msr	mdscr_el1, x10
@@ -233,6 +234,8 @@ ENTRY(__cpu_setup)
 	 */
 	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
 			TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0
+	tcr_set_idmap_t0sz	x10, x9
+
 	/*
 	 * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
 	 * TCR_EL1.
-- 
cgit v1.2.3


From e4c5a6851058386c9e109ad529717a23173918bc Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 19 Mar 2015 16:42:28 +0000
Subject: arm64: KVM: use ID map with increased VA range if required

This patch modifies the HYP init code so it can deal with system
RAM residing at an offset which exceeds the reach of VA_BITS.

Like for EL1, this involves configuring an additional level of
translation for the ID map. However, in case of EL2, this implies
that all translations use the extra level, as we cannot seamlessly
switch between translation tables with different numbers of
translation levels.

So add an extra translation table at the root level. Since the
ID map and the runtime HYP map are guaranteed not to overlap, they
can share this root level, and we can essentially merge these two
tables into one.

Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/kvm_mmu.h   | 10 ++++++++++
 arch/arm/kvm/mmu.c               | 27 +++++++++++++++++++++++++--
 arch/arm64/include/asm/kvm_mmu.h | 33 +++++++++++++++++++++++++++++++++
 arch/arm64/kvm/hyp-init.S        | 25 +++++++++++++++++++++++++
 4 files changed, 93 insertions(+), 2 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index bf0fe99e8ca9..018760675b56 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -270,6 +270,16 @@ static inline void __kvm_flush_dcache_pud(pud_t pud)
 void kvm_set_way_flush(struct kvm_vcpu *vcpu);
 void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
 
+static inline bool __kvm_cpu_uses_extended_idmap(void)
+{
+	return false;
+}
+
+static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
+				       pgd_t *hyp_pgd,
+				       pgd_t *merged_hyp_pgd,
+				       unsigned long hyp_idmap_start) { }
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 42a24d6b003b..69c2b4ce6160 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -35,6 +35,7 @@ extern char  __hyp_idmap_text_start[], __hyp_idmap_text_end[];
 
 static pgd_t *boot_hyp_pgd;
 static pgd_t *hyp_pgd;
+static pgd_t *merged_hyp_pgd;
 static DEFINE_MUTEX(kvm_hyp_pgd_mutex);
 
 static unsigned long hyp_idmap_start;
@@ -434,6 +435,11 @@ void free_hyp_pgds(void)
 		free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
 		hyp_pgd = NULL;
 	}
+	if (merged_hyp_pgd) {
+		clear_page(merged_hyp_pgd);
+		free_page((unsigned long)merged_hyp_pgd);
+		merged_hyp_pgd = NULL;
+	}
 
 	mutex_unlock(&kvm_hyp_pgd_mutex);
 }
@@ -1473,12 +1479,18 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
 
 phys_addr_t kvm_mmu_get_httbr(void)
 {
-	return virt_to_phys(hyp_pgd);
+	if (__kvm_cpu_uses_extended_idmap())
+		return virt_to_phys(merged_hyp_pgd);
+	else
+		return virt_to_phys(hyp_pgd);
 }
 
 phys_addr_t kvm_mmu_get_boot_httbr(void)
 {
-	return virt_to_phys(boot_hyp_pgd);
+	if (__kvm_cpu_uses_extended_idmap())
+		return virt_to_phys(merged_hyp_pgd);
+	else
+		return virt_to_phys(boot_hyp_pgd);
 }
 
 phys_addr_t kvm_get_idmap_vector(void)
@@ -1521,6 +1533,17 @@ int kvm_mmu_init(void)
 		goto out;
 	}
 
+	if (__kvm_cpu_uses_extended_idmap()) {
+		merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+		if (!merged_hyp_pgd) {
+			kvm_err("Failed to allocate extra HYP pgd\n");
+			goto out;
+		}
+		__kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd,
+				    hyp_idmap_start);
+		return 0;
+	}
+
 	/* Map the very same page at the trampoline VA */
 	err = 	__create_hyp_mappings(boot_hyp_pgd,
 				      TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE,
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6458b5373142..edfe6864bc28 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -68,6 +68,8 @@
 #include <asm/pgalloc.h>
 #include <asm/cachetype.h>
 #include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
 
 #define KERN_TO_HYP(kva)	((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET)
 
@@ -305,5 +307,36 @@ static inline void __kvm_flush_dcache_pud(pud_t pud)
 void kvm_set_way_flush(struct kvm_vcpu *vcpu);
 void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
 
+static inline bool __kvm_cpu_uses_extended_idmap(void)
+{
+	return __cpu_uses_extended_idmap();
+}
+
+static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
+				       pgd_t *hyp_pgd,
+				       pgd_t *merged_hyp_pgd,
+				       unsigned long hyp_idmap_start)
+{
+	int idmap_idx;
+
+	/*
+	 * Use the first entry to access the HYP mappings. It is
+	 * guaranteed to be free, otherwise we wouldn't use an
+	 * extended idmap.
+	 */
+	VM_BUG_ON(pgd_val(merged_hyp_pgd[0]));
+	merged_hyp_pgd[0] = __pgd(__pa(hyp_pgd) | PMD_TYPE_TABLE);
+
+	/*
+	 * Create another extended level entry that points to the boot HYP map,
+	 * which contains an ID mapping of the HYP init code. We essentially
+	 * merge the boot and runtime HYP maps by doing so, but they don't
+	 * overlap anyway, so this is fine.
+	 */
+	idmap_idx = hyp_idmap_start >> VA_BITS;
+	VM_BUG_ON(pgd_val(merged_hyp_pgd[idmap_idx]));
+	merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE);
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index c3191168a994..178ba2248a98 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -20,6 +20,7 @@
 #include <asm/assembler.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/pgtable-hwdef.h>
 
 	.text
 	.pushsection	.hyp.idmap.text, "ax"
@@ -65,6 +66,25 @@ __do_hyp_init:
 	and	x4, x4, x5
 	ldr	x5, =TCR_EL2_FLAGS
 	orr	x4, x4, x5
+
+#ifndef CONFIG_ARM64_VA_BITS_48
+	/*
+	 * If we are running with VA_BITS < 48, we may be running with an extra
+	 * level of translation in the ID map. This is only the case if system
+	 * RAM is out of range for the currently configured page size and number
+	 * of translation levels, in which case we will also need the extra
+	 * level for the HYP ID map, or we won't be able to enable the EL2 MMU.
+	 *
+	 * However, at EL2, there is only one TTBR register, and we can't switch
+	 * between translation tables *and* update TCR_EL2.T0SZ at the same
+	 * time. Bottom line: we need the extra level in *both* our translation
+	 * tables.
+	 *
+	 * So use the same T0SZ value we use for the ID map.
+	 */
+	ldr_l	x5, idmap_t0sz
+	bfi	x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
+#endif
 	msr	tcr_el2, x4
 
 	ldr	x4, =VTCR_EL2_FLAGS
@@ -91,6 +111,10 @@ __do_hyp_init:
 	msr	sctlr_el2, x4
 	isb
 
+	/* Skip the trampoline dance if we merged the boot and runtime PGDs */
+	cmp	x0, x1
+	b.eq	merged
+
 	/* MMU is now enabled. Get ready for the trampoline dance */
 	ldr	x4, =TRAMPOLINE_VA
 	adr	x5, target
@@ -105,6 +129,7 @@ target: /* We're now in the trampoline code, switch page tables */
 	tlbi	alle2
 	dsb	sy
 
+merged:
 	/* Set the stack and new vectors */
 	kern_hyp_va	x2
 	mov	sp, x2
-- 
cgit v1.2.3


From d5efd9cc9cf2e422d064c912c7d5d985f52c1b2c Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 6 Mar 2015 11:54:10 +0000
Subject: arm64: pmu: add support for interrupt-affinity property

Historically, the PMU devicetree bindings have expected SPIs to be
listed in order of *logical* CPU number. This is problematic for
bootloaders, especially when the boot CPU (logical ID 0) isn't listed
first in the devicetree.

This patch adds a new optional property, interrupt-affinity, to the
PMU node which allows the interrupt affinity to be described using
a list of phandled to CPU nodes, with each entry in the list
corresponding to the SPI at the same index in the interrupts property.

Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/pmu.h   |  1 +
 arch/arm64/kernel/perf_event.c | 57 +++++++++++++++++++++++++++++++++++++++---
 2 files changed, 54 insertions(+), 4 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/pmu.h b/arch/arm64/include/asm/pmu.h
index e6f087806aaf..b7710a59672c 100644
--- a/arch/arm64/include/asm/pmu.h
+++ b/arch/arm64/include/asm/pmu.h
@@ -44,6 +44,7 @@ struct pmu_hw_events {
 struct arm_pmu {
 	struct pmu		pmu;
 	cpumask_t		active_irqs;
+	int			*irq_affinity;
 	const char		*name;
 	irqreturn_t		(*handle_irq)(int irq_num, void *dev);
 	void			(*enable)(struct hw_perf_event *evt, int idx);
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 68a74151fa6c..195991dadc37 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -25,8 +25,10 @@
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
+#include <linux/of.h>
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/uaccess.h>
 
@@ -405,7 +407,12 @@ armpmu_release_hardware(struct arm_pmu *armpmu)
 		free_percpu_irq(irq, &cpu_hw_events);
 	} else {
 		for (i = 0; i < irqs; ++i) {
-			if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs))
+			int cpu = i;
+
+			if (armpmu->irq_affinity)
+				cpu = armpmu->irq_affinity[i];
+
+			if (!cpumask_test_and_clear_cpu(cpu, &armpmu->active_irqs))
 				continue;
 			irq = platform_get_irq(pmu_device, i);
 			if (irq > 0)
@@ -459,19 +466,24 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu)
 		on_each_cpu(armpmu_enable_percpu_irq, &irq, 1);
 	} else {
 		for (i = 0; i < irqs; ++i) {
+			int cpu = i;
+
 			err = 0;
 			irq = platform_get_irq(pmu_device, i);
 			if (irq <= 0)
 				continue;
 
+			if (armpmu->irq_affinity)
+				cpu = armpmu->irq_affinity[i];
+
 			/*
 			 * If we have a single PMU interrupt that we can't shift,
 			 * assume that we're running on a uniprocessor machine and
 			 * continue. Otherwise, continue without this interrupt.
 			 */
-			if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
+			if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) {
 				pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
-						irq, i);
+						irq, cpu);
 				continue;
 			}
 
@@ -485,7 +497,7 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu)
 				return err;
 			}
 
-			cpumask_set_cpu(i, &armpmu->active_irqs);
+			cpumask_set_cpu(cpu, &armpmu->active_irqs);
 		}
 	}
 
@@ -1298,9 +1310,46 @@ static const struct of_device_id armpmu_of_device_ids[] = {
 
 static int armpmu_device_probe(struct platform_device *pdev)
 {
+	int i, *irqs;
+
 	if (!cpu_pmu)
 		return -ENODEV;
 
+	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+	if (!irqs)
+		return -ENOMEM;
+
+	for (i = 0; i < pdev->num_resources; ++i) {
+		struct device_node *dn;
+		int cpu;
+
+		dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity",
+				      i);
+		if (!dn) {
+			pr_warn("Failed to parse %s/interrupt-affinity[%d]\n",
+				of_node_full_name(dn), i);
+			break;
+		}
+
+		for_each_possible_cpu(cpu)
+			if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL))
+				break;
+
+		of_node_put(dn);
+		if (cpu >= nr_cpu_ids) {
+			pr_warn("Failed to find logical CPU for %s\n",
+				dn->name);
+			break;
+		}
+
+		irqs[i] = cpu;
+	}
+
+	if (i == pdev->num_resources)
+		cpu_pmu->irq_affinity = irqs;
+	else
+		kfree(irqs);
+
 	cpu_pmu->plat_device = pdev;
 	return 0;
 }
-- 
cgit v1.2.3


From 0978fb25f86b7595821cee6955679250d47c6438 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 27 Mar 2015 13:09:21 +0000
Subject: arm64: insn: Add aarch64_insn_decode_immediate

Patching an instruction sometimes requires extracting the immediate
field from this instruction. To facilitate this, and avoid
potential duplication of code, add aarch64_insn_decode_immediate
as the reciprocal to aarch64_insn_encode_immediate.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/insn.h |  1 +
 arch/arm64/kernel/insn.c      | 81 ++++++++++++++++++++++++++++++++++---------
 2 files changed, 66 insertions(+), 16 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index d2f49423c5dc..f81b328d9cf4 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -285,6 +285,7 @@ bool aarch64_insn_is_nop(u32 insn);
 int aarch64_insn_read(void *addr, u32 *insnp);
 int aarch64_insn_write(void *addr, u32 insn);
 enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
+u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn);
 u32 aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
 				  u32 insn, u64 imm);
 u32 aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr,
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index c8eca88f12e6..924902083e47 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -265,23 +265,13 @@ int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
 	return aarch64_insn_patch_text_sync(addrs, insns, cnt);
 }
 
-u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
-				  u32 insn, u64 imm)
+static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type,
+						u32 *maskp, int *shiftp)
 {
-	u32 immlo, immhi, lomask, himask, mask;
+	u32 mask;
 	int shift;
 
 	switch (type) {
-	case AARCH64_INSN_IMM_ADR:
-		lomask = 0x3;
-		himask = 0x7ffff;
-		immlo = imm & lomask;
-		imm >>= 2;
-		immhi = imm & himask;
-		imm = (immlo << 24) | (immhi);
-		mask = (lomask << 24) | (himask);
-		shift = 5;
-		break;
 	case AARCH64_INSN_IMM_26:
 		mask = BIT(26) - 1;
 		shift = 0;
@@ -320,9 +310,68 @@ u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
 		shift = 16;
 		break;
 	default:
-		pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n",
-			type);
-		return 0;
+		return -EINVAL;
+	}
+
+	*maskp = mask;
+	*shiftp = shift;
+
+	return 0;
+}
+
+#define ADR_IMM_HILOSPLIT	2
+#define ADR_IMM_SIZE		SZ_2M
+#define ADR_IMM_LOMASK		((1 << ADR_IMM_HILOSPLIT) - 1)
+#define ADR_IMM_HIMASK		((ADR_IMM_SIZE >> ADR_IMM_HILOSPLIT) - 1)
+#define ADR_IMM_LOSHIFT		29
+#define ADR_IMM_HISHIFT		5
+
+u64 aarch64_insn_decode_immediate(enum aarch64_insn_imm_type type, u32 insn)
+{
+	u32 immlo, immhi, mask;
+	int shift;
+
+	switch (type) {
+	case AARCH64_INSN_IMM_ADR:
+		shift = 0;
+		immlo = (insn >> ADR_IMM_LOSHIFT) & ADR_IMM_LOMASK;
+		immhi = (insn >> ADR_IMM_HISHIFT) & ADR_IMM_HIMASK;
+		insn = (immhi << ADR_IMM_HILOSPLIT) | immlo;
+		mask = ADR_IMM_SIZE - 1;
+		break;
+	default:
+		if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
+			pr_err("aarch64_insn_decode_immediate: unknown immediate encoding %d\n",
+			       type);
+			return 0;
+		}
+	}
+
+	return (insn >> shift) & mask;
+}
+
+u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type,
+				  u32 insn, u64 imm)
+{
+	u32 immlo, immhi, mask;
+	int shift;
+
+	switch (type) {
+	case AARCH64_INSN_IMM_ADR:
+		shift = 0;
+		immlo = (imm & ADR_IMM_LOMASK) << ADR_IMM_LOSHIFT;
+		imm >>= ADR_IMM_HILOSPLIT;
+		immhi = (imm & ADR_IMM_HIMASK) << ADR_IMM_HISHIFT;
+		imm = immlo | immhi;
+		mask = ((ADR_IMM_LOMASK << ADR_IMM_LOSHIFT) |
+			(ADR_IMM_HIMASK << ADR_IMM_HISHIFT));
+		break;
+	default:
+		if (aarch64_get_imm_shift_mask(type, &mask, &shift) < 0) {
+			pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n",
+			       type);
+			return 0;
+		}
 	}
 
 	/* Update the immediate field. */
-- 
cgit v1.2.3


From 359b706473b47da3c93bd99fd10d798fe411ab67 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 27 Mar 2015 13:09:23 +0000
Subject: arm64: Extract feature parsing code from cpu_errata.c

As we detect more architectural features at runtime, it makes
sense to reuse the existing framework whilst avoiding to call
a feature an erratum...

This patch extract the core capability parsing, moves it into
a new file (cpufeature.c), and let the CPU errata detection code
use it.

Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cpufeature.h | 15 ++++++++++++
 arch/arm64/kernel/Makefile          |  2 +-
 arch/arm64/kernel/cpu_errata.c      | 36 ++++------------------------
 arch/arm64/kernel/cpufeature.c      | 47 +++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/cpuinfo.c         |  1 +
 5 files changed, 68 insertions(+), 33 deletions(-)
 create mode 100644 arch/arm64/kernel/cpufeature.c

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index b6c16d5f622f..6ae35d160464 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -28,6 +28,18 @@
 
 #ifndef __ASSEMBLY__
 
+struct arm64_cpu_capabilities {
+	const char *desc;
+	u16 capability;
+	bool (*matches)(const struct arm64_cpu_capabilities *);
+	union {
+		struct {	/* To be used for erratum handling only */
+			u32 midr_model;
+			u32 midr_range_min, midr_range_max;
+		};
+	};
+};
+
 extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 
 static inline bool cpu_have_feature(unsigned int num)
@@ -51,7 +63,10 @@ static inline void cpus_set_cap(unsigned int num)
 		__set_bit(num, cpu_hwcaps);
 }
 
+void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+			    const char *info);
 void check_local_cpu_errata(void);
+void check_local_cpu_features(void);
 bool cpu_supports_mixed_endian_el0(void);
 bool system_supports_mixed_endian_el0(void);
 
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index d5e70747c7a2..b12e15b80516 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -17,7 +17,7 @@ arm64-obj-y		:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   sys.o stacktrace.o time.o traps.o io.o vdso.o	\
 			   hyp-stub.o psci.o psci-call.o cpu_ops.o insn.o	\
 			   return_address.o cpuinfo.o cpu_errata.o		\
-			   alternative.o cacheinfo.o
+			   cpufeature.o alternative.o cacheinfo.o
 
 arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
 					   sys_compat.o entry32.o		\
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index fa62637e63a8..a66f4fa4d541 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -16,8 +16,6 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#define pr_fmt(fmt) "alternatives: " fmt
-
 #include <linux/types.h>
 #include <asm/cpu.h>
 #include <asm/cputype.h>
@@ -26,27 +24,11 @@
 #define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
 #define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
 
-/*
- * Add a struct or another datatype to the union below if you need
- * different means to detect an affected CPU.
- */
-struct arm64_cpu_capabilities {
-	const char *desc;
-	u16 capability;
-	bool (*is_affected)(struct arm64_cpu_capabilities *);
-	union {
-		struct {
-			u32 midr_model;
-			u32 midr_range_min, midr_range_max;
-		};
-	};
-};
-
 #define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
 			MIDR_ARCHITECTURE_MASK)
 
 static bool __maybe_unused
-is_affected_midr_range(struct arm64_cpu_capabilities *entry)
+is_affected_midr_range(const struct arm64_cpu_capabilities *entry)
 {
 	u32 midr = read_cpuid_id();
 
@@ -59,12 +41,12 @@ is_affected_midr_range(struct arm64_cpu_capabilities *entry)
 }
 
 #define MIDR_RANGE(model, min, max) \
-	.is_affected = is_affected_midr_range, \
+	.matches = is_affected_midr_range, \
 	.midr_model = model, \
 	.midr_range_min = min, \
 	.midr_range_max = max
 
-struct arm64_cpu_capabilities arm64_errata[] = {
+const struct arm64_cpu_capabilities arm64_errata[] = {
 #if	defined(CONFIG_ARM64_ERRATUM_826319) || \
 	defined(CONFIG_ARM64_ERRATUM_827319) || \
 	defined(CONFIG_ARM64_ERRATUM_824069)
@@ -97,15 +79,5 @@ struct arm64_cpu_capabilities arm64_errata[] = {
 
 void check_local_cpu_errata(void)
 {
-	struct arm64_cpu_capabilities *cpus = arm64_errata;
-	int i;
-
-	for (i = 0; cpus[i].desc; i++) {
-		if (!cpus[i].is_affected(&cpus[i]))
-			continue;
-
-		if (!cpus_have_cap(cpus[i].capability))
-			pr_info("enabling workaround for %s\n", cpus[i].desc);
-		cpus_set_cap(cpus[i].capability);
-	}
+	check_cpu_capabilities(arm64_errata, "enabling workaround for");
 }
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
new file mode 100644
index 000000000000..3d9967e43d89
--- /dev/null
+++ b/arch/arm64/kernel/cpufeature.c
@@ -0,0 +1,47 @@
+/*
+ * Contains CPU feature definitions
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) "alternatives: " fmt
+
+#include <linux/types.h>
+#include <asm/cpu.h>
+#include <asm/cpufeature.h>
+
+static const struct arm64_cpu_capabilities arm64_features[] = {
+	{},
+};
+
+void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
+			    const char *info)
+{
+	int i;
+
+	for (i = 0; caps[i].desc; i++) {
+		if (!caps[i].matches(&caps[i]))
+			continue;
+
+		if (!cpus_have_cap(caps[i].capability))
+			pr_info("%s %s\n", info, caps[i].desc);
+		cpus_set_cap(caps[i].capability);
+	}
+}
+
+void check_local_cpu_features(void)
+{
+	check_cpu_capabilities(arm64_features, "detected feature");
+}
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 929855691dae..75d5a867e7fb 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -236,6 +236,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	cpuinfo_detect_icache_policy(info);
 
 	check_local_cpu_errata();
+	check_local_cpu_features();
 	update_cpu_features(info);
 }
 
-- 
cgit v1.2.3


From cc3979b54d5f1d5b5059b404892888c304d28080 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 31 Mar 2015 00:46:00 +0100
Subject: arm64: Use bool function return values of true/false not 1/0

Use the normal return values for bool functions

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/dma-mapping.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index 6932bb57dba0..9437e3dc5833 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -97,7 +97,7 @@ static inline int dma_set_mask(struct device *dev, u64 mask)
 static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 {
 	if (!dev->dma_mask)
-		return 0;
+		return false;
 
 	return addr + size - 1 <= *dev->dma_mask;
 }
-- 
cgit v1.2.3


From 905e8c5dcaa147163672b06fe9dcb5abaacbc711 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 23 Mar 2015 19:07:02 +0000
Subject: arm64: errata: add workaround for cortex-a53 erratum #845719

When running a compat (AArch32) userspace on Cortex-A53, a load at EL0
from a virtual address that matches the bottom 32 bits of the virtual
address used by a recent load at (AArch64) EL1 might return incorrect
data.

This patch works around the issue by writing to the contextidr_el1
register on the exception return path when returning to a 32-bit task.
This workaround is patched in at runtime based on the MIDR value of the
processor.

Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig                  | 21 +++++++++++++++++++++
 arch/arm64/include/asm/cpufeature.h |  3 ++-
 arch/arm64/kernel/cpu_errata.c      |  8 ++++++++
 arch/arm64/kernel/entry.S           | 20 ++++++++++++++++++++
 4 files changed, 51 insertions(+), 1 deletion(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 02f67a9d7426..23d51be7d60a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -361,6 +361,27 @@ config ARM64_ERRATUM_832075
 
 	  If unsure, say Y.
 
+config ARM64_ERRATUM_845719
+	bool "Cortex-A53: 845719: a load might read incorrect data"
+	depends on COMPAT
+	default y
+	help
+	  This option adds an alternative code sequence to work around ARM
+	  erratum 845719 on Cortex-A53 parts up to r0p4.
+
+	  When running a compat (AArch32) userspace on an affected Cortex-A53
+	  part, a load at EL0 from a virtual address that matches the bottom 32
+	  bits of the virtual address used by a recent load at (AArch64) EL1
+	  might return incorrect data.
+
+	  The workaround is to write the contextidr_el1 register on exception
+	  return to a 32-bit task.
+	  Please note that this does not necessarily enable the workaround,
+	  as it depends on the alternative framework, which will only patch
+	  the kernel if an affected CPU is detected.
+
+	  If unsure, say Y.
+
 endmenu
 
 
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 6ae35d160464..82cb9f98ba1a 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -23,8 +23,9 @@
 
 #define ARM64_WORKAROUND_CLEAN_CACHE		0
 #define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE	1
+#define ARM64_WORKAROUND_845719			2
 
-#define ARM64_NCAPS				2
+#define ARM64_NCAPS				3
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index a66f4fa4d541..4672860def1f 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -72,6 +72,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		.capability = ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE,
 		MIDR_RANGE(MIDR_CORTEX_A57, 0x00, 0x12),
 	},
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_845719
+	{
+	/* Cortex-A53 r0p[01234] */
+		.desc = "ARM erratum 845719",
+		.capability = ARM64_WORKAROUND_845719,
+		MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x04),
+	},
 #endif
 	{
 	}
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index cf21bb3bf752..959fe8733560 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -21,8 +21,10 @@
 #include <linux/init.h>
 #include <linux/linkage.h>
 
+#include <asm/alternative-asm.h>
 #include <asm/assembler.h>
 #include <asm/asm-offsets.h>
+#include <asm/cpufeature.h>
 #include <asm/errno.h>
 #include <asm/esr.h>
 #include <asm/thread_info.h>
@@ -120,6 +122,24 @@
 	ct_user_enter
 	ldr	x23, [sp, #S_SP]		// load return stack pointer
 	msr	sp_el0, x23
+
+#ifdef CONFIG_ARM64_ERRATUM_845719
+	alternative_insn						\
+	"nop",								\
+	"tbz x22, #4, 1f",						\
+	ARM64_WORKAROUND_845719
+#ifdef CONFIG_PID_IN_CONTEXTIDR
+	alternative_insn						\
+	"nop; nop",							\
+	"mrs x29, contextidr_el1; msr contextidr_el1, x29; 1:",		\
+	ARM64_WORKAROUND_845719
+#else
+	alternative_insn						\
+	"nop",								\
+	"msr contextidr_el1, xzr; 1:",					\
+	ARM64_WORKAROUND_845719
+#endif
+#endif
 	.endif
 	msr	elr_el1, x21			// set up the return data
 	msr	spsr_el1, x22
-- 
cgit v1.2.3