summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolas Pitre <nicolas.pitre@linaro.org>2015-12-12 04:49:21 +0300
committerRussell King <rmk+kernel@arm.linux.org.uk>2015-12-17 13:29:01 +0300
commit42f25bddd0a226d2431e057b9e01c5cc61067e12 (patch)
tree33e3bf9c4669caba06f72303e5d333e2acd15e78
parent38fc2f6c98262913388de338d5b0cda67e3f78cd (diff)
downloadlinux-42f25bddd0a226d2431e057b9e01c5cc61067e12.tar.xz
ARM: 8477/1: runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()
The ARM compiler inserts calls to __aeabi_idiv() and __aeabi_uidiv() when it needs to perform division on signed and unsigned integers. If a processor has support for the sdiv and udiv instructions, the kernel may overwrite the beginning of those functions with those instructions and a "bx lr" to get better performance. To ensure that those functions are aligned to a 32-bit word for easier patching (which might not always be the case in Thumb mode) and that the two patched instructions end up in the same cache line, a 8-byte alignment is enforced when ARM_PATCH_IDIV is selected. This was heavily inspired by a previous patch from Stephen Boyd. Signed-off-by: Nicolas Pitre <nico@linaro.org> Acked-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
-rw-r--r--arch/arm/Kconfig18
-rw-r--r--arch/arm/kernel/setup.c67
-rw-r--r--arch/arm/lib/lib1funcs.S8
3 files changed, 93 insertions, 0 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9f1eeb2e7d75..6e644fd68ad2 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1603,6 +1603,24 @@ config THUMB2_AVOID_R_ARM_THM_JUMP11
config ARM_ASM_UNIFIED
bool
+config ARM_PATCH_IDIV
+ bool "Runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()"
+ depends on CPU_32v7 && !XIP_KERNEL
+ default y
+ help
+ The ARM compiler inserts calls to __aeabi_idiv() and
+ __aeabi_uidiv() when it needs to perform division on signed
+ and unsigned integers. Some v7 CPUs have support for the sdiv
+ and udiv instructions that can be used to implement those
+ functions.
+
+ Enabling this option allows the kernel to modify itself to
+ replace the first two instructions of these library functions
+ with the sdiv or udiv plus "bx lr" instructions when the CPU
+ it is running on supports them. Typically this will be faster
+ and less power intensive than running the original library
+ code to do integer division.
+
config AEABI
bool "Use the ARM EABI to compile the kernel"
help
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 20edd349d379..e07f567487cd 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -375,6 +375,72 @@ void __init early_print(const char *str, ...)
printk("%s", buf);
}
+#ifdef CONFIG_ARM_PATCH_IDIV
+
+static inline u32 __attribute_const__ sdiv_instruction(void)
+{
+ if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) {
+ /* "sdiv r0, r0, r1" */
+ u32 insn = __opcode_thumb32_compose(0xfb90, 0xf0f1);
+ return __opcode_to_mem_thumb32(insn);
+ }
+
+ /* "sdiv r0, r0, r1" */
+ return __opcode_to_mem_arm(0xe710f110);
+}
+
+static inline u32 __attribute_const__ udiv_instruction(void)
+{
+ if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) {
+ /* "udiv r0, r0, r1" */
+ u32 insn = __opcode_thumb32_compose(0xfbb0, 0xf0f1);
+ return __opcode_to_mem_thumb32(insn);
+ }
+
+ /* "udiv r0, r0, r1" */
+ return __opcode_to_mem_arm(0xe730f110);
+}
+
+static inline u32 __attribute_const__ bx_lr_instruction(void)
+{
+ if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) {
+ /* "bx lr; nop" */
+ u32 insn = __opcode_thumb32_compose(0x4770, 0x46c0);
+ return __opcode_to_mem_thumb32(insn);
+ }
+
+ /* "bx lr" */
+ return __opcode_to_mem_arm(0xe12fff1e);
+}
+
+static void __init patch_aeabi_idiv(void)
+{
+ extern void __aeabi_uidiv(void);
+ extern void __aeabi_idiv(void);
+ uintptr_t fn_addr;
+ unsigned int mask;
+
+ mask = IS_ENABLED(CONFIG_THUMB2_KERNEL) ? HWCAP_IDIVT : HWCAP_IDIVA;
+ if (!(elf_hwcap & mask))
+ return;
+
+ pr_info("CPU: div instructions available: patching division code\n");
+
+ fn_addr = ((uintptr_t)&__aeabi_uidiv) & ~1;
+ ((u32 *)fn_addr)[0] = udiv_instruction();
+ ((u32 *)fn_addr)[1] = bx_lr_instruction();
+ flush_icache_range(fn_addr, fn_addr + 8);
+
+ fn_addr = ((uintptr_t)&__aeabi_idiv) & ~1;
+ ((u32 *)fn_addr)[0] = sdiv_instruction();
+ ((u32 *)fn_addr)[1] = bx_lr_instruction();
+ flush_icache_range(fn_addr, fn_addr + 8);
+}
+
+#else
+static inline void patch_aeabi_idiv(void) { }
+#endif
+
static void __init cpuid_init_hwcaps(void)
{
int block;
@@ -642,6 +708,7 @@ static void __init setup_processor(void)
elf_hwcap = list->elf_hwcap;
cpuid_init_hwcaps();
+ patch_aeabi_idiv();
#ifndef CONFIG_ARM_THUMB
elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT);
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
index af2267f6a529..9397b2e532af 100644
--- a/arch/arm/lib/lib1funcs.S
+++ b/arch/arm/lib/lib1funcs.S
@@ -205,6 +205,10 @@ Boston, MA 02111-1307, USA. */
.endm
+#ifdef CONFIG_ARM_PATCH_IDIV
+ .align 3
+#endif
+
ENTRY(__udivsi3)
ENTRY(__aeabi_uidiv)
UNWIND(.fnstart)
@@ -253,6 +257,10 @@ UNWIND(.fnstart)
UNWIND(.fnend)
ENDPROC(__umodsi3)
+#ifdef CONFIG_ARM_PATCH_IDIV
+ .align 3
+#endif
+
ENTRY(__divsi3)
ENTRY(__aeabi_idiv)
UNWIND(.fnstart)