summaryrefslogtreecommitdiff
path: root/arch/powerpc/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-07 20:23:33 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-07 20:23:33 +0300
commitc90fca951e90ba470a3dc6087667edffcf8db21b (patch)
treeb131279a9290826e78884ee706cc0a4f1d82be4e /arch/powerpc/lib
parentc0ab85267e25e34ce8b7e4429f0ef01fa0795b80 (diff)
parentff5bc793e47b537bf3e904fada585e102c54dd8b (diff)
downloadlinux-c90fca951e90ba470a3dc6087667edffcf8db21b.tar.xz
Merge tag 'powerpc-4.18-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: "Notable changes: - Support for split PMD page table lock on 64-bit Book3S (Power8/9). - Add support for HAVE_RELIABLE_STACKTRACE, so we properly support live patching again. - Add support for patching barrier_nospec in copy_from_user() and syscall entry. - A couple of fixes for our data breakpoints on Book3S. - A series from Nick optimising TLB/mm handling with the Radix MMU. - Numerous small cleanups to squash sparse/gcc warnings from Mathieu Malaterre. - Several series optimising various parts of the 32-bit code from Christophe Leroy. - Removal of support for two old machines, "SBC834xE" and "C2K" ("GEFanuc,C2K"), which is why the diffstat has so many deletions. And many other small improvements & fixes. There's a few out-of-area changes. Some minor ftrace changes OK'ed by Steve, and a fix to our powernv cpuidle driver. Then there's a series touching mm, x86 and fs/proc/task_mmu.c, which cleans up some details around pkey support. It was ack'ed/reviewed by Ingo & Dave and has been in next for several weeks. Thanks to: Akshay Adiga, Alastair D'Silva, Alexey Kardashevskiy, Al Viro, Andrew Donnellan, Aneesh Kumar K.V, Anju T Sudhakar, Arnd Bergmann, Balbir Singh, Cédric Le Goater, Christophe Leroy, Christophe Lombard, Colin Ian King, Dave Hansen, Fabio Estevam, Finn Thain, Frederic Barrat, Gautham R. Shenoy, Haren Myneni, Hari Bathini, Ingo Molnar, Jonathan Neuschäfer, Josh Poimboeuf, Kamalesh Babulal, Madhavan Srinivasan, Mahesh Salgaonkar, Mark Greer, Mathieu Malaterre, Matthew Wilcox, Michael Neuling, Michal Suchanek, Naveen N. Rao, Nicholas Piggin, Nicolai Stange, Olof Johansson, Paul Gortmaker, Paul Mackerras, Peter Rosin, Pridhiviraj Paidipeddi, Ram Pai, Rashmica Gupta, Ravi Bangoria, Russell Currey, Sam Bobroff, Samuel Mendoza-Jonas, Segher Boessenkool, Shilpasri G Bhat, Simon Guo, Souptick Joarder, Stewart Smith, Thiago Jung Bauermann, Torsten Duwe, Vaibhav Jain, Wei Yongjun, Wolfram Sang, Yisheng Xie, YueHaibing" * tag 'powerpc-4.18-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (251 commits) powerpc/64s/radix: Fix missing ptesync in flush_cache_vmap cpuidle: powernv: Fix promotion from snooze if next state disabled powerpc: fix build failure by disabling attribute-alias warning in pci_32 ocxl: Fix missing unlock on error in afu_ioctl_enable_p9_wait() powerpc-opal: fix spelling mistake "Uniterrupted" -> "Uninterrupted" powerpc: fix spelling mistake: "Usupported" -> "Unsupported" powerpc/pkeys: Detach execute_only key on !PROT_EXEC powerpc/powernv: copy/paste - Mask SO bit in CR powerpc: Remove core support for Marvell mv64x60 hostbridges powerpc/boot: Remove core support for Marvell mv64x60 hostbridges powerpc/boot: Remove support for Marvell mv64x60 i2c controller powerpc/boot: Remove support for Marvell MPSC serial controller powerpc/embedded6xx: Remove C2K board support powerpc/lib: optimise PPC32 memcmp powerpc/lib: optimise 32 bits __clear_user() powerpc/time: inline arch_vtime_task_switch() powerpc/Makefile: set -mcpu=860 flag for the 8xx powerpc: Implement csum_ipv6_magic in assembly powerpc/32: Optimise __csum_partial() powerpc/lib: Adjust .balign inside string functions for PPC32 ...
Diffstat (limited to 'arch/powerpc/lib')
-rw-r--r--arch/powerpc/lib/Makefile5
-rw-r--r--arch/powerpc/lib/checksum_32.S46
-rw-r--r--arch/powerpc/lib/checksum_64.S28
-rw-r--r--arch/powerpc/lib/feature-fixups-test.S42
-rw-r--r--arch/powerpc/lib/feature-fixups.c60
-rw-r--r--arch/powerpc/lib/memcmp_32.S45
-rw-r--r--arch/powerpc/lib/sstep.c26
-rw-r--r--arch/powerpc/lib/string.S70
-rw-r--r--arch/powerpc/lib/string_32.S90
-rw-r--r--arch/powerpc/lib/test_emulate_step.c21
-rw-r--r--arch/powerpc/lib/xor_vmx_glue.c1
11 files changed, 341 insertions, 93 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 653901042ad7..d0ca13ad8231 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -26,13 +26,14 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
memcpy_power7.o
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
- string_64.o memcpy_64.o memcmp_64.o pmem.o
+ memcpy_64.o pmem.o
obj64-$(CONFIG_SMP) += locks.o
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
-obj-y += checksum_$(BITS).o checksum_wrappers.o
+obj-y += checksum_$(BITS).o checksum_wrappers.o \
+ string_$(BITS).o memcmp_$(BITS).o
obj-y += sstep.o ldstfp.o quad.o
obj64-y += quad.o
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
index 9a671c774b22..aa224069f93a 100644
--- a/arch/powerpc/lib/checksum_32.S
+++ b/arch/powerpc/lib/checksum_32.S
@@ -47,16 +47,25 @@ _GLOBAL(__csum_partial)
bdnz 2b
21: srwi. r6,r4,4 /* # blocks of 4 words to do */
beq 3f
+ lwz r0,4(r3)
mtctr r6
-22: lwz r0,4(r3)
lwz r6,8(r3)
+ adde r5,r5,r0
lwz r7,12(r3)
+ adde r5,r5,r6
lwzu r8,16(r3)
+ adde r5,r5,r7
+ bdz 23f
+22: lwz r0,4(r3)
+ adde r5,r5,r8
+ lwz r6,8(r3)
adde r5,r5,r0
+ lwz r7,12(r3)
adde r5,r5,r6
+ lwzu r8,16(r3)
adde r5,r5,r7
- adde r5,r5,r8
bdnz 22b
+23: adde r5,r5,r8
3: andi. r0,r4,2
beq+ 4f
lhz r0,4(r3)
@@ -293,3 +302,36 @@ dst_error:
EX_TABLE(51b, dst_error);
EXPORT_SYMBOL(csum_partial_copy_generic)
+
+/*
+ * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ * const struct in6_addr *daddr,
+ * __u32 len, __u8 proto, __wsum sum)
+ */
+
+_GLOBAL(csum_ipv6_magic)
+ lwz r8, 0(r3)
+ lwz r9, 4(r3)
+ addc r0, r7, r8
+ lwz r10, 8(r3)
+ adde r0, r0, r9
+ lwz r11, 12(r3)
+ adde r0, r0, r10
+ lwz r8, 0(r4)
+ adde r0, r0, r11
+ lwz r9, 4(r4)
+ adde r0, r0, r8
+ lwz r10, 8(r4)
+ adde r0, r0, r9
+ lwz r11, 12(r4)
+ adde r0, r0, r10
+ add r5, r5, r6 /* assumption: len + proto doesn't carry */
+ adde r0, r0, r11
+ adde r0, r0, r5
+ addze r0, r0
+ rotlwi r3, r0, 16
+ add r3, r0, r3
+ not r3, r3
+ rlwinm r3, r3, 16, 16, 31
+ blr
+EXPORT_SYMBOL(csum_ipv6_magic)
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index d7f1a966136e..886ed94b9c13 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -429,3 +429,31 @@ dstnr; stb r6,0(r4)
stw r6,0(r8)
blr
EXPORT_SYMBOL(csum_partial_copy_generic)
+
+/*
+ * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ * const struct in6_addr *daddr,
+ * __u32 len, __u8 proto, __wsum sum)
+ */
+
+_GLOBAL(csum_ipv6_magic)
+ ld r8, 0(r3)
+ ld r9, 8(r3)
+ add r5, r5, r6
+ addc r0, r8, r9
+ ld r10, 0(r4)
+ ld r11, 8(r4)
+ adde r0, r0, r10
+ add r5, r5, r7
+ adde r0, r0, r11
+ adde r0, r0, r5
+ addze r0, r0
+ rotldi r3, r0, 32 /* fold two 32 bit halves together */
+ add r3, r0, r3
+ srdi r0, r3, 32
+ rotlwi r3, r0, 16 /* fold two 16 bit halves together */
+ add r3, r0, r3
+ not r3, r3
+ rlwinm r3, r3, 16, 16, 31
+ blr
+EXPORT_SYMBOL(csum_ipv6_magic)
diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S
index f4613118132e..f16cec989506 100644
--- a/arch/powerpc/lib/feature-fixups-test.S
+++ b/arch/powerpc/lib/feature-fixups-test.S
@@ -167,16 +167,52 @@ globl(ftr_fixup_test6_expected)
blt 2b
b 3f
b 1b
-2: or 1,1,1
+3: or 1,1,1
+ or 2,2,2
+ or 3,3,3
+
+globl(ftr_fixup_test7)
+ or 1,1,1
+BEGIN_FTR_SECTION
+ or 2,2,2
+ or 2,2,2
or 2,2,2
-3: or 3,3,3
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+FTR_SECTION_ELSE
+2: b 3f
+3: or 5,5,5
+ beq 3b
+ b 1f
+ or 6,6,6
+ b 2b
+ bdnz 3b
+1:
+ALT_FTR_SECTION_END(0, 1)
+ or 1,1,1
+ or 1,1,1
+
+globl(end_ftr_fixup_test7)
+ nop
+globl(ftr_fixup_test7_expected)
+ or 1,1,1
+2: b 3f
+3: or 5,5,5
+ beq 3b
+ b 1f
+ or 6,6,6
+ b 2b
+ bdnz 3b
+1: or 1,1,1
#if 0
/* Test that if we have a larger else case the assembler spots it and
* reports an error. #if 0'ed so as not to break the build normally.
*/
-ftr_fixup_test7:
+ftr_fixup_test_too_big:
or 1,1,1
BEGIN_FTR_SECTION
or 2,2,2
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index e1bcdc32a851..8b69f868298c 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -277,6 +277,43 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
(types & L1D_FLUSH_MTTRIG) ? "mttrig type"
: "unknown");
}
+
+void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
+{
+ unsigned int instr, *dest;
+ long *start, *end;
+ int i;
+
+ start = fixup_start;
+ end = fixup_end;
+
+ instr = 0x60000000; /* nop */
+
+ if (enable) {
+ pr_info("barrier-nospec: using ORI speculation barrier\n");
+ instr = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+ }
+
+ for (i = 0; start < end; start++, i++) {
+ dest = (void *)start + *start;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+ patch_instruction(dest, instr);
+ }
+
+ printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
+}
+
+void do_barrier_nospec_fixups(bool enable)
+{
+ void *start, *end;
+
+ start = PTRRELOC(&__start___barrier_nospec_fixup),
+ end = PTRRELOC(&__stop___barrier_nospec_fixup);
+
+ do_barrier_nospec_fixups_range(enable, start, end);
+}
+
#endif /* CONFIG_PPC_BOOK3S_64 */
void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
@@ -400,7 +437,7 @@ static void test_basic_patching(void)
extern unsigned int end_ftr_fixup_test1[];
extern unsigned int ftr_fixup_test1_orig[];
extern unsigned int ftr_fixup_test1_expected[];
- int size = end_ftr_fixup_test1 - ftr_fixup_test1;
+ int size = 4 * (end_ftr_fixup_test1 - ftr_fixup_test1);
fixup.value = fixup.mask = 8;
fixup.start_off = calc_offset(&fixup, ftr_fixup_test1 + 1);
@@ -432,7 +469,7 @@ static void test_alternative_patching(void)
extern unsigned int ftr_fixup_test2_orig[];
extern unsigned int ftr_fixup_test2_alt[];
extern unsigned int ftr_fixup_test2_expected[];
- int size = end_ftr_fixup_test2 - ftr_fixup_test2;
+ int size = 4 * (end_ftr_fixup_test2 - ftr_fixup_test2);
fixup.value = fixup.mask = 0xF;
fixup.start_off = calc_offset(&fixup, ftr_fixup_test2 + 1);
@@ -464,7 +501,7 @@ static void test_alternative_case_too_big(void)
extern unsigned int end_ftr_fixup_test3[];
extern unsigned int ftr_fixup_test3_orig[];
extern unsigned int ftr_fixup_test3_alt[];
- int size = end_ftr_fixup_test3 - ftr_fixup_test3;
+ int size = 4 * (end_ftr_fixup_test3 - ftr_fixup_test3);
fixup.value = fixup.mask = 0xC;
fixup.start_off = calc_offset(&fixup, ftr_fixup_test3 + 1);
@@ -491,7 +528,7 @@ static void test_alternative_case_too_small(void)
extern unsigned int ftr_fixup_test4_orig[];
extern unsigned int ftr_fixup_test4_alt[];
extern unsigned int ftr_fixup_test4_expected[];
- int size = end_ftr_fixup_test4 - ftr_fixup_test4;
+ int size = 4 * (end_ftr_fixup_test4 - ftr_fixup_test4);
unsigned long flag;
/* Check a high-bit flag */
@@ -525,7 +562,7 @@ static void test_alternative_case_with_branch(void)
extern unsigned int ftr_fixup_test5[];
extern unsigned int end_ftr_fixup_test5[];
extern unsigned int ftr_fixup_test5_expected[];
- int size = end_ftr_fixup_test5 - ftr_fixup_test5;
+ int size = 4 * (end_ftr_fixup_test5 - ftr_fixup_test5);
check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0);
}
@@ -535,11 +572,21 @@ static void test_alternative_case_with_external_branch(void)
extern unsigned int ftr_fixup_test6[];
extern unsigned int end_ftr_fixup_test6[];
extern unsigned int ftr_fixup_test6_expected[];
- int size = end_ftr_fixup_test6 - ftr_fixup_test6;
+ int size = 4 * (end_ftr_fixup_test6 - ftr_fixup_test6);
check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0);
}
+static void test_alternative_case_with_branch_to_end(void)
+{
+ extern unsigned int ftr_fixup_test7[];
+ extern unsigned int end_ftr_fixup_test7[];
+ extern unsigned int ftr_fixup_test7_expected[];
+ int size = 4 * (end_ftr_fixup_test7 - ftr_fixup_test7);
+
+ check(memcmp(ftr_fixup_test7, ftr_fixup_test7_expected, size) == 0);
+}
+
static void test_cpu_macros(void)
{
extern u8 ftr_fixup_test_FTR_macros[];
@@ -595,6 +642,7 @@ static int __init test_feature_fixups(void)
test_alternative_case_too_small();
test_alternative_case_with_branch();
test_alternative_case_with_external_branch();
+ test_alternative_case_with_branch_to_end();
test_cpu_macros();
test_fw_macros();
test_lwsync_macros();
diff --git a/arch/powerpc/lib/memcmp_32.S b/arch/powerpc/lib/memcmp_32.S
new file mode 100644
index 000000000000..5010e376f7b8
--- /dev/null
+++ b/arch/powerpc/lib/memcmp_32.S
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * memcmp for PowerPC32
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/export.h>
+
+ .text
+
+_GLOBAL(memcmp)
+ srawi. r7, r5, 2 /* Divide len by 4 */
+ mr r6, r3
+ beq- 3f
+ mtctr r7
+ li r7, 0
+1: lwzx r3, r6, r7
+ lwzx r0, r4, r7
+ addi r7, r7, 4
+ cmplw cr0, r3, r0
+ bdnzt eq, 1b
+ bne 5f
+3: andi. r3, r5, 3
+ beqlr
+ cmplwi cr1, r3, 2
+ blt- cr1, 4f
+ lhzx r3, r6, r7
+ lhzx r0, r4, r7
+ addi r7, r7, 2
+ subf. r3, r0, r3
+ beqlr cr1
+ bnelr
+4: lbzx r3, r6, r7
+ lbzx r0, r4, r7
+ subf. r3, r0, r3
+ blr
+5: li r3, 1
+ bgtlr
+ li r3, -1
+ blr
+EXPORT_SYMBOL(memcmp)
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 34d68f1b1b40..d81568f783e5 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1065,9 +1065,10 @@ static nokprobe_inline void do_popcnt(const struct pt_regs *regs,
{
unsigned long long out = v1;
- out -= (out >> 1) & 0x5555555555555555;
- out = (0x3333333333333333 & out) + (0x3333333333333333 & (out >> 2));
- out = (out + (out >> 4)) & 0x0f0f0f0f0f0f0f0f;
+ out -= (out >> 1) & 0x5555555555555555ULL;
+ out = (0x3333333333333333ULL & out) +
+ (0x3333333333333333ULL & (out >> 2));
+ out = (out + (out >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
if (size == 8) { /* popcntb */
op->val = out;
@@ -1076,7 +1077,7 @@ static nokprobe_inline void do_popcnt(const struct pt_regs *regs,
out += out >> 8;
out += out >> 16;
if (size == 32) { /* popcntw */
- op->val = out & 0x0000003f0000003f;
+ op->val = out & 0x0000003f0000003fULL;
return;
}
@@ -1114,7 +1115,7 @@ static nokprobe_inline void do_prty(const struct pt_regs *regs,
res ^= res >> 16;
if (size == 32) { /* prtyw */
- op->val = res & 0x0000000100000001;
+ op->val = res & 0x0000000100000001ULL;
return;
}
@@ -2544,6 +2545,15 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
#endif /* __powerpc64__ */
}
+
+#ifdef CONFIG_VSX
+ if ((GETTYPE(op->type) == LOAD_VSX ||
+ GETTYPE(op->type) == STORE_VSX) &&
+ !cpu_has_feature(CPU_FTR_VSX)) {
+ return -1;
+ }
+#endif /* CONFIG_VSX */
+
return 0;
logical_done:
@@ -2641,7 +2651,7 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op)
unsigned long next_pc;
next_pc = truncate_if_32bit(regs->msr, regs->nip + 4);
- switch (op->type & INSTR_TYPE_MASK) {
+ switch (GETTYPE(op->type)) {
case COMPUTE:
if (op->type & SETREG)
regs->gpr[op->reg] = op->val;
@@ -2739,7 +2749,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
err = 0;
size = GETSIZE(op->type);
- type = op->type & INSTR_TYPE_MASK;
+ type = GETTYPE(op->type);
cross_endian = (regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
ea = truncate_if_32bit(regs->msr, op->ea);
@@ -3001,7 +3011,7 @@ int emulate_step(struct pt_regs *regs, unsigned int instr)
}
err = 0;
- type = op.type & INSTR_TYPE_MASK;
+ type = GETTYPE(op.type);
if (OP_IS_LOAD_STORE(type)) {
err = emulate_loadstore(regs, &op);
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
index a787776822d8..4b41970e9ed8 100644
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -8,10 +8,9 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
-#include <asm/processor.h>
-#include <asm/errno.h>
#include <asm/ppc_asm.h>
#include <asm/export.h>
+#include <asm/cache.h>
.text
@@ -23,7 +22,7 @@ _GLOBAL(strncpy)
mtctr r5
addi r6,r3,-1
addi r4,r4,-1
- .balign 16
+ .balign IFETCH_ALIGN_BYTES
1: lbzu r0,1(r4)
cmpwi 0,r0,0
stbu r0,1(r6)
@@ -43,7 +42,7 @@ _GLOBAL(strncmp)
mtctr r5
addi r5,r3,-1
addi r4,r4,-1
- .balign 16
+ .balign IFETCH_ALIGN_BYTES
1: lbzu r3,1(r5)
cmpwi 1,r3,0
lbzu r0,1(r4)
@@ -55,29 +54,12 @@ _GLOBAL(strncmp)
blr
EXPORT_SYMBOL(strncmp)
-#ifdef CONFIG_PPC32
-_GLOBAL(memcmp)
- PPC_LCMPI 0,r5,0
- beq- 2f
- mtctr r5
- addi r6,r3,-1
- addi r4,r4,-1
-1: lbzu r3,1(r6)
- lbzu r0,1(r4)
- subf. r3,r0,r3
- bdnzt 2,1b
- blr
-2: li r3,0
- blr
-EXPORT_SYMBOL(memcmp)
-#endif
-
_GLOBAL(memchr)
PPC_LCMPI 0,r5,0
beq- 2f
mtctr r5
addi r3,r3,-1
- .balign 16
+ .balign IFETCH_ALIGN_BYTES
1: lbzu r0,1(r3)
cmpw 0,r0,r4
bdnzf 2,1b
@@ -85,47 +67,3 @@ _GLOBAL(memchr)
2: li r3,0
blr
EXPORT_SYMBOL(memchr)
-
-#ifdef CONFIG_PPC32
-_GLOBAL(__clear_user)
- addi r6,r3,-4
- li r3,0
- li r5,0
- cmplwi 0,r4,4
- blt 7f
- /* clear a single word */
-11: stwu r5,4(r6)
- beqlr
- /* clear word sized chunks */
- andi. r0,r6,3
- add r4,r0,r4
- subf r6,r0,r6
- srwi r0,r4,2
- andi. r4,r4,3
- mtctr r0
- bdz 7f
-1: stwu r5,4(r6)
- bdnz 1b
- /* clear byte sized chunks */
-7: cmpwi 0,r4,0
- beqlr
- mtctr r4
- addi r6,r6,3
-8: stbu r5,1(r6)
- bdnz 8b
- blr
-90: mr r3,r4
- blr
-91: mfctr r3
- slwi r3,r3,2
- add r3,r3,r4
- blr
-92: mfctr r3
- blr
-
- EX_TABLE(11b, 90b)
- EX_TABLE(1b, 91b)
- EX_TABLE(8b, 92b)
-
-EXPORT_SYMBOL(__clear_user)
-#endif
diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S
new file mode 100644
index 000000000000..f69a6aab7bfb
--- /dev/null
+++ b/arch/powerpc/lib/string_32.S
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * String handling functions for PowerPC32
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/export.h>
+#include <asm/cache.h>
+
+ .text
+
+CACHELINE_BYTES = L1_CACHE_BYTES
+LG_CACHELINE_BYTES = L1_CACHE_SHIFT
+CACHELINE_MASK = (L1_CACHE_BYTES-1)
+
+_GLOBAL(__clear_user)
+/*
+ * Use dcbz on the complete cache lines in the destination
+ * to set them to zero. This requires that the destination
+ * area is cacheable.
+ */
+ cmplwi cr0, r4, 4
+ mr r10, r3
+ li r3, 0
+ blt 7f
+
+11: stw r3, 0(r10)
+ beqlr
+ andi. r0, r10, 3
+ add r11, r0, r4
+ subf r6, r0, r10
+
+ clrlwi r7, r6, 32 - LG_CACHELINE_BYTES
+ add r8, r7, r11
+ srwi r9, r8, LG_CACHELINE_BYTES
+ addic. r9, r9, -1 /* total number of complete cachelines */
+ ble 2f
+ xori r0, r7, CACHELINE_MASK & ~3
+ srwi. r0, r0, 2
+ beq 3f
+ mtctr r0
+4: stwu r3, 4(r6)
+ bdnz 4b
+3: mtctr r9
+ li r7, 4
+10: dcbz r7, r6
+ addi r6, r6, CACHELINE_BYTES
+ bdnz 10b
+ clrlwi r11, r8, 32 - LG_CACHELINE_BYTES
+ addi r11, r11, 4
+
+2: srwi r0 ,r11 ,2
+ mtctr r0
+ bdz 6f
+1: stwu r3, 4(r6)
+ bdnz 1b
+6: andi. r11, r11, 3
+ beqlr
+ mtctr r11
+ addi r6, r6, 3
+8: stbu r3, 1(r6)
+ bdnz 8b
+ blr
+
+7: cmpwi cr0, r4, 0
+ beqlr
+ mtctr r4
+ addi r6, r10, -1
+9: stbu r3, 1(r6)
+ bdnz 9b
+ blr
+
+90: mr r3, r4
+ blr
+91: add r3, r10, r4
+ subf r3, r6, r3
+ blr
+
+ EX_TABLE(11b, 90b)
+ EX_TABLE(4b, 91b)
+ EX_TABLE(10b, 91b)
+ EX_TABLE(1b, 91b)
+ EX_TABLE(8b, 91b)
+ EX_TABLE(9b, 91b)
+
+EXPORT_SYMBOL(__clear_user)
diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c
index 2534c1447554..6c47daa61614 100644
--- a/arch/powerpc/lib/test_emulate_step.c
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -387,10 +387,14 @@ static void __init test_lxvd2x_stxvd2x(void)
/* lxvd2x vsr39, r3, r4 */
stepped = emulate_step(&regs, TEST_LXVD2X(39, 3, 4));
- if (stepped == 1)
+ if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
show_result("lxvd2x", "PASS");
- else
- show_result("lxvd2x", "FAIL");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("lxvd2x", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("lxvd2x", "FAIL");
+ }
/*** stxvd2x ***/
@@ -404,10 +408,15 @@ static void __init test_lxvd2x_stxvd2x(void)
stepped = emulate_step(&regs, TEST_STXVD2X(39, 3, 4));
if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
- cached_b[2] == c.b[2] && cached_b[3] == c.b[3])
+ cached_b[2] == c.b[2] && cached_b[3] == c.b[3] &&
+ cpu_has_feature(CPU_FTR_VSX)) {
show_result("stxvd2x", "PASS");
- else
- show_result("stxvd2x", "FAIL");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("stxvd2x", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("stxvd2x", "FAIL");
+ }
}
#else
static void __init test_lxvd2x_stxvd2x(void)
diff --git a/arch/powerpc/lib/xor_vmx_glue.c b/arch/powerpc/lib/xor_vmx_glue.c
index 6521fe5e8cef..dab2b6bfcf36 100644
--- a/arch/powerpc/lib/xor_vmx_glue.c
+++ b/arch/powerpc/lib/xor_vmx_glue.c
@@ -13,6 +13,7 @@
#include <linux/export.h>
#include <linux/sched.h>
#include <asm/switch_to.h>
+#include <asm/xor_altivec.h>
#include "xor_vmx.h"
void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,