diff options
Diffstat (limited to 'arch/powerpc/lib')
-rw-r--r-- | arch/powerpc/lib/Makefile | 28 | ||||
-rw-r--r-- | arch/powerpc/lib/alloc.c | 6 | ||||
-rw-r--r-- | arch/powerpc/lib/copy_32.S | 127 | ||||
-rw-r--r-- | arch/powerpc/lib/copypage_power7.S | 32 | ||||
-rw-r--r-- | arch/powerpc/lib/copyuser_64.S | 3 | ||||
-rw-r--r-- | arch/powerpc/lib/copyuser_power7.S | 228 | ||||
-rw-r--r-- | arch/powerpc/lib/crtsavres.S | 96 | ||||
-rw-r--r-- | arch/powerpc/lib/devres.c | 43 | ||||
-rw-r--r-- | arch/powerpc/lib/feature-fixups.c | 2 | ||||
-rw-r--r-- | arch/powerpc/lib/ldstfp.S | 32 | ||||
-rw-r--r-- | arch/powerpc/lib/locks.c | 5 | ||||
-rw-r--r-- | arch/powerpc/lib/memcmp_64.S | 233 | ||||
-rw-r--r-- | arch/powerpc/lib/memcpy_power7.S | 228 | ||||
-rw-r--r-- | arch/powerpc/lib/ppc_ksyms.c | 35 | ||||
-rw-r--r-- | arch/powerpc/lib/rheap.c | 2 | ||||
-rw-r--r-- | arch/powerpc/lib/sstep.c | 998 | ||||
-rw-r--r-- | arch/powerpc/lib/string.S | 2 |
17 files changed, 1238 insertions, 862 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 59fa2de9546d..7902802a19a5 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -9,34 +9,30 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) CFLAGS_REMOVE_code-patching.o = -pg CFLAGS_REMOVE_feature-fixups.o = -pg -obj-y := string.o alloc.o \ - crtsavres.o +obj-y += string.o alloc.o crtsavres.o ppc_ksyms.o code-patching.o \ + feature-fixups.o + obj-$(CONFIG_PPC32) += div64.o copy_32.o -obj-$(CONFIG_HAS_IOMEM) += devres.o -obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ - usercopy_64.o mem_64.o string.o \ - hweight_64.o \ - copyuser_power7.o string_64.o copypage_power7.o +obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \ + copyuser_power7.o string_64.o copypage_power7.o memcpy_power7.o \ + memcpy_64.o memcmp_64.o + +obj64-$(CONFIG_SMP) += locks.o +obj64-$(CONFIG_ALTIVEC) += vmx-helper.o + ifeq ($(CONFIG_GENERIC_CSUM),) obj-y += checksum_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC64) += checksum_wrappers_64.o endif -obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o - obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o -ifeq ($(CONFIG_PPC64),y) -obj-$(CONFIG_SMP) += locks.o -obj-$(CONFIG_ALTIVEC) += vmx-helper.o -endif - obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o -obj-y += code-patching.o -obj-y += feature-fixups.o obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o obj-$(CONFIG_ALTIVEC) += xor_vmx.o CFLAGS_xor_vmx.o += -maltivec -mabi=altivec + +obj-$(CONFIG_PPC64) += $(obj64-y) diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c index da22c84a8fed..60b0b3fc8fc1 100644 --- a/arch/powerpc/lib/alloc.c +++ b/arch/powerpc/lib/alloc.c @@ -10,12 +10,10 @@ void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask) { void *p; - if (mem_init_done) + if (slab_is_available()) p = kzalloc(size, mask); else { - p = alloc_bootmem(size); - if (p) - memset(p, 0, size); + p = memblock_virt_alloc(size, 0); } return p; } diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 55f19f9fd708..6813f80d1eec 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -69,54 +69,6 @@ CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT CACHELINE_MASK = (L1_CACHE_BYTES-1) -/* - * Use dcbz on the complete cache lines in the destination - * to set them to zero. This requires that the destination - * area is cacheable. -- paulus - */ -_GLOBAL(cacheable_memzero) - mr r5,r4 - li r4,0 - addi r6,r3,-4 - cmplwi 0,r5,4 - blt 7f - stwu r4,4(r6) - beqlr - andi. r0,r6,3 - add r5,r0,r5 - subf r6,r0,r6 - clrlwi r7,r6,32-LG_CACHELINE_BYTES - add r8,r7,r5 - srwi r9,r8,LG_CACHELINE_BYTES - addic. r9,r9,-1 /* total number of complete cachelines */ - ble 2f - xori r0,r7,CACHELINE_MASK & ~3 - srwi. r0,r0,2 - beq 3f - mtctr r0 -4: stwu r4,4(r6) - bdnz 4b -3: mtctr r9 - li r7,4 -10: dcbz r7,r6 - addi r6,r6,CACHELINE_BYTES - bdnz 10b - clrlwi r5,r8,32-LG_CACHELINE_BYTES - addi r5,r5,4 -2: srwi r0,r5,2 - mtctr r0 - bdz 6f -1: stwu r4,4(r6) - bdnz 1b -6: andi. r5,r5,3 -7: cmpwi 0,r5,0 - beqlr - mtctr r5 - addi r6,r6,3 -8: stbu r4,1(r6) - bdnz 8b - blr - _GLOBAL(memset) rlwimi r4,r4,8,16,23 rlwimi r4,r4,16,0,15 @@ -142,85 +94,6 @@ _GLOBAL(memset) bdnz 8b blr -/* - * This version uses dcbz on the complete cache lines in the - * destination area to reduce memory traffic. This requires that - * the destination area is cacheable. - * We only use this version if the source and dest don't overlap. - * -- paulus. - */ -_GLOBAL(cacheable_memcpy) - add r7,r3,r5 /* test if the src & dst overlap */ - add r8,r4,r5 - cmplw 0,r4,r7 - cmplw 1,r3,r8 - crand 0,0,4 /* cr0.lt &= cr1.lt */ - blt memcpy /* if regions overlap */ - - addi r4,r4,-4 - addi r6,r3,-4 - neg r0,r3 - andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ - beq 58f - - cmplw 0,r5,r0 /* is this more than total to do? */ - blt 63f /* if not much to do */ - andi. r8,r0,3 /* get it word-aligned first */ - subf r5,r0,r5 - mtctr r8 - beq+ 61f -70: lbz r9,4(r4) /* do some bytes */ - stb r9,4(r6) - addi r4,r4,1 - addi r6,r6,1 - bdnz 70b -61: srwi. r0,r0,2 - mtctr r0 - beq 58f -72: lwzu r9,4(r4) /* do some words */ - stwu r9,4(r6) - bdnz 72b - -58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ - clrlwi r5,r5,32-LG_CACHELINE_BYTES - li r11,4 - mtctr r0 - beq 63f -53: - dcbz r11,r6 - COPY_16_BYTES -#if L1_CACHE_BYTES >= 32 - COPY_16_BYTES -#if L1_CACHE_BYTES >= 64 - COPY_16_BYTES - COPY_16_BYTES -#if L1_CACHE_BYTES >= 128 - COPY_16_BYTES - COPY_16_BYTES - COPY_16_BYTES - COPY_16_BYTES -#endif -#endif -#endif - bdnz 53b - -63: srwi. r0,r5,2 - mtctr r0 - beq 64f -30: lwzu r0,4(r4) - stwu r0,4(r6) - bdnz 30b - -64: andi. r0,r5,3 - mtctr r0 - beq+ 65f -40: lbz r0,4(r4) - stb r0,4(r6) - addi r4,r4,1 - addi r6,r6,1 - bdnz 40b -65: blr - _GLOBAL(memmove) cmplw 0,r3,r4 bgt backwards_memcpy diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S index d7dafb3777ac..a84d333ecb09 100644 --- a/arch/powerpc/lib/copypage_power7.S +++ b/arch/powerpc/lib/copypage_power7.S @@ -83,23 +83,23 @@ _GLOBAL(copypage_power7) li r12,112 .align 5 -1: lvx vr7,r0,r4 - lvx vr6,r4,r6 - lvx vr5,r4,r7 - lvx vr4,r4,r8 - lvx vr3,r4,r9 - lvx vr2,r4,r10 - lvx vr1,r4,r11 - lvx vr0,r4,r12 +1: lvx v7,r0,r4 + lvx v6,r4,r6 + lvx v5,r4,r7 + lvx v4,r4,r8 + lvx v3,r4,r9 + lvx v2,r4,r10 + lvx v1,r4,r11 + lvx v0,r4,r12 addi r4,r4,128 - stvx vr7,r0,r3 - stvx vr6,r3,r6 - stvx vr5,r3,r7 - stvx vr4,r3,r8 - stvx vr3,r3,r9 - stvx vr2,r3,r10 - stvx vr1,r3,r11 - stvx vr0,r3,r12 + stvx v7,r0,r3 + stvx v6,r3,r6 + stvx v5,r3,r7 + stvx v4,r3,r8 + stvx v3,r3,r9 + stvx v2,r3,r10 + stvx v1,r3,r11 + stvx v0,r3,r12 addi r3,r3,128 bdnz 1b diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index 0860ee46013c..f09899e35991 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -461,8 +461,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) /* * Routine to copy a whole page of data, optimized for POWER4. * On POWER4 it is more than 50% faster than the simple loop - * above (following the .Ldst_aligned label) but it runs slightly - * slower on POWER3. + * above (following the .Ldst_aligned label). */ .Lcopy_page_4K: std r31,-32(1) diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index c46c876ac96a..da0c568d18c4 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -388,29 +388,29 @@ err3; std r0,0(r3) li r11,48 bf cr7*4+3,5f -err3; lvx vr1,r0,r4 +err3; lvx v1,r0,r4 addi r4,r4,16 -err3; stvx vr1,r0,r3 +err3; stvx v1,r0,r3 addi r3,r3,16 5: bf cr7*4+2,6f -err3; lvx vr1,r0,r4 -err3; lvx vr0,r4,r9 +err3; lvx v1,r0,r4 +err3; lvx v0,r4,r9 addi r4,r4,32 -err3; stvx vr1,r0,r3 -err3; stvx vr0,r3,r9 +err3; stvx v1,r0,r3 +err3; stvx v0,r3,r9 addi r3,r3,32 6: bf cr7*4+1,7f -err3; lvx vr3,r0,r4 -err3; lvx vr2,r4,r9 -err3; lvx vr1,r4,r10 -err3; lvx vr0,r4,r11 +err3; lvx v3,r0,r4 +err3; lvx v2,r4,r9 +err3; lvx v1,r4,r10 +err3; lvx v0,r4,r11 addi r4,r4,64 -err3; stvx vr3,r0,r3 -err3; stvx vr2,r3,r9 -err3; stvx vr1,r3,r10 -err3; stvx vr0,r3,r11 +err3; stvx v3,r0,r3 +err3; stvx v2,r3,r9 +err3; stvx v1,r3,r10 +err3; stvx v0,r3,r11 addi r3,r3,64 7: sub r5,r5,r6 @@ -433,23 +433,23 @@ err3; stvx vr0,r3,r11 */ .align 5 8: -err4; lvx vr7,r0,r4 -err4; lvx vr6,r4,r9 -err4; lvx vr5,r4,r10 -err4; lvx vr4,r4,r11 -err4; lvx vr3,r4,r12 -err4; lvx vr2,r4,r14 -err4; lvx vr1,r4,r15 -err4; lvx vr0,r4,r16 +err4; lvx v7,r0,r4 +err4; lvx v6,r4,r9 +err4; lvx v5,r4,r10 +err4; lvx v4,r4,r11 +err4; lvx v3,r4,r12 +err4; lvx v2,r4,r14 +err4; lvx v1,r4,r15 +err4; lvx v0,r4,r16 addi r4,r4,128 -err4; stvx vr7,r0,r3 -err4; stvx vr6,r3,r9 -err4; stvx vr5,r3,r10 -err4; stvx vr4,r3,r11 -err4; stvx vr3,r3,r12 -err4; stvx vr2,r3,r14 -err4; stvx vr1,r3,r15 -err4; stvx vr0,r3,r16 +err4; stvx v7,r0,r3 +err4; stvx v6,r3,r9 +err4; stvx v5,r3,r10 +err4; stvx v4,r3,r11 +err4; stvx v3,r3,r12 +err4; stvx v2,r3,r14 +err4; stvx v1,r3,r15 +err4; stvx v0,r3,r16 addi r3,r3,128 bdnz 8b @@ -463,29 +463,29 @@ err4; stvx vr0,r3,r16 mtocrf 0x01,r6 bf cr7*4+1,9f -err3; lvx vr3,r0,r4 -err3; lvx vr2,r4,r9 -err3; lvx vr1,r4,r10 -err3; lvx vr0,r4,r11 +err3; lvx v3,r0,r4 +err3; lvx v2,r4,r9 +err3; lvx v1,r4,r10 +err3; lvx v0,r4,r11 addi r4,r4,64 -err3; stvx vr3,r0,r3 -err3; stvx vr2,r3,r9 -err3; stvx vr1,r3,r10 -err3; stvx vr0,r3,r11 +err3; stvx v3,r0,r3 +err3; stvx v2,r3,r9 +err3; stvx v1,r3,r10 +err3; stvx v0,r3,r11 addi r3,r3,64 9: bf cr7*4+2,10f -err3; lvx vr1,r0,r4 -err3; lvx vr0,r4,r9 +err3; lvx v1,r0,r4 +err3; lvx v0,r4,r9 addi r4,r4,32 -err3; stvx vr1,r0,r3 -err3; stvx vr0,r3,r9 +err3; stvx v1,r0,r3 +err3; stvx v0,r3,r9 addi r3,r3,32 10: bf cr7*4+3,11f -err3; lvx vr1,r0,r4 +err3; lvx v1,r0,r4 addi r4,r4,16 -err3; stvx vr1,r0,r3 +err3; stvx v1,r0,r3 addi r3,r3,16 /* Up to 15B to go */ @@ -560,42 +560,42 @@ err3; stw r7,4(r3) li r10,32 li r11,48 - LVS(vr16,0,r4) /* Setup permute control vector */ -err3; lvx vr0,0,r4 + LVS(v16,0,r4) /* Setup permute control vector */ +err3; lvx v0,0,r4 addi r4,r4,16 bf cr7*4+3,5f -err3; lvx vr1,r0,r4 - VPERM(vr8,vr0,vr1,vr16) +err3; lvx v1,r0,r4 + VPERM(v8,v0,v1,v16) addi r4,r4,16 -err3; stvx vr8,r0,r3 +err3; stvx v8,r0,r3 addi r3,r3,16 - vor vr0,vr1,vr1 + vor v0,v1,v1 5: bf cr7*4+2,6f -err3; lvx vr1,r0,r4 - VPERM(vr8,vr0,vr1,vr16) -err3; lvx vr0,r4,r9 - VPERM(vr9,vr1,vr0,vr16) +err3; lvx v1,r0,r4 + VPERM(v8,v0,v1,v16) +err3; lvx v0,r4,r9 + VPERM(v9,v1,v0,v16) addi r4,r4,32 -err3; stvx vr8,r0,r3 -err3; stvx vr9,r3,r9 +err3; stvx v8,r0,r3 +err3; stvx v9,r3,r9 addi r3,r3,32 6: bf cr7*4+1,7f -err3; lvx vr3,r0,r4 - VPERM(vr8,vr0,vr3,vr16) -err3; lvx vr2,r4,r9 - VPERM(vr9,vr3,vr2,vr16) -err3; lvx vr1,r4,r10 - VPERM(vr10,vr2,vr1,vr16) -err3; lvx vr0,r4,r11 - VPERM(vr11,vr1,vr0,vr16) +err3; lvx v3,r0,r4 + VPERM(v8,v0,v3,v16) +err3; lvx v2,r4,r9 + VPERM(v9,v3,v2,v16) +err3; lvx v1,r4,r10 + VPERM(v10,v2,v1,v16) +err3; lvx v0,r4,r11 + VPERM(v11,v1,v0,v16) addi r4,r4,64 -err3; stvx vr8,r0,r3 -err3; stvx vr9,r3,r9 -err3; stvx vr10,r3,r10 -err3; stvx vr11,r3,r11 +err3; stvx v8,r0,r3 +err3; stvx v9,r3,r9 +err3; stvx v10,r3,r10 +err3; stvx v11,r3,r11 addi r3,r3,64 7: sub r5,r5,r6 @@ -618,31 +618,31 @@ err3; stvx vr11,r3,r11 */ .align 5 8: -err4; lvx vr7,r0,r4 - VPERM(vr8,vr0,vr7,vr16) -err4; lvx vr6,r4,r9 - VPERM(vr9,vr7,vr6,vr16) -err4; lvx vr5,r4,r10 - VPERM(vr10,vr6,vr5,vr16) -err4; lvx vr4,r4,r11 - VPERM(vr11,vr5,vr4,vr16) -err4; lvx vr3,r4,r12 - VPERM(vr12,vr4,vr3,vr16) -err4; lvx vr2,r4,r14 - VPERM(vr13,vr3,vr2,vr16) -err4; lvx vr1,r4,r15 - VPERM(vr14,vr2,vr1,vr16) -err4; lvx vr0,r4,r16 - VPERM(vr15,vr1,vr0,vr16) +err4; lvx v7,r0,r4 + VPERM(v8,v0,v7,v16) +err4; lvx v6,r4,r9 + VPERM(v9,v7,v6,v16) +err4; lvx v5,r4,r10 + VPERM(v10,v6,v5,v16) +err4; lvx v4,r4,r11 + VPERM(v11,v5,v4,v16) +err4; lvx v3,r4,r12 + VPERM(v12,v4,v3,v16) +err4; lvx v2,r4,r14 + VPERM(v13,v3,v2,v16) +err4; lvx v1,r4,r15 + VPERM(v14,v2,v1,v16) +err4; lvx v0,r4,r16 + VPERM(v15,v1,v0,v16) addi r4,r4,128 -err4; stvx vr8,r0,r3 -err4; stvx vr9,r3,r9 -err4; stvx vr10,r3,r10 -err4; stvx vr11,r3,r11 -err4; stvx vr12,r3,r12 -err4; stvx vr13,r3,r14 -err4; stvx vr14,r3,r15 -err4; stvx vr15,r3,r16 +err4; stvx v8,r0,r3 +err4; stvx v9,r3,r9 +err4; stvx v10,r3,r10 +err4; stvx v11,r3,r11 +err4; stvx v12,r3,r12 +err4; stvx v13,r3,r14 +err4; stvx v14,r3,r15 +err4; stvx v15,r3,r16 addi r3,r3,128 bdnz 8b @@ -656,36 +656,36 @@ err4; stvx vr15,r3,r16 mtocrf 0x01,r6 bf cr7*4+1,9f -err3; lvx vr3,r0,r4 - VPERM(vr8,vr0,vr3,vr16) -err3; lvx vr2,r4,r9 - VPERM(vr9,vr3,vr2,vr16) -err3; lvx vr1,r4,r10 - VPERM(vr10,vr2,vr1,vr16) -err3; lvx vr0,r4,r11 - VPERM(vr11,vr1,vr0,vr16) +err3; lvx v3,r0,r4 + VPERM(v8,v0,v3,v16) +err3; lvx v2,r4,r9 + VPERM(v9,v3,v2,v16) +err3; lvx v1,r4,r10 + VPERM(v10,v2,v1,v16) +err3; lvx v0,r4,r11 + VPERM(v11,v1,v0,v16) addi r4,r4,64 -err3; stvx vr8,r0,r3 -err3; stvx vr9,r3,r9 -err3; stvx vr10,r3,r10 -err3; stvx vr11,r3,r11 +err3; stvx v8,r0,r3 +err3; stvx v9,r3,r9 +err3; stvx v10,r3,r10 +err3; stvx v11,r3,r11 addi r3,r3,64 9: bf cr7*4+2,10f -err3; lvx vr1,r0,r4 - VPERM(vr8,vr0,vr1,vr16) -err3; lvx vr0,r4,r9 - VPERM(vr9,vr1,vr0,vr16) +err3; lvx v1,r0,r4 + VPERM(v8,v0,v1,v16) +err3; lvx v0,r4,r9 + VPERM(v9,v1,v0,v16) addi r4,r4,32 -err3; stvx vr8,r0,r3 -err3; stvx vr9,r3,r9 +err3; stvx v8,r0,r3 +err3; stvx v9,r3,r9 addi r3,r3,32 10: bf cr7*4+3,11f -err3; lvx vr1,r0,r4 - VPERM(vr8,vr0,vr1,vr16) +err3; lvx v1,r0,r4 + VPERM(v8,v0,v1,v16) addi r4,r4,16 -err3; stvx vr8,r0,r3 +err3; stvx v8,r0,r3 addi r3,r3,16 /* Up to 15B to go */ @@ -718,4 +718,4 @@ err3; stb r0,0(r3) 15: addi r1,r1,STACKFRAMESIZE b exit_vmx_usercopy /* tail call optimise */ -#endif /* CONFiG_ALTIVEC */ +#endif /* CONFIG_ALTIVEC */ diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S index a5b30c71a8d3..18af0b3d3eb2 100644 --- a/arch/powerpc/lib/crtsavres.S +++ b/arch/powerpc/lib/crtsavres.S @@ -236,78 +236,78 @@ _GLOBAL(_rest32gpr_31_x) _GLOBAL(_savevr_20) li r11,-192 - stvx vr20,r11,r0 + stvx v20,r11,r0 _GLOBAL(_savevr_21) li r11,-176 - stvx vr21,r11,r0 + stvx v21,r11,r0 _GLOBAL(_savevr_22) li r11,-160 - stvx vr22,r11,r0 + stvx v22,r11,r0 _GLOBAL(_savevr_23) li r11,-144 - stvx vr23,r11,r0 + stvx v23,r11,r0 _GLOBAL(_savevr_24) li r11,-128 - stvx vr24,r11,r0 + stvx v24,r11,r0 _GLOBAL(_savevr_25) li r11,-112 - stvx vr25,r11,r0 + stvx v25,r11,r0 _GLOBAL(_savevr_26) li r11,-96 - stvx vr26,r11,r0 + stvx v26,r11,r0 _GLOBAL(_savevr_27) li r11,-80 - stvx vr27,r11,r0 + stvx v27,r11,r0 _GLOBAL(_savevr_28) li r11,-64 - stvx vr28,r11,r0 + stvx v28,r11,r0 _GLOBAL(_savevr_29) li r11,-48 - stvx vr29,r11,r0 + stvx v29,r11,r0 _GLOBAL(_savevr_30) li r11,-32 - stvx vr30,r11,r0 + stvx v30,r11,r0 _GLOBAL(_savevr_31) li r11,-16 - stvx vr31,r11,r0 + stvx v31,r11,r0 blr _GLOBAL(_restvr_20) li r11,-192 - lvx vr20,r11,r0 + lvx v20,r11,r0 _GLOBAL(_restvr_21) li r11,-176 - lvx vr21,r11,r0 + lvx v21,r11,r0 _GLOBAL(_restvr_22) li r11,-160 - lvx vr22,r11,r0 + lvx v22,r11,r0 _GLOBAL(_restvr_23) li r11,-144 - lvx vr23,r11,r0 + lvx v23,r11,r0 _GLOBAL(_restvr_24) li r11,-128 - lvx vr24,r11,r0 + lvx v24,r11,r0 _GLOBAL(_restvr_25) li r11,-112 - lvx vr25,r11,r0 + lvx v25,r11,r0 _GLOBAL(_restvr_26) li r11,-96 - lvx vr26,r11,r0 + lvx v26,r11,r0 _GLOBAL(_restvr_27) li r11,-80 - lvx vr27,r11,r0 + lvx v27,r11,r0 _GLOBAL(_restvr_28) li r11,-64 - lvx vr28,r11,r0 + lvx v28,r11,r0 _GLOBAL(_restvr_29) li r11,-48 - lvx vr29,r11,r0 + lvx v29,r11,r0 _GLOBAL(_restvr_30) li r11,-32 - lvx vr30,r11,r0 + lvx v30,r11,r0 _GLOBAL(_restvr_31) li r11,-16 - lvx vr31,r11,r0 + lvx v31,r11,r0 blr #endif /* CONFIG_ALTIVEC */ @@ -443,101 +443,101 @@ _restgpr0_31: .globl _savevr_20 _savevr_20: li r12,-192 - stvx vr20,r12,r0 + stvx v20,r12,r0 .globl _savevr_21 _savevr_21: li r12,-176 - stvx vr21,r12,r0 + stvx v21,r12,r0 .globl _savevr_22 _savevr_22: li r12,-160 - stvx vr22,r12,r0 + stvx v22,r12,r0 .globl _savevr_23 _savevr_23: li r12,-144 - stvx vr23,r12,r0 + stvx v23,r12,r0 .globl _savevr_24 _savevr_24: li r12,-128 - stvx vr24,r12,r0 + stvx v24,r12,r0 .globl _savevr_25 _savevr_25: li r12,-112 - stvx vr25,r12,r0 + stvx v25,r12,r0 .globl _savevr_26 _savevr_26: li r12,-96 - stvx vr26,r12,r0 + stvx v26,r12,r0 .globl _savevr_27 _savevr_27: li r12,-80 - stvx vr27,r12,r0 + stvx v27,r12,r0 .globl _savevr_28 _savevr_28: li r12,-64 - stvx vr28,r12,r0 + stvx v28,r12,r0 .globl _savevr_29 _savevr_29: li r12,-48 - stvx vr29,r12,r0 + stvx v29,r12,r0 .globl _savevr_30 _savevr_30: li r12,-32 - stvx vr30,r12,r0 + stvx v30,r12,r0 .globl _savevr_31 _savevr_31: li r12,-16 - stvx vr31,r12,r0 + stvx v31,r12,r0 blr .globl _restvr_20 _restvr_20: li r12,-192 - lvx vr20,r12,r0 + lvx v20,r12,r0 .globl _restvr_21 _restvr_21: li r12,-176 - lvx vr21,r12,r0 + lvx v21,r12,r0 .globl _restvr_22 _restvr_22: li r12,-160 - lvx vr22,r12,r0 + lvx v22,r12,r0 .globl _restvr_23 _restvr_23: li r12,-144 - lvx vr23,r12,r0 + lvx v23,r12,r0 .globl _restvr_24 _restvr_24: li r12,-128 - lvx vr24,r12,r0 + lvx v24,r12,r0 .globl _restvr_25 _restvr_25: li r12,-112 - lvx vr25,r12,r0 + lvx v25,r12,r0 .globl _restvr_26 _restvr_26: li r12,-96 - lvx vr26,r12,r0 + lvx v26,r12,r0 .globl _restvr_27 _restvr_27: li r12,-80 - lvx vr27,r12,r0 + lvx v27,r12,r0 .globl _restvr_28 _restvr_28: li r12,-64 - lvx vr28,r12,r0 + lvx v28,r12,r0 .globl _restvr_29 _restvr_29: li r12,-48 - lvx vr29,r12,r0 + lvx v29,r12,r0 .globl _restvr_30 _restvr_30: li r12,-32 - lvx vr30,r12,r0 + lvx v30,r12,r0 .globl _restvr_31 _restvr_31: li r12,-16 - lvx vr31,r12,r0 + lvx v31,r12,r0 blr #endif /* CONFIG_ALTIVEC */ diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c deleted file mode 100644 index 8df55fc3aad6..000000000000 --- a/arch/powerpc/lib/devres.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) 2008 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/device.h> /* devres_*(), devm_ioremap_release() */ -#include <linux/gfp.h> -#include <linux/io.h> /* ioremap_prot() */ -#include <linux/export.h> /* EXPORT_SYMBOL() */ - -/** - * devm_ioremap_prot - Managed ioremap_prot() - * @dev: Generic device to remap IO address for - * @offset: BUS offset to map - * @size: Size of map - * @flags: Page flags - * - * Managed ioremap_prot(). Map is automatically unmapped on driver - * detach. - */ -void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset, - size_t size, unsigned long flags) -{ - void __iomem **ptr, *addr; - - ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL); - if (!ptr) - return NULL; - - addr = ioremap_prot(offset, size, flags); - if (addr) { - *ptr = addr; - devres_add(dev, ptr); - } else - devres_free(ptr); - - return addr; -} -EXPORT_SYMBOL(devm_ioremap_prot); diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 7a8a7487cee8..7ce3870d7ddd 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -164,7 +164,7 @@ static long calc_offset(struct fixup_entry *entry, unsigned int *p) return (unsigned long)p - (unsigned long)entry; } -void test_basic_patching(void) +static void test_basic_patching(void) { extern unsigned int ftr_fixup_test1; extern unsigned int end_ftr_fixup_test1; diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S index 85aec08ab234..5d0cdbfbe3f2 100644 --- a/arch/powerpc/lib/ldstfp.S +++ b/arch/powerpc/lib/ldstfp.S @@ -184,16 +184,16 @@ _GLOBAL(do_stfd) extab 2b,3b #ifdef CONFIG_ALTIVEC -/* Get the contents of vrN into vr0; N is in r3. */ +/* Get the contents of vrN into v0; N is in r3. */ _GLOBAL(get_vr) mflr r0 rlwinm r3,r3,3,0xf8 bcl 20,31,1f - blr /* vr0 is already in vr0 */ + blr /* v0 is already in v0 */ nop reg = 1 .rept 31 - vor vr0,reg,reg /* assembler doesn't know vmr? */ + vor v0,reg,reg /* assembler doesn't know vmr? */ blr reg = reg + 1 .endr @@ -203,16 +203,16 @@ reg = reg + 1 mtlr r0 bctr -/* Put the contents of vr0 into vrN; N is in r3. */ +/* Put the contents of v0 into vrN; N is in r3. */ _GLOBAL(put_vr) mflr r0 rlwinm r3,r3,3,0xf8 bcl 20,31,1f - blr /* vr0 is already in vr0 */ + blr /* v0 is already in v0 */ nop reg = 1 .rept 31 - vor reg,vr0,vr0 + vor reg,v0,v0 blr reg = reg + 1 .endr @@ -234,13 +234,13 @@ _GLOBAL(do_lvx) MTMSRD(r7) isync beq cr7,1f - stvx vr0,r1,r8 + stvx v0,r1,r8 1: li r9,-EFAULT -2: lvx vr0,0,r4 +2: lvx v0,0,r4 li r9,0 3: beq cr7,4f bl put_vr - lvx vr0,r1,r8 + lvx v0,r1,r8 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 MTMSRD(r6) @@ -262,13 +262,13 @@ _GLOBAL(do_stvx) MTMSRD(r7) isync beq cr7,1f - stvx vr0,r1,r8 + stvx v0,r1,r8 bl get_vr 1: li r9,-EFAULT -2: stvx vr0,0,r4 +2: stvx v0,0,r4 li r9,0 3: beq cr7,4f - lvx vr0,r1,r8 + lvx v0,r1,r8 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) mtlr r0 MTMSRD(r6) @@ -280,12 +280,12 @@ _GLOBAL(do_stvx) #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_VSX -/* Get the contents of vsrN into vsr0; N is in r3. */ +/* Get the contents of vsN into vs0; N is in r3. */ _GLOBAL(get_vsr) mflr r0 rlwinm r3,r3,3,0x1f8 bcl 20,31,1f - blr /* vsr0 is already in vsr0 */ + blr /* vs0 is already in vs0 */ nop reg = 1 .rept 63 @@ -299,12 +299,12 @@ reg = reg + 1 mtlr r0 bctr -/* Put the contents of vsr0 into vsrN; N is in r3. */ +/* Put the contents of vs0 into vsN; N is in r3. */ _GLOBAL(put_vsr) mflr r0 rlwinm r3,r3,3,0x1f8 bcl 20,31,1f - blr /* vr0 is already in vr0 */ + blr /* v0 is already in v0 */ nop reg = 1 .rept 63 diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 0c9c8d7d0734..f7deebdf3365 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c @@ -41,6 +41,7 @@ void __spin_yield(arch_spinlock_t *lock) plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu), yield_count); } +EXPORT_SYMBOL_GPL(__spin_yield); /* * Waiting for a read lock or a write lock on a rwlock... @@ -70,12 +71,16 @@ void __rw_yield(arch_rwlock_t *rw) void arch_spin_unlock_wait(arch_spinlock_t *lock) { + smp_mb(); + while (lock->slock) { HMT_low(); if (SHARED_PROCESSOR) __spin_yield(lock); } HMT_medium(); + + smp_mb(); } EXPORT_SYMBOL(arch_spin_unlock_wait); diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S new file mode 100644 index 000000000000..8953d2382a65 --- /dev/null +++ b/arch/powerpc/lib/memcmp_64.S @@ -0,0 +1,233 @@ +/* + * Author: Anton Blanchard <anton@au.ibm.com> + * Copyright 2015 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/ppc_asm.h> + +#define off8 r6 +#define off16 r7 +#define off24 r8 + +#define rA r9 +#define rB r10 +#define rC r11 +#define rD r27 +#define rE r28 +#define rF r29 +#define rG r30 +#define rH r31 + +#ifdef __LITTLE_ENDIAN__ +#define LD ldbrx +#else +#define LD ldx +#endif + +_GLOBAL(memcmp) + cmpdi cr1,r5,0 + + /* Use the short loop if both strings are not 8B aligned */ + or r6,r3,r4 + andi. r6,r6,7 + + /* Use the short loop if length is less than 32B */ + cmpdi cr6,r5,31 + + beq cr1,.Lzero + bne .Lshort + bgt cr6,.Llong + +.Lshort: + mtctr r5 + +1: lbz rA,0(r3) + lbz rB,0(r4) + subf. rC,rB,rA + bne .Lnon_zero + bdz .Lzero + + lbz rA,1(r3) + lbz rB,1(r4) + subf. rC,rB,rA + bne .Lnon_zero + bdz .Lzero + + lbz rA,2(r3) + lbz rB,2(r4) + subf. rC,rB,rA + bne .Lnon_zero + bdz .Lzero + + lbz rA,3(r3) + lbz rB,3(r4) + subf. rC,rB,rA + bne .Lnon_zero + + addi r3,r3,4 + addi r4,r4,4 + + bdnz 1b + +.Lzero: + li r3,0 + blr + +.Lnon_zero: + mr r3,rC + blr + +.Llong: + li off8,8 + li off16,16 + li off24,24 + + std r31,-8(r1) + std r30,-16(r1) + std r29,-24(r1) + std r28,-32(r1) + std r27,-40(r1) + + srdi r0,r5,5 + mtctr r0 + andi. r5,r5,31 + + LD rA,0,r3 + LD rB,0,r4 + + LD rC,off8,r3 + LD rD,off8,r4 + + LD rE,off16,r3 + LD rF,off16,r4 + + LD rG,off24,r3 + LD rH,off24,r4 + cmpld cr0,rA,rB + + addi r3,r3,32 + addi r4,r4,32 + + bdz .Lfirst32 + + LD rA,0,r3 + LD rB,0,r4 + cmpld cr1,rC,rD + + LD rC,off8,r3 + LD rD,off8,r4 + cmpld cr6,rE,rF + + LD rE,off16,r3 + LD rF,off16,r4 + cmpld cr7,rG,rH + bne cr0,.LcmpAB + + LD rG,off24,r3 + LD rH,off24,r4 + cmpld cr0,rA,rB + bne cr1,.LcmpCD + + addi r3,r3,32 + addi r4,r4,32 + + bdz .Lsecond32 + + .balign 16 + +1: LD rA,0,r3 + LD rB,0,r4 + cmpld cr1,rC,rD + bne cr6,.LcmpEF + + LD rC,off8,r3 + LD rD,off8,r4 + cmpld cr6,rE,rF + bne cr7,.LcmpGH + + LD rE,off16,r3 + LD rF,off16,r4 + cmpld cr7,rG,rH + bne cr0,.LcmpAB + + LD rG,off24,r3 + LD rH,off24,r4 + cmpld cr0,rA,rB + bne cr1,.LcmpCD + + addi r3,r3,32 + addi r4,r4,32 + + bdnz 1b + +.Lsecond32: + cmpld cr1,rC,rD + bne cr6,.LcmpEF + + cmpld cr6,rE,rF + bne cr7,.LcmpGH + + cmpld cr7,rG,rH + bne cr0,.LcmpAB + + bne cr1,.LcmpCD + bne cr6,.LcmpEF + bne cr7,.LcmpGH + +.Ltail: + ld r31,-8(r1) + ld r30,-16(r1) + ld r29,-24(r1) + ld r28,-32(r1) + ld r27,-40(r1) + + cmpdi r5,0 + beq .Lzero + b .Lshort + +.Lfirst32: + cmpld cr1,rC,rD + cmpld cr6,rE,rF + cmpld cr7,rG,rH + + bne cr0,.LcmpAB + bne cr1,.LcmpCD + bne cr6,.LcmpEF + bne cr7,.LcmpGH + + b .Ltail + +.LcmpAB: + li r3,1 + bgt cr0,.Lout + li r3,-1 + b .Lout + +.LcmpCD: + li r3,1 + bgt cr1,.Lout + li r3,-1 + b .Lout + +.LcmpEF: + li r3,1 + bgt cr6,.Lout + li r3,-1 + b .Lout + +.LcmpGH: + li r3,1 + bgt cr7,.Lout + li r3,-1 + +.Lout: + ld r31,-8(r1) + ld r30,-16(r1) + ld r29,-24(r1) + ld r28,-32(r1) + ld r27,-40(r1) + blr diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S index 2ff5c142f87b..786234fd4e91 100644 --- a/arch/powerpc/lib/memcpy_power7.S +++ b/arch/powerpc/lib/memcpy_power7.S @@ -321,29 +321,29 @@ _GLOBAL(memcpy_power7) li r11,48 bf cr7*4+3,5f - lvx vr1,r0,r4 + lvx v1,r0,r4 addi r4,r4,16 - stvx vr1,r0,r3 + stvx v1,r0,r3 addi r3,r3,16 5: bf cr7*4+2,6f - lvx vr1,r0,r4 - lvx vr0,r4,r9 + lvx v1,r0,r4 + lvx v0,r4,r9 addi r4,r4,32 - stvx vr1,r0,r3 - stvx vr0,r3,r9 + stvx v1,r0,r3 + stvx v0,r3,r9 addi r3,r3,32 6: bf cr7*4+1,7f - lvx vr3,r0,r4 - lvx vr2,r4,r9 - lvx vr1,r4,r10 - lvx vr0,r4,r11 + lvx v3,r0,r4 + lvx v2,r4,r9 + lvx v1,r4,r10 + lvx v0,r4,r11 addi r4,r4,64 - stvx vr3,r0,r3 - stvx vr2,r3,r9 - stvx vr1,r3,r10 - stvx vr0,r3,r11 + stvx v3,r0,r3 + stvx v2,r3,r9 + stvx v1,r3,r10 + stvx v0,r3,r11 addi r3,r3,64 7: sub r5,r5,r6 @@ -366,23 +366,23 @@ _GLOBAL(memcpy_power7) */ .align 5 8: - lvx vr7,r0,r4 - lvx vr6,r4,r9 - lvx vr5,r4,r10 - lvx vr4,r4,r11 - lvx vr3,r4,r12 - lvx vr2,r4,r14 - lvx vr1,r4,r15 - lvx vr0,r4,r16 + lvx v7,r0,r4 + lvx v6,r4,r9 + lvx v5,r4,r10 + lvx v4,r4,r11 + lvx v3,r4,r12 + lvx v2,r4,r14 + lvx v1,r4,r15 + lvx v0,r4,r16 addi r4,r4,128 - stvx vr7,r0,r3 - stvx vr6,r3,r9 - stvx vr5,r3,r10 - stvx vr4,r3,r11 - stvx vr3,r3,r12 - stvx vr2,r3,r14 - stvx vr1,r3,r15 - stvx vr0,r3,r16 + stvx v7,r0,r3 + stvx v6,r3,r9 + stvx v5,r3,r10 + stvx v4,r3,r11 + stvx v3,r3,r12 + stvx v2,r3,r14 + stvx v1,r3,r15 + stvx v0,r3,r16 addi r3,r3,128 bdnz 8b @@ -396,29 +396,29 @@ _GLOBAL(memcpy_power7) mtocrf 0x01,r6 bf cr7*4+1,9f - lvx vr3,r0,r4 - lvx vr2,r4,r9 - lvx vr1,r4,r10 - lvx vr0,r4,r11 + lvx v3,r0,r4 + lvx v2,r4,r9 + lvx v1,r4,r10 + lvx v0,r4,r11 addi r4,r4,64 - stvx vr3,r0,r3 - stvx vr2,r3,r9 - stvx vr1,r3,r10 - stvx vr0,r3,r11 + stvx v3,r0,r3 + stvx v2,r3,r9 + stvx v1,r3,r10 + stvx v0,r3,r11 addi r3,r3,64 9: bf cr7*4+2,10f - lvx vr1,r0,r4 - lvx vr0,r4,r9 + lvx v1,r0,r4 + lvx v0,r4,r9 addi r4,r4,32 - stvx vr1,r0,r3 - stvx vr0,r3,r9 + stvx v1,r0,r3 + stvx v0,r3,r9 addi r3,r3,32 10: bf cr7*4+3,11f - lvx vr1,r0,r4 + lvx v1,r0,r4 addi r4,r4,16 - stvx vr1,r0,r3 + stvx v1,r0,r3 addi r3,r3,16 /* Up to 15B to go */ @@ -494,42 +494,42 @@ _GLOBAL(memcpy_power7) li r10,32 li r11,48 - LVS(vr16,0,r4) /* Setup permute control vector */ - lvx vr0,0,r4 + LVS(v16,0,r4) /* Setup permute control vector */ + lvx v0,0,r4 addi r4,r4,16 bf cr7*4+3,5f - lvx vr1,r0,r4 - VPERM(vr8,vr0,vr1,vr16) + lvx v1,r0,r4 + VPERM(v8,v0,v1,v16) addi r4,r4,16 - stvx vr8,r0,r3 + stvx v8,r0,r3 addi r3,r3,16 - vor vr0,vr1,vr1 + vor v0,v1,v1 5: bf cr7*4+2,6f - lvx vr1,r0,r4 - VPERM(vr8,vr0,vr1,vr16) - lvx vr0,r4,r9 - VPERM(vr9,vr1,vr0,vr16) + lvx v1,r0,r4 + VPERM(v8,v0,v1,v16) + lvx v0,r4,r9 + VPERM(v9,v1,v0,v16) addi r4,r4,32 - stvx vr8,r0,r3 - stvx vr9,r3,r9 + stvx v8,r0,r3 + stvx v9,r3,r9 addi r3,r3,32 6: bf cr7*4+1,7f - lvx vr3,r0,r4 - VPERM(vr8,vr0,vr3,vr16) - lvx vr2,r4,r9 - VPERM(vr9,vr3,vr2,vr16) - lvx vr1,r4,r10 - VPERM(vr10,vr2,vr1,vr16) - lvx vr0,r4,r11 - VPERM(vr11,vr1,vr0,vr16) + lvx v3,r0,r4 + VPERM(v8,v0,v3,v16) + lvx v2,r4,r9 + VPERM(v9,v3,v2,v16) + lvx v1,r4,r10 + VPERM(v10,v2,v1,v16) + lvx v0,r4,r11 + VPERM(v11,v1,v0,v16) addi r4,r4,64 - stvx vr8,r0,r3 - stvx vr9,r3,r9 - stvx vr10,r3,r10 - stvx vr11,r3,r11 + stvx v8,r0,r3 + stvx v9,r3,r9 + stvx v10,r3,r10 + stvx v11,r3,r11 addi r3,r3,64 7: sub r5,r5,r6 @@ -552,31 +552,31 @@ _GLOBAL(memcpy_power7) */ .align 5 8: - lvx vr7,r0,r4 - VPERM(vr8,vr0,vr7,vr16) - lvx vr6,r4,r9 - VPERM(vr9,vr7,vr6,vr16) - lvx vr5,r4,r10 - VPERM(vr10,vr6,vr5,vr16) - lvx vr4,r4,r11 - VPERM(vr11,vr5,vr4,vr16) - lvx vr3,r4,r12 - VPERM(vr12,vr4,vr3,vr16) - lvx vr2,r4,r14 - VPERM(vr13,vr3,vr2,vr16) - lvx vr1,r4,r15 - VPERM(vr14,vr2,vr1,vr16) - lvx vr0,r4,r16 - VPERM(vr15,vr1,vr0,vr16) + lvx v7,r0,r4 + VPERM(v8,v0,v7,v16) + lvx v6,r4,r9 + VPERM(v9,v7,v6,v16) + lvx v5,r4,r10 + VPERM(v10,v6,v5,v16) + lvx v4,r4,r11 + VPERM(v11,v5,v4,v16) + lvx v3,r4,r12 + VPERM(v12,v4,v3,v16) + lvx v2,r4,r14 + VPERM(v13,v3,v2,v16) + lvx v1,r4,r15 + VPERM(v14,v2,v1,v16) + lvx v0,r4,r16 + VPERM(v15,v1,v0,v16) addi r4,r4,128 - stvx vr8,r0,r3 - stvx vr9,r3,r9 - stvx vr10,r3,r10 - stvx vr11,r3,r11 - stvx vr12,r3,r12 - stvx vr13,r3,r14 - stvx vr14,r3,r15 - stvx vr15,r3,r16 + stvx v8,r0,r3 + stvx v9,r3,r9 + stvx v10,r3,r10 + stvx v11,r3,r11 + stvx v12,r3,r12 + stvx v13,r3,r14 + stvx v14,r3,r15 + stvx v15,r3,r16 addi r3,r3,128 bdnz 8b @@ -590,36 +590,36 @@ _GLOBAL(memcpy_power7) mtocrf 0x01,r6 bf cr7*4+1,9f - lvx vr3,r0,r4 - VPERM(vr8,vr0,vr3,vr16) - lvx vr2,r4,r9 - VPERM(vr9,vr3,vr2,vr16) - lvx vr1,r4,r10 - VPERM(vr10,vr2,vr1,vr16) - lvx vr0,r4,r11 - VPERM(vr11,vr1,vr0,vr16) + lvx v3,r0,r4 + VPERM(v8,v0,v3,v16) + lvx v2,r4,r9 + VPERM(v9,v3,v2,v16) + lvx v1,r4,r10 + VPERM(v10,v2,v1,v16) + lvx v0,r4,r11 + VPERM(v11,v1,v0,v16) addi r4,r4,64 - stvx vr8,r0,r3 - stvx vr9,r3,r9 - stvx vr10,r3,r10 - stvx vr11,r3,r11 + stvx v8,r0,r3 + stvx v9,r3,r9 + stvx v10,r3,r10 + stvx v11,r3,r11 addi r3,r3,64 9: bf cr7*4+2,10f - lvx vr1,r0,r4 - VPERM(vr8,vr0,vr1,vr16) - lvx vr0,r4,r9 - VPERM(vr9,vr1,vr0,vr16) + lvx v1,r0,r4 + VPERM(v8,v0,v1,v16) + lvx v0,r4,r9 + VPERM(v9,v1,v0,v16) addi r4,r4,32 - stvx vr8,r0,r3 - stvx vr9,r3,r9 + stvx v8,r0,r3 + stvx v9,r3,r9 addi r3,r3,32 10: bf cr7*4+3,11f - lvx vr1,r0,r4 - VPERM(vr8,vr0,vr1,vr16) + lvx v1,r0,r4 + VPERM(v8,v0,v1,v16) addi r4,r4,16 - stvx vr8,r0,r3 + stvx v8,r0,r3 addi r3,r3,16 /* Up to 15B to go */ @@ -653,4 +653,4 @@ _GLOBAL(memcpy_power7) 15: addi r1,r1,STACKFRAMESIZE ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) b exit_vmx_copy /* tail call optimise */ -#endif /* CONFiG_ALTIVEC */ +#endif /* CONFIG_ALTIVEC */ diff --git a/arch/powerpc/lib/ppc_ksyms.c b/arch/powerpc/lib/ppc_ksyms.c new file mode 100644 index 000000000000..c7f8e9586316 --- /dev/null +++ b/arch/powerpc/lib/ppc_ksyms.c @@ -0,0 +1,35 @@ +#include <linux/string.h> +#include <linux/uaccess.h> +#include <linux/bitops.h> +#include <net/checksum.h> + +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(memcmp); +EXPORT_SYMBOL(memchr); + +EXPORT_SYMBOL(strcpy); +EXPORT_SYMBOL(strncpy); +EXPORT_SYMBOL(strcat); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strncmp); + +#ifndef CONFIG_GENERIC_CSUM +EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(csum_partial_copy_generic); +EXPORT_SYMBOL(ip_fast_csum); +EXPORT_SYMBOL(csum_tcpudp_magic); +#endif + +EXPORT_SYMBOL(__copy_tofrom_user); +EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(copy_page); + +#ifdef CONFIG_PPC64 +EXPORT_SYMBOL(__arch_hweight8); +EXPORT_SYMBOL(__arch_hweight16); +EXPORT_SYMBOL(__arch_hweight32); +EXPORT_SYMBOL(__arch_hweight64); +#endif diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c index a1060a868e69..69abf844c2c3 100644 --- a/arch/powerpc/lib/rheap.c +++ b/arch/powerpc/lib/rheap.c @@ -284,7 +284,7 @@ EXPORT_SYMBOL_GPL(rh_create); */ void rh_destroy(rh_info_t * info) { - if ((info->flags & RHIF_STATIC_BLOCK) == 0 && info->block != NULL) + if ((info->flags & RHIF_STATIC_BLOCK) == 0) kfree(info->block); if ((info->flags & RHIF_STATIC_INFO) == 0) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 5c09f365c842..dc885b30f7a6 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -98,13 +98,8 @@ static unsigned long __kprobes dform_ea(unsigned int instr, struct pt_regs *regs ra = (instr >> 16) & 0x1f; ea = (signed short) instr; /* sign-extend */ - if (ra) { + if (ra) ea += regs->gpr[ra]; - if (instr & 0x04000000) { /* update forms */ - if ((instr>>26) != 47) /* stmw is not an update form */ - regs->gpr[ra] = ea; - } - } return truncate_if_32bit(regs->msr, ea); } @@ -120,11 +115,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg ra = (instr >> 16) & 0x1f; ea = (signed short) (instr & ~3); /* sign-extend */ - if (ra) { + if (ra) ea += regs->gpr[ra]; - if ((instr & 3) == 1) /* update forms */ - regs->gpr[ra] = ea; - } return truncate_if_32bit(regs->msr, ea); } @@ -133,8 +125,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg /* * Calculate effective address for an X-form instruction */ -static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs, - int do_update) +static unsigned long __kprobes xform_ea(unsigned int instr, + struct pt_regs *regs) { int ra, rb; unsigned long ea; @@ -142,11 +134,8 @@ static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs ra = (instr >> 16) & 0x1f; rb = (instr >> 11) & 0x1f; ea = regs->gpr[rb]; - if (ra) { + if (ra) ea += regs->gpr[ra]; - if (do_update) /* update forms */ - regs->gpr[ra] = ea; - } return truncate_if_32bit(regs->msr, ea); } @@ -611,6 +600,23 @@ static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1, regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift); } +static int __kprobes trap_compare(long v1, long v2) +{ + int ret = 0; + + if (v1 < v2) + ret |= 0x10; + else if (v1 > v2) + ret |= 0x08; + else + ret |= 0x04; + if ((unsigned long)v1 < (unsigned long)v2) + ret |= 0x02; + else if ((unsigned long)v1 > (unsigned long)v2) + ret |= 0x01; + return ret; +} + /* * Elements of 32-bit rotate and mask instructions. */ @@ -627,26 +633,27 @@ static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1, #define ROTATE(x, n) ((n) ? (((x) << (n)) | ((x) >> (8 * sizeof(long) - (n)))) : (x)) /* - * Emulate instructions that cause a transfer of control, - * loads and stores, and a few other instructions. - * Returns 1 if the step was emulated, 0 if not, - * or -1 if the instruction is one that should not be stepped, - * such as an rfid, or a mtmsrd that would clear MSR_RI. + * Decode an instruction, and execute it if that can be done just by + * modifying *regs (i.e. integer arithmetic and logical instructions, + * branches, and barrier instructions). + * Returns 1 if the instruction has been executed, or 0 if not. + * Sets *op to indicate what the instruction does. */ -int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) +int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs, + unsigned int instr) { unsigned int opcode, ra, rb, rd, spr, u; unsigned long int imm; unsigned long int val, val2; - unsigned long int ea; - unsigned int cr, mb, me, sh; - int err; - unsigned long old_ra, val3; + unsigned int mb, me, sh; long ival; + op->type = COMPUTE; + opcode = instr >> 26; switch (opcode) { case 16: /* bc */ + op->type = BRANCH; imm = (signed short)(instr & 0xfffc); if ((instr & 2) == 0) imm += regs->nip; @@ -659,26 +666,14 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 1; #ifdef CONFIG_PPC64 case 17: /* sc */ - /* - * N.B. this uses knowledge about how the syscall - * entry code works. If that is changed, this will - * need to be changed also. - */ - if (regs->gpr[0] == 0x1ebe && - cpu_has_feature(CPU_FTR_REAL_LE)) { - regs->msr ^= MSR_LE; - goto instr_done; - } - regs->gpr[9] = regs->gpr[13]; - regs->gpr[10] = MSR_KERNEL; - regs->gpr[11] = regs->nip + 4; - regs->gpr[12] = regs->msr & MSR_MASK; - regs->gpr[13] = (unsigned long) get_paca(); - regs->nip = (unsigned long) &system_call_common; - regs->msr = MSR_KERNEL; - return 1; + if ((instr & 0xfe2) == 2) + op->type = SYSCALL; + else + op->type = UNKNOWN; + return 0; #endif case 18: /* b */ + op->type = BRANCH; imm = instr & 0x03fffffc; if (imm & 0x02000000) imm -= 0x04000000; @@ -691,8 +686,16 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 1; case 19: switch ((instr >> 1) & 0x3ff) { + case 0: /* mcrf */ + rd = (instr >> 21) & 0x1c; + ra = (instr >> 16) & 0x1c; + val = (regs->ccr >> ra) & 0xf; + regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd); + goto instr_done; + case 16: /* bclr */ case 528: /* bcctr */ + op->type = BRANCH; imm = (instr & 0x400)? regs->ctr: regs->link; regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); imm = truncate_if_32bit(regs->msr, imm); @@ -703,9 +706,13 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 1; case 18: /* rfid, scary */ - return -1; + if (regs->msr & MSR_PR) + goto priv; + op->type = RFI; + return 0; case 150: /* isync */ + op->type = BARRIER; isync(); goto instr_done; @@ -731,6 +738,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 31: switch ((instr >> 1) & 0x3ff) { case 598: /* sync */ + op->type = BARRIER; #ifdef __powerpc64__ switch ((instr >> 21) & 3) { case 1: /* lwsync */ @@ -745,6 +753,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case 854: /* eieio */ + op->type = BARRIER; eieio(); goto instr_done; } @@ -760,6 +769,17 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) rb = (instr >> 11) & 0x1f; switch (opcode) { +#ifdef __powerpc64__ + case 2: /* tdi */ + if (rd & trap_compare(regs->gpr[ra], (short) instr)) + goto trap; + goto instr_done; +#endif + case 3: /* twi */ + if (rd & trap_compare((int)regs->gpr[ra], (short) instr)) + goto trap; + goto instr_done; + case 7: /* mulli */ regs->gpr[rd] = regs->gpr[ra] * (short) instr; goto instr_done; @@ -908,35 +928,44 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 31: switch ((instr >> 1) & 0x3ff) { + case 4: /* tw */ + if (rd == 0x1f || + (rd & trap_compare((int)regs->gpr[ra], + (int)regs->gpr[rb]))) + goto trap; + goto instr_done; +#ifdef __powerpc64__ + case 68: /* td */ + if (rd & trap_compare(regs->gpr[ra], regs->gpr[rb])) + goto trap; + goto instr_done; +#endif case 83: /* mfmsr */ if (regs->msr & MSR_PR) - break; - regs->gpr[rd] = regs->msr & MSR_MASK; - goto instr_done; + goto priv; + op->type = MFMSR; + op->reg = rd; + return 0; case 146: /* mtmsr */ if (regs->msr & MSR_PR) - break; - imm = regs->gpr[rd]; - if ((imm & MSR_RI) == 0) - /* can't step mtmsr that would clear MSR_RI */ - return -1; - regs->msr = imm; - goto instr_done; + goto priv; + op->type = MTMSR; + op->reg = rd; + op->val = 0xffffffff & ~(MSR_ME | MSR_LE); + return 0; #ifdef CONFIG_PPC64 case 178: /* mtmsrd */ - /* only MSR_EE and MSR_RI get changed if bit 15 set */ - /* mtmsrd doesn't change MSR_HV and MSR_ME */ if (regs->msr & MSR_PR) - break; - imm = (instr & 0x10000)? 0x8002: 0xefffffffffffefffUL; - imm = (regs->msr & MSR_MASK & ~imm) - | (regs->gpr[rd] & imm); - if ((imm & MSR_RI) == 0) - /* can't step mtmsrd that would clear MSR_RI */ - return -1; - regs->msr = imm; - goto instr_done; + goto priv; + op->type = MTMSR; + op->reg = rd; + /* only MSR_EE and MSR_RI get changed if bit 15 set */ + /* mtmsrd doesn't change MSR_HV, MSR_ME or MSR_LE */ + imm = (instr & 0x10000)? 0x8002: 0xefffffffffffeffeUL; + op->val = imm; + return 0; #endif + case 19: /* mfcr */ regs->gpr[rd] = regs->ccr; regs->gpr[rd] &= 0xffffffffUL; @@ -954,33 +983,43 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case 339: /* mfspr */ - spr = (instr >> 11) & 0x3ff; + spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); switch (spr) { - case 0x20: /* mfxer */ + case SPRN_XER: /* mfxer */ regs->gpr[rd] = regs->xer; regs->gpr[rd] &= 0xffffffffUL; goto instr_done; - case 0x100: /* mflr */ + case SPRN_LR: /* mflr */ regs->gpr[rd] = regs->link; goto instr_done; - case 0x120: /* mfctr */ + case SPRN_CTR: /* mfctr */ regs->gpr[rd] = regs->ctr; goto instr_done; + default: + op->type = MFSPR; + op->reg = rd; + op->spr = spr; + return 0; } break; case 467: /* mtspr */ - spr = (instr >> 11) & 0x3ff; + spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0); switch (spr) { - case 0x20: /* mtxer */ + case SPRN_XER: /* mtxer */ regs->xer = (regs->gpr[rd] & 0xffffffffUL); goto instr_done; - case 0x100: /* mtlr */ + case SPRN_LR: /* mtlr */ regs->link = regs->gpr[rd]; goto instr_done; - case 0x120: /* mtctr */ + case SPRN_CTR: /* mtctr */ regs->ctr = regs->gpr[rd]; goto instr_done; + default: + op->type = MTSPR; + op->val = regs->gpr[rd]; + op->spr = spr; + return 0; } break; @@ -1257,294 +1296,242 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) * Cache instructions */ case 54: /* dcbst */ - ea = xform_ea(instr, regs, 0); - if (!address_ok(regs, ea, 8)) - return 0; - err = 0; - __cacheop_user_asmx(ea, err, "dcbst"); - if (err) - return 0; - goto instr_done; + op->type = MKOP(CACHEOP, DCBST, 0); + op->ea = xform_ea(instr, regs); + return 0; case 86: /* dcbf */ - ea = xform_ea(instr, regs, 0); - if (!address_ok(regs, ea, 8)) - return 0; - err = 0; - __cacheop_user_asmx(ea, err, "dcbf"); - if (err) - return 0; - goto instr_done; + op->type = MKOP(CACHEOP, DCBF, 0); + op->ea = xform_ea(instr, regs); + return 0; case 246: /* dcbtst */ - if (rd == 0) { - ea = xform_ea(instr, regs, 0); - prefetchw((void *) ea); - } - goto instr_done; + op->type = MKOP(CACHEOP, DCBTST, 0); + op->ea = xform_ea(instr, regs); + op->reg = rd; + return 0; case 278: /* dcbt */ - if (rd == 0) { - ea = xform_ea(instr, regs, 0); - prefetch((void *) ea); - } - goto instr_done; + op->type = MKOP(CACHEOP, DCBTST, 0); + op->ea = xform_ea(instr, regs); + op->reg = rd; + return 0; + case 982: /* icbi */ + op->type = MKOP(CACHEOP, ICBI, 0); + op->ea = xform_ea(instr, regs); + return 0; } break; } /* - * Following cases are for loads and stores, so bail out - * if we're in little-endian mode. + * Loads and stores. */ - if (regs->msr & MSR_LE) - return 0; - - /* - * Save register RA in case it's an update form load or store - * and the access faults. - */ - old_ra = regs->gpr[ra]; + op->type = UNKNOWN; + op->update_reg = ra; + op->reg = rd; + op->val = regs->gpr[rd]; + u = (instr >> 20) & UPDATE; switch (opcode) { case 31: - u = instr & 0x40; + u = instr & UPDATE; + op->ea = xform_ea(instr, regs); switch ((instr >> 1) & 0x3ff) { case 20: /* lwarx */ - ea = xform_ea(instr, regs, 0); - if (ea & 3) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 4)) - goto ldst_done; - err = 0; - __get_user_asmx(val, ea, err, "lwarx"); - if (!err) - regs->gpr[rd] = val; - goto ldst_done; + op->type = MKOP(LARX, 0, 4); + break; case 150: /* stwcx. */ - ea = xform_ea(instr, regs, 0); - if (ea & 3) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 4)) - goto ldst_done; - err = 0; - __put_user_asmx(regs->gpr[rd], ea, err, "stwcx.", cr); - if (!err) - regs->ccr = (regs->ccr & 0x0fffffff) | - (cr & 0xe0000000) | - ((regs->xer >> 3) & 0x10000000); - goto ldst_done; + op->type = MKOP(STCX, 0, 4); + break; #ifdef __powerpc64__ case 84: /* ldarx */ - ea = xform_ea(instr, regs, 0); - if (ea & 7) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 8)) - goto ldst_done; - err = 0; - __get_user_asmx(val, ea, err, "ldarx"); - if (!err) - regs->gpr[rd] = val; - goto ldst_done; + op->type = MKOP(LARX, 0, 8); + break; case 214: /* stdcx. */ - ea = xform_ea(instr, regs, 0); - if (ea & 7) - break; /* can't handle misaligned */ - err = -EFAULT; - if (!address_ok(regs, ea, 8)) - goto ldst_done; - err = 0; - __put_user_asmx(regs->gpr[rd], ea, err, "stdcx.", cr); - if (!err) - regs->ccr = (regs->ccr & 0x0fffffff) | - (cr & 0xe0000000) | - ((regs->xer >> 3) & 0x10000000); - goto ldst_done; + op->type = MKOP(STCX, 0, 8); + break; case 21: /* ldx */ case 53: /* ldux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 8, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 8); + break; #endif case 23: /* lwzx */ case 55: /* lwzux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 4, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 4); + break; case 87: /* lbzx */ case 119: /* lbzux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 1, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 1); + break; #ifdef CONFIG_ALTIVEC case 103: /* lvx */ case 359: /* lvxl */ if (!(regs->msr & MSR_VEC)) - break; - ea = xform_ea(instr, regs, 0); - err = do_vec_load(rd, do_lvx, ea, regs); - goto ldst_done; + goto vecunavail; + op->type = MKOP(LOAD_VMX, 0, 16); + break; case 231: /* stvx */ case 487: /* stvxl */ if (!(regs->msr & MSR_VEC)) - break; - ea = xform_ea(instr, regs, 0); - err = do_vec_store(rd, do_stvx, ea, regs); - goto ldst_done; + goto vecunavail; + op->type = MKOP(STORE_VMX, 0, 16); + break; #endif /* CONFIG_ALTIVEC */ #ifdef __powerpc64__ case 149: /* stdx */ case 181: /* stdux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 8); + break; #endif case 151: /* stwx */ case 183: /* stwux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 4, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 4); + break; case 215: /* stbx */ case 247: /* stbux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 1, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 1); + break; case 279: /* lhzx */ case 311: /* lhzux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 2, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 2); + break; #ifdef __powerpc64__ case 341: /* lwax */ case 373: /* lwaux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 4, regs); - if (!err) - regs->gpr[rd] = (signed int) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT | u, 4); + break; #endif case 343: /* lhax */ case 375: /* lhaux */ - err = read_mem(®s->gpr[rd], xform_ea(instr, regs, u), - 2, regs); - if (!err) - regs->gpr[rd] = (signed short) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT | u, 2); + break; case 407: /* sthx */ case 439: /* sthux */ - val = regs->gpr[rd]; - err = write_mem(val, xform_ea(instr, regs, u), 2, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 2); + break; #ifdef __powerpc64__ case 532: /* ldbrx */ - err = read_mem(&val, xform_ea(instr, regs, 0), 8, regs); - if (!err) - regs->gpr[rd] = byterev_8(val); - goto ldst_done; + op->type = MKOP(LOAD, BYTEREV, 8); + break; #endif + case 533: /* lswx */ + op->type = MKOP(LOAD_MULTI, 0, regs->xer & 0x7f); + break; case 534: /* lwbrx */ - err = read_mem(&val, xform_ea(instr, regs, 0), 4, regs); - if (!err) - regs->gpr[rd] = byterev_4(val); - goto ldst_done; + op->type = MKOP(LOAD, BYTEREV, 4); + break; + + case 597: /* lswi */ + if (rb == 0) + rb = 32; /* # bytes to load */ + op->type = MKOP(LOAD_MULTI, 0, rb); + op->ea = 0; + if (ra) + op->ea = truncate_if_32bit(regs->msr, + regs->gpr[ra]); + break; #ifdef CONFIG_PPC_FPU case 535: /* lfsx */ case 567: /* lfsux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_load(rd, do_lfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 4); + break; case 599: /* lfdx */ case 631: /* lfdux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_load(rd, do_lfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 8); + break; case 663: /* stfsx */ case 695: /* stfsux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_store(rd, do_stfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 4); + break; case 727: /* stfdx */ case 759: /* stfdux */ if (!(regs->msr & MSR_FP)) - break; - ea = xform_ea(instr, regs, u); - err = do_fp_store(rd, do_stfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 8); + break; #endif #ifdef __powerpc64__ case 660: /* stdbrx */ - val = byterev_8(regs->gpr[rd]); - err = write_mem(val, xform_ea(instr, regs, 0), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, BYTEREV, 8); + op->val = byterev_8(regs->gpr[rd]); + break; #endif + case 661: /* stswx */ + op->type = MKOP(STORE_MULTI, 0, regs->xer & 0x7f); + break; + case 662: /* stwbrx */ - val = byterev_4(regs->gpr[rd]); - err = write_mem(val, xform_ea(instr, regs, 0), 4, regs); - goto ldst_done; + op->type = MKOP(STORE, BYTEREV, 4); + op->val = byterev_4(regs->gpr[rd]); + break; + + case 725: + if (rb == 0) + rb = 32; /* # bytes to store */ + op->type = MKOP(STORE_MULTI, 0, rb); + op->ea = 0; + if (ra) + op->ea = truncate_if_32bit(regs->msr, + regs->gpr[ra]); + break; case 790: /* lhbrx */ - err = read_mem(&val, xform_ea(instr, regs, 0), 2, regs); - if (!err) - regs->gpr[rd] = byterev_2(val); - goto ldst_done; + op->type = MKOP(LOAD, BYTEREV, 2); + break; case 918: /* sthbrx */ - val = byterev_2(regs->gpr[rd]); - err = write_mem(val, xform_ea(instr, regs, 0), 2, regs); - goto ldst_done; + op->type = MKOP(STORE, BYTEREV, 2); + op->val = byterev_2(regs->gpr[rd]); + break; #ifdef CONFIG_VSX case 844: /* lxvd2x */ case 876: /* lxvd2ux */ if (!(regs->msr & MSR_VSX)) - break; - rd |= (instr & 1) << 5; - ea = xform_ea(instr, regs, u); - err = do_vsx_load(rd, do_lxvd2x, ea, regs); - goto ldst_done; + goto vsxunavail; + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(LOAD_VSX, u, 16); + break; case 972: /* stxvd2x */ case 1004: /* stxvd2ux */ if (!(regs->msr & MSR_VSX)) - break; - rd |= (instr & 1) << 5; - ea = xform_ea(instr, regs, u); - err = do_vsx_store(rd, do_stxvd2x, ea, regs); - goto ldst_done; + goto vsxunavail; + op->reg = rd | ((instr & 1) << 5); + op->type = MKOP(STORE_VSX, u, 16); + break; #endif /* CONFIG_VSX */ } @@ -1552,178 +1539,123 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) case 32: /* lwz */ case 33: /* lwzu */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 4, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 4); + op->ea = dform_ea(instr, regs); + break; case 34: /* lbz */ case 35: /* lbzu */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 1, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 1); + op->ea = dform_ea(instr, regs); + break; case 36: /* stw */ - val = regs->gpr[rd]; - err = write_mem(val, dform_ea(instr, regs), 4, regs); - goto ldst_done; - case 37: /* stwu */ - val = regs->gpr[rd]; - val3 = dform_ea(instr, regs); - /* - * For PPC32 we always use stwu to change stack point with r1. So - * this emulated store may corrupt the exception frame, now we - * have to provide the exception frame trampoline, which is pushed - * below the kprobed function stack. So we only update gpr[1] but - * don't emulate the real store operation. We will do real store - * operation safely in exception return code by checking this flag. - */ - if ((ra == 1) && !(regs->msr & MSR_PR) \ - && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) { -#ifdef CONFIG_PPC32 - /* - * Check if we will touch kernel sack overflow - */ - if (val3 - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) { - printk(KERN_CRIT "Can't kprobe this since Kernel stack overflow.\n"); - err = -EINVAL; - break; - } -#endif /* CONFIG_PPC32 */ - /* - * Check if we already set since that means we'll - * lose the previous value. - */ - WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE)); - set_thread_flag(TIF_EMULATE_STACK_STORE); - err = 0; - } else - err = write_mem(val, val3, 4, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 4); + op->ea = dform_ea(instr, regs); + break; case 38: /* stb */ case 39: /* stbu */ - val = regs->gpr[rd]; - err = write_mem(val, dform_ea(instr, regs), 1, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 1); + op->ea = dform_ea(instr, regs); + break; case 40: /* lhz */ case 41: /* lhzu */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 2, regs); - goto ldst_done; + op->type = MKOP(LOAD, u, 2); + op->ea = dform_ea(instr, regs); + break; case 42: /* lha */ case 43: /* lhau */ - err = read_mem(®s->gpr[rd], dform_ea(instr, regs), 2, regs); - if (!err) - regs->gpr[rd] = (signed short) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT | u, 2); + op->ea = dform_ea(instr, regs); + break; case 44: /* sth */ case 45: /* sthu */ - val = regs->gpr[rd]; - err = write_mem(val, dform_ea(instr, regs), 2, regs); - goto ldst_done; + op->type = MKOP(STORE, u, 2); + op->ea = dform_ea(instr, regs); + break; case 46: /* lmw */ - ra = (instr >> 16) & 0x1f; if (ra >= rd) break; /* invalid form, ra in range to load */ - ea = dform_ea(instr, regs); - do { - err = read_mem(®s->gpr[rd], ea, 4, regs); - if (err) - return 0; - ea += 4; - } while (++rd < 32); - goto instr_done; + op->type = MKOP(LOAD_MULTI, 0, 4 * (32 - rd)); + op->ea = dform_ea(instr, regs); + break; case 47: /* stmw */ - ea = dform_ea(instr, regs); - do { - err = write_mem(regs->gpr[rd], ea, 4, regs); - if (err) - return 0; - ea += 4; - } while (++rd < 32); - goto instr_done; + op->type = MKOP(STORE_MULTI, 0, 4 * (32 - rd)); + op->ea = dform_ea(instr, regs); + break; #ifdef CONFIG_PPC_FPU case 48: /* lfs */ case 49: /* lfsu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_load(rd, do_lfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 4); + op->ea = dform_ea(instr, regs); + break; case 50: /* lfd */ case 51: /* lfdu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_load(rd, do_lfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(LOAD_FP, u, 8); + op->ea = dform_ea(instr, regs); + break; case 52: /* stfs */ case 53: /* stfsu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_store(rd, do_stfs, ea, 4, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 4); + op->ea = dform_ea(instr, regs); + break; case 54: /* stfd */ case 55: /* stfdu */ if (!(regs->msr & MSR_FP)) - break; - ea = dform_ea(instr, regs); - err = do_fp_store(rd, do_stfd, ea, 8, regs); - goto ldst_done; + goto fpunavail; + op->type = MKOP(STORE_FP, u, 8); + op->ea = dform_ea(instr, regs); + break; #endif #ifdef __powerpc64__ case 58: /* ld[u], lwa */ + op->ea = dsform_ea(instr, regs); switch (instr & 3) { case 0: /* ld */ - err = read_mem(®s->gpr[rd], dsform_ea(instr, regs), - 8, regs); - goto ldst_done; + op->type = MKOP(LOAD, 0, 8); + break; case 1: /* ldu */ - err = read_mem(®s->gpr[rd], dsform_ea(instr, regs), - 8, regs); - goto ldst_done; + op->type = MKOP(LOAD, UPDATE, 8); + break; case 2: /* lwa */ - err = read_mem(®s->gpr[rd], dsform_ea(instr, regs), - 4, regs); - if (!err) - regs->gpr[rd] = (signed int) regs->gpr[rd]; - goto ldst_done; + op->type = MKOP(LOAD, SIGNEXT, 4); + break; } break; case 62: /* std[u] */ - val = regs->gpr[rd]; + op->ea = dsform_ea(instr, regs); switch (instr & 3) { case 0: /* std */ - err = write_mem(val, dsform_ea(instr, regs), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, 0, 8); + break; case 1: /* stdu */ - err = write_mem(val, dsform_ea(instr, regs), 8, regs); - goto ldst_done; + op->type = MKOP(STORE, UPDATE, 8); + break; } break; #endif /* __powerpc64__ */ } - err = -EINVAL; - - ldst_done: - if (err) { - regs->gpr[ra] = old_ra; - return 0; /* invoke DSI if -EFAULT? */ - } - instr_done: - regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); - return 1; + return 0; logical_done: if (instr & 1) @@ -1733,5 +1665,351 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) arith_done: if (instr & 1) set_cr0(regs, rd); - goto instr_done; + + instr_done: + regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); + return 1; + + priv: + op->type = INTERRUPT | 0x700; + op->val = SRR1_PROGPRIV; + return 0; + + trap: + op->type = INTERRUPT | 0x700; + op->val = SRR1_PROGTRAP; + return 0; + +#ifdef CONFIG_PPC_FPU + fpunavail: + op->type = INTERRUPT | 0x800; + return 0; +#endif + +#ifdef CONFIG_ALTIVEC + vecunavail: + op->type = INTERRUPT | 0xf20; + return 0; +#endif + +#ifdef CONFIG_VSX + vsxunavail: + op->type = INTERRUPT | 0xf40; + return 0; +#endif +} +EXPORT_SYMBOL_GPL(analyse_instr); + +/* + * For PPC32 we always use stwu with r1 to change the stack pointer. + * So this emulated store may corrupt the exception frame, now we + * have to provide the exception frame trampoline, which is pushed + * below the kprobed function stack. So we only update gpr[1] but + * don't emulate the real store operation. We will do real store + * operation safely in exception return code by checking this flag. + */ +static __kprobes int handle_stack_update(unsigned long ea, struct pt_regs *regs) +{ +#ifdef CONFIG_PPC32 + /* + * Check if we will touch kernel stack overflow + */ + if (ea - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) { + printk(KERN_CRIT "Can't kprobe this since kernel stack would overflow.\n"); + return -EINVAL; + } +#endif /* CONFIG_PPC32 */ + /* + * Check if we already set since that means we'll + * lose the previous value. + */ + WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE)); + set_thread_flag(TIF_EMULATE_STACK_STORE); + return 0; +} + +static __kprobes void do_signext(unsigned long *valp, int size) +{ + switch (size) { + case 2: + *valp = (signed short) *valp; + break; + case 4: + *valp = (signed int) *valp; + break; + } +} + +static __kprobes void do_byterev(unsigned long *valp, int size) +{ + switch (size) { + case 2: + *valp = byterev_2(*valp); + break; + case 4: + *valp = byterev_4(*valp); + break; +#ifdef __powerpc64__ + case 8: + *valp = byterev_8(*valp); + break; +#endif + } +} + +/* + * Emulate instructions that cause a transfer of control, + * loads and stores, and a few other instructions. + * Returns 1 if the step was emulated, 0 if not, + * or -1 if the instruction is one that should not be stepped, + * such as an rfid, or a mtmsrd that would clear MSR_RI. + */ +int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) +{ + struct instruction_op op; + int r, err, size; + unsigned long val; + unsigned int cr; + int i, rd, nb; + + r = analyse_instr(&op, regs, instr); + if (r != 0) + return r; + + err = 0; + size = GETSIZE(op.type); + switch (op.type & INSTR_TYPE_MASK) { + case CACHEOP: + if (!address_ok(regs, op.ea, 8)) + return 0; + switch (op.type & CACHEOP_MASK) { + case DCBST: + __cacheop_user_asmx(op.ea, err, "dcbst"); + break; + case DCBF: + __cacheop_user_asmx(op.ea, err, "dcbf"); + break; + case DCBTST: + if (op.reg == 0) + prefetchw((void *) op.ea); + break; + case DCBT: + if (op.reg == 0) + prefetch((void *) op.ea); + break; + case ICBI: + __cacheop_user_asmx(op.ea, err, "icbi"); + break; + } + if (err) + return 0; + goto instr_done; + + case LARX: + if (regs->msr & MSR_LE) + return 0; + if (op.ea & (size - 1)) + break; /* can't handle misaligned */ + err = -EFAULT; + if (!address_ok(regs, op.ea, size)) + goto ldst_done; + err = 0; + switch (size) { + case 4: + __get_user_asmx(val, op.ea, err, "lwarx"); + break; + case 8: + __get_user_asmx(val, op.ea, err, "ldarx"); + break; + default: + return 0; + } + if (!err) + regs->gpr[op.reg] = val; + goto ldst_done; + + case STCX: + if (regs->msr & MSR_LE) + return 0; + if (op.ea & (size - 1)) + break; /* can't handle misaligned */ + err = -EFAULT; + if (!address_ok(regs, op.ea, size)) + goto ldst_done; + err = 0; + switch (size) { + case 4: + __put_user_asmx(op.val, op.ea, err, "stwcx.", cr); + break; + case 8: + __put_user_asmx(op.val, op.ea, err, "stdcx.", cr); + break; + default: + return 0; + } + if (!err) + regs->ccr = (regs->ccr & 0x0fffffff) | + (cr & 0xe0000000) | + ((regs->xer >> 3) & 0x10000000); + goto ldst_done; + + case LOAD: + if (regs->msr & MSR_LE) + return 0; + err = read_mem(®s->gpr[op.reg], op.ea, size, regs); + if (!err) { + if (op.type & SIGNEXT) + do_signext(®s->gpr[op.reg], size); + if (op.type & BYTEREV) + do_byterev(®s->gpr[op.reg], size); + } + goto ldst_done; + +#ifdef CONFIG_PPC_FPU + case LOAD_FP: + if (regs->msr & MSR_LE) + return 0; + if (size == 4) + err = do_fp_load(op.reg, do_lfs, op.ea, size, regs); + else + err = do_fp_load(op.reg, do_lfd, op.ea, size, regs); + goto ldst_done; +#endif +#ifdef CONFIG_ALTIVEC + case LOAD_VMX: + if (regs->msr & MSR_LE) + return 0; + err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs); + goto ldst_done; +#endif +#ifdef CONFIG_VSX + case LOAD_VSX: + if (regs->msr & MSR_LE) + return 0; + err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs); + goto ldst_done; +#endif + case LOAD_MULTI: + if (regs->msr & MSR_LE) + return 0; + rd = op.reg; + for (i = 0; i < size; i += 4) { + nb = size - i; + if (nb > 4) + nb = 4; + err = read_mem(®s->gpr[rd], op.ea, nb, regs); + if (err) + return 0; + if (nb < 4) /* left-justify last bytes */ + regs->gpr[rd] <<= 32 - 8 * nb; + op.ea += 4; + ++rd; + } + goto instr_done; + + case STORE: + if (regs->msr & MSR_LE) + return 0; + if ((op.type & UPDATE) && size == sizeof(long) && + op.reg == 1 && op.update_reg == 1 && + !(regs->msr & MSR_PR) && + op.ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) { + err = handle_stack_update(op.ea, regs); + goto ldst_done; + } + err = write_mem(op.val, op.ea, size, regs); + goto ldst_done; + +#ifdef CONFIG_PPC_FPU + case STORE_FP: + if (regs->msr & MSR_LE) + return 0; + if (size == 4) + err = do_fp_store(op.reg, do_stfs, op.ea, size, regs); + else + err = do_fp_store(op.reg, do_stfd, op.ea, size, regs); + goto ldst_done; +#endif +#ifdef CONFIG_ALTIVEC + case STORE_VMX: + if (regs->msr & MSR_LE) + return 0; + err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs); + goto ldst_done; +#endif +#ifdef CONFIG_VSX + case STORE_VSX: + if (regs->msr & MSR_LE) + return 0; + err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs); + goto ldst_done; +#endif + case STORE_MULTI: + if (regs->msr & MSR_LE) + return 0; + rd = op.reg; + for (i = 0; i < size; i += 4) { + val = regs->gpr[rd]; + nb = size - i; + if (nb > 4) + nb = 4; + else + val >>= 32 - 8 * nb; + err = write_mem(val, op.ea, nb, regs); + if (err) + return 0; + op.ea += 4; + ++rd; + } + goto instr_done; + + case MFMSR: + regs->gpr[op.reg] = regs->msr & MSR_MASK; + goto instr_done; + + case MTMSR: + val = regs->gpr[op.reg]; + if ((val & MSR_RI) == 0) + /* can't step mtmsr[d] that would clear MSR_RI */ + return -1; + /* here op.val is the mask of bits to change */ + regs->msr = (regs->msr & ~op.val) | (val & op.val); + goto instr_done; + +#ifdef CONFIG_PPC64 + case SYSCALL: /* sc */ + /* + * N.B. this uses knowledge about how the syscall + * entry code works. If that is changed, this will + * need to be changed also. + */ + if (regs->gpr[0] == 0x1ebe && + cpu_has_feature(CPU_FTR_REAL_LE)) { + regs->msr ^= MSR_LE; + goto instr_done; + } + regs->gpr[9] = regs->gpr[13]; + regs->gpr[10] = MSR_KERNEL; + regs->gpr[11] = regs->nip + 4; + regs->gpr[12] = regs->msr & MSR_MASK; + regs->gpr[13] = (unsigned long) get_paca(); + regs->nip = (unsigned long) &system_call_common; + regs->msr = MSR_KERNEL; + return 1; + + case RFI: + return -1; +#endif + } + return 0; + + ldst_done: + if (err) + return 0; + if (op.type & UPDATE) + regs->gpr[op.update_reg] = op.ea; + + instr_done: + regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); + return 1; } diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S index 1b5a0a09d609..c80fb49ce607 100644 --- a/arch/powerpc/lib/string.S +++ b/arch/powerpc/lib/string.S @@ -93,6 +93,7 @@ _GLOBAL(strlen) subf r3,r3,r4 blr +#ifdef CONFIG_PPC32 _GLOBAL(memcmp) PPC_LCMPI 0,r5,0 beq- 2f @@ -106,6 +107,7 @@ _GLOBAL(memcmp) blr 2: li r3,0 blr +#endif _GLOBAL(memchr) PPC_LCMPI 0,r5,0 |