summaryrefslogtreecommitdiff
path: root/arch/powerpc/lib/copypage_64.S
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2010-02-10 21:07:54 +0300
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2010-02-17 06:03:16 +0300
commit63e6c5b8102af7df7a5e1cebbd865d711645886a (patch)
tree8dd79a95cf5265b6e1d88bed1493c0b952d23992 /arch/powerpc/lib/copypage_64.S
parent5a0e9b5718d921f5d8e17176d6b483f6b8f1844a (diff)
downloadlinux-63e6c5b8102af7df7a5e1cebbd865d711645886a.tar.xz
powerpc: Pair loads and stores in copy_4k_page
A number of our chips like loads and stores to be paired. A small kernel module testcase shows the improvement of pairing loads and stores in copy_4k_page: POWER6: +9% POWER7: +1.5% #include <linux/module.h> #include <linux/mm.h> #define ITERATIONS 10000000 static int __init copypage_init(void) { struct timespec before, after; unsigned long i; struct page *destpage, *srcpage; char *dest, *src; destpage = alloc_page(GFP_KERNEL); srcpage = alloc_page(GFP_KERNEL); dest = page_address(destpage); src = page_address(srcpage); getnstimeofday(&before); for (i = 0; i < ITERATIONS; i++) copy_4K_page(dest, src); getnstimeofday(&after); free_page((unsigned long)dest); free_page((unsigned long)src); printk(KERN_DEBUG "copy_4K_page loop took %lu ns\n", (after.tv_sec - before.tv_sec) * NSEC_PER_SEC + (after.tv_nsec - before.tv_nsec)); return 0; } static void __exit copypage_exit(void) { } module_init(copypage_init) module_exit(copypage_exit) MODULE_LICENSE("GPL"); MODULE_AUTHOR("Anton Blanchard"); Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/lib/copypage_64.S')
-rw-r--r--arch/powerpc/lib/copypage_64.S28
1 files changed, 14 insertions, 14 deletions
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index e68beac0a171..4d4eeb900486 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -43,62 +43,62 @@ END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
ld r7,16(r4)
ldu r8,24(r4)
1: std r5,8(r3)
- ld r9,8(r4)
std r6,16(r3)
+ ld r9,8(r4)
ld r10,16(r4)
std r7,24(r3)
- ld r11,24(r4)
std r8,32(r3)
+ ld r11,24(r4)
ld r12,32(r4)
std r9,40(r3)
- ld r5,40(r4)
std r10,48(r3)
+ ld r5,40(r4)
ld r6,48(r4)
std r11,56(r3)
- ld r7,56(r4)
std r12,64(r3)
+ ld r7,56(r4)
ld r8,64(r4)
std r5,72(r3)
- ld r9,72(r4)
std r6,80(r3)
+ ld r9,72(r4)
ld r10,80(r4)
std r7,88(r3)
- ld r11,88(r4)
std r8,96(r3)
+ ld r11,88(r4)
ld r12,96(r4)
std r9,104(r3)
- ld r5,104(r4)
std r10,112(r3)
+ ld r5,104(r4)
ld r6,112(r4)
std r11,120(r3)
- ld r7,120(r4)
stdu r12,128(r3)
+ ld r7,120(r4)
ldu r8,128(r4)
bdnz 1b
std r5,8(r3)
- ld r9,8(r4)
std r6,16(r3)
+ ld r9,8(r4)
ld r10,16(r4)
std r7,24(r3)
- ld r11,24(r4)
std r8,32(r3)
+ ld r11,24(r4)
ld r12,32(r4)
std r9,40(r3)
- ld r5,40(r4)
std r10,48(r3)
+ ld r5,40(r4)
ld r6,48(r4)
std r11,56(r3)
- ld r7,56(r4)
std r12,64(r3)
+ ld r7,56(r4)
ld r8,64(r4)
std r5,72(r3)
- ld r9,72(r4)
std r6,80(r3)
+ ld r9,72(r4)
ld r10,80(r4)
std r7,88(r3)
- ld r11,88(r4)
std r8,96(r3)
+ ld r11,88(r4)
ld r12,96(r4)
std r9,104(r3)
std r10,112(r3)