summaryrefslogtreecommitdiff
path: root/arch/sh/lib/udivsi3_i4i-Os.S
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 22:39:19 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 22:39:19 +0300
commit81d6e59dabb1ae0c782e9eb7e3d88f699d25b314 (patch)
tree532afd14c119f1c95206ef0d23db9c4c26a0aa34 /arch/sh/lib/udivsi3_i4i-Os.S
parent4a6908a3a050aacc9c3a2f36b276b46c0629ad91 (diff)
parent59de580af1c2fd671b0cb27c41ff958859ae5288 (diff)
downloadlinux-81d6e59dabb1ae0c782e9eb7e3d88f699d25b314.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/lethal/sh-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/lethal/sh-2.6: (132 commits) sh: oprofile: Fix up the module build. sh: add UIO support for JPU on SH7722. serial: sh-sci: Fix up port pinmux for SH7366. sh: mach-rsk: Use uImage generation by default for rsk7201/7203. sh: mach-sh03: Fix up pata_platform build breakage. sh: enable deferred io LCDC on Migo-R video: sh_mobile_lcdcfb deferred io support video: deferred io with physically contiguous memory video: deferred io cleanup video: fix deferred io fsync() sh: add LCDC interrupt configuration to AP325 and Migo-R sh_mobile_lcdc: use FB_SYS helpers instead of FB_CFB sh: split coherent pages sh: dma: Kill off ISA DMA wrapper. sh: Conditionalize the code dumper on CONFIG_DUMP_CODE. sh: Kill off the unused SH_ALPHANUMERIC debug option. sh: Enable skipping of bss on debug platforms for sh32 also. doc: Update sh cpufreq documentation. sh: mrshpc_setup_windows() needs to be inline. serial: sh-sci: sci_poll_get_char() is only used by CONFIG_CONSOLE_POLL. ...
Diffstat (limited to 'arch/sh/lib/udivsi3_i4i-Os.S')
-rw-r--r--arch/sh/lib/udivsi3_i4i-Os.S149
1 files changed, 149 insertions, 0 deletions
diff --git a/arch/sh/lib/udivsi3_i4i-Os.S b/arch/sh/lib/udivsi3_i4i-Os.S
new file mode 100644
index 000000000000..4835553e1ea9
--- /dev/null
+++ b/arch/sh/lib/udivsi3_i4i-Os.S
@@ -0,0 +1,149 @@
+/* Copyright (C) 2006 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file. (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING. If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+/* Moderately Space-optimized libgcc routines for the Renesas SH /
+ STMicroelectronics ST40 CPUs.
+ Contributed by J"orn Rennecke joern.rennecke@st.com. */
+
+/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
+ sh4-200 run times:
+ udiv small divisor: 55 cycles
+ udiv large divisor: 52 cycles
+ sdiv small divisor, positive result: 59 cycles
+ sdiv large divisor, positive result: 56 cycles
+ sdiv small divisor, negative result: 65 cycles (*)
+ sdiv large divisor, negative result: 62 cycles (*)
+ (*): r2 is restored in the rts delay slot and has a lingering latency
+ of two more cycles. */
+ .balign 4
+ .global __udivsi3_i4i
+ .global __udivsi3_i4
+ .set __udivsi3_i4, __udivsi3_i4i
+ .type __udivsi3_i4i, @function
+ .type __sdivsi3_i4i, @function
+__udivsi3_i4i:
+ sts pr,r1
+ mov.l r4,@-r15
+ extu.w r5,r0
+ cmp/eq r5,r0
+ swap.w r4,r0
+ shlr16 r4
+ bf/s large_divisor
+ div0u
+ mov.l r5,@-r15
+ shll16 r5
+sdiv_small_divisor:
+ div1 r5,r4
+ bsr div6
+ div1 r5,r4
+ div1 r5,r4
+ bsr div6
+ div1 r5,r4
+ xtrct r4,r0
+ xtrct r0,r4
+ bsr div7
+ swap.w r4,r4
+ div1 r5,r4
+ bsr div7
+ div1 r5,r4
+ xtrct r4,r0
+ mov.l @r15+,r5
+ swap.w r0,r0
+ mov.l @r15+,r4
+ jmp @r1
+ rotcl r0
+div7:
+ div1 r5,r4
+div6:
+ div1 r5,r4; div1 r5,r4; div1 r5,r4
+ div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
+
+divx3:
+ rotcl r0
+ div1 r5,r4
+ rotcl r0
+ div1 r5,r4
+ rotcl r0
+ rts
+ div1 r5,r4
+
+large_divisor:
+ mov.l r5,@-r15
+sdiv_large_divisor:
+ xor r4,r0
+ .rept 4
+ rotcl r0
+ bsr divx3
+ div1 r5,r4
+ .endr
+ mov.l @r15+,r5
+ mov.l @r15+,r4
+ jmp @r1
+ rotcl r0
+
+ .global __sdivsi3_i4i
+ .global __sdivsi3_i4
+ .global __sdivsi3
+ .set __sdivsi3_i4, __sdivsi3_i4i
+ .set __sdivsi3, __sdivsi3_i4i
+__sdivsi3_i4i:
+ mov.l r4,@-r15
+ cmp/pz r5
+ mov.l r5,@-r15
+ bt/s pos_divisor
+ cmp/pz r4
+ neg r5,r5
+ extu.w r5,r0
+ bt/s neg_result
+ cmp/eq r5,r0
+ neg r4,r4
+pos_result:
+ swap.w r4,r0
+ bra sdiv_check_divisor
+ sts pr,r1
+pos_divisor:
+ extu.w r5,r0
+ bt/s pos_result
+ cmp/eq r5,r0
+ neg r4,r4
+neg_result:
+ mova negate_result,r0
+ ;
+ mov r0,r1
+ swap.w r4,r0
+ lds r2,macl
+ sts pr,r2
+sdiv_check_divisor:
+ shlr16 r4
+ bf/s sdiv_large_divisor
+ div0u
+ bra sdiv_small_divisor
+ shll16 r5
+ .balign 4
+negate_result:
+ neg r0,r0
+ jmp @r2
+ sts macl,r2