summaryrefslogtreecommitdiff
path: root/arch/x86/crypto
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r--arch/x86/crypto/Makefile4
-rw-r--r--arch/x86/crypto/salsa20-i586-asm_32.S1114
-rw-r--r--arch/x86/crypto/salsa20-x86_64-asm_64.S919
-rw-r--r--arch/x86/crypto/salsa20_glue.c116
4 files changed, 0 insertions, 2153 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 5f07333bb224..9c903a420cda 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -15,7 +15,6 @@ obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
-obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
@@ -24,7 +23,6 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
-obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o
obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
@@ -59,7 +57,6 @@ endif
aes-i586-y := aes-i586-asm_32.o aes_glue.o
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
-salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o
serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
@@ -68,7 +65,6 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
-salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o
chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o
serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S
deleted file mode 100644
index 329452b8f794..000000000000
--- a/arch/x86/crypto/salsa20-i586-asm_32.S
+++ /dev/null
@@ -1,1114 +0,0 @@
-# salsa20_pm.s version 20051229
-# D. J. Bernstein
-# Public domain.
-
-#include <linux/linkage.h>
-
-.text
-
-# enter salsa20_encrypt_bytes
-ENTRY(salsa20_encrypt_bytes)
- mov %esp,%eax
- and $31,%eax
- add $256,%eax
- sub %eax,%esp
- # eax_stack = eax
- movl %eax,80(%esp)
- # ebx_stack = ebx
- movl %ebx,84(%esp)
- # esi_stack = esi
- movl %esi,88(%esp)
- # edi_stack = edi
- movl %edi,92(%esp)
- # ebp_stack = ebp
- movl %ebp,96(%esp)
- # x = arg1
- movl 4(%esp,%eax),%edx
- # m = arg2
- movl 8(%esp,%eax),%esi
- # out = arg3
- movl 12(%esp,%eax),%edi
- # bytes = arg4
- movl 16(%esp,%eax),%ebx
- # bytes -= 0
- sub $0,%ebx
- # goto done if unsigned<=
- jbe ._done
-._start:
- # in0 = *(uint32 *) (x + 0)
- movl 0(%edx),%eax
- # in1 = *(uint32 *) (x + 4)
- movl 4(%edx),%ecx
- # in2 = *(uint32 *) (x + 8)
- movl 8(%edx),%ebp
- # j0 = in0
- movl %eax,164(%esp)
- # in3 = *(uint32 *) (x + 12)
- movl 12(%edx),%eax
- # j1 = in1
- movl %ecx,168(%esp)
- # in4 = *(uint32 *) (x + 16)
- movl 16(%edx),%ecx
- # j2 = in2
- movl %ebp,172(%esp)
- # in5 = *(uint32 *) (x + 20)
- movl 20(%edx),%ebp
- # j3 = in3
- movl %eax,176(%esp)
- # in6 = *(uint32 *) (x + 24)
- movl 24(%edx),%eax
- # j4 = in4
- movl %ecx,180(%esp)
- # in7 = *(uint32 *) (x + 28)
- movl 28(%edx),%ecx
- # j5 = in5
- movl %ebp,184(%esp)
- # in8 = *(uint32 *) (x + 32)
- movl 32(%edx),%ebp
- # j6 = in6
- movl %eax,188(%esp)
- # in9 = *(uint32 *) (x + 36)
- movl 36(%edx),%eax
- # j7 = in7
- movl %ecx,192(%esp)
- # in10 = *(uint32 *) (x + 40)
- movl 40(%edx),%ecx
- # j8 = in8
- movl %ebp,196(%esp)
- # in11 = *(uint32 *) (x + 44)
- movl 44(%edx),%ebp
- # j9 = in9
- movl %eax,200(%esp)
- # in12 = *(uint32 *) (x + 48)
- movl 48(%edx),%eax
- # j10 = in10
- movl %ecx,204(%esp)
- # in13 = *(uint32 *) (x + 52)
- movl 52(%edx),%ecx
- # j11 = in11
- movl %ebp,208(%esp)
- # in14 = *(uint32 *) (x + 56)
- movl 56(%edx),%ebp
- # j12 = in12
- movl %eax,212(%esp)
- # in15 = *(uint32 *) (x + 60)
- movl 60(%edx),%eax
- # j13 = in13
- movl %ecx,216(%esp)
- # j14 = in14
- movl %ebp,220(%esp)
- # j15 = in15
- movl %eax,224(%esp)
- # x_backup = x
- movl %edx,64(%esp)
-._bytesatleast1:
- # bytes - 64
- cmp $64,%ebx
- # goto nocopy if unsigned>=
- jae ._nocopy
- # ctarget = out
- movl %edi,228(%esp)
- # out = &tmp
- leal 0(%esp),%edi
- # i = bytes
- mov %ebx,%ecx
- # while (i) { *out++ = *m++; --i }
- rep movsb
- # out = &tmp
- leal 0(%esp),%edi
- # m = &tmp
- leal 0(%esp),%esi
-._nocopy:
- # out_backup = out
- movl %edi,72(%esp)
- # m_backup = m
- movl %esi,68(%esp)
- # bytes_backup = bytes
- movl %ebx,76(%esp)
- # in0 = j0
- movl 164(%esp),%eax
- # in1 = j1
- movl 168(%esp),%ecx
- # in2 = j2
- movl 172(%esp),%edx
- # in3 = j3
- movl 176(%esp),%ebx
- # x0 = in0
- movl %eax,100(%esp)
- # x1 = in1
- movl %ecx,104(%esp)
- # x2 = in2
- movl %edx,108(%esp)
- # x3 = in3
- movl %ebx,112(%esp)
- # in4 = j4
- movl 180(%esp),%eax
- # in5 = j5
- movl 184(%esp),%ecx
- # in6 = j6
- movl 188(%esp),%edx
- # in7 = j7
- movl 192(%esp),%ebx
- # x4 = in4
- movl %eax,116(%esp)
- # x5 = in5
- movl %ecx,120(%esp)
- # x6 = in6
- movl %edx,124(%esp)
- # x7 = in7
- movl %ebx,128(%esp)
- # in8 = j8
- movl 196(%esp),%eax
- # in9 = j9
- movl 200(%esp),%ecx
- # in10 = j10
- movl 204(%esp),%edx
- # in11 = j11
- movl 208(%esp),%ebx
- # x8 = in8
- movl %eax,132(%esp)
- # x9 = in9
- movl %ecx,136(%esp)
- # x10 = in10
- movl %edx,140(%esp)
- # x11 = in11
- movl %ebx,144(%esp)
- # in12 = j12
- movl 212(%esp),%eax
- # in13 = j13
- movl 216(%esp),%ecx
- # in14 = j14
- movl 220(%esp),%edx
- # in15 = j15
- movl 224(%esp),%ebx
- # x12 = in12
- movl %eax,148(%esp)
- # x13 = in13
- movl %ecx,152(%esp)
- # x14 = in14
- movl %edx,156(%esp)
- # x15 = in15
- movl %ebx,160(%esp)
- # i = 20
- mov $20,%ebp
- # p = x0
- movl 100(%esp),%eax
- # s = x5
- movl 120(%esp),%ecx
- # t = x10
- movl 140(%esp),%edx
- # w = x15
- movl 160(%esp),%ebx
-._mainloop:
- # x0 = p
- movl %eax,100(%esp)
- # x10 = t
- movl %edx,140(%esp)
- # p += x12
- addl 148(%esp),%eax
- # x5 = s
- movl %ecx,120(%esp)
- # t += x6
- addl 124(%esp),%edx
- # x15 = w
- movl %ebx,160(%esp)
- # r = x1
- movl 104(%esp),%esi
- # r += s
- add %ecx,%esi
- # v = x11
- movl 144(%esp),%edi
- # v += w
- add %ebx,%edi
- # p <<<= 7
- rol $7,%eax
- # p ^= x4
- xorl 116(%esp),%eax
- # t <<<= 7
- rol $7,%edx
- # t ^= x14
- xorl 156(%esp),%edx
- # r <<<= 7
- rol $7,%esi
- # r ^= x9
- xorl 136(%esp),%esi
- # v <<<= 7
- rol $7,%edi
- # v ^= x3
- xorl 112(%esp),%edi
- # x4 = p
- movl %eax,116(%esp)
- # x14 = t
- movl %edx,156(%esp)
- # p += x0
- addl 100(%esp),%eax
- # x9 = r
- movl %esi,136(%esp)
- # t += x10
- addl 140(%esp),%edx
- # x3 = v
- movl %edi,112(%esp)
- # p <<<= 9
- rol $9,%eax
- # p ^= x8
- xorl 132(%esp),%eax
- # t <<<= 9
- rol $9,%edx
- # t ^= x2
- xorl 108(%esp),%edx
- # s += r
- add %esi,%ecx
- # s <<<= 9
- rol $9,%ecx
- # s ^= x13
- xorl 152(%esp),%ecx
- # w += v
- add %edi,%ebx
- # w <<<= 9
- rol $9,%ebx
- # w ^= x7
- xorl 128(%esp),%ebx
- # x8 = p
- movl %eax,132(%esp)
- # x2 = t
- movl %edx,108(%esp)
- # p += x4
- addl 116(%esp),%eax
- # x13 = s
- movl %ecx,152(%esp)
- # t += x14
- addl 156(%esp),%edx
- # x7 = w
- movl %ebx,128(%esp)
- # p <<<= 13
- rol $13,%eax
- # p ^= x12
- xorl 148(%esp),%eax
- # t <<<= 13
- rol $13,%edx
- # t ^= x6
- xorl 124(%esp),%edx
- # r += s
- add %ecx,%esi
- # r <<<= 13
- rol $13,%esi
- # r ^= x1
- xorl 104(%esp),%esi
- # v += w
- add %ebx,%edi
- # v <<<= 13
- rol $13,%edi
- # v ^= x11
- xorl 144(%esp),%edi
- # x12 = p
- movl %eax,148(%esp)
- # x6 = t
- movl %edx,124(%esp)
- # p += x8
- addl 132(%esp),%eax
- # x1 = r
- movl %esi,104(%esp)
- # t += x2
- addl 108(%esp),%edx
- # x11 = v
- movl %edi,144(%esp)
- # p <<<= 18
- rol $18,%eax
- # p ^= x0
- xorl 100(%esp),%eax
- # t <<<= 18
- rol $18,%edx
- # t ^= x10
- xorl 140(%esp),%edx
- # s += r
- add %esi,%ecx
- # s <<<= 18
- rol $18,%ecx
- # s ^= x5
- xorl 120(%esp),%ecx
- # w += v
- add %edi,%ebx
- # w <<<= 18
- rol $18,%ebx
- # w ^= x15
- xorl 160(%esp),%ebx
- # x0 = p
- movl %eax,100(%esp)
- # x10 = t
- movl %edx,140(%esp)
- # p += x3
- addl 112(%esp),%eax
- # p <<<= 7
- rol $7,%eax
- # x5 = s
- movl %ecx,120(%esp)
- # t += x9
- addl 136(%esp),%edx
- # x15 = w
- movl %ebx,160(%esp)
- # r = x4
- movl 116(%esp),%esi
- # r += s
- add %ecx,%esi
- # v = x14
- movl 156(%esp),%edi
- # v += w
- add %ebx,%edi
- # p ^= x1
- xorl 104(%esp),%eax
- # t <<<= 7
- rol $7,%edx
- # t ^= x11
- xorl 144(%esp),%edx
- # r <<<= 7
- rol $7,%esi
- # r ^= x6
- xorl 124(%esp),%esi
- # v <<<= 7
- rol $7,%edi
- # v ^= x12
- xorl 148(%esp),%edi
- # x1 = p
- movl %eax,104(%esp)
- # x11 = t
- movl %edx,144(%esp)
- # p += x0
- addl 100(%esp),%eax
- # x6 = r
- movl %esi,124(%esp)
- # t += x10
- addl 140(%esp),%edx
- # x12 = v
- movl %edi,148(%esp)
- # p <<<= 9
- rol $9,%eax
- # p ^= x2
- xorl 108(%esp),%eax
- # t <<<= 9
- rol $9,%edx
- # t ^= x8
- xorl 132(%esp),%edx
- # s += r
- add %esi,%ecx
- # s <<<= 9
- rol $9,%ecx
- # s ^= x7
- xorl 128(%esp),%ecx
- # w += v
- add %edi,%ebx
- # w <<<= 9
- rol $9,%ebx
- # w ^= x13
- xorl 152(%esp),%ebx
- # x2 = p
- movl %eax,108(%esp)
- # x8 = t
- movl %edx,132(%esp)
- # p += x1
- addl 104(%esp),%eax
- # x7 = s
- movl %ecx,128(%esp)
- # t += x11
- addl 144(%esp),%edx
- # x13 = w
- movl %ebx,152(%esp)
- # p <<<= 13
- rol $13,%eax
- # p ^= x3
- xorl 112(%esp),%eax
- # t <<<= 13
- rol $13,%edx
- # t ^= x9
- xorl 136(%esp),%edx
- # r += s
- add %ecx,%esi
- # r <<<= 13
- rol $13,%esi
- # r ^= x4
- xorl 116(%esp),%esi
- # v += w
- add %ebx,%edi
- # v <<<= 13
- rol $13,%edi
- # v ^= x14
- xorl 156(%esp),%edi
- # x3 = p
- movl %eax,112(%esp)
- # x9 = t
- movl %edx,136(%esp)
- # p += x2
- addl 108(%esp),%eax
- # x4 = r
- movl %esi,116(%esp)
- # t += x8
- addl 132(%esp),%edx
- # x14 = v
- movl %edi,156(%esp)
- # p <<<= 18
- rol $18,%eax
- # p ^= x0
- xorl 100(%esp),%eax
- # t <<<= 18
- rol $18,%edx
- # t ^= x10
- xorl 140(%esp),%edx
- # s += r
- add %esi,%ecx
- # s <<<= 18
- rol $18,%ecx
- # s ^= x5
- xorl 120(%esp),%ecx
- # w += v
- add %edi,%ebx
- # w <<<= 18
- rol $18,%ebx
- # w ^= x15
- xorl 160(%esp),%ebx
- # x0 = p
- movl %eax,100(%esp)
- # x10 = t
- movl %edx,140(%esp)
- # p += x12
- addl 148(%esp),%eax
- # x5 = s
- movl %ecx,120(%esp)
- # t += x6
- addl 124(%esp),%edx
- # x15 = w
- movl %ebx,160(%esp)
- # r = x1
- movl 104(%esp),%esi
- # r += s
- add %ecx,%esi
- # v = x11
- movl 144(%esp),%edi
- # v += w
- add %ebx,%edi
- # p <<<= 7
- rol $7,%eax
- # p ^= x4
- xorl 116(%esp),%eax
- # t <<<= 7
- rol $7,%edx
- # t ^= x14
- xorl 156(%esp),%edx
- # r <<<= 7
- rol $7,%esi
- # r ^= x9
- xorl 136(%esp),%esi
- # v <<<= 7
- rol $7,%edi
- # v ^= x3
- xorl 112(%esp),%edi
- # x4 = p
- movl %eax,116(%esp)
- # x14 = t
- movl %edx,156(%esp)
- # p += x0
- addl 100(%esp),%eax
- # x9 = r
- movl %esi,136(%esp)
- # t += x10
- addl 140(%esp),%edx
- # x3 = v
- movl %edi,112(%esp)
- # p <<<= 9
- rol $9,%eax
- # p ^= x8
- xorl 132(%esp),%eax
- # t <<<= 9
- rol $9,%edx
- # t ^= x2
- xorl 108(%esp),%edx
- # s += r
- add %esi,%ecx
- # s <<<= 9
- rol $9,%ecx
- # s ^= x13
- xorl 152(%esp),%ecx
- # w += v
- add %edi,%ebx
- # w <<<= 9
- rol $9,%ebx
- # w ^= x7
- xorl 128(%esp),%ebx
- # x8 = p
- movl %eax,132(%esp)
- # x2 = t
- movl %edx,108(%esp)
- # p += x4
- addl 116(%esp),%eax
- # x13 = s
- movl %ecx,152(%esp)
- # t += x14
- addl 156(%esp),%edx
- # x7 = w
- movl %ebx,128(%esp)
- # p <<<= 13
- rol $13,%eax
- # p ^= x12
- xorl 148(%esp),%eax
- # t <<<= 13
- rol $13,%edx
- # t ^= x6
- xorl 124(%esp),%edx
- # r += s
- add %ecx,%esi
- # r <<<= 13
- rol $13,%esi
- # r ^= x1
- xorl 104(%esp),%esi
- # v += w
- add %ebx,%edi
- # v <<<= 13
- rol $13,%edi
- # v ^= x11
- xorl 144(%esp),%edi
- # x12 = p
- movl %eax,148(%esp)
- # x6 = t
- movl %edx,124(%esp)
- # p += x8
- addl 132(%esp),%eax
- # x1 = r
- movl %esi,104(%esp)
- # t += x2
- addl 108(%esp),%edx
- # x11 = v
- movl %edi,144(%esp)
- # p <<<= 18
- rol $18,%eax
- # p ^= x0
- xorl 100(%esp),%eax
- # t <<<= 18
- rol $18,%edx
- # t ^= x10
- xorl 140(%esp),%edx
- # s += r
- add %esi,%ecx
- # s <<<= 18
- rol $18,%ecx
- # s ^= x5
- xorl 120(%esp),%ecx
- # w += v
- add %edi,%ebx
- # w <<<= 18
- rol $18,%ebx
- # w ^= x15
- xorl 160(%esp),%ebx
- # x0 = p
- movl %eax,100(%esp)
- # x10 = t
- movl %edx,140(%esp)
- # p += x3
- addl 112(%esp),%eax
- # p <<<= 7
- rol $7,%eax
- # x5 = s
- movl %ecx,120(%esp)
- # t += x9
- addl 136(%esp),%edx
- # x15 = w
- movl %ebx,160(%esp)
- # r = x4
- movl 116(%esp),%esi
- # r += s
- add %ecx,%esi
- # v = x14
- movl 156(%esp),%edi
- # v += w
- add %ebx,%edi
- # p ^= x1
- xorl 104(%esp),%eax
- # t <<<= 7
- rol $7,%edx
- # t ^= x11
- xorl 144(%esp),%edx
- # r <<<= 7
- rol $7,%esi
- # r ^= x6
- xorl 124(%esp),%esi
- # v <<<= 7
- rol $7,%edi
- # v ^= x12
- xorl 148(%esp),%edi
- # x1 = p
- movl %eax,104(%esp)
- # x11 = t
- movl %edx,144(%esp)
- # p += x0
- addl 100(%esp),%eax
- # x6 = r
- movl %esi,124(%esp)
- # t += x10
- addl 140(%esp),%edx
- # x12 = v
- movl %edi,148(%esp)
- # p <<<= 9
- rol $9,%eax
- # p ^= x2
- xorl 108(%esp),%eax
- # t <<<= 9
- rol $9,%edx
- # t ^= x8
- xorl 132(%esp),%edx
- # s += r
- add %esi,%ecx
- # s <<<= 9
- rol $9,%ecx
- # s ^= x7
- xorl 128(%esp),%ecx
- # w += v
- add %edi,%ebx
- # w <<<= 9
- rol $9,%ebx
- # w ^= x13
- xorl 152(%esp),%ebx
- # x2 = p
- movl %eax,108(%esp)
- # x8 = t
- movl %edx,132(%esp)
- # p += x1
- addl 104(%esp),%eax
- # x7 = s
- movl %ecx,128(%esp)
- # t += x11
- addl 144(%esp),%edx
- # x13 = w
- movl %ebx,152(%esp)
- # p <<<= 13
- rol $13,%eax
- # p ^= x3
- xorl 112(%esp),%eax
- # t <<<= 13
- rol $13,%edx
- # t ^= x9
- xorl 136(%esp),%edx
- # r += s
- add %ecx,%esi
- # r <<<= 13
- rol $13,%esi
- # r ^= x4
- xorl 116(%esp),%esi
- # v += w
- add %ebx,%edi
- # v <<<= 13
- rol $13,%edi
- # v ^= x14
- xorl 156(%esp),%edi
- # x3 = p
- movl %eax,112(%esp)
- # x9 = t
- movl %edx,136(%esp)
- # p += x2
- addl 108(%esp),%eax
- # x4 = r
- movl %esi,116(%esp)
- # t += x8
- addl 132(%esp),%edx
- # x14 = v
- movl %edi,156(%esp)
- # p <<<= 18
- rol $18,%eax
- # p ^= x0
- xorl 100(%esp),%eax
- # t <<<= 18
- rol $18,%edx
- # t ^= x10
- xorl 140(%esp),%edx
- # s += r
- add %esi,%ecx
- # s <<<= 18
- rol $18,%ecx
- # s ^= x5
- xorl 120(%esp),%ecx
- # w += v
- add %edi,%ebx
- # w <<<= 18
- rol $18,%ebx
- # w ^= x15
- xorl 160(%esp),%ebx
- # i -= 4
- sub $4,%ebp
- # goto mainloop if unsigned >
- ja ._mainloop
- # x0 = p
- movl %eax,100(%esp)
- # x5 = s
- movl %ecx,120(%esp)
- # x10 = t
- movl %edx,140(%esp)
- # x15 = w
- movl %ebx,160(%esp)
- # out = out_backup
- movl 72(%esp),%edi
- # m = m_backup
- movl 68(%esp),%esi
- # in0 = x0
- movl 100(%esp),%eax
- # in1 = x1
- movl 104(%esp),%ecx
- # in0 += j0
- addl 164(%esp),%eax
- # in1 += j1
- addl 168(%esp),%ecx
- # in0 ^= *(uint32 *) (m + 0)
- xorl 0(%esi),%eax
- # in1 ^= *(uint32 *) (m + 4)
- xorl 4(%esi),%ecx
- # *(uint32 *) (out + 0) = in0
- movl %eax,0(%edi)
- # *(uint32 *) (out + 4) = in1
- movl %ecx,4(%edi)
- # in2 = x2
- movl 108(%esp),%eax
- # in3 = x3
- movl 112(%esp),%ecx
- # in2 += j2
- addl 172(%esp),%eax
- # in3 += j3
- addl 176(%esp),%ecx
- # in2 ^= *(uint32 *) (m + 8)
- xorl 8(%esi),%eax
- # in3 ^= *(uint32 *) (m + 12)
- xorl 12(%esi),%ecx
- # *(uint32 *) (out + 8) = in2
- movl %eax,8(%edi)
- # *(uint32 *) (out + 12) = in3
- movl %ecx,12(%edi)
- # in4 = x4
- movl 116(%esp),%eax
- # in5 = x5
- movl 120(%esp),%ecx
- # in4 += j4
- addl 180(%esp),%eax
- # in5 += j5
- addl 184(%esp),%ecx
- # in4 ^= *(uint32 *) (m + 16)
- xorl 16(%esi),%eax
- # in5 ^= *(uint32 *) (m + 20)
- xorl 20(%esi),%ecx
- # *(uint32 *) (out + 16) = in4
- movl %eax,16(%edi)
- # *(uint32 *) (out + 20) = in5
- movl %ecx,20(%edi)
- # in6 = x6
- movl 124(%esp),%eax
- # in7 = x7
- movl 128(%esp),%ecx
- # in6 += j6
- addl 188(%esp),%eax
- # in7 += j7
- addl 192(%esp),%ecx
- # in6 ^= *(uint32 *) (m + 24)
- xorl 24(%esi),%eax
- # in7 ^= *(uint32 *) (m + 28)
- xorl 28(%esi),%ecx
- # *(uint32 *) (out + 24) = in6
- movl %eax,24(%edi)
- # *(uint32 *) (out + 28) = in7
- movl %ecx,28(%edi)
- # in8 = x8
- movl 132(%esp),%eax
- # in9 = x9
- movl 136(%esp),%ecx
- # in8 += j8
- addl 196(%esp),%eax
- # in9 += j9
- addl 200(%esp),%ecx
- # in8 ^= *(uint32 *) (m + 32)
- xorl 32(%esi),%eax
- # in9 ^= *(uint32 *) (m + 36)
- xorl 36(%esi),%ecx
- # *(uint32 *) (out + 32) = in8
- movl %eax,32(%edi)
- # *(uint32 *) (out + 36) = in9
- movl %ecx,36(%edi)
- # in10 = x10
- movl 140(%esp),%eax
- # in11 = x11
- movl 144(%esp),%ecx
- # in10 += j10
- addl 204(%esp),%eax
- # in11 += j11
- addl 208(%esp),%ecx
- # in10 ^= *(uint32 *) (m + 40)
- xorl 40(%esi),%eax
- # in11 ^= *(uint32 *) (m + 44)
- xorl 44(%esi),%ecx
- # *(uint32 *) (out + 40) = in10
- movl %eax,40(%edi)
- # *(uint32 *) (out + 44) = in11
- movl %ecx,44(%edi)
- # in12 = x12
- movl 148(%esp),%eax
- # in13 = x13
- movl 152(%esp),%ecx
- # in12 += j12
- addl 212(%esp),%eax
- # in13 += j13
- addl 216(%esp),%ecx
- # in12 ^= *(uint32 *) (m + 48)
- xorl 48(%esi),%eax
- # in13 ^= *(uint32 *) (m + 52)
- xorl 52(%esi),%ecx
- # *(uint32 *) (out + 48) = in12
- movl %eax,48(%edi)
- # *(uint32 *) (out + 52) = in13
- movl %ecx,52(%edi)
- # in14 = x14
- movl 156(%esp),%eax
- # in15 = x15
- movl 160(%esp),%ecx
- # in14 += j14
- addl 220(%esp),%eax
- # in15 += j15
- addl 224(%esp),%ecx
- # in14 ^= *(uint32 *) (m + 56)
- xorl 56(%esi),%eax
- # in15 ^= *(uint32 *) (m + 60)
- xorl 60(%esi),%ecx
- # *(uint32 *) (out + 56) = in14
- movl %eax,56(%edi)
- # *(uint32 *) (out + 60) = in15
- movl %ecx,60(%edi)
- # bytes = bytes_backup
- movl 76(%esp),%ebx
- # in8 = j8
- movl 196(%esp),%eax
- # in9 = j9
- movl 200(%esp),%ecx
- # in8 += 1
- add $1,%eax
- # in9 += 0 + carry
- adc $0,%ecx
- # j8 = in8
- movl %eax,196(%esp)
- # j9 = in9
- movl %ecx,200(%esp)
- # bytes - 64
- cmp $64,%ebx
- # goto bytesatleast65 if unsigned>
- ja ._bytesatleast65
- # goto bytesatleast64 if unsigned>=
- jae ._bytesatleast64
- # m = out
- mov %edi,%esi
- # out = ctarget
- movl 228(%esp),%edi
- # i = bytes
- mov %ebx,%ecx
- # while (i) { *out++ = *m++; --i }
- rep movsb
-._bytesatleast64:
- # x = x_backup
- movl 64(%esp),%eax
- # in8 = j8
- movl 196(%esp),%ecx
- # in9 = j9
- movl 200(%esp),%edx
- # *(uint32 *) (x + 32) = in8
- movl %ecx,32(%eax)
- # *(uint32 *) (x + 36) = in9
- movl %edx,36(%eax)
-._done:
- # eax = eax_stack
- movl 80(%esp),%eax
- # ebx = ebx_stack
- movl 84(%esp),%ebx
- # esi = esi_stack
- movl 88(%esp),%esi
- # edi = edi_stack
- movl 92(%esp),%edi
- # ebp = ebp_stack
- movl 96(%esp),%ebp
- # leave
- add %eax,%esp
- ret
-._bytesatleast65:
- # bytes -= 64
- sub $64,%ebx
- # out += 64
- add $64,%edi
- # m += 64
- add $64,%esi
- # goto bytesatleast1
- jmp ._bytesatleast1
-ENDPROC(salsa20_encrypt_bytes)
-
-# enter salsa20_keysetup
-ENTRY(salsa20_keysetup)
- mov %esp,%eax
- and $31,%eax
- add $256,%eax
- sub %eax,%esp
- # eax_stack = eax
- movl %eax,64(%esp)
- # ebx_stack = ebx
- movl %ebx,68(%esp)
- # esi_stack = esi
- movl %esi,72(%esp)
- # edi_stack = edi
- movl %edi,76(%esp)
- # ebp_stack = ebp
- movl %ebp,80(%esp)
- # k = arg2
- movl 8(%esp,%eax),%ecx
- # kbits = arg3
- movl 12(%esp,%eax),%edx
- # x = arg1
- movl 4(%esp,%eax),%eax
- # in1 = *(uint32 *) (k + 0)
- movl 0(%ecx),%ebx
- # in2 = *(uint32 *) (k + 4)
- movl 4(%ecx),%esi
- # in3 = *(uint32 *) (k + 8)
- movl 8(%ecx),%edi
- # in4 = *(uint32 *) (k + 12)
- movl 12(%ecx),%ebp
- # *(uint32 *) (x + 4) = in1
- movl %ebx,4(%eax)
- # *(uint32 *) (x + 8) = in2
- movl %esi,8(%eax)
- # *(uint32 *) (x + 12) = in3
- movl %edi,12(%eax)
- # *(uint32 *) (x + 16) = in4
- movl %ebp,16(%eax)
- # kbits - 256
- cmp $256,%edx
- # goto kbits128 if unsigned<
- jb ._kbits128
-._kbits256:
- # in11 = *(uint32 *) (k + 16)
- movl 16(%ecx),%edx
- # in12 = *(uint32 *) (k + 20)
- movl 20(%ecx),%ebx
- # in13 = *(uint32 *) (k + 24)
- movl 24(%ecx),%esi
- # in14 = *(uint32 *) (k + 28)
- movl 28(%ecx),%ecx
- # *(uint32 *) (x + 44) = in11
- movl %edx,44(%eax)
- # *(uint32 *) (x + 48) = in12
- movl %ebx,48(%eax)
- # *(uint32 *) (x + 52) = in13
- movl %esi,52(%eax)
- # *(uint32 *) (x + 56) = in14
- movl %ecx,56(%eax)
- # in0 = 1634760805
- mov $1634760805,%ecx
- # in5 = 857760878
- mov $857760878,%edx
- # in10 = 2036477234
- mov $2036477234,%ebx
- # in15 = 1797285236
- mov $1797285236,%esi
- # *(uint32 *) (x + 0) = in0
- movl %ecx,0(%eax)
- # *(uint32 *) (x + 20) = in5
- movl %edx,20(%eax)
- # *(uint32 *) (x + 40) = in10
- movl %ebx,40(%eax)
- # *(uint32 *) (x + 60) = in15
- movl %esi,60(%eax)
- # goto keysetupdone
- jmp ._keysetupdone
-._kbits128:
- # in11 = *(uint32 *) (k + 0)
- movl 0(%ecx),%edx
- # in12 = *(uint32 *) (k + 4)
- movl 4(%ecx),%ebx
- # in13 = *(uint32 *) (k + 8)
- movl 8(%ecx),%esi
- # in14 = *(uint32 *) (k + 12)
- movl 12(%ecx),%ecx
- # *(uint32 *) (x + 44) = in11
- movl %edx,44(%eax)
- # *(uint32 *) (x + 48) = in12
- movl %ebx,48(%eax)
- # *(uint32 *) (x + 52) = in13
- movl %esi,52(%eax)
- # *(uint32 *) (x + 56) = in14
- movl %ecx,56(%eax)
- # in0 = 1634760805
- mov $1634760805,%ecx
- # in5 = 824206446
- mov $824206446,%edx
- # in10 = 2036477238
- mov $2036477238,%ebx
- # in15 = 1797285236
- mov $1797285236,%esi
- # *(uint32 *) (x + 0) = in0
- movl %ecx,0(%eax)
- # *(uint32 *) (x + 20) = in5
- movl %edx,20(%eax)
- # *(uint32 *) (x + 40) = in10
- movl %ebx,40(%eax)
- # *(uint32 *) (x + 60) = in15
- movl %esi,60(%eax)
-._keysetupdone:
- # eax = eax_stack
- movl 64(%esp),%eax
- # ebx = ebx_stack
- movl 68(%esp),%ebx
- # esi = esi_stack
- movl 72(%esp),%esi
- # edi = edi_stack
- movl 76(%esp),%edi
- # ebp = ebp_stack
- movl 80(%esp),%ebp
- # leave
- add %eax,%esp
- ret
-ENDPROC(salsa20_keysetup)
-
-# enter salsa20_ivsetup
-ENTRY(salsa20_ivsetup)
- mov %esp,%eax
- and $31,%eax
- add $256,%eax
- sub %eax,%esp
- # eax_stack = eax
- movl %eax,64(%esp)
- # ebx_stack = ebx
- movl %ebx,68(%esp)
- # esi_stack = esi
- movl %esi,72(%esp)
- # edi_stack = edi
- movl %edi,76(%esp)
- # ebp_stack = ebp
- movl %ebp,80(%esp)
- # iv = arg2
- movl 8(%esp,%eax),%ecx
- # x = arg1
- movl 4(%esp,%eax),%eax
- # in6 = *(uint32 *) (iv + 0)
- movl 0(%ecx),%edx
- # in7 = *(uint32 *) (iv + 4)
- movl 4(%ecx),%ecx
- # in8 = 0
- mov $0,%ebx
- # in9 = 0
- mov $0,%esi
- # *(uint32 *) (x + 24) = in6
- movl %edx,24(%eax)
- # *(uint32 *) (x + 28) = in7
- movl %ecx,28(%eax)
- # *(uint32 *) (x + 32) = in8
- movl %ebx,32(%eax)
- # *(uint32 *) (x + 36) = in9
- movl %esi,36(%eax)
- # eax = eax_stack
- movl 64(%esp),%eax
- # ebx = ebx_stack
- movl 68(%esp),%ebx
- # esi = esi_stack
- movl 72(%esp),%esi
- # edi = edi_stack
- movl 76(%esp),%edi
- # ebp = ebp_stack
- movl 80(%esp),%ebp
- # leave
- add %eax,%esp
- ret
-ENDPROC(salsa20_ivsetup)
diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S
deleted file mode 100644
index 10db30d58006..000000000000
--- a/arch/x86/crypto/salsa20-x86_64-asm_64.S
+++ /dev/null
@@ -1,919 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/linkage.h>
-
-# enter salsa20_encrypt_bytes
-ENTRY(salsa20_encrypt_bytes)
- mov %rsp,%r11
- and $31,%r11
- add $256,%r11
- sub %r11,%rsp
- # x = arg1
- mov %rdi,%r8
- # m = arg2
- mov %rsi,%rsi
- # out = arg3
- mov %rdx,%rdi
- # bytes = arg4
- mov %rcx,%rdx
- # unsigned>? bytes - 0
- cmp $0,%rdx
- # comment:fp stack unchanged by jump
- # goto done if !unsigned>
- jbe ._done
- # comment:fp stack unchanged by fallthrough
-# start:
-._start:
- # r11_stack = r11
- movq %r11,0(%rsp)
- # r12_stack = r12
- movq %r12,8(%rsp)
- # r13_stack = r13
- movq %r13,16(%rsp)
- # r14_stack = r14
- movq %r14,24(%rsp)
- # r15_stack = r15
- movq %r15,32(%rsp)
- # rbx_stack = rbx
- movq %rbx,40(%rsp)
- # rbp_stack = rbp
- movq %rbp,48(%rsp)
- # in0 = *(uint64 *) (x + 0)
- movq 0(%r8),%rcx
- # in2 = *(uint64 *) (x + 8)
- movq 8(%r8),%r9
- # in4 = *(uint64 *) (x + 16)
- movq 16(%r8),%rax
- # in6 = *(uint64 *) (x + 24)
- movq 24(%r8),%r10
- # in8 = *(uint64 *) (x + 32)
- movq 32(%r8),%r11
- # in10 = *(uint64 *) (x + 40)
- movq 40(%r8),%r12
- # in12 = *(uint64 *) (x + 48)
- movq 48(%r8),%r13
- # in14 = *(uint64 *) (x + 56)
- movq 56(%r8),%r14
- # j0 = in0
- movq %rcx,56(%rsp)
- # j2 = in2
- movq %r9,64(%rsp)
- # j4 = in4
- movq %rax,72(%rsp)
- # j6 = in6
- movq %r10,80(%rsp)
- # j8 = in8
- movq %r11,88(%rsp)
- # j10 = in10
- movq %r12,96(%rsp)
- # j12 = in12
- movq %r13,104(%rsp)
- # j14 = in14
- movq %r14,112(%rsp)
- # x_backup = x
- movq %r8,120(%rsp)
-# bytesatleast1:
-._bytesatleast1:
- # unsigned<? bytes - 64
- cmp $64,%rdx
- # comment:fp stack unchanged by jump
- # goto nocopy if !unsigned<
- jae ._nocopy
- # ctarget = out
- movq %rdi,128(%rsp)
- # out = &tmp
- leaq 192(%rsp),%rdi
- # i = bytes
- mov %rdx,%rcx
- # while (i) { *out++ = *m++; --i }
- rep movsb
- # out = &tmp
- leaq 192(%rsp),%rdi
- # m = &tmp
- leaq 192(%rsp),%rsi
- # comment:fp stack unchanged by fallthrough
-# nocopy:
-._nocopy:
- # out_backup = out
- movq %rdi,136(%rsp)
- # m_backup = m
- movq %rsi,144(%rsp)
- # bytes_backup = bytes
- movq %rdx,152(%rsp)
- # x1 = j0
- movq 56(%rsp),%rdi
- # x0 = x1
- mov %rdi,%rdx
- # (uint64) x1 >>= 32
- shr $32,%rdi
- # x3 = j2
- movq 64(%rsp),%rsi
- # x2 = x3
- mov %rsi,%rcx
- # (uint64) x3 >>= 32
- shr $32,%rsi
- # x5 = j4
- movq 72(%rsp),%r8
- # x4 = x5
- mov %r8,%r9
- # (uint64) x5 >>= 32
- shr $32,%r8
- # x5_stack = x5
- movq %r8,160(%rsp)
- # x7 = j6
- movq 80(%rsp),%r8
- # x6 = x7
- mov %r8,%rax
- # (uint64) x7 >>= 32
- shr $32,%r8
- # x9 = j8
- movq 88(%rsp),%r10
- # x8 = x9
- mov %r10,%r11
- # (uint64) x9 >>= 32
- shr $32,%r10
- # x11 = j10
- movq 96(%rsp),%r12
- # x10 = x11
- mov %r12,%r13
- # x10_stack = x10
- movq %r13,168(%rsp)
- # (uint64) x11 >>= 32
- shr $32,%r12
- # x13 = j12
- movq 104(%rsp),%r13
- # x12 = x13
- mov %r13,%r14
- # (uint64) x13 >>= 32
- shr $32,%r13
- # x15 = j14
- movq 112(%rsp),%r15
- # x14 = x15
- mov %r15,%rbx
- # (uint64) x15 >>= 32
- shr $32,%r15
- # x15_stack = x15
- movq %r15,176(%rsp)
- # i = 20
- mov $20,%r15
-# mainloop:
-._mainloop:
- # i_backup = i
- movq %r15,184(%rsp)
- # x5 = x5_stack
- movq 160(%rsp),%r15
- # a = x12 + x0
- lea (%r14,%rdx),%rbp
- # (uint32) a <<<= 7
- rol $7,%ebp
- # x4 ^= a
- xor %rbp,%r9
- # b = x1 + x5
- lea (%rdi,%r15),%rbp
- # (uint32) b <<<= 7
- rol $7,%ebp
- # x9 ^= b
- xor %rbp,%r10
- # a = x0 + x4
- lea (%rdx,%r9),%rbp
- # (uint32) a <<<= 9
- rol $9,%ebp
- # x8 ^= a
- xor %rbp,%r11
- # b = x5 + x9
- lea (%r15,%r10),%rbp
- # (uint32) b <<<= 9
- rol $9,%ebp
- # x13 ^= b
- xor %rbp,%r13
- # a = x4 + x8
- lea (%r9,%r11),%rbp
- # (uint32) a <<<= 13
- rol $13,%ebp
- # x12 ^= a
- xor %rbp,%r14
- # b = x9 + x13
- lea (%r10,%r13),%rbp
- # (uint32) b <<<= 13
- rol $13,%ebp
- # x1 ^= b
- xor %rbp,%rdi
- # a = x8 + x12
- lea (%r11,%r14),%rbp
- # (uint32) a <<<= 18
- rol $18,%ebp
- # x0 ^= a
- xor %rbp,%rdx
- # b = x13 + x1
- lea (%r13,%rdi),%rbp
- # (uint32) b <<<= 18
- rol $18,%ebp
- # x5 ^= b
- xor %rbp,%r15
- # x10 = x10_stack
- movq 168(%rsp),%rbp
- # x5_stack = x5
- movq %r15,160(%rsp)
- # c = x6 + x10
- lea (%rax,%rbp),%r15
- # (uint32) c <<<= 7
- rol $7,%r15d
- # x14 ^= c
- xor %r15,%rbx
- # c = x10 + x14
- lea (%rbp,%rbx),%r15
- # (uint32) c <<<= 9
- rol $9,%r15d
- # x2 ^= c
- xor %r15,%rcx
- # c = x14 + x2
- lea (%rbx,%rcx),%r15
- # (uint32) c <<<= 13
- rol $13,%r15d
- # x6 ^= c
- xor %r15,%rax
- # c = x2 + x6
- lea (%rcx,%rax),%r15
- # (uint32) c <<<= 18
- rol $18,%r15d
- # x10 ^= c
- xor %r15,%rbp
- # x15 = x15_stack
- movq 176(%rsp),%r15
- # x10_stack = x10
- movq %rbp,168(%rsp)
- # d = x11 + x15
- lea (%r12,%r15),%rbp
- # (uint32) d <<<= 7
- rol $7,%ebp
- # x3 ^= d
- xor %rbp,%rsi
- # d = x15 + x3
- lea (%r15,%rsi),%rbp
- # (uint32) d <<<= 9
- rol $9,%ebp
- # x7 ^= d
- xor %rbp,%r8
- # d = x3 + x7
- lea (%rsi,%r8),%rbp
- # (uint32) d <<<= 13
- rol $13,%ebp
- # x11 ^= d
- xor %rbp,%r12
- # d = x7 + x11
- lea (%r8,%r12),%rbp
- # (uint32) d <<<= 18
- rol $18,%ebp
- # x15 ^= d
- xor %rbp,%r15
- # x15_stack = x15
- movq %r15,176(%rsp)
- # x5 = x5_stack
- movq 160(%rsp),%r15
- # a = x3 + x0
- lea (%rsi,%rdx),%rbp
- # (uint32) a <<<= 7
- rol $7,%ebp
- # x1 ^= a
- xor %rbp,%rdi
- # b = x4 + x5
- lea (%r9,%r15),%rbp
- # (uint32) b <<<= 7
- rol $7,%ebp
- # x6 ^= b
- xor %rbp,%rax
- # a = x0 + x1
- lea (%rdx,%rdi),%rbp
- # (uint32) a <<<= 9
- rol $9,%ebp
- # x2 ^= a
- xor %rbp,%rcx
- # b = x5 + x6
- lea (%r15,%rax),%rbp
- # (uint32) b <<<= 9
- rol $9,%ebp
- # x7 ^= b
- xor %rbp,%r8
- # a = x1 + x2
- lea (%rdi,%rcx),%rbp
- # (uint32) a <<<= 13
- rol $13,%ebp
- # x3 ^= a
- xor %rbp,%rsi
- # b = x6 + x7
- lea (%rax,%r8),%rbp
- # (uint32) b <<<= 13
- rol $13,%ebp
- # x4 ^= b
- xor %rbp,%r9
- # a = x2 + x3
- lea (%rcx,%rsi),%rbp
- # (uint32) a <<<= 18
- rol $18,%ebp
- # x0 ^= a
- xor %rbp,%rdx
- # b = x7 + x4
- lea (%r8,%r9),%rbp
- # (uint32) b <<<= 18
- rol $18,%ebp
- # x5 ^= b
- xor %rbp,%r15
- # x10 = x10_stack
- movq 168(%rsp),%rbp
- # x5_stack = x5
- movq %r15,160(%rsp)
- # c = x9 + x10
- lea (%r10,%rbp),%r15
- # (uint32) c <<<= 7
- rol $7,%r15d
- # x11 ^= c
- xor %r15,%r12
- # c = x10 + x11
- lea (%rbp,%r12),%r15
- # (uint32) c <<<= 9
- rol $9,%r15d
- # x8 ^= c
- xor %r15,%r11
- # c = x11 + x8
- lea (%r12,%r11),%r15
- # (uint32) c <<<= 13
- rol $13,%r15d
- # x9 ^= c
- xor %r15,%r10
- # c = x8 + x9
- lea (%r11,%r10),%r15
- # (uint32) c <<<= 18
- rol $18,%r15d
- # x10 ^= c
- xor %r15,%rbp
- # x15 = x15_stack
- movq 176(%rsp),%r15
- # x10_stack = x10
- movq %rbp,168(%rsp)
- # d = x14 + x15
- lea (%rbx,%r15),%rbp
- # (uint32) d <<<= 7
- rol $7,%ebp
- # x12 ^= d
- xor %rbp,%r14
- # d = x15 + x12
- lea (%r15,%r14),%rbp
- # (uint32) d <<<= 9
- rol $9,%ebp
- # x13 ^= d
- xor %rbp,%r13
- # d = x12 + x13
- lea (%r14,%r13),%rbp
- # (uint32) d <<<= 13
- rol $13,%ebp
- # x14 ^= d
- xor %rbp,%rbx
- # d = x13 + x14
- lea (%r13,%rbx),%rbp
- # (uint32) d <<<= 18
- rol $18,%ebp
- # x15 ^= d
- xor %rbp,%r15
- # x15_stack = x15
- movq %r15,176(%rsp)
- # x5 = x5_stack
- movq 160(%rsp),%r15
- # a = x12 + x0
- lea (%r14,%rdx),%rbp
- # (uint32) a <<<= 7
- rol $7,%ebp
- # x4 ^= a
- xor %rbp,%r9
- # b = x1 + x5
- lea (%rdi,%r15),%rbp
- # (uint32) b <<<= 7
- rol $7,%ebp
- # x9 ^= b
- xor %rbp,%r10
- # a = x0 + x4
- lea (%rdx,%r9),%rbp
- # (uint32) a <<<= 9
- rol $9,%ebp
- # x8 ^= a
- xor %rbp,%r11
- # b = x5 + x9
- lea (%r15,%r10),%rbp
- # (uint32) b <<<= 9
- rol $9,%ebp
- # x13 ^= b
- xor %rbp,%r13
- # a = x4 + x8
- lea (%r9,%r11),%rbp
- # (uint32) a <<<= 13
- rol $13,%ebp
- # x12 ^= a
- xor %rbp,%r14
- # b = x9 + x13
- lea (%r10,%r13),%rbp
- # (uint32) b <<<= 13
- rol $13,%ebp
- # x1 ^= b
- xor %rbp,%rdi
- # a = x8 + x12
- lea (%r11,%r14),%rbp
- # (uint32) a <<<= 18
- rol $18,%ebp
- # x0 ^= a
- xor %rbp,%rdx
- # b = x13 + x1
- lea (%r13,%rdi),%rbp
- # (uint32) b <<<= 18
- rol $18,%ebp
- # x5 ^= b
- xor %rbp,%r15
- # x10 = x10_stack
- movq 168(%rsp),%rbp
- # x5_stack = x5
- movq %r15,160(%rsp)
- # c = x6 + x10
- lea (%rax,%rbp),%r15
- # (uint32) c <<<= 7
- rol $7,%r15d
- # x14 ^= c
- xor %r15,%rbx
- # c = x10 + x14
- lea (%rbp,%rbx),%r15
- # (uint32) c <<<= 9
- rol $9,%r15d
- # x2 ^= c
- xor %r15,%rcx
- # c = x14 + x2
- lea (%rbx,%rcx),%r15
- # (uint32) c <<<= 13
- rol $13,%r15d
- # x6 ^= c
- xor %r15,%rax
- # c = x2 + x6
- lea (%rcx,%rax),%r15
- # (uint32) c <<<= 18
- rol $18,%r15d
- # x10 ^= c
- xor %r15,%rbp
- # x15 = x15_stack
- movq 176(%rsp),%r15
- # x10_stack = x10
- movq %rbp,168(%rsp)
- # d = x11 + x15
- lea (%r12,%r15),%rbp
- # (uint32) d <<<= 7
- rol $7,%ebp
- # x3 ^= d
- xor %rbp,%rsi
- # d = x15 + x3
- lea (%r15,%rsi),%rbp
- # (uint32) d <<<= 9
- rol $9,%ebp
- # x7 ^= d
- xor %rbp,%r8
- # d = x3 + x7
- lea (%rsi,%r8),%rbp
- # (uint32) d <<<= 13
- rol $13,%ebp
- # x11 ^= d
- xor %rbp,%r12
- # d = x7 + x11
- lea (%r8,%r12),%rbp
- # (uint32) d <<<= 18
- rol $18,%ebp
- # x15 ^= d
- xor %rbp,%r15
- # x15_stack = x15
- movq %r15,176(%rsp)
- # x5 = x5_stack
- movq 160(%rsp),%r15
- # a = x3 + x0
- lea (%rsi,%rdx),%rbp
- # (uint32) a <<<= 7
- rol $7,%ebp
- # x1 ^= a
- xor %rbp,%rdi
- # b = x4 + x5
- lea (%r9,%r15),%rbp
- # (uint32) b <<<= 7
- rol $7,%ebp
- # x6 ^= b
- xor %rbp,%rax
- # a = x0 + x1
- lea (%rdx,%rdi),%rbp
- # (uint32) a <<<= 9
- rol $9,%ebp
- # x2 ^= a
- xor %rbp,%rcx
- # b = x5 + x6
- lea (%r15,%rax),%rbp
- # (uint32) b <<<= 9
- rol $9,%ebp
- # x7 ^= b
- xor %rbp,%r8
- # a = x1 + x2
- lea (%rdi,%rcx),%rbp
- # (uint32) a <<<= 13
- rol $13,%ebp
- # x3 ^= a
- xor %rbp,%rsi
- # b = x6 + x7
- lea (%rax,%r8),%rbp
- # (uint32) b <<<= 13
- rol $13,%ebp
- # x4 ^= b
- xor %rbp,%r9
- # a = x2 + x3
- lea (%rcx,%rsi),%rbp
- # (uint32) a <<<= 18
- rol $18,%ebp
- # x0 ^= a
- xor %rbp,%rdx
- # b = x7 + x4
- lea (%r8,%r9),%rbp
- # (uint32) b <<<= 18
- rol $18,%ebp
- # x5 ^= b
- xor %rbp,%r15
- # x10 = x10_stack
- movq 168(%rsp),%rbp
- # x5_stack = x5
- movq %r15,160(%rsp)
- # c = x9 + x10
- lea (%r10,%rbp),%r15
- # (uint32) c <<<= 7
- rol $7,%r15d
- # x11 ^= c
- xor %r15,%r12
- # c = x10 + x11
- lea (%rbp,%r12),%r15
- # (uint32) c <<<= 9
- rol $9,%r15d
- # x8 ^= c
- xor %r15,%r11
- # c = x11 + x8
- lea (%r12,%r11),%r15
- # (uint32) c <<<= 13
- rol $13,%r15d
- # x9 ^= c
- xor %r15,%r10
- # c = x8 + x9
- lea (%r11,%r10),%r15
- # (uint32) c <<<= 18
- rol $18,%r15d
- # x10 ^= c
- xor %r15,%rbp
- # x15 = x15_stack
- movq 176(%rsp),%r15
- # x10_stack = x10
- movq %rbp,168(%rsp)
- # d = x14 + x15
- lea (%rbx,%r15),%rbp
- # (uint32) d <<<= 7
- rol $7,%ebp
- # x12 ^= d
- xor %rbp,%r14
- # d = x15 + x12
- lea (%r15,%r14),%rbp
- # (uint32) d <<<= 9
- rol $9,%ebp
- # x13 ^= d
- xor %rbp,%r13
- # d = x12 + x13
- lea (%r14,%r13),%rbp
- # (uint32) d <<<= 13
- rol $13,%ebp
- # x14 ^= d
- xor %rbp,%rbx
- # d = x13 + x14
- lea (%r13,%rbx),%rbp
- # (uint32) d <<<= 18
- rol $18,%ebp
- # x15 ^= d
- xor %rbp,%r15
- # x15_stack = x15
- movq %r15,176(%rsp)
- # i = i_backup
- movq 184(%rsp),%r15
- # unsigned>? i -= 4
- sub $4,%r15
- # comment:fp stack unchanged by jump
- # goto mainloop if unsigned>
- ja ._mainloop
- # (uint32) x2 += j2
- addl 64(%rsp),%ecx
- # x3 <<= 32
- shl $32,%rsi
- # x3 += j2
- addq 64(%rsp),%rsi
- # (uint64) x3 >>= 32
- shr $32,%rsi
- # x3 <<= 32
- shl $32,%rsi
- # x2 += x3
- add %rsi,%rcx
- # (uint32) x6 += j6
- addl 80(%rsp),%eax
- # x7 <<= 32
- shl $32,%r8
- # x7 += j6
- addq 80(%rsp),%r8
- # (uint64) x7 >>= 32
- shr $32,%r8
- # x7 <<= 32
- shl $32,%r8
- # x6 += x7
- add %r8,%rax
- # (uint32) x8 += j8
- addl 88(%rsp),%r11d
- # x9 <<= 32
- shl $32,%r10
- # x9 += j8
- addq 88(%rsp),%r10
- # (uint64) x9 >>= 32
- shr $32,%r10
- # x9 <<= 32
- shl $32,%r10
- # x8 += x9
- add %r10,%r11
- # (uint32) x12 += j12
- addl 104(%rsp),%r14d
- # x13 <<= 32
- shl $32,%r13
- # x13 += j12
- addq 104(%rsp),%r13
- # (uint64) x13 >>= 32
- shr $32,%r13
- # x13 <<= 32
- shl $32,%r13
- # x12 += x13
- add %r13,%r14
- # (uint32) x0 += j0
- addl 56(%rsp),%edx
- # x1 <<= 32
- shl $32,%rdi
- # x1 += j0
- addq 56(%rsp),%rdi
- # (uint64) x1 >>= 32
- shr $32,%rdi
- # x1 <<= 32
- shl $32,%rdi
- # x0 += x1
- add %rdi,%rdx
- # x5 = x5_stack
- movq 160(%rsp),%rdi
- # (uint32) x4 += j4
- addl 72(%rsp),%r9d
- # x5 <<= 32
- shl $32,%rdi
- # x5 += j4
- addq 72(%rsp),%rdi
- # (uint64) x5 >>= 32
- shr $32,%rdi
- # x5 <<= 32
- shl $32,%rdi
- # x4 += x5
- add %rdi,%r9
- # x10 = x10_stack
- movq 168(%rsp),%r8
- # (uint32) x10 += j10
- addl 96(%rsp),%r8d
- # x11 <<= 32
- shl $32,%r12
- # x11 += j10
- addq 96(%rsp),%r12
- # (uint64) x11 >>= 32
- shr $32,%r12
- # x11 <<= 32
- shl $32,%r12
- # x10 += x11
- add %r12,%r8
- # x15 = x15_stack
- movq 176(%rsp),%rdi
- # (uint32) x14 += j14
- addl 112(%rsp),%ebx
- # x15 <<= 32
- shl $32,%rdi
- # x15 += j14
- addq 112(%rsp),%rdi
- # (uint64) x15 >>= 32
- shr $32,%rdi
- # x15 <<= 32
- shl $32,%rdi
- # x14 += x15
- add %rdi,%rbx
- # out = out_backup
- movq 136(%rsp),%rdi
- # m = m_backup
- movq 144(%rsp),%rsi
- # x0 ^= *(uint64 *) (m + 0)
- xorq 0(%rsi),%rdx
- # *(uint64 *) (out + 0) = x0
- movq %rdx,0(%rdi)
- # x2 ^= *(uint64 *) (m + 8)
- xorq 8(%rsi),%rcx
- # *(uint64 *) (out + 8) = x2
- movq %rcx,8(%rdi)
- # x4 ^= *(uint64 *) (m + 16)
- xorq 16(%rsi),%r9
- # *(uint64 *) (out + 16) = x4
- movq %r9,16(%rdi)
- # x6 ^= *(uint64 *) (m + 24)
- xorq 24(%rsi),%rax
- # *(uint64 *) (out + 24) = x6
- movq %rax,24(%rdi)
- # x8 ^= *(uint64 *) (m + 32)
- xorq 32(%rsi),%r11
- # *(uint64 *) (out + 32) = x8
- movq %r11,32(%rdi)
- # x10 ^= *(uint64 *) (m + 40)
- xorq 40(%rsi),%r8
- # *(uint64 *) (out + 40) = x10
- movq %r8,40(%rdi)
- # x12 ^= *(uint64 *) (m + 48)
- xorq 48(%rsi),%r14
- # *(uint64 *) (out + 48) = x12
- movq %r14,48(%rdi)
- # x14 ^= *(uint64 *) (m + 56)
- xorq 56(%rsi),%rbx
- # *(uint64 *) (out + 56) = x14
- movq %rbx,56(%rdi)
- # bytes = bytes_backup
- movq 152(%rsp),%rdx
- # in8 = j8
- movq 88(%rsp),%rcx
- # in8 += 1
- add $1,%rcx
- # j8 = in8
- movq %rcx,88(%rsp)
- # unsigned>? unsigned<? bytes - 64
- cmp $64,%rdx
- # comment:fp stack unchanged by jump
- # goto bytesatleast65 if unsigned>
- ja ._bytesatleast65
- # comment:fp stack unchanged by jump
- # goto bytesatleast64 if !unsigned<
- jae ._bytesatleast64
- # m = out
- mov %rdi,%rsi
- # out = ctarget
- movq 128(%rsp),%rdi
- # i = bytes
- mov %rdx,%rcx
- # while (i) { *out++ = *m++; --i }
- rep movsb
- # comment:fp stack unchanged by fallthrough
-# bytesatleast64:
-._bytesatleast64:
- # x = x_backup
- movq 120(%rsp),%rdi
- # in8 = j8
- movq 88(%rsp),%rsi
- # *(uint64 *) (x + 32) = in8
- movq %rsi,32(%rdi)
- # r11 = r11_stack
- movq 0(%rsp),%r11
- # r12 = r12_stack
- movq 8(%rsp),%r12
- # r13 = r13_stack
- movq 16(%rsp),%r13
- # r14 = r14_stack
- movq 24(%rsp),%r14
- # r15 = r15_stack
- movq 32(%rsp),%r15
- # rbx = rbx_stack
- movq 40(%rsp),%rbx
- # rbp = rbp_stack
- movq 48(%rsp),%rbp
- # comment:fp stack unchanged by fallthrough
-# done:
-._done:
- # leave
- add %r11,%rsp
- mov %rdi,%rax
- mov %rsi,%rdx
- ret
-# bytesatleast65:
-._bytesatleast65:
- # bytes -= 64
- sub $64,%rdx
- # out += 64
- add $64,%rdi
- # m += 64
- add $64,%rsi
- # comment:fp stack unchanged by jump
- # goto bytesatleast1
- jmp ._bytesatleast1
-ENDPROC(salsa20_encrypt_bytes)
-
-# enter salsa20_keysetup
-ENTRY(salsa20_keysetup)
- mov %rsp,%r11
- and $31,%r11
- add $256,%r11
- sub %r11,%rsp
- # k = arg2
- mov %rsi,%rsi
- # kbits = arg3
- mov %rdx,%rdx
- # x = arg1
- mov %rdi,%rdi
- # in0 = *(uint64 *) (k + 0)
- movq 0(%rsi),%r8
- # in2 = *(uint64 *) (k + 8)
- movq 8(%rsi),%r9
- # *(uint64 *) (x + 4) = in0
- movq %r8,4(%rdi)
- # *(uint64 *) (x + 12) = in2
- movq %r9,12(%rdi)
- # unsigned<? kbits - 256
- cmp $256,%rdx
- # comment:fp stack unchanged by jump
- # goto kbits128 if unsigned<
- jb ._kbits128
-# kbits256:
-._kbits256:
- # in10 = *(uint64 *) (k + 16)
- movq 16(%rsi),%rdx
- # in12 = *(uint64 *) (k + 24)
- movq 24(%rsi),%rsi
- # *(uint64 *) (x + 44) = in10
- movq %rdx,44(%rdi)
- # *(uint64 *) (x + 52) = in12
- movq %rsi,52(%rdi)
- # in0 = 1634760805
- mov $1634760805,%rsi
- # in4 = 857760878
- mov $857760878,%rdx
- # in10 = 2036477234
- mov $2036477234,%rcx
- # in14 = 1797285236
- mov $1797285236,%r8
- # *(uint32 *) (x + 0) = in0
- movl %esi,0(%rdi)
- # *(uint32 *) (x + 20) = in4
- movl %edx,20(%rdi)
- # *(uint32 *) (x + 40) = in10
- movl %ecx,40(%rdi)
- # *(uint32 *) (x + 60) = in14
- movl %r8d,60(%rdi)
- # comment:fp stack unchanged by jump
- # goto keysetupdone
- jmp ._keysetupdone
-# kbits128:
-._kbits128:
- # in10 = *(uint64 *) (k + 0)
- movq 0(%rsi),%rdx
- # in12 = *(uint64 *) (k + 8)
- movq 8(%rsi),%rsi
- # *(uint64 *) (x + 44) = in10
- movq %rdx,44(%rdi)
- # *(uint64 *) (x + 52) = in12
- movq %rsi,52(%rdi)
- # in0 = 1634760805
- mov $1634760805,%rsi
- # in4 = 824206446
- mov $824206446,%rdx
- # in10 = 2036477238
- mov $2036477238,%rcx
- # in14 = 1797285236
- mov $1797285236,%r8
- # *(uint32 *) (x + 0) = in0
- movl %esi,0(%rdi)
- # *(uint32 *) (x + 20) = in4
- movl %edx,20(%rdi)
- # *(uint32 *) (x + 40) = in10
- movl %ecx,40(%rdi)
- # *(uint32 *) (x + 60) = in14
- movl %r8d,60(%rdi)
-# keysetupdone:
-._keysetupdone:
- # leave
- add %r11,%rsp
- mov %rdi,%rax
- mov %rsi,%rdx
- ret
-ENDPROC(salsa20_keysetup)
-
-# enter salsa20_ivsetup
-ENTRY(salsa20_ivsetup)
- mov %rsp,%r11
- and $31,%r11
- add $256,%r11
- sub %r11,%rsp
- # iv = arg2
- mov %rsi,%rsi
- # x = arg1
- mov %rdi,%rdi
- # in6 = *(uint64 *) (iv + 0)
- movq 0(%rsi),%rsi
- # in8 = 0
- mov $0,%r8
- # *(uint64 *) (x + 24) = in6
- movq %rsi,24(%rdi)
- # *(uint64 *) (x + 32) = in8
- movq %r8,32(%rdi)
- # leave
- add %r11,%rsp
- mov %rdi,%rax
- mov %rsi,%rdx
- ret
-ENDPROC(salsa20_ivsetup)
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
deleted file mode 100644
index cb91a64a99e7..000000000000
--- a/arch/x86/crypto/salsa20_glue.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Glue code for optimized assembly version of Salsa20.
- *
- * Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com>
- *
- * The assembly codes are public domain assembly codes written by Daniel. J.
- * Bernstein <djb@cr.yp.to>. The codes are modified to include indentation
- * and to remove extraneous comments and functions that are not needed.
- * - i586 version, renamed as salsa20-i586-asm_32.S
- * available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s>
- * - x86-64 version, renamed as salsa20-x86_64-asm_64.S
- * available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-
-#include <crypto/algapi.h>
-#include <linux/module.h>
-#include <linux/crypto.h>
-
-#define SALSA20_IV_SIZE 8U
-#define SALSA20_MIN_KEY_SIZE 16U
-#define SALSA20_MAX_KEY_SIZE 32U
-
-struct salsa20_ctx
-{
- u32 input[16];
-};
-
-asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k,
- u32 keysize, u32 ivsize);
-asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv);
-asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx,
- const u8 *src, u8 *dst, u32 bytes);
-
-static int setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keysize)
-{
- struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm);
- salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8);
- return 0;
-}
-
-static int encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct blkcipher_walk walk;
- struct crypto_blkcipher *tfm = desc->tfm;
- struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm);
- int err;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt_block(desc, &walk, 64);
-
- salsa20_ivsetup(ctx, walk.iv);
-
- while (walk.nbytes >= 64) {
- salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
- walk.dst.virt.addr,
- walk.nbytes - (walk.nbytes % 64));
- err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64);
- }
-
- if (walk.nbytes) {
- salsa20_encrypt_bytes(ctx, walk.src.virt.addr,
- walk.dst.virt.addr, walk.nbytes);
- err = blkcipher_walk_done(desc, &walk, 0);
- }
-
- return err;
-}
-
-static struct crypto_alg alg = {
- .cra_name = "salsa20",
- .cra_driver_name = "salsa20-asm",
- .cra_priority = 200,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_type = &crypto_blkcipher_type,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct salsa20_ctx),
- .cra_alignmask = 3,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .setkey = setkey,
- .encrypt = encrypt,
- .decrypt = encrypt,
- .min_keysize = SALSA20_MIN_KEY_SIZE,
- .max_keysize = SALSA20_MAX_KEY_SIZE,
- .ivsize = SALSA20_IV_SIZE,
- }
- }
-};
-
-static int __init init(void)
-{
- return crypto_register_alg(&alg);
-}
-
-static void __exit fini(void)
-{
- crypto_unregister_alg(&alg);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)");
-MODULE_ALIAS_CRYPTO("salsa20");
-MODULE_ALIAS_CRYPTO("salsa20-asm");