diff options
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r-- | arch/x86/crypto/Makefile | 4 | ||||
-rw-r--r-- | arch/x86/crypto/salsa20-i586-asm_32.S | 1114 | ||||
-rw-r--r-- | arch/x86/crypto/salsa20-x86_64-asm_64.S | 919 | ||||
-rw-r--r-- | arch/x86/crypto/salsa20_glue.c | 116 |
4 files changed, 0 insertions, 2153 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 5f07333bb224..9c903a420cda 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -15,7 +15,6 @@ obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o -obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o @@ -24,7 +23,6 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o -obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o @@ -59,7 +57,6 @@ endif aes-i586-y := aes-i586-asm_32.o aes_glue.o twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o -salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o @@ -68,7 +65,6 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o -salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S deleted file mode 100644 index 329452b8f794..000000000000 --- a/arch/x86/crypto/salsa20-i586-asm_32.S +++ /dev/null @@ -1,1114 +0,0 @@ -# salsa20_pm.s version 20051229 -# D. J. Bernstein -# Public domain. - -#include <linux/linkage.h> - -.text - -# enter salsa20_encrypt_bytes -ENTRY(salsa20_encrypt_bytes) - mov %esp,%eax - and $31,%eax - add $256,%eax - sub %eax,%esp - # eax_stack = eax - movl %eax,80(%esp) - # ebx_stack = ebx - movl %ebx,84(%esp) - # esi_stack = esi - movl %esi,88(%esp) - # edi_stack = edi - movl %edi,92(%esp) - # ebp_stack = ebp - movl %ebp,96(%esp) - # x = arg1 - movl 4(%esp,%eax),%edx - # m = arg2 - movl 8(%esp,%eax),%esi - # out = arg3 - movl 12(%esp,%eax),%edi - # bytes = arg4 - movl 16(%esp,%eax),%ebx - # bytes -= 0 - sub $0,%ebx - # goto done if unsigned<= - jbe ._done -._start: - # in0 = *(uint32 *) (x + 0) - movl 0(%edx),%eax - # in1 = *(uint32 *) (x + 4) - movl 4(%edx),%ecx - # in2 = *(uint32 *) (x + 8) - movl 8(%edx),%ebp - # j0 = in0 - movl %eax,164(%esp) - # in3 = *(uint32 *) (x + 12) - movl 12(%edx),%eax - # j1 = in1 - movl %ecx,168(%esp) - # in4 = *(uint32 *) (x + 16) - movl 16(%edx),%ecx - # j2 = in2 - movl %ebp,172(%esp) - # in5 = *(uint32 *) (x + 20) - movl 20(%edx),%ebp - # j3 = in3 - movl %eax,176(%esp) - # in6 = *(uint32 *) (x + 24) - movl 24(%edx),%eax - # j4 = in4 - movl %ecx,180(%esp) - # in7 = *(uint32 *) (x + 28) - movl 28(%edx),%ecx - # j5 = in5 - movl %ebp,184(%esp) - # in8 = *(uint32 *) (x + 32) - movl 32(%edx),%ebp - # j6 = in6 - movl %eax,188(%esp) - # in9 = *(uint32 *) (x + 36) - movl 36(%edx),%eax - # j7 = in7 - movl %ecx,192(%esp) - # in10 = *(uint32 *) (x + 40) - movl 40(%edx),%ecx - # j8 = in8 - movl %ebp,196(%esp) - # in11 = *(uint32 *) (x + 44) - movl 44(%edx),%ebp - # j9 = in9 - movl %eax,200(%esp) - # in12 = *(uint32 *) (x + 48) - movl 48(%edx),%eax - # j10 = in10 - movl %ecx,204(%esp) - # in13 = *(uint32 *) (x + 52) - movl 52(%edx),%ecx - # j11 = in11 - movl %ebp,208(%esp) - # in14 = *(uint32 *) (x + 56) - movl 56(%edx),%ebp - # j12 = in12 - movl %eax,212(%esp) - # in15 = *(uint32 *) (x + 60) - movl 60(%edx),%eax - # j13 = in13 - movl %ecx,216(%esp) - # j14 = in14 - movl %ebp,220(%esp) - # j15 = in15 - movl %eax,224(%esp) - # x_backup = x - movl %edx,64(%esp) -._bytesatleast1: - # bytes - 64 - cmp $64,%ebx - # goto nocopy if unsigned>= - jae ._nocopy - # ctarget = out - movl %edi,228(%esp) - # out = &tmp - leal 0(%esp),%edi - # i = bytes - mov %ebx,%ecx - # while (i) { *out++ = *m++; --i } - rep movsb - # out = &tmp - leal 0(%esp),%edi - # m = &tmp - leal 0(%esp),%esi -._nocopy: - # out_backup = out - movl %edi,72(%esp) - # m_backup = m - movl %esi,68(%esp) - # bytes_backup = bytes - movl %ebx,76(%esp) - # in0 = j0 - movl 164(%esp),%eax - # in1 = j1 - movl 168(%esp),%ecx - # in2 = j2 - movl 172(%esp),%edx - # in3 = j3 - movl 176(%esp),%ebx - # x0 = in0 - movl %eax,100(%esp) - # x1 = in1 - movl %ecx,104(%esp) - # x2 = in2 - movl %edx,108(%esp) - # x3 = in3 - movl %ebx,112(%esp) - # in4 = j4 - movl 180(%esp),%eax - # in5 = j5 - movl 184(%esp),%ecx - # in6 = j6 - movl 188(%esp),%edx - # in7 = j7 - movl 192(%esp),%ebx - # x4 = in4 - movl %eax,116(%esp) - # x5 = in5 - movl %ecx,120(%esp) - # x6 = in6 - movl %edx,124(%esp) - # x7 = in7 - movl %ebx,128(%esp) - # in8 = j8 - movl 196(%esp),%eax - # in9 = j9 - movl 200(%esp),%ecx - # in10 = j10 - movl 204(%esp),%edx - # in11 = j11 - movl 208(%esp),%ebx - # x8 = in8 - movl %eax,132(%esp) - # x9 = in9 - movl %ecx,136(%esp) - # x10 = in10 - movl %edx,140(%esp) - # x11 = in11 - movl %ebx,144(%esp) - # in12 = j12 - movl 212(%esp),%eax - # in13 = j13 - movl 216(%esp),%ecx - # in14 = j14 - movl 220(%esp),%edx - # in15 = j15 - movl 224(%esp),%ebx - # x12 = in12 - movl %eax,148(%esp) - # x13 = in13 - movl %ecx,152(%esp) - # x14 = in14 - movl %edx,156(%esp) - # x15 = in15 - movl %ebx,160(%esp) - # i = 20 - mov $20,%ebp - # p = x0 - movl 100(%esp),%eax - # s = x5 - movl 120(%esp),%ecx - # t = x10 - movl 140(%esp),%edx - # w = x15 - movl 160(%esp),%ebx -._mainloop: - # x0 = p - movl %eax,100(%esp) - # x10 = t - movl %edx,140(%esp) - # p += x12 - addl 148(%esp),%eax - # x5 = s - movl %ecx,120(%esp) - # t += x6 - addl 124(%esp),%edx - # x15 = w - movl %ebx,160(%esp) - # r = x1 - movl 104(%esp),%esi - # r += s - add %ecx,%esi - # v = x11 - movl 144(%esp),%edi - # v += w - add %ebx,%edi - # p <<<= 7 - rol $7,%eax - # p ^= x4 - xorl 116(%esp),%eax - # t <<<= 7 - rol $7,%edx - # t ^= x14 - xorl 156(%esp),%edx - # r <<<= 7 - rol $7,%esi - # r ^= x9 - xorl 136(%esp),%esi - # v <<<= 7 - rol $7,%edi - # v ^= x3 - xorl 112(%esp),%edi - # x4 = p - movl %eax,116(%esp) - # x14 = t - movl %edx,156(%esp) - # p += x0 - addl 100(%esp),%eax - # x9 = r - movl %esi,136(%esp) - # t += x10 - addl 140(%esp),%edx - # x3 = v - movl %edi,112(%esp) - # p <<<= 9 - rol $9,%eax - # p ^= x8 - xorl 132(%esp),%eax - # t <<<= 9 - rol $9,%edx - # t ^= x2 - xorl 108(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 9 - rol $9,%ecx - # s ^= x13 - xorl 152(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 9 - rol $9,%ebx - # w ^= x7 - xorl 128(%esp),%ebx - # x8 = p - movl %eax,132(%esp) - # x2 = t - movl %edx,108(%esp) - # p += x4 - addl 116(%esp),%eax - # x13 = s - movl %ecx,152(%esp) - # t += x14 - addl 156(%esp),%edx - # x7 = w - movl %ebx,128(%esp) - # p <<<= 13 - rol $13,%eax - # p ^= x12 - xorl 148(%esp),%eax - # t <<<= 13 - rol $13,%edx - # t ^= x6 - xorl 124(%esp),%edx - # r += s - add %ecx,%esi - # r <<<= 13 - rol $13,%esi - # r ^= x1 - xorl 104(%esp),%esi - # v += w - add %ebx,%edi - # v <<<= 13 - rol $13,%edi - # v ^= x11 - xorl 144(%esp),%edi - # x12 = p - movl %eax,148(%esp) - # x6 = t - movl %edx,124(%esp) - # p += x8 - addl 132(%esp),%eax - # x1 = r - movl %esi,104(%esp) - # t += x2 - addl 108(%esp),%edx - # x11 = v - movl %edi,144(%esp) - # p <<<= 18 - rol $18,%eax - # p ^= x0 - xorl 100(%esp),%eax - # t <<<= 18 - rol $18,%edx - # t ^= x10 - xorl 140(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 18 - rol $18,%ecx - # s ^= x5 - xorl 120(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 18 - rol $18,%ebx - # w ^= x15 - xorl 160(%esp),%ebx - # x0 = p - movl %eax,100(%esp) - # x10 = t - movl %edx,140(%esp) - # p += x3 - addl 112(%esp),%eax - # p <<<= 7 - rol $7,%eax - # x5 = s - movl %ecx,120(%esp) - # t += x9 - addl 136(%esp),%edx - # x15 = w - movl %ebx,160(%esp) - # r = x4 - movl 116(%esp),%esi - # r += s - add %ecx,%esi - # v = x14 - movl 156(%esp),%edi - # v += w - add %ebx,%edi - # p ^= x1 - xorl 104(%esp),%eax - # t <<<= 7 - rol $7,%edx - # t ^= x11 - xorl 144(%esp),%edx - # r <<<= 7 - rol $7,%esi - # r ^= x6 - xorl 124(%esp),%esi - # v <<<= 7 - rol $7,%edi - # v ^= x12 - xorl 148(%esp),%edi - # x1 = p - movl %eax,104(%esp) - # x11 = t - movl %edx,144(%esp) - # p += x0 - addl 100(%esp),%eax - # x6 = r - movl %esi,124(%esp) - # t += x10 - addl 140(%esp),%edx - # x12 = v - movl %edi,148(%esp) - # p <<<= 9 - rol $9,%eax - # p ^= x2 - xorl 108(%esp),%eax - # t <<<= 9 - rol $9,%edx - # t ^= x8 - xorl 132(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 9 - rol $9,%ecx - # s ^= x7 - xorl 128(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 9 - rol $9,%ebx - # w ^= x13 - xorl 152(%esp),%ebx - # x2 = p - movl %eax,108(%esp) - # x8 = t - movl %edx,132(%esp) - # p += x1 - addl 104(%esp),%eax - # x7 = s - movl %ecx,128(%esp) - # t += x11 - addl 144(%esp),%edx - # x13 = w - movl %ebx,152(%esp) - # p <<<= 13 - rol $13,%eax - # p ^= x3 - xorl 112(%esp),%eax - # t <<<= 13 - rol $13,%edx - # t ^= x9 - xorl 136(%esp),%edx - # r += s - add %ecx,%esi - # r <<<= 13 - rol $13,%esi - # r ^= x4 - xorl 116(%esp),%esi - # v += w - add %ebx,%edi - # v <<<= 13 - rol $13,%edi - # v ^= x14 - xorl 156(%esp),%edi - # x3 = p - movl %eax,112(%esp) - # x9 = t - movl %edx,136(%esp) - # p += x2 - addl 108(%esp),%eax - # x4 = r - movl %esi,116(%esp) - # t += x8 - addl 132(%esp),%edx - # x14 = v - movl %edi,156(%esp) - # p <<<= 18 - rol $18,%eax - # p ^= x0 - xorl 100(%esp),%eax - # t <<<= 18 - rol $18,%edx - # t ^= x10 - xorl 140(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 18 - rol $18,%ecx - # s ^= x5 - xorl 120(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 18 - rol $18,%ebx - # w ^= x15 - xorl 160(%esp),%ebx - # x0 = p - movl %eax,100(%esp) - # x10 = t - movl %edx,140(%esp) - # p += x12 - addl 148(%esp),%eax - # x5 = s - movl %ecx,120(%esp) - # t += x6 - addl 124(%esp),%edx - # x15 = w - movl %ebx,160(%esp) - # r = x1 - movl 104(%esp),%esi - # r += s - add %ecx,%esi - # v = x11 - movl 144(%esp),%edi - # v += w - add %ebx,%edi - # p <<<= 7 - rol $7,%eax - # p ^= x4 - xorl 116(%esp),%eax - # t <<<= 7 - rol $7,%edx - # t ^= x14 - xorl 156(%esp),%edx - # r <<<= 7 - rol $7,%esi - # r ^= x9 - xorl 136(%esp),%esi - # v <<<= 7 - rol $7,%edi - # v ^= x3 - xorl 112(%esp),%edi - # x4 = p - movl %eax,116(%esp) - # x14 = t - movl %edx,156(%esp) - # p += x0 - addl 100(%esp),%eax - # x9 = r - movl %esi,136(%esp) - # t += x10 - addl 140(%esp),%edx - # x3 = v - movl %edi,112(%esp) - # p <<<= 9 - rol $9,%eax - # p ^= x8 - xorl 132(%esp),%eax - # t <<<= 9 - rol $9,%edx - # t ^= x2 - xorl 108(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 9 - rol $9,%ecx - # s ^= x13 - xorl 152(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 9 - rol $9,%ebx - # w ^= x7 - xorl 128(%esp),%ebx - # x8 = p - movl %eax,132(%esp) - # x2 = t - movl %edx,108(%esp) - # p += x4 - addl 116(%esp),%eax - # x13 = s - movl %ecx,152(%esp) - # t += x14 - addl 156(%esp),%edx - # x7 = w - movl %ebx,128(%esp) - # p <<<= 13 - rol $13,%eax - # p ^= x12 - xorl 148(%esp),%eax - # t <<<= 13 - rol $13,%edx - # t ^= x6 - xorl 124(%esp),%edx - # r += s - add %ecx,%esi - # r <<<= 13 - rol $13,%esi - # r ^= x1 - xorl 104(%esp),%esi - # v += w - add %ebx,%edi - # v <<<= 13 - rol $13,%edi - # v ^= x11 - xorl 144(%esp),%edi - # x12 = p - movl %eax,148(%esp) - # x6 = t - movl %edx,124(%esp) - # p += x8 - addl 132(%esp),%eax - # x1 = r - movl %esi,104(%esp) - # t += x2 - addl 108(%esp),%edx - # x11 = v - movl %edi,144(%esp) - # p <<<= 18 - rol $18,%eax - # p ^= x0 - xorl 100(%esp),%eax - # t <<<= 18 - rol $18,%edx - # t ^= x10 - xorl 140(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 18 - rol $18,%ecx - # s ^= x5 - xorl 120(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 18 - rol $18,%ebx - # w ^= x15 - xorl 160(%esp),%ebx - # x0 = p - movl %eax,100(%esp) - # x10 = t - movl %edx,140(%esp) - # p += x3 - addl 112(%esp),%eax - # p <<<= 7 - rol $7,%eax - # x5 = s - movl %ecx,120(%esp) - # t += x9 - addl 136(%esp),%edx - # x15 = w - movl %ebx,160(%esp) - # r = x4 - movl 116(%esp),%esi - # r += s - add %ecx,%esi - # v = x14 - movl 156(%esp),%edi - # v += w - add %ebx,%edi - # p ^= x1 - xorl 104(%esp),%eax - # t <<<= 7 - rol $7,%edx - # t ^= x11 - xorl 144(%esp),%edx - # r <<<= 7 - rol $7,%esi - # r ^= x6 - xorl 124(%esp),%esi - # v <<<= 7 - rol $7,%edi - # v ^= x12 - xorl 148(%esp),%edi - # x1 = p - movl %eax,104(%esp) - # x11 = t - movl %edx,144(%esp) - # p += x0 - addl 100(%esp),%eax - # x6 = r - movl %esi,124(%esp) - # t += x10 - addl 140(%esp),%edx - # x12 = v - movl %edi,148(%esp) - # p <<<= 9 - rol $9,%eax - # p ^= x2 - xorl 108(%esp),%eax - # t <<<= 9 - rol $9,%edx - # t ^= x8 - xorl 132(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 9 - rol $9,%ecx - # s ^= x7 - xorl 128(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 9 - rol $9,%ebx - # w ^= x13 - xorl 152(%esp),%ebx - # x2 = p - movl %eax,108(%esp) - # x8 = t - movl %edx,132(%esp) - # p += x1 - addl 104(%esp),%eax - # x7 = s - movl %ecx,128(%esp) - # t += x11 - addl 144(%esp),%edx - # x13 = w - movl %ebx,152(%esp) - # p <<<= 13 - rol $13,%eax - # p ^= x3 - xorl 112(%esp),%eax - # t <<<= 13 - rol $13,%edx - # t ^= x9 - xorl 136(%esp),%edx - # r += s - add %ecx,%esi - # r <<<= 13 - rol $13,%esi - # r ^= x4 - xorl 116(%esp),%esi - # v += w - add %ebx,%edi - # v <<<= 13 - rol $13,%edi - # v ^= x14 - xorl 156(%esp),%edi - # x3 = p - movl %eax,112(%esp) - # x9 = t - movl %edx,136(%esp) - # p += x2 - addl 108(%esp),%eax - # x4 = r - movl %esi,116(%esp) - # t += x8 - addl 132(%esp),%edx - # x14 = v - movl %edi,156(%esp) - # p <<<= 18 - rol $18,%eax - # p ^= x0 - xorl 100(%esp),%eax - # t <<<= 18 - rol $18,%edx - # t ^= x10 - xorl 140(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 18 - rol $18,%ecx - # s ^= x5 - xorl 120(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 18 - rol $18,%ebx - # w ^= x15 - xorl 160(%esp),%ebx - # i -= 4 - sub $4,%ebp - # goto mainloop if unsigned > - ja ._mainloop - # x0 = p - movl %eax,100(%esp) - # x5 = s - movl %ecx,120(%esp) - # x10 = t - movl %edx,140(%esp) - # x15 = w - movl %ebx,160(%esp) - # out = out_backup - movl 72(%esp),%edi - # m = m_backup - movl 68(%esp),%esi - # in0 = x0 - movl 100(%esp),%eax - # in1 = x1 - movl 104(%esp),%ecx - # in0 += j0 - addl 164(%esp),%eax - # in1 += j1 - addl 168(%esp),%ecx - # in0 ^= *(uint32 *) (m + 0) - xorl 0(%esi),%eax - # in1 ^= *(uint32 *) (m + 4) - xorl 4(%esi),%ecx - # *(uint32 *) (out + 0) = in0 - movl %eax,0(%edi) - # *(uint32 *) (out + 4) = in1 - movl %ecx,4(%edi) - # in2 = x2 - movl 108(%esp),%eax - # in3 = x3 - movl 112(%esp),%ecx - # in2 += j2 - addl 172(%esp),%eax - # in3 += j3 - addl 176(%esp),%ecx - # in2 ^= *(uint32 *) (m + 8) - xorl 8(%esi),%eax - # in3 ^= *(uint32 *) (m + 12) - xorl 12(%esi),%ecx - # *(uint32 *) (out + 8) = in2 - movl %eax,8(%edi) - # *(uint32 *) (out + 12) = in3 - movl %ecx,12(%edi) - # in4 = x4 - movl 116(%esp),%eax - # in5 = x5 - movl 120(%esp),%ecx - # in4 += j4 - addl 180(%esp),%eax - # in5 += j5 - addl 184(%esp),%ecx - # in4 ^= *(uint32 *) (m + 16) - xorl 16(%esi),%eax - # in5 ^= *(uint32 *) (m + 20) - xorl 20(%esi),%ecx - # *(uint32 *) (out + 16) = in4 - movl %eax,16(%edi) - # *(uint32 *) (out + 20) = in5 - movl %ecx,20(%edi) - # in6 = x6 - movl 124(%esp),%eax - # in7 = x7 - movl 128(%esp),%ecx - # in6 += j6 - addl 188(%esp),%eax - # in7 += j7 - addl 192(%esp),%ecx - # in6 ^= *(uint32 *) (m + 24) - xorl 24(%esi),%eax - # in7 ^= *(uint32 *) (m + 28) - xorl 28(%esi),%ecx - # *(uint32 *) (out + 24) = in6 - movl %eax,24(%edi) - # *(uint32 *) (out + 28) = in7 - movl %ecx,28(%edi) - # in8 = x8 - movl 132(%esp),%eax - # in9 = x9 - movl 136(%esp),%ecx - # in8 += j8 - addl 196(%esp),%eax - # in9 += j9 - addl 200(%esp),%ecx - # in8 ^= *(uint32 *) (m + 32) - xorl 32(%esi),%eax - # in9 ^= *(uint32 *) (m + 36) - xorl 36(%esi),%ecx - # *(uint32 *) (out + 32) = in8 - movl %eax,32(%edi) - # *(uint32 *) (out + 36) = in9 - movl %ecx,36(%edi) - # in10 = x10 - movl 140(%esp),%eax - # in11 = x11 - movl 144(%esp),%ecx - # in10 += j10 - addl 204(%esp),%eax - # in11 += j11 - addl 208(%esp),%ecx - # in10 ^= *(uint32 *) (m + 40) - xorl 40(%esi),%eax - # in11 ^= *(uint32 *) (m + 44) - xorl 44(%esi),%ecx - # *(uint32 *) (out + 40) = in10 - movl %eax,40(%edi) - # *(uint32 *) (out + 44) = in11 - movl %ecx,44(%edi) - # in12 = x12 - movl 148(%esp),%eax - # in13 = x13 - movl 152(%esp),%ecx - # in12 += j12 - addl 212(%esp),%eax - # in13 += j13 - addl 216(%esp),%ecx - # in12 ^= *(uint32 *) (m + 48) - xorl 48(%esi),%eax - # in13 ^= *(uint32 *) (m + 52) - xorl 52(%esi),%ecx - # *(uint32 *) (out + 48) = in12 - movl %eax,48(%edi) - # *(uint32 *) (out + 52) = in13 - movl %ecx,52(%edi) - # in14 = x14 - movl 156(%esp),%eax - # in15 = x15 - movl 160(%esp),%ecx - # in14 += j14 - addl 220(%esp),%eax - # in15 += j15 - addl 224(%esp),%ecx - # in14 ^= *(uint32 *) (m + 56) - xorl 56(%esi),%eax - # in15 ^= *(uint32 *) (m + 60) - xorl 60(%esi),%ecx - # *(uint32 *) (out + 56) = in14 - movl %eax,56(%edi) - # *(uint32 *) (out + 60) = in15 - movl %ecx,60(%edi) - # bytes = bytes_backup - movl 76(%esp),%ebx - # in8 = j8 - movl 196(%esp),%eax - # in9 = j9 - movl 200(%esp),%ecx - # in8 += 1 - add $1,%eax - # in9 += 0 + carry - adc $0,%ecx - # j8 = in8 - movl %eax,196(%esp) - # j9 = in9 - movl %ecx,200(%esp) - # bytes - 64 - cmp $64,%ebx - # goto bytesatleast65 if unsigned> - ja ._bytesatleast65 - # goto bytesatleast64 if unsigned>= - jae ._bytesatleast64 - # m = out - mov %edi,%esi - # out = ctarget - movl 228(%esp),%edi - # i = bytes - mov %ebx,%ecx - # while (i) { *out++ = *m++; --i } - rep movsb -._bytesatleast64: - # x = x_backup - movl 64(%esp),%eax - # in8 = j8 - movl 196(%esp),%ecx - # in9 = j9 - movl 200(%esp),%edx - # *(uint32 *) (x + 32) = in8 - movl %ecx,32(%eax) - # *(uint32 *) (x + 36) = in9 - movl %edx,36(%eax) -._done: - # eax = eax_stack - movl 80(%esp),%eax - # ebx = ebx_stack - movl 84(%esp),%ebx - # esi = esi_stack - movl 88(%esp),%esi - # edi = edi_stack - movl 92(%esp),%edi - # ebp = ebp_stack - movl 96(%esp),%ebp - # leave - add %eax,%esp - ret -._bytesatleast65: - # bytes -= 64 - sub $64,%ebx - # out += 64 - add $64,%edi - # m += 64 - add $64,%esi - # goto bytesatleast1 - jmp ._bytesatleast1 -ENDPROC(salsa20_encrypt_bytes) - -# enter salsa20_keysetup -ENTRY(salsa20_keysetup) - mov %esp,%eax - and $31,%eax - add $256,%eax - sub %eax,%esp - # eax_stack = eax - movl %eax,64(%esp) - # ebx_stack = ebx - movl %ebx,68(%esp) - # esi_stack = esi - movl %esi,72(%esp) - # edi_stack = edi - movl %edi,76(%esp) - # ebp_stack = ebp - movl %ebp,80(%esp) - # k = arg2 - movl 8(%esp,%eax),%ecx - # kbits = arg3 - movl 12(%esp,%eax),%edx - # x = arg1 - movl 4(%esp,%eax),%eax - # in1 = *(uint32 *) (k + 0) - movl 0(%ecx),%ebx - # in2 = *(uint32 *) (k + 4) - movl 4(%ecx),%esi - # in3 = *(uint32 *) (k + 8) - movl 8(%ecx),%edi - # in4 = *(uint32 *) (k + 12) - movl 12(%ecx),%ebp - # *(uint32 *) (x + 4) = in1 - movl %ebx,4(%eax) - # *(uint32 *) (x + 8) = in2 - movl %esi,8(%eax) - # *(uint32 *) (x + 12) = in3 - movl %edi,12(%eax) - # *(uint32 *) (x + 16) = in4 - movl %ebp,16(%eax) - # kbits - 256 - cmp $256,%edx - # goto kbits128 if unsigned< - jb ._kbits128 -._kbits256: - # in11 = *(uint32 *) (k + 16) - movl 16(%ecx),%edx - # in12 = *(uint32 *) (k + 20) - movl 20(%ecx),%ebx - # in13 = *(uint32 *) (k + 24) - movl 24(%ecx),%esi - # in14 = *(uint32 *) (k + 28) - movl 28(%ecx),%ecx - # *(uint32 *) (x + 44) = in11 - movl %edx,44(%eax) - # *(uint32 *) (x + 48) = in12 - movl %ebx,48(%eax) - # *(uint32 *) (x + 52) = in13 - movl %esi,52(%eax) - # *(uint32 *) (x + 56) = in14 - movl %ecx,56(%eax) - # in0 = 1634760805 - mov $1634760805,%ecx - # in5 = 857760878 - mov $857760878,%edx - # in10 = 2036477234 - mov $2036477234,%ebx - # in15 = 1797285236 - mov $1797285236,%esi - # *(uint32 *) (x + 0) = in0 - movl %ecx,0(%eax) - # *(uint32 *) (x + 20) = in5 - movl %edx,20(%eax) - # *(uint32 *) (x + 40) = in10 - movl %ebx,40(%eax) - # *(uint32 *) (x + 60) = in15 - movl %esi,60(%eax) - # goto keysetupdone - jmp ._keysetupdone -._kbits128: - # in11 = *(uint32 *) (k + 0) - movl 0(%ecx),%edx - # in12 = *(uint32 *) (k + 4) - movl 4(%ecx),%ebx - # in13 = *(uint32 *) (k + 8) - movl 8(%ecx),%esi - # in14 = *(uint32 *) (k + 12) - movl 12(%ecx),%ecx - # *(uint32 *) (x + 44) = in11 - movl %edx,44(%eax) - # *(uint32 *) (x + 48) = in12 - movl %ebx,48(%eax) - # *(uint32 *) (x + 52) = in13 - movl %esi,52(%eax) - # *(uint32 *) (x + 56) = in14 - movl %ecx,56(%eax) - # in0 = 1634760805 - mov $1634760805,%ecx - # in5 = 824206446 - mov $824206446,%edx - # in10 = 2036477238 - mov $2036477238,%ebx - # in15 = 1797285236 - mov $1797285236,%esi - # *(uint32 *) (x + 0) = in0 - movl %ecx,0(%eax) - # *(uint32 *) (x + 20) = in5 - movl %edx,20(%eax) - # *(uint32 *) (x + 40) = in10 - movl %ebx,40(%eax) - # *(uint32 *) (x + 60) = in15 - movl %esi,60(%eax) -._keysetupdone: - # eax = eax_stack - movl 64(%esp),%eax - # ebx = ebx_stack - movl 68(%esp),%ebx - # esi = esi_stack - movl 72(%esp),%esi - # edi = edi_stack - movl 76(%esp),%edi - # ebp = ebp_stack - movl 80(%esp),%ebp - # leave - add %eax,%esp - ret -ENDPROC(salsa20_keysetup) - -# enter salsa20_ivsetup -ENTRY(salsa20_ivsetup) - mov %esp,%eax - and $31,%eax - add $256,%eax - sub %eax,%esp - # eax_stack = eax - movl %eax,64(%esp) - # ebx_stack = ebx - movl %ebx,68(%esp) - # esi_stack = esi - movl %esi,72(%esp) - # edi_stack = edi - movl %edi,76(%esp) - # ebp_stack = ebp - movl %ebp,80(%esp) - # iv = arg2 - movl 8(%esp,%eax),%ecx - # x = arg1 - movl 4(%esp,%eax),%eax - # in6 = *(uint32 *) (iv + 0) - movl 0(%ecx),%edx - # in7 = *(uint32 *) (iv + 4) - movl 4(%ecx),%ecx - # in8 = 0 - mov $0,%ebx - # in9 = 0 - mov $0,%esi - # *(uint32 *) (x + 24) = in6 - movl %edx,24(%eax) - # *(uint32 *) (x + 28) = in7 - movl %ecx,28(%eax) - # *(uint32 *) (x + 32) = in8 - movl %ebx,32(%eax) - # *(uint32 *) (x + 36) = in9 - movl %esi,36(%eax) - # eax = eax_stack - movl 64(%esp),%eax - # ebx = ebx_stack - movl 68(%esp),%ebx - # esi = esi_stack - movl 72(%esp),%esi - # edi = edi_stack - movl 76(%esp),%edi - # ebp = ebp_stack - movl 80(%esp),%ebp - # leave - add %eax,%esp - ret -ENDPROC(salsa20_ivsetup) diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S deleted file mode 100644 index 10db30d58006..000000000000 --- a/arch/x86/crypto/salsa20-x86_64-asm_64.S +++ /dev/null @@ -1,919 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <linux/linkage.h> - -# enter salsa20_encrypt_bytes -ENTRY(salsa20_encrypt_bytes) - mov %rsp,%r11 - and $31,%r11 - add $256,%r11 - sub %r11,%rsp - # x = arg1 - mov %rdi,%r8 - # m = arg2 - mov %rsi,%rsi - # out = arg3 - mov %rdx,%rdi - # bytes = arg4 - mov %rcx,%rdx - # unsigned>? bytes - 0 - cmp $0,%rdx - # comment:fp stack unchanged by jump - # goto done if !unsigned> - jbe ._done - # comment:fp stack unchanged by fallthrough -# start: -._start: - # r11_stack = r11 - movq %r11,0(%rsp) - # r12_stack = r12 - movq %r12,8(%rsp) - # r13_stack = r13 - movq %r13,16(%rsp) - # r14_stack = r14 - movq %r14,24(%rsp) - # r15_stack = r15 - movq %r15,32(%rsp) - # rbx_stack = rbx - movq %rbx,40(%rsp) - # rbp_stack = rbp - movq %rbp,48(%rsp) - # in0 = *(uint64 *) (x + 0) - movq 0(%r8),%rcx - # in2 = *(uint64 *) (x + 8) - movq 8(%r8),%r9 - # in4 = *(uint64 *) (x + 16) - movq 16(%r8),%rax - # in6 = *(uint64 *) (x + 24) - movq 24(%r8),%r10 - # in8 = *(uint64 *) (x + 32) - movq 32(%r8),%r11 - # in10 = *(uint64 *) (x + 40) - movq 40(%r8),%r12 - # in12 = *(uint64 *) (x + 48) - movq 48(%r8),%r13 - # in14 = *(uint64 *) (x + 56) - movq 56(%r8),%r14 - # j0 = in0 - movq %rcx,56(%rsp) - # j2 = in2 - movq %r9,64(%rsp) - # j4 = in4 - movq %rax,72(%rsp) - # j6 = in6 - movq %r10,80(%rsp) - # j8 = in8 - movq %r11,88(%rsp) - # j10 = in10 - movq %r12,96(%rsp) - # j12 = in12 - movq %r13,104(%rsp) - # j14 = in14 - movq %r14,112(%rsp) - # x_backup = x - movq %r8,120(%rsp) -# bytesatleast1: -._bytesatleast1: - # unsigned<? bytes - 64 - cmp $64,%rdx - # comment:fp stack unchanged by jump - # goto nocopy if !unsigned< - jae ._nocopy - # ctarget = out - movq %rdi,128(%rsp) - # out = &tmp - leaq 192(%rsp),%rdi - # i = bytes - mov %rdx,%rcx - # while (i) { *out++ = *m++; --i } - rep movsb - # out = &tmp - leaq 192(%rsp),%rdi - # m = &tmp - leaq 192(%rsp),%rsi - # comment:fp stack unchanged by fallthrough -# nocopy: -._nocopy: - # out_backup = out - movq %rdi,136(%rsp) - # m_backup = m - movq %rsi,144(%rsp) - # bytes_backup = bytes - movq %rdx,152(%rsp) - # x1 = j0 - movq 56(%rsp),%rdi - # x0 = x1 - mov %rdi,%rdx - # (uint64) x1 >>= 32 - shr $32,%rdi - # x3 = j2 - movq 64(%rsp),%rsi - # x2 = x3 - mov %rsi,%rcx - # (uint64) x3 >>= 32 - shr $32,%rsi - # x5 = j4 - movq 72(%rsp),%r8 - # x4 = x5 - mov %r8,%r9 - # (uint64) x5 >>= 32 - shr $32,%r8 - # x5_stack = x5 - movq %r8,160(%rsp) - # x7 = j6 - movq 80(%rsp),%r8 - # x6 = x7 - mov %r8,%rax - # (uint64) x7 >>= 32 - shr $32,%r8 - # x9 = j8 - movq 88(%rsp),%r10 - # x8 = x9 - mov %r10,%r11 - # (uint64) x9 >>= 32 - shr $32,%r10 - # x11 = j10 - movq 96(%rsp),%r12 - # x10 = x11 - mov %r12,%r13 - # x10_stack = x10 - movq %r13,168(%rsp) - # (uint64) x11 >>= 32 - shr $32,%r12 - # x13 = j12 - movq 104(%rsp),%r13 - # x12 = x13 - mov %r13,%r14 - # (uint64) x13 >>= 32 - shr $32,%r13 - # x15 = j14 - movq 112(%rsp),%r15 - # x14 = x15 - mov %r15,%rbx - # (uint64) x15 >>= 32 - shr $32,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # i = 20 - mov $20,%r15 -# mainloop: -._mainloop: - # i_backup = i - movq %r15,184(%rsp) - # x5 = x5_stack - movq 160(%rsp),%r15 - # a = x12 + x0 - lea (%r14,%rdx),%rbp - # (uint32) a <<<= 7 - rol $7,%ebp - # x4 ^= a - xor %rbp,%r9 - # b = x1 + x5 - lea (%rdi,%r15),%rbp - # (uint32) b <<<= 7 - rol $7,%ebp - # x9 ^= b - xor %rbp,%r10 - # a = x0 + x4 - lea (%rdx,%r9),%rbp - # (uint32) a <<<= 9 - rol $9,%ebp - # x8 ^= a - xor %rbp,%r11 - # b = x5 + x9 - lea (%r15,%r10),%rbp - # (uint32) b <<<= 9 - rol $9,%ebp - # x13 ^= b - xor %rbp,%r13 - # a = x4 + x8 - lea (%r9,%r11),%rbp - # (uint32) a <<<= 13 - rol $13,%ebp - # x12 ^= a - xor %rbp,%r14 - # b = x9 + x13 - lea (%r10,%r13),%rbp - # (uint32) b <<<= 13 - rol $13,%ebp - # x1 ^= b - xor %rbp,%rdi - # a = x8 + x12 - lea (%r11,%r14),%rbp - # (uint32) a <<<= 18 - rol $18,%ebp - # x0 ^= a - xor %rbp,%rdx - # b = x13 + x1 - lea (%r13,%rdi),%rbp - # (uint32) b <<<= 18 - rol $18,%ebp - # x5 ^= b - xor %rbp,%r15 - # x10 = x10_stack - movq 168(%rsp),%rbp - # x5_stack = x5 - movq %r15,160(%rsp) - # c = x6 + x10 - lea (%rax,%rbp),%r15 - # (uint32) c <<<= 7 - rol $7,%r15d - # x14 ^= c - xor %r15,%rbx - # c = x10 + x14 - lea (%rbp,%rbx),%r15 - # (uint32) c <<<= 9 - rol $9,%r15d - # x2 ^= c - xor %r15,%rcx - # c = x14 + x2 - lea (%rbx,%rcx),%r15 - # (uint32) c <<<= 13 - rol $13,%r15d - # x6 ^= c - xor %r15,%rax - # c = x2 + x6 - lea (%rcx,%rax),%r15 - # (uint32) c <<<= 18 - rol $18,%r15d - # x10 ^= c - xor %r15,%rbp - # x15 = x15_stack - movq 176(%rsp),%r15 - # x10_stack = x10 - movq %rbp,168(%rsp) - # d = x11 + x15 - lea (%r12,%r15),%rbp - # (uint32) d <<<= 7 - rol $7,%ebp - # x3 ^= d - xor %rbp,%rsi - # d = x15 + x3 - lea (%r15,%rsi),%rbp - # (uint32) d <<<= 9 - rol $9,%ebp - # x7 ^= d - xor %rbp,%r8 - # d = x3 + x7 - lea (%rsi,%r8),%rbp - # (uint32) d <<<= 13 - rol $13,%ebp - # x11 ^= d - xor %rbp,%r12 - # d = x7 + x11 - lea (%r8,%r12),%rbp - # (uint32) d <<<= 18 - rol $18,%ebp - # x15 ^= d - xor %rbp,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # x5 = x5_stack - movq 160(%rsp),%r15 - # a = x3 + x0 - lea (%rsi,%rdx),%rbp - # (uint32) a <<<= 7 - rol $7,%ebp - # x1 ^= a - xor %rbp,%rdi - # b = x4 + x5 - lea (%r9,%r15),%rbp - # (uint32) b <<<= 7 - rol $7,%ebp - # x6 ^= b - xor %rbp,%rax - # a = x0 + x1 - lea (%rdx,%rdi),%rbp - # (uint32) a <<<= 9 - rol $9,%ebp - # x2 ^= a - xor %rbp,%rcx - # b = x5 + x6 - lea (%r15,%rax),%rbp - # (uint32) b <<<= 9 - rol $9,%ebp - # x7 ^= b - xor %rbp,%r8 - # a = x1 + x2 - lea (%rdi,%rcx),%rbp - # (uint32) a <<<= 13 - rol $13,%ebp - # x3 ^= a - xor %rbp,%rsi - # b = x6 + x7 - lea (%rax,%r8),%rbp - # (uint32) b <<<= 13 - rol $13,%ebp - # x4 ^= b - xor %rbp,%r9 - # a = x2 + x3 - lea (%rcx,%rsi),%rbp - # (uint32) a <<<= 18 - rol $18,%ebp - # x0 ^= a - xor %rbp,%rdx - # b = x7 + x4 - lea (%r8,%r9),%rbp - # (uint32) b <<<= 18 - rol $18,%ebp - # x5 ^= b - xor %rbp,%r15 - # x10 = x10_stack - movq 168(%rsp),%rbp - # x5_stack = x5 - movq %r15,160(%rsp) - # c = x9 + x10 - lea (%r10,%rbp),%r15 - # (uint32) c <<<= 7 - rol $7,%r15d - # x11 ^= c - xor %r15,%r12 - # c = x10 + x11 - lea (%rbp,%r12),%r15 - # (uint32) c <<<= 9 - rol $9,%r15d - # x8 ^= c - xor %r15,%r11 - # c = x11 + x8 - lea (%r12,%r11),%r15 - # (uint32) c <<<= 13 - rol $13,%r15d - # x9 ^= c - xor %r15,%r10 - # c = x8 + x9 - lea (%r11,%r10),%r15 - # (uint32) c <<<= 18 - rol $18,%r15d - # x10 ^= c - xor %r15,%rbp - # x15 = x15_stack - movq 176(%rsp),%r15 - # x10_stack = x10 - movq %rbp,168(%rsp) - # d = x14 + x15 - lea (%rbx,%r15),%rbp - # (uint32) d <<<= 7 - rol $7,%ebp - # x12 ^= d - xor %rbp,%r14 - # d = x15 + x12 - lea (%r15,%r14),%rbp - # (uint32) d <<<= 9 - rol $9,%ebp - # x13 ^= d - xor %rbp,%r13 - # d = x12 + x13 - lea (%r14,%r13),%rbp - # (uint32) d <<<= 13 - rol $13,%ebp - # x14 ^= d - xor %rbp,%rbx - # d = x13 + x14 - lea (%r13,%rbx),%rbp - # (uint32) d <<<= 18 - rol $18,%ebp - # x15 ^= d - xor %rbp,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # x5 = x5_stack - movq 160(%rsp),%r15 - # a = x12 + x0 - lea (%r14,%rdx),%rbp - # (uint32) a <<<= 7 - rol $7,%ebp - # x4 ^= a - xor %rbp,%r9 - # b = x1 + x5 - lea (%rdi,%r15),%rbp - # (uint32) b <<<= 7 - rol $7,%ebp - # x9 ^= b - xor %rbp,%r10 - # a = x0 + x4 - lea (%rdx,%r9),%rbp - # (uint32) a <<<= 9 - rol $9,%ebp - # x8 ^= a - xor %rbp,%r11 - # b = x5 + x9 - lea (%r15,%r10),%rbp - # (uint32) b <<<= 9 - rol $9,%ebp - # x13 ^= b - xor %rbp,%r13 - # a = x4 + x8 - lea (%r9,%r11),%rbp - # (uint32) a <<<= 13 - rol $13,%ebp - # x12 ^= a - xor %rbp,%r14 - # b = x9 + x13 - lea (%r10,%r13),%rbp - # (uint32) b <<<= 13 - rol $13,%ebp - # x1 ^= b - xor %rbp,%rdi - # a = x8 + x12 - lea (%r11,%r14),%rbp - # (uint32) a <<<= 18 - rol $18,%ebp - # x0 ^= a - xor %rbp,%rdx - # b = x13 + x1 - lea (%r13,%rdi),%rbp - # (uint32) b <<<= 18 - rol $18,%ebp - # x5 ^= b - xor %rbp,%r15 - # x10 = x10_stack - movq 168(%rsp),%rbp - # x5_stack = x5 - movq %r15,160(%rsp) - # c = x6 + x10 - lea (%rax,%rbp),%r15 - # (uint32) c <<<= 7 - rol $7,%r15d - # x14 ^= c - xor %r15,%rbx - # c = x10 + x14 - lea (%rbp,%rbx),%r15 - # (uint32) c <<<= 9 - rol $9,%r15d - # x2 ^= c - xor %r15,%rcx - # c = x14 + x2 - lea (%rbx,%rcx),%r15 - # (uint32) c <<<= 13 - rol $13,%r15d - # x6 ^= c - xor %r15,%rax - # c = x2 + x6 - lea (%rcx,%rax),%r15 - # (uint32) c <<<= 18 - rol $18,%r15d - # x10 ^= c - xor %r15,%rbp - # x15 = x15_stack - movq 176(%rsp),%r15 - # x10_stack = x10 - movq %rbp,168(%rsp) - # d = x11 + x15 - lea (%r12,%r15),%rbp - # (uint32) d <<<= 7 - rol $7,%ebp - # x3 ^= d - xor %rbp,%rsi - # d = x15 + x3 - lea (%r15,%rsi),%rbp - # (uint32) d <<<= 9 - rol $9,%ebp - # x7 ^= d - xor %rbp,%r8 - # d = x3 + x7 - lea (%rsi,%r8),%rbp - # (uint32) d <<<= 13 - rol $13,%ebp - # x11 ^= d - xor %rbp,%r12 - # d = x7 + x11 - lea (%r8,%r12),%rbp - # (uint32) d <<<= 18 - rol $18,%ebp - # x15 ^= d - xor %rbp,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # x5 = x5_stack - movq 160(%rsp),%r15 - # a = x3 + x0 - lea (%rsi,%rdx),%rbp - # (uint32) a <<<= 7 - rol $7,%ebp - # x1 ^= a - xor %rbp,%rdi - # b = x4 + x5 - lea (%r9,%r15),%rbp - # (uint32) b <<<= 7 - rol $7,%ebp - # x6 ^= b - xor %rbp,%rax - # a = x0 + x1 - lea (%rdx,%rdi),%rbp - # (uint32) a <<<= 9 - rol $9,%ebp - # x2 ^= a - xor %rbp,%rcx - # b = x5 + x6 - lea (%r15,%rax),%rbp - # (uint32) b <<<= 9 - rol $9,%ebp - # x7 ^= b - xor %rbp,%r8 - # a = x1 + x2 - lea (%rdi,%rcx),%rbp - # (uint32) a <<<= 13 - rol $13,%ebp - # x3 ^= a - xor %rbp,%rsi - # b = x6 + x7 - lea (%rax,%r8),%rbp - # (uint32) b <<<= 13 - rol $13,%ebp - # x4 ^= b - xor %rbp,%r9 - # a = x2 + x3 - lea (%rcx,%rsi),%rbp - # (uint32) a <<<= 18 - rol $18,%ebp - # x0 ^= a - xor %rbp,%rdx - # b = x7 + x4 - lea (%r8,%r9),%rbp - # (uint32) b <<<= 18 - rol $18,%ebp - # x5 ^= b - xor %rbp,%r15 - # x10 = x10_stack - movq 168(%rsp),%rbp - # x5_stack = x5 - movq %r15,160(%rsp) - # c = x9 + x10 - lea (%r10,%rbp),%r15 - # (uint32) c <<<= 7 - rol $7,%r15d - # x11 ^= c - xor %r15,%r12 - # c = x10 + x11 - lea (%rbp,%r12),%r15 - # (uint32) c <<<= 9 - rol $9,%r15d - # x8 ^= c - xor %r15,%r11 - # c = x11 + x8 - lea (%r12,%r11),%r15 - # (uint32) c <<<= 13 - rol $13,%r15d - # x9 ^= c - xor %r15,%r10 - # c = x8 + x9 - lea (%r11,%r10),%r15 - # (uint32) c <<<= 18 - rol $18,%r15d - # x10 ^= c - xor %r15,%rbp - # x15 = x15_stack - movq 176(%rsp),%r15 - # x10_stack = x10 - movq %rbp,168(%rsp) - # d = x14 + x15 - lea (%rbx,%r15),%rbp - # (uint32) d <<<= 7 - rol $7,%ebp - # x12 ^= d - xor %rbp,%r14 - # d = x15 + x12 - lea (%r15,%r14),%rbp - # (uint32) d <<<= 9 - rol $9,%ebp - # x13 ^= d - xor %rbp,%r13 - # d = x12 + x13 - lea (%r14,%r13),%rbp - # (uint32) d <<<= 13 - rol $13,%ebp - # x14 ^= d - xor %rbp,%rbx - # d = x13 + x14 - lea (%r13,%rbx),%rbp - # (uint32) d <<<= 18 - rol $18,%ebp - # x15 ^= d - xor %rbp,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # i = i_backup - movq 184(%rsp),%r15 - # unsigned>? i -= 4 - sub $4,%r15 - # comment:fp stack unchanged by jump - # goto mainloop if unsigned> - ja ._mainloop - # (uint32) x2 += j2 - addl 64(%rsp),%ecx - # x3 <<= 32 - shl $32,%rsi - # x3 += j2 - addq 64(%rsp),%rsi - # (uint64) x3 >>= 32 - shr $32,%rsi - # x3 <<= 32 - shl $32,%rsi - # x2 += x3 - add %rsi,%rcx - # (uint32) x6 += j6 - addl 80(%rsp),%eax - # x7 <<= 32 - shl $32,%r8 - # x7 += j6 - addq 80(%rsp),%r8 - # (uint64) x7 >>= 32 - shr $32,%r8 - # x7 <<= 32 - shl $32,%r8 - # x6 += x7 - add %r8,%rax - # (uint32) x8 += j8 - addl 88(%rsp),%r11d - # x9 <<= 32 - shl $32,%r10 - # x9 += j8 - addq 88(%rsp),%r10 - # (uint64) x9 >>= 32 - shr $32,%r10 - # x9 <<= 32 - shl $32,%r10 - # x8 += x9 - add %r10,%r11 - # (uint32) x12 += j12 - addl 104(%rsp),%r14d - # x13 <<= 32 - shl $32,%r13 - # x13 += j12 - addq 104(%rsp),%r13 - # (uint64) x13 >>= 32 - shr $32,%r13 - # x13 <<= 32 - shl $32,%r13 - # x12 += x13 - add %r13,%r14 - # (uint32) x0 += j0 - addl 56(%rsp),%edx - # x1 <<= 32 - shl $32,%rdi - # x1 += j0 - addq 56(%rsp),%rdi - # (uint64) x1 >>= 32 - shr $32,%rdi - # x1 <<= 32 - shl $32,%rdi - # x0 += x1 - add %rdi,%rdx - # x5 = x5_stack - movq 160(%rsp),%rdi - # (uint32) x4 += j4 - addl 72(%rsp),%r9d - # x5 <<= 32 - shl $32,%rdi - # x5 += j4 - addq 72(%rsp),%rdi - # (uint64) x5 >>= 32 - shr $32,%rdi - # x5 <<= 32 - shl $32,%rdi - # x4 += x5 - add %rdi,%r9 - # x10 = x10_stack - movq 168(%rsp),%r8 - # (uint32) x10 += j10 - addl 96(%rsp),%r8d - # x11 <<= 32 - shl $32,%r12 - # x11 += j10 - addq 96(%rsp),%r12 - # (uint64) x11 >>= 32 - shr $32,%r12 - # x11 <<= 32 - shl $32,%r12 - # x10 += x11 - add %r12,%r8 - # x15 = x15_stack - movq 176(%rsp),%rdi - # (uint32) x14 += j14 - addl 112(%rsp),%ebx - # x15 <<= 32 - shl $32,%rdi - # x15 += j14 - addq 112(%rsp),%rdi - # (uint64) x15 >>= 32 - shr $32,%rdi - # x15 <<= 32 - shl $32,%rdi - # x14 += x15 - add %rdi,%rbx - # out = out_backup - movq 136(%rsp),%rdi - # m = m_backup - movq 144(%rsp),%rsi - # x0 ^= *(uint64 *) (m + 0) - xorq 0(%rsi),%rdx - # *(uint64 *) (out + 0) = x0 - movq %rdx,0(%rdi) - # x2 ^= *(uint64 *) (m + 8) - xorq 8(%rsi),%rcx - # *(uint64 *) (out + 8) = x2 - movq %rcx,8(%rdi) - # x4 ^= *(uint64 *) (m + 16) - xorq 16(%rsi),%r9 - # *(uint64 *) (out + 16) = x4 - movq %r9,16(%rdi) - # x6 ^= *(uint64 *) (m + 24) - xorq 24(%rsi),%rax - # *(uint64 *) (out + 24) = x6 - movq %rax,24(%rdi) - # x8 ^= *(uint64 *) (m + 32) - xorq 32(%rsi),%r11 - # *(uint64 *) (out + 32) = x8 - movq %r11,32(%rdi) - # x10 ^= *(uint64 *) (m + 40) - xorq 40(%rsi),%r8 - # *(uint64 *) (out + 40) = x10 - movq %r8,40(%rdi) - # x12 ^= *(uint64 *) (m + 48) - xorq 48(%rsi),%r14 - # *(uint64 *) (out + 48) = x12 - movq %r14,48(%rdi) - # x14 ^= *(uint64 *) (m + 56) - xorq 56(%rsi),%rbx - # *(uint64 *) (out + 56) = x14 - movq %rbx,56(%rdi) - # bytes = bytes_backup - movq 152(%rsp),%rdx - # in8 = j8 - movq 88(%rsp),%rcx - # in8 += 1 - add $1,%rcx - # j8 = in8 - movq %rcx,88(%rsp) - # unsigned>? unsigned<? bytes - 64 - cmp $64,%rdx - # comment:fp stack unchanged by jump - # goto bytesatleast65 if unsigned> - ja ._bytesatleast65 - # comment:fp stack unchanged by jump - # goto bytesatleast64 if !unsigned< - jae ._bytesatleast64 - # m = out - mov %rdi,%rsi - # out = ctarget - movq 128(%rsp),%rdi - # i = bytes - mov %rdx,%rcx - # while (i) { *out++ = *m++; --i } - rep movsb - # comment:fp stack unchanged by fallthrough -# bytesatleast64: -._bytesatleast64: - # x = x_backup - movq 120(%rsp),%rdi - # in8 = j8 - movq 88(%rsp),%rsi - # *(uint64 *) (x + 32) = in8 - movq %rsi,32(%rdi) - # r11 = r11_stack - movq 0(%rsp),%r11 - # r12 = r12_stack - movq 8(%rsp),%r12 - # r13 = r13_stack - movq 16(%rsp),%r13 - # r14 = r14_stack - movq 24(%rsp),%r14 - # r15 = r15_stack - movq 32(%rsp),%r15 - # rbx = rbx_stack - movq 40(%rsp),%rbx - # rbp = rbp_stack - movq 48(%rsp),%rbp - # comment:fp stack unchanged by fallthrough -# done: -._done: - # leave - add %r11,%rsp - mov %rdi,%rax - mov %rsi,%rdx - ret -# bytesatleast65: -._bytesatleast65: - # bytes -= 64 - sub $64,%rdx - # out += 64 - add $64,%rdi - # m += 64 - add $64,%rsi - # comment:fp stack unchanged by jump - # goto bytesatleast1 - jmp ._bytesatleast1 -ENDPROC(salsa20_encrypt_bytes) - -# enter salsa20_keysetup -ENTRY(salsa20_keysetup) - mov %rsp,%r11 - and $31,%r11 - add $256,%r11 - sub %r11,%rsp - # k = arg2 - mov %rsi,%rsi - # kbits = arg3 - mov %rdx,%rdx - # x = arg1 - mov %rdi,%rdi - # in0 = *(uint64 *) (k + 0) - movq 0(%rsi),%r8 - # in2 = *(uint64 *) (k + 8) - movq 8(%rsi),%r9 - # *(uint64 *) (x + 4) = in0 - movq %r8,4(%rdi) - # *(uint64 *) (x + 12) = in2 - movq %r9,12(%rdi) - # unsigned<? kbits - 256 - cmp $256,%rdx - # comment:fp stack unchanged by jump - # goto kbits128 if unsigned< - jb ._kbits128 -# kbits256: -._kbits256: - # in10 = *(uint64 *) (k + 16) - movq 16(%rsi),%rdx - # in12 = *(uint64 *) (k + 24) - movq 24(%rsi),%rsi - # *(uint64 *) (x + 44) = in10 - movq %rdx,44(%rdi) - # *(uint64 *) (x + 52) = in12 - movq %rsi,52(%rdi) - # in0 = 1634760805 - mov $1634760805,%rsi - # in4 = 857760878 - mov $857760878,%rdx - # in10 = 2036477234 - mov $2036477234,%rcx - # in14 = 1797285236 - mov $1797285236,%r8 - # *(uint32 *) (x + 0) = in0 - movl %esi,0(%rdi) - # *(uint32 *) (x + 20) = in4 - movl %edx,20(%rdi) - # *(uint32 *) (x + 40) = in10 - movl %ecx,40(%rdi) - # *(uint32 *) (x + 60) = in14 - movl %r8d,60(%rdi) - # comment:fp stack unchanged by jump - # goto keysetupdone - jmp ._keysetupdone -# kbits128: -._kbits128: - # in10 = *(uint64 *) (k + 0) - movq 0(%rsi),%rdx - # in12 = *(uint64 *) (k + 8) - movq 8(%rsi),%rsi - # *(uint64 *) (x + 44) = in10 - movq %rdx,44(%rdi) - # *(uint64 *) (x + 52) = in12 - movq %rsi,52(%rdi) - # in0 = 1634760805 - mov $1634760805,%rsi - # in4 = 824206446 - mov $824206446,%rdx - # in10 = 2036477238 - mov $2036477238,%rcx - # in14 = 1797285236 - mov $1797285236,%r8 - # *(uint32 *) (x + 0) = in0 - movl %esi,0(%rdi) - # *(uint32 *) (x + 20) = in4 - movl %edx,20(%rdi) - # *(uint32 *) (x + 40) = in10 - movl %ecx,40(%rdi) - # *(uint32 *) (x + 60) = in14 - movl %r8d,60(%rdi) -# keysetupdone: -._keysetupdone: - # leave - add %r11,%rsp - mov %rdi,%rax - mov %rsi,%rdx - ret -ENDPROC(salsa20_keysetup) - -# enter salsa20_ivsetup -ENTRY(salsa20_ivsetup) - mov %rsp,%r11 - and $31,%r11 - add $256,%r11 - sub %r11,%rsp - # iv = arg2 - mov %rsi,%rsi - # x = arg1 - mov %rdi,%rdi - # in6 = *(uint64 *) (iv + 0) - movq 0(%rsi),%rsi - # in8 = 0 - mov $0,%r8 - # *(uint64 *) (x + 24) = in6 - movq %rsi,24(%rdi) - # *(uint64 *) (x + 32) = in8 - movq %r8,32(%rdi) - # leave - add %r11,%rsp - mov %rdi,%rax - mov %rsi,%rdx - ret -ENDPROC(salsa20_ivsetup) diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c deleted file mode 100644 index cb91a64a99e7..000000000000 --- a/arch/x86/crypto/salsa20_glue.c +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Glue code for optimized assembly version of Salsa20. - * - * Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com> - * - * The assembly codes are public domain assembly codes written by Daniel. J. - * Bernstein <djb@cr.yp.to>. The codes are modified to include indentation - * and to remove extraneous comments and functions that are not needed. - * - i586 version, renamed as salsa20-i586-asm_32.S - * available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s> - * - x86-64 version, renamed as salsa20-x86_64-asm_64.S - * available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include <crypto/algapi.h> -#include <linux/module.h> -#include <linux/crypto.h> - -#define SALSA20_IV_SIZE 8U -#define SALSA20_MIN_KEY_SIZE 16U -#define SALSA20_MAX_KEY_SIZE 32U - -struct salsa20_ctx -{ - u32 input[16]; -}; - -asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k, - u32 keysize, u32 ivsize); -asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv); -asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx, - const u8 *src, u8 *dst, u32 bytes); - -static int setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keysize) -{ - struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm); - salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8); - return 0; -} - -static int encrypt(struct blkcipher_desc *desc, - struct scatterlist *dst, struct scatterlist *src, - unsigned int nbytes) -{ - struct blkcipher_walk walk; - struct crypto_blkcipher *tfm = desc->tfm; - struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm); - int err; - - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, 64); - - salsa20_ivsetup(ctx, walk.iv); - - while (walk.nbytes >= 64) { - salsa20_encrypt_bytes(ctx, walk.src.virt.addr, - walk.dst.virt.addr, - walk.nbytes - (walk.nbytes % 64)); - err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64); - } - - if (walk.nbytes) { - salsa20_encrypt_bytes(ctx, walk.src.virt.addr, - walk.dst.virt.addr, walk.nbytes); - err = blkcipher_walk_done(desc, &walk, 0); - } - - return err; -} - -static struct crypto_alg alg = { - .cra_name = "salsa20", - .cra_driver_name = "salsa20-asm", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_type = &crypto_blkcipher_type, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct salsa20_ctx), - .cra_alignmask = 3, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .setkey = setkey, - .encrypt = encrypt, - .decrypt = encrypt, - .min_keysize = SALSA20_MIN_KEY_SIZE, - .max_keysize = SALSA20_MAX_KEY_SIZE, - .ivsize = SALSA20_IV_SIZE, - } - } -}; - -static int __init init(void) -{ - return crypto_register_alg(&alg); -} - -static void __exit fini(void) -{ - crypto_unregister_alg(&alg); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)"); -MODULE_ALIAS_CRYPTO("salsa20"); -MODULE_ALIAS_CRYPTO("salsa20-asm"); |