diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 21:25:58 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 21:25:58 +0300 |
commit | 27d189c02ba25851973c8582e419c0bded9f7e5b (patch) | |
tree | be142d664bc4e3cec7ab2878a243343f46e897ee | |
parent | a1703154200c390ab03c10224c586e815d3e31e8 (diff) | |
parent | 55db8387a5e8d07407f0b7c6b2526417a2bc6243 (diff) | |
download | linux-27d189c02ba25851973c8582e419c0bded9f7e5b.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (46 commits)
hwrng: via_rng - Fix memory scribbling on some CPUs
crypto: padlock - Move padlock.h into include/crypto
hwrng: via_rng - Fix asm constraints
crypto: n2 - use __devexit not __exit in n2_unregister_algs
crypto: mark crypto workqueues CPU_INTENSIVE
crypto: mv_cesa - dont return PTR_ERR() of wrong pointer
crypto: ripemd - Set module author and update email address
crypto: omap-sham - backlog handling fix
crypto: gf128mul - Remove experimental tag
crypto: af_alg - fix af_alg memory_allocated data type
crypto: aesni-intel - Fixed build with binutils 2.16
crypto: af_alg - Make sure sk_security is initialized on accept()ed sockets
net: Add missing lockdep class names for af_alg
include: Install linux/if_alg.h for user-space crypto API
crypto: omap-aes - checkpatch --file warning fixes
crypto: omap-aes - initialize aes module once per request
crypto: omap-aes - unnecessary code removed
crypto: omap-aes - error handling implementation improved
crypto: omap-aes - redundant locking is removed
crypto: omap-aes - DMA initialization fixes for OMAP off mode
...
37 files changed, 4779 insertions, 464 deletions
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index ff16756a51c1..8fe2a4966b7a 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -9,6 +9,20 @@ * Vinodh Gopal <vinodh.gopal@intel.com> * Kahraman Akdemir * + * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD + * interface for 64-bit kernels. + * Authors: Erdinc Ozturk (erdinc.ozturk@intel.com) + * Aidan O'Mahony (aidan.o.mahony@intel.com) + * Adrian Hoban <adrian.hoban@intel.com> + * James Guilford (james.guilford@intel.com) + * Gabriele Paoloni <gabriele.paoloni@intel.com> + * Tadeusz Struk (tadeusz.struk@intel.com) + * Wajdi Feghali (wajdi.k.feghali@intel.com) + * Copyright (c) 2010, Intel Corporation. + * + * Ported x86_64 version to x86: + * Author: Mathias Krause <minipli@googlemail.com> + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -18,8 +32,62 @@ #include <linux/linkage.h> #include <asm/inst.h> +#ifdef __x86_64__ +.data +POLY: .octa 0xC2000000000000000000000000000001 +TWOONE: .octa 0x00000001000000000000000000000001 + +# order of these constants should not change. +# more specifically, ALL_F should follow SHIFT_MASK, +# and ZERO should follow ALL_F + +SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F +MASK1: .octa 0x0000000000000000ffffffffffffffff +MASK2: .octa 0xffffffffffffffff0000000000000000 +SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 +ALL_F: .octa 0xffffffffffffffffffffffffffffffff +ZERO: .octa 0x00000000000000000000000000000000 +ONE: .octa 0x00000000000000000000000000000001 +F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0 +dec: .octa 0x1 +enc: .octa 0x2 + + .text + +#define STACK_OFFSET 8*3 +#define HashKey 16*0 // store HashKey <<1 mod poly here +#define HashKey_2 16*1 // store HashKey^2 <<1 mod poly here +#define HashKey_3 16*2 // store HashKey^3 <<1 mod poly here +#define HashKey_4 16*3 // store HashKey^4 <<1 mod poly here +#define HashKey_k 16*4 // store XOR of High 64 bits and Low 64 + // bits of HashKey <<1 mod poly here + //(for Karatsuba purposes) +#define HashKey_2_k 16*5 // store XOR of High 64 bits and Low 64 + // bits of HashKey^2 <<1 mod poly here + // (for Karatsuba purposes) +#define HashKey_3_k 16*6 // store XOR of High 64 bits and Low 64 + // bits of HashKey^3 <<1 mod poly here + // (for Karatsuba purposes) +#define HashKey_4_k 16*7 // store XOR of High 64 bits and Low 64 + // bits of HashKey^4 <<1 mod poly here + // (for Karatsuba purposes) +#define VARIABLE_OFFSET 16*8 + +#define arg1 rdi +#define arg2 rsi +#define arg3 rdx +#define arg4 rcx +#define arg5 r8 +#define arg6 r9 +#define arg7 STACK_OFFSET+8(%r14) +#define arg8 STACK_OFFSET+16(%r14) +#define arg9 STACK_OFFSET+24(%r14) +#define arg10 STACK_OFFSET+32(%r14) +#endif + + #define STATE1 %xmm0 #define STATE2 %xmm4 #define STATE3 %xmm5 @@ -32,12 +100,16 @@ #define IN IN1 #define KEY %xmm2 #define IV %xmm3 + #define BSWAP_MASK %xmm10 #define CTR %xmm11 #define INC %xmm12 +#ifdef __x86_64__ +#define AREG %rax #define KEYP %rdi #define OUTP %rsi +#define UKEYP OUTP #define INP %rdx #define LEN %rcx #define IVP %r8 @@ -46,6 +118,1588 @@ #define TKEYP T1 #define T2 %r11 #define TCTR_LOW T2 +#else +#define AREG %eax +#define KEYP %edi +#define OUTP AREG +#define UKEYP OUTP +#define INP %edx +#define LEN %esi +#define IVP %ebp +#define KLEN %ebx +#define T1 %ecx +#define TKEYP T1 +#endif + + +#ifdef __x86_64__ +/* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) +* +* +* Input: A and B (128-bits each, bit-reflected) +* Output: C = A*B*x mod poly, (i.e. >>1 ) +* To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input +* GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. +* +*/ +.macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5 + movdqa \GH, \TMP1 + pshufd $78, \GH, \TMP2 + pshufd $78, \HK, \TMP3 + pxor \GH, \TMP2 # TMP2 = a1+a0 + pxor \HK, \TMP3 # TMP3 = b1+b0 + PCLMULQDQ 0x11, \HK, \TMP1 # TMP1 = a1*b1 + PCLMULQDQ 0x00, \HK, \GH # GH = a0*b0 + PCLMULQDQ 0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0) + pxor \GH, \TMP2 + pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0) + movdqa \TMP2, \TMP3 + pslldq $8, \TMP3 # left shift TMP3 2 DWs + psrldq $8, \TMP2 # right shift TMP2 2 DWs + pxor \TMP3, \GH + pxor \TMP2, \TMP1 # TMP2:GH holds the result of GH*HK + + # first phase of the reduction + + movdqa \GH, \TMP2 + movdqa \GH, \TMP3 + movdqa \GH, \TMP4 # copy GH into TMP2,TMP3 and TMP4 + # in in order to perform + # independent shifts + pslld $31, \TMP2 # packed right shift <<31 + pslld $30, \TMP3 # packed right shift <<30 + pslld $25, \TMP4 # packed right shift <<25 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + movdqa \TMP2, \TMP5 + psrldq $4, \TMP5 # right shift TMP5 1 DW + pslldq $12, \TMP2 # left shift TMP2 3 DWs + pxor \TMP2, \GH + + # second phase of the reduction + + movdqa \GH,\TMP2 # copy GH into TMP2,TMP3 and TMP4 + # in in order to perform + # independent shifts + movdqa \GH,\TMP3 + movdqa \GH,\TMP4 + psrld $1,\TMP2 # packed left shift >>1 + psrld $2,\TMP3 # packed left shift >>2 + psrld $7,\TMP4 # packed left shift >>7 + pxor \TMP3,\TMP2 # xor the shifted versions + pxor \TMP4,\TMP2 + pxor \TMP5, \TMP2 + pxor \TMP2, \GH + pxor \TMP1, \GH # result is in TMP1 +.endm + +/* +* if a = number of total plaintext bytes +* b = floor(a/16) +* num_initial_blocks = b mod 4 +* encrypt the initial num_initial_blocks blocks and apply ghash on +* the ciphertext +* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers +* are clobbered +* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified +*/ + + +.macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ +XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation + mov arg7, %r10 # %r10 = AAD + mov arg8, %r12 # %r12 = aadLen + mov %r12, %r11 + pxor %xmm\i, %xmm\i +_get_AAD_loop\num_initial_blocks\operation: + movd (%r10), \TMP1 + pslldq $12, \TMP1 + psrldq $4, %xmm\i + pxor \TMP1, %xmm\i + add $4, %r10 + sub $4, %r12 + jne _get_AAD_loop\num_initial_blocks\operation + cmp $16, %r11 + je _get_AAD_loop2_done\num_initial_blocks\operation + mov $16, %r12 +_get_AAD_loop2\num_initial_blocks\operation: + psrldq $4, %xmm\i + sub $4, %r12 + cmp %r11, %r12 + jne _get_AAD_loop2\num_initial_blocks\operation +_get_AAD_loop2_done\num_initial_blocks\operation: + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data + + xor %r11, %r11 # initialise the data pointer offset as zero + + # start AES for num_initial_blocks blocks + + mov %arg5, %rax # %rax = *Y0 + movdqu (%rax), \XMM0 # XMM0 = Y0 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM0 + +.if (\i == 5) || (\i == 6) || (\i == 7) +.irpc index, \i_seq + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, %xmm\index + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap + +.endr +.irpc index, \i_seq + pxor 16*0(%arg1), %xmm\index +.endr +.irpc index, \i_seq + movaps 0x10(%rdi), \TMP1 + AESENC \TMP1, %xmm\index # Round 1 +.endr +.irpc index, \i_seq + movaps 0x20(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x30(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x40(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x50(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x60(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x70(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x80(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x90(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0xa0(%arg1), \TMP1 + AESENCLAST \TMP1, %xmm\index # Round 10 +.endr +.irpc index, \i_seq + movdqu (%arg3 , %r11, 1), \TMP1 + pxor \TMP1, %xmm\index + movdqu %xmm\index, (%arg2 , %r11, 1) + # write back plaintext/ciphertext for num_initial_blocks + add $16, %r11 + + movdqa \TMP1, %xmm\index + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\index + + # prepare plaintext/ciphertext for GHASH computation +.endr +.endif + GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + # apply GHASH on num_initial_blocks blocks + +.if \i == 5 + pxor %xmm5, %xmm6 + GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm6, %xmm7 + GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.elseif \i == 6 + pxor %xmm6, %xmm7 + GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.elseif \i == 7 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.endif + cmp $64, %r13 + jl _initial_blocks_done\num_initial_blocks\operation + # no need for precomputed values +/* +* +* Precomputations for HashKey parallel with encryption of first 4 blocks. +* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i +*/ + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM1 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap + + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM2 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap + + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM3 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap + + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM4 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap + + pxor 16*0(%arg1), \XMM1 + pxor 16*0(%arg1), \XMM2 + pxor 16*0(%arg1), \XMM3 + pxor 16*0(%arg1), \XMM4 + movdqa \TMP3, \TMP5 + pshufd $78, \TMP3, \TMP1 + pxor \TMP3, \TMP1 + movdqa \TMP1, HashKey_k(%rsp) + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^2<<1 (mod poly) + movdqa \TMP5, HashKey_2(%rsp) +# HashKey_2 = HashKey^2<<1 (mod poly) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_2_k(%rsp) +.irpc index, 1234 # do 4 rounds + movaps 0x10*\index(%arg1), \TMP1 + AESENC \TMP1, \XMM1 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 +.endr + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqa \TMP5, HashKey_3(%rsp) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_3_k(%rsp) +.irpc index, 56789 # do next 5 rounds + movaps 0x10*\index(%arg1), \TMP1 + AESENC \TMP1, \XMM1 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 +.endr + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqa \TMP5, HashKey_4(%rsp) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_4_k(%rsp) + movaps 0xa0(%arg1), \TMP2 + AESENCLAST \TMP2, \XMM1 + AESENCLAST \TMP2, \XMM2 + AESENCLAST \TMP2, \XMM3 + AESENCLAST \TMP2, \XMM4 + movdqu 16*0(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM1 + movdqu \XMM1, 16*0(%arg2 , %r11 , 1) + movdqa \TMP1, \XMM1 + movdqu 16*1(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM2 + movdqu \XMM2, 16*1(%arg2 , %r11 , 1) + movdqa \TMP1, \XMM2 + movdqu 16*2(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM3 + movdqu \XMM3, 16*2(%arg2 , %r11 , 1) + movdqa \TMP1, \XMM3 + movdqu 16*3(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM4 + movdqu \XMM4, 16*3(%arg2 , %r11 , 1) + movdqa \TMP1, \XMM4 + add $64, %r11 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap + pxor \XMMDst, \XMM1 +# combine GHASHed value with the corresponding ciphertext + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap + +_initial_blocks_done\num_initial_blocks\operation: + +.endm + + +/* +* if a = number of total plaintext bytes +* b = floor(a/16) +* num_initial_blocks = b mod 4 +* encrypt the initial num_initial_blocks blocks and apply ghash on +* the ciphertext +* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers +* are clobbered +* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified +*/ + + +.macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ +XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation + mov arg7, %r10 # %r10 = AAD + mov arg8, %r12 # %r12 = aadLen + mov %r12, %r11 + pxor %xmm\i, %xmm\i +_get_AAD_loop\num_initial_blocks\operation: + movd (%r10), \TMP1 + pslldq $12, \TMP1 + psrldq $4, %xmm\i + pxor \TMP1, %xmm\i + add $4, %r10 + sub $4, %r12 + jne _get_AAD_loop\num_initial_blocks\operation + cmp $16, %r11 + je _get_AAD_loop2_done\num_initial_blocks\operation + mov $16, %r12 +_get_AAD_loop2\num_initial_blocks\operation: + psrldq $4, %xmm\i + sub $4, %r12 + cmp %r11, %r12 + jne _get_AAD_loop2\num_initial_blocks\operation +_get_AAD_loop2_done\num_initial_blocks\operation: + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data + + xor %r11, %r11 # initialise the data pointer offset as zero + + # start AES for num_initial_blocks blocks + + mov %arg5, %rax # %rax = *Y0 + movdqu (%rax), \XMM0 # XMM0 = Y0 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM0 + +.if (\i == 5) || (\i == 6) || (\i == 7) +.irpc index, \i_seq + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, %xmm\index + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap + +.endr +.irpc index, \i_seq + pxor 16*0(%arg1), %xmm\index +.endr +.irpc index, \i_seq + movaps 0x10(%rdi), \TMP1 + AESENC \TMP1, %xmm\index # Round 1 +.endr +.irpc index, \i_seq + movaps 0x20(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x30(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x40(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x50(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x60(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x70(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x80(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x90(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0xa0(%arg1), \TMP1 + AESENCLAST \TMP1, %xmm\index # Round 10 +.endr +.irpc index, \i_seq + movdqu (%arg3 , %r11, 1), \TMP1 + pxor \TMP1, %xmm\index + movdqu %xmm\index, (%arg2 , %r11, 1) + # write back plaintext/ciphertext for num_initial_blocks + add $16, %r11 + + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\index + + # prepare plaintext/ciphertext for GHASH computation +.endr +.endif + GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + # apply GHASH on num_initial_blocks blocks + +.if \i == 5 + pxor %xmm5, %xmm6 + GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm6, %xmm7 + GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.elseif \i == 6 + pxor %xmm6, %xmm7 + GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.elseif \i == 7 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.endif + cmp $64, %r13 + jl _initial_blocks_done\num_initial_blocks\operation + # no need for precomputed values +/* +* +* Precomputations for HashKey parallel with encryption of first 4 blocks. +* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i +*/ + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM1 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap + + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM2 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap + + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM3 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap + + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM4 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap + + pxor 16*0(%arg1), \XMM1 + pxor 16*0(%arg1), \XMM2 + pxor 16*0(%arg1), \XMM3 + pxor 16*0(%arg1), \XMM4 + movdqa \TMP3, \TMP5 + pshufd $78, \TMP3, \TMP1 + pxor \TMP3, \TMP1 + movdqa \TMP1, HashKey_k(%rsp) + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^2<<1 (mod poly) + movdqa \TMP5, HashKey_2(%rsp) +# HashKey_2 = HashKey^2<<1 (mod poly) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_2_k(%rsp) +.irpc index, 1234 # do 4 rounds + movaps 0x10*\index(%arg1), \TMP1 + AESENC \TMP1, \XMM1 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 +.endr + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqa \TMP5, HashKey_3(%rsp) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_3_k(%rsp) +.irpc index, 56789 # do next 5 rounds + movaps 0x10*\index(%arg1), \TMP1 + AESENC \TMP1, \XMM1 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 +.endr + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqa \TMP5, HashKey_4(%rsp) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_4_k(%rsp) + movaps 0xa0(%arg1), \TMP2 + AESENCLAST \TMP2, \XMM1 + AESENCLAST \TMP2, \XMM2 + AESENCLAST \TMP2, \XMM3 + AESENCLAST \TMP2, \XMM4 + movdqu 16*0(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM1 + movdqu 16*1(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM2 + movdqu 16*2(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM3 + movdqu 16*3(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM4 + movdqu \XMM1, 16*0(%arg2 , %r11 , 1) + movdqu \XMM2, 16*1(%arg2 , %r11 , 1) + movdqu \XMM3, 16*2(%arg2 , %r11 , 1) + movdqu \XMM4, 16*3(%arg2 , %r11 , 1) + + add $64, %r11 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap + pxor \XMMDst, \XMM1 +# combine GHASHed value with the corresponding ciphertext + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap + +_initial_blocks_done\num_initial_blocks\operation: + +.endm + +/* +* encrypt 4 blocks at a time +* ghash the 4 previously encrypted ciphertext blocks +* arg1, %arg2, %arg3 are used as pointers only, not modified +* %r11 is the data offset value +*/ +.macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \ +TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation + + movdqa \XMM1, \XMM5 + movdqa \XMM2, \XMM6 + movdqa \XMM3, \XMM7 + movdqa \XMM4, \XMM8 + + movdqa SHUF_MASK(%rip), %xmm15 + # multiply TMP5 * HashKey using karatsuba + + movdqa \XMM5, \TMP4 + pshufd $78, \XMM5, \TMP6 + pxor \XMM5, \TMP6 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa HashKey_4(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 + movdqa \XMM0, \XMM1 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM2 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM3 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM4 + PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap + PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 + PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap + + pxor (%arg1), \XMM1 + pxor (%arg1), \XMM2 + pxor (%arg1), \XMM3 + pxor (%arg1), \XMM4 + movdqa HashKey_4_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) + movaps 0x10(%arg1), \TMP1 + AESENC \TMP1, \XMM1 # Round 1 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 + movaps 0x20(%arg1), \TMP1 + AESENC \TMP1, \XMM1 # Round 2 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 + movdqa \XMM6, \TMP1 + pshufd $78, \XMM6, \TMP2 + pxor \XMM6, \TMP2 + movdqa HashKey_3(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 + movaps 0x30(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 3 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 + movaps 0x40(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 4 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + movdqa HashKey_3_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movaps 0x50(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 5 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + pxor \TMP1, \TMP4 +# accumulate the results in TMP4:XMM5, TMP6 holds the middle part + pxor \XMM6, \XMM5 + pxor \TMP2, \TMP6 + movdqa \XMM7, \TMP1 + pshufd $78, \XMM7, \TMP2 + pxor \XMM7, \TMP2 + movdqa HashKey_2(%rsp ), \TMP5 + + # Multiply TMP5 * HashKey using karatsuba + + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + movaps 0x60(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 6 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 + movaps 0x70(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 7 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + movdqa HashKey_2_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movaps 0x80(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 8 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + pxor \TMP1, \TMP4 +# accumulate the results in TMP4:XMM5, TMP6 holds the middle part + pxor \XMM7, \XMM5 + pxor \TMP2, \TMP6 + + # Multiply XMM8 * HashKey + # XMM8 and TMP5 hold the values for the two operands + + movdqa \XMM8, \TMP1 + pshufd $78, \XMM8, \TMP2 + pxor \XMM8, \TMP2 + movdqa HashKey(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + movaps 0x90(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 9 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 + movaps 0xa0(%arg1), \TMP3 + AESENCLAST \TMP3, \XMM1 # Round 10 + AESENCLAST \TMP3, \XMM2 + AESENCLAST \TMP3, \XMM3 + AESENCLAST \TMP3, \XMM4 + movdqa HashKey_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movdqu (%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK + movdqu 16(%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK + movdqu 32(%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK + movdqu 48(%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK + movdqu \XMM1, (%arg2,%r11,1) # Write to the ciphertext buffer + movdqu \XMM2, 16(%arg2,%r11,1) # Write to the ciphertext buffer + movdqu \XMM3, 32(%arg2,%r11,1) # Write to the ciphertext buffer + movdqu \XMM4, 48(%arg2,%r11,1) # Write to the ciphertext buffer + PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap + + pxor \TMP4, \TMP1 + pxor \XMM8, \XMM5 + pxor \TMP6, \TMP2 + pxor \TMP1, \TMP2 + pxor \XMM5, \TMP2 + movdqa \TMP2, \TMP3 + pslldq $8, \TMP3 # left shift TMP3 2 DWs + psrldq $8, \TMP2 # right shift TMP2 2 DWs + pxor \TMP3, \XMM5 + pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 + + # first phase of reduction + + movdqa \XMM5, \TMP2 + movdqa \XMM5, \TMP3 + movdqa \XMM5, \TMP4 +# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently + pslld $31, \TMP2 # packed right shift << 31 + pslld $30, \TMP3 # packed right shift << 30 + pslld $25, \TMP4 # packed right shift << 25 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + movdqa \TMP2, \TMP5 + psrldq $4, \TMP5 # right shift T5 1 DW + pslldq $12, \TMP2 # left shift T2 3 DWs + pxor \TMP2, \XMM5 + + # second phase of reduction + + movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 + movdqa \XMM5,\TMP3 + movdqa \XMM5,\TMP4 + psrld $1, \TMP2 # packed left shift >>1 + psrld $2, \TMP3 # packed left shift >>2 + psrld $7, \TMP4 # packed left shift >>7 + pxor \TMP3,\TMP2 # xor the shifted versions + pxor \TMP4,\TMP2 + pxor \TMP5, \TMP2 + pxor \TMP2, \XMM5 + pxor \TMP1, \XMM5 # result is in TMP1 + + pxor \XMM5, \XMM1 +.endm + +/* +* decrypt 4 blocks at a time +* ghash the 4 previously decrypted ciphertext blocks +* arg1, %arg2, %arg3 are used as pointers only, not modified +* %r11 is the data offset value +*/ +.macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \ +TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation + + movdqa \XMM1, \XMM5 + movdqa \XMM2, \XMM6 + movdqa \XMM3, \XMM7 + movdqa \XMM4, \XMM8 + + movdqa SHUF_MASK(%rip), %xmm15 + # multiply TMP5 * HashKey using karatsuba + + movdqa \XMM5, \TMP4 + pshufd $78, \XMM5, \TMP6 + pxor \XMM5, \TMP6 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa HashKey_4(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 + movdqa \XMM0, \XMM1 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM2 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM3 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM4 + PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap + PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 + PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap + + pxor (%arg1), \XMM1 + pxor (%arg1), \XMM2 + pxor (%arg1), \XMM3 + pxor (%arg1), \XMM4 + movdqa HashKey_4_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) + movaps 0x10(%arg1), \TMP1 + AESENC \TMP1, \XMM1 # Round 1 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 + movaps 0x20(%arg1), \TMP1 + AESENC \TMP1, \XMM1 # Round 2 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 + movdqa \XMM6, \TMP1 + pshufd $78, \XMM6, \TMP2 + pxor \XMM6, \TMP2 + movdqa HashKey_3(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 + movaps 0x30(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 3 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 + movaps 0x40(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 4 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + movdqa HashKey_3_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movaps 0x50(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 5 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + pxor \TMP1, \TMP4 +# accumulate the results in TMP4:XMM5, TMP6 holds the middle part + pxor \XMM6, \XMM5 + pxor \TMP2, \TMP6 + movdqa \XMM7, \TMP1 + pshufd $78, \XMM7, \TMP2 + pxor \XMM7, \TMP2 + movdqa HashKey_2(%rsp ), \TMP5 + + # Multiply TMP5 * HashKey using karatsuba + + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + movaps 0x60(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 6 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 + movaps 0x70(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 7 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + movdqa HashKey_2_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movaps 0x80(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 8 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + pxor \TMP1, \TMP4 +# accumulate the results in TMP4:XMM5, TMP6 holds the middle part + pxor \XMM7, \XMM5 + pxor \TMP2, \TMP6 + + # Multiply XMM8 * HashKey + # XMM8 and TMP5 hold the values for the two operands + + movdqa \XMM8, \TMP1 + pshufd $78, \XMM8, \TMP2 + pxor \XMM8, \TMP2 + movdqa HashKey(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + movaps 0x90(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 9 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 + movaps 0xa0(%arg1), \TMP3 + AESENCLAST \TMP3, \XMM1 # Round 10 + AESENCLAST \TMP3, \XMM2 + AESENCLAST \TMP3, \XMM3 + AESENCLAST \TMP3, \XMM4 + movdqa HashKey_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movdqu (%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK + movdqu \XMM1, (%arg2,%r11,1) # Write to plaintext buffer + movdqa \TMP3, \XMM1 + movdqu 16(%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK + movdqu \XMM2, 16(%arg2,%r11,1) # Write to plaintext buffer + movdqa \TMP3, \XMM2 + movdqu 32(%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK + movdqu \XMM3, 32(%arg2,%r11,1) # Write to plaintext buffer + movdqa \TMP3, \XMM3 + movdqu 48(%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK + movdqu \XMM4, 48(%arg2,%r11,1) # Write to plaintext buffer + movdqa \TMP3, \XMM4 + PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap + + pxor \TMP4, \TMP1 + pxor \XMM8, \XMM5 + pxor \TMP6, \TMP2 + pxor \TMP1, \TMP2 + pxor \XMM5, \TMP2 + movdqa \TMP2, \TMP3 + pslldq $8, \TMP3 # left shift TMP3 2 DWs + psrldq $8, \TMP2 # right shift TMP2 2 DWs + pxor \TMP3, \XMM5 + pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 + + # first phase of reduction + + movdqa \XMM5, \TMP2 + movdqa \XMM5, \TMP3 + movdqa \XMM5, \TMP4 +# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently + pslld $31, \TMP2 # packed right shift << 31 + pslld $30, \TMP3 # packed right shift << 30 + pslld $25, \TMP4 # packed right shift << 25 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + movdqa \TMP2, \TMP5 + psrldq $4, \TMP5 # right shift T5 1 DW + pslldq $12, \TMP2 # left shift T2 3 DWs + pxor \TMP2, \XMM5 + + # second phase of reduction + + movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 + movdqa \XMM5,\TMP3 + movdqa \XMM5,\TMP4 + psrld $1, \TMP2 # packed left shift >>1 + psrld $2, \TMP3 # packed left shift >>2 + psrld $7, \TMP4 # packed left shift >>7 + pxor \TMP3,\TMP2 # xor the shifted versions + pxor \TMP4,\TMP2 + pxor \TMP5, \TMP2 + pxor \TMP2, \XMM5 + pxor \TMP1, \XMM5 # result is in TMP1 + + pxor \XMM5, \XMM1 +.endm + +/* GHASH the last 4 ciphertext blocks. */ +.macro GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \ +TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst + + # Multiply TMP6 * HashKey (using Karatsuba) + + movdqa \XMM1, \TMP6 + pshufd $78, \XMM1, \TMP2 + pxor \XMM1, \TMP2 + movdqa HashKey_4(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1 + PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0 + movdqa HashKey_4_k(%rsp), \TMP4 + PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movdqa \XMM1, \XMMDst + movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1 + + # Multiply TMP1 * HashKey (using Karatsuba) + + movdqa \XMM2, \TMP1 + pshufd $78, \XMM2, \TMP2 + pxor \XMM2, \TMP2 + movdqa HashKey_3(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0 + movdqa HashKey_3_k(%rsp), \TMP4 + PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + pxor \TMP1, \TMP6 + pxor \XMM2, \XMMDst + pxor \TMP2, \XMM1 +# results accumulated in TMP6, XMMDst, XMM1 + + # Multiply TMP1 * HashKey (using Karatsuba) + + movdqa \XMM3, \TMP1 + pshufd $78, \XMM3, \TMP2 + pxor \XMM3, \TMP2 + movdqa HashKey_2(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0 + movdqa HashKey_2_k(%rsp), \TMP4 + PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + pxor \TMP1, \TMP6 + pxor \XMM3, \XMMDst + pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1 + + # Multiply TMP1 * HashKey (using Karatsuba) + movdqa \XMM4, \TMP1 + pshufd $78, \XMM4, \TMP2 + pxor \XMM4, \TMP2 + movdqa HashKey(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0 + movdqa HashKey_k(%rsp), \TMP4 + PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + pxor \TMP1, \TMP6 + pxor \XMM4, \XMMDst + pxor \XMM1, \TMP2 + pxor \TMP6, \TMP2 + pxor \XMMDst, \TMP2 + # middle section of the temp results combined as in karatsuba algorithm + movdqa \TMP2, \TMP4 + pslldq $8, \TMP4 # left shift TMP4 2 DWs + psrldq $8, \TMP2 # right shift TMP2 2 DWs + pxor \TMP4, \XMMDst + pxor \TMP2, \TMP6 +# TMP6:XMMDst holds the result of the accumulated carry-less multiplications + # first phase of the reduction + movdqa \XMMDst, \TMP2 + movdqa \XMMDst, \TMP3 + movdqa \XMMDst, \TMP4 +# move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently + pslld $31, \TMP2 # packed right shifting << 31 + pslld $30, \TMP3 # packed right shifting << 30 + pslld $25, \TMP4 # packed right shifting << 25 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + movdqa \TMP2, \TMP7 + psrldq $4, \TMP7 # right shift TMP7 1 DW + pslldq $12, \TMP2 # left shift TMP2 3 DWs + pxor \TMP2, \XMMDst + + # second phase of the reduction + movdqa \XMMDst, \TMP2 + # make 3 copies of XMMDst for doing 3 shift operations + movdqa \XMMDst, \TMP3 + movdqa \XMMDst, \TMP4 + psrld $1, \TMP2 # packed left shift >> 1 + psrld $2, \TMP3 # packed left shift >> 2 + psrld $7, \TMP4 # packed left shift >> 7 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + pxor \TMP7, \TMP2 + pxor \TMP2, \XMMDst + pxor \TMP6, \XMMDst # reduced result is in XMMDst +.endm + +/* Encryption of a single block done*/ +.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1 + + pxor (%arg1), \XMM0 + movaps 16(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 32(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 48(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 64(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 80(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 96(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 112(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 128(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 144(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 160(%arg1), \TMP1 + AESENCLAST \TMP1, \XMM0 +.endm + + +/***************************************************************************** +* void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* u8 *out, // Plaintext output. Encrypt in-place is allowed. +* const u8 *in, // Ciphertext input +* u64 plaintext_len, // Length of data in bytes for decryption. +* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) +* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) +* // concatenated with 0x00000001. 16-byte aligned pointer. +* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. +* const u8 *aad, // Additional Authentication Data (AAD) +* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes +* u8 *auth_tag, // Authenticated Tag output. The driver will compare this to the +* // given authentication tag and only return the plaintext if they match. +* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 +* // (most likely), 12 or 8. +* +* Assumptions: +* +* keys: +* keys are pre-expanded and aligned to 16 bytes. we are using the first +* set of 11 keys in the data structure void *aes_ctx +* +* iv: +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | Salt (From the SA) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | Initialization Vector | +* | (This is the sequence number from IPSec header) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x1 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* +* +* AAD: +* AAD padded to 128 bits with 0 +* for example, assume AAD is a u32 vector +* +* if AAD is 8 bytes: +* AAD[3] = {A0, A1}; +* padded AAD in xmm register = {A1 A0 0 0} +* +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | SPI (A1) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 32-bit Sequence Number (A0) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x0 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* AAD Format with 32-bit Sequence Number +* +* if AAD is 12 bytes: +* AAD[3] = {A0, A1, A2}; +* padded AAD in xmm register = {A2 A1 A0 0} +* +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | SPI (A2) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 64-bit Extended Sequence Number {A1,A0} | +* | | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x0 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* AAD Format with 64-bit Extended Sequence Number +* +* aadLen: +* from the definition of the spec, aadLen can only be 8 or 12 bytes. +* The code supports 16 too but for other sizes, the code will fail. +* +* TLen: +* from the definition of the spec, TLen can only be 8, 12 or 16 bytes. +* For other sizes, the code will fail. +* +* poly = x^128 + x^127 + x^126 + x^121 + 1 +* +*****************************************************************************/ + +ENTRY(aesni_gcm_dec) + push %r12 + push %r13 + push %r14 + mov %rsp, %r14 +/* +* states of %xmm registers %xmm6:%xmm15 not saved +* all %xmm registers are clobbered +*/ + sub $VARIABLE_OFFSET, %rsp + and $~63, %rsp # align rsp to 64 bytes + mov %arg6, %r12 + movdqu (%r12), %xmm13 # %xmm13 = HashKey + movdqa SHUF_MASK(%rip), %xmm2 + PSHUFB_XMM %xmm2, %xmm13 + + +# Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH) + + movdqa %xmm13, %xmm2 + psllq $1, %xmm13 + psrlq $63, %xmm2 + movdqa %xmm2, %xmm1 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + por %xmm2, %xmm13 + + # Reduction + + pshufd $0x24, %xmm1, %xmm2 + pcmpeqd TWOONE(%rip), %xmm2 + pand POLY(%rip), %xmm2 + pxor %xmm2, %xmm13 # %xmm13 holds the HashKey<<1 (mod poly) + + + # Decrypt first few blocks + + movdqa %xmm13, HashKey(%rsp) # store HashKey<<1 (mod poly) + mov %arg4, %r13 # save the number of bytes of plaintext/ciphertext + and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) + mov %r13, %r12 + and $(3<<4), %r12 + jz _initial_num_blocks_is_0_decrypt + cmp $(2<<4), %r12 + jb _initial_num_blocks_is_1_decrypt + je _initial_num_blocks_is_2_decrypt +_initial_num_blocks_is_3_decrypt: + INITIAL_BLOCKS_DEC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec + sub $48, %r13 + jmp _initial_blocks_decrypted +_initial_num_blocks_is_2_decrypt: + INITIAL_BLOCKS_DEC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec + sub $32, %r13 + jmp _initial_blocks_decrypted +_initial_num_blocks_is_1_decrypt: + INITIAL_BLOCKS_DEC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec + sub $16, %r13 + jmp _initial_blocks_decrypted +_initial_num_blocks_is_0_decrypt: + INITIAL_BLOCKS_DEC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec +_initial_blocks_decrypted: + cmp $0, %r13 + je _zero_cipher_left_decrypt + sub $64, %r13 + je _four_cipher_left_decrypt +_decrypt_by_4: + GHASH_4_ENCRYPT_4_PARALLEL_DEC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ +%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec + add $64, %r11 + sub $64, %r13 + jne _decrypt_by_4 +_four_cipher_left_decrypt: + GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ +%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 +_zero_cipher_left_decrypt: + mov %arg4, %r13 + and $15, %r13 # %r13 = arg4 (mod 16) + je _multiple_of_16_bytes_decrypt + + # Handle the last <16 byte block seperately + + paddd ONE(%rip), %xmm0 # increment CNT to get Yn + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm0 + + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) + sub $16, %r11 + add %r13, %r11 + movdqu (%arg3,%r11,1), %xmm1 # recieve the last <16 byte block + lea SHIFT_MASK+16(%rip), %r12 + sub %r13, %r12 +# adjust the shuffle mask pointer to be able to shift 16-%r13 bytes +# (%r13 is the number of bytes in plaintext mod 16) + movdqu (%r12), %xmm2 # get the appropriate shuffle mask + PSHUFB_XMM %xmm2, %xmm1 # right shift 16-%r13 butes + + movdqa %xmm1, %xmm2 + pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn) + movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0 + pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0 + pand %xmm1, %xmm2 + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10 ,%xmm2 + + pxor %xmm2, %xmm8 + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + # GHASH computation for the last <16 byte block + sub %r13, %r11 + add $16, %r11 + + # output %r13 bytes + MOVQ_R64_XMM %xmm0, %rax + cmp $8, %r13 + jle _less_than_8_bytes_left_decrypt + mov %rax, (%arg2 , %r11, 1) + add $8, %r11 + psrldq $8, %xmm0 + MOVQ_R64_XMM %xmm0, %rax + sub $8, %r13 +_less_than_8_bytes_left_decrypt: + mov %al, (%arg2, %r11, 1) + add $1, %r11 + shr $8, %rax + sub $1, %r13 + jne _less_than_8_bytes_left_decrypt +_multiple_of_16_bytes_decrypt: + mov arg8, %r12 # %r13 = aadLen (number of bytes) + shl $3, %r12 # convert into number of bits + movd %r12d, %xmm15 # len(A) in %xmm15 + shl $3, %arg4 # len(C) in bits (*128) + MOVQ_R64_XMM %arg4, %xmm1 + pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 + pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) + pxor %xmm15, %xmm8 + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + # final GHASH computation + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm8 + + mov %arg5, %rax # %rax = *Y0 + movdqu (%rax), %xmm0 # %xmm0 = Y0 + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) + pxor %xmm8, %xmm0 +_return_T_decrypt: + mov arg9, %r10 # %r10 = authTag + mov arg10, %r11 # %r11 = auth_tag_len + cmp $16, %r11 + je _T_16_decrypt + cmp $12, %r11 + je _T_12_decrypt +_T_8_decrypt: + MOVQ_R64_XMM %xmm0, %rax + mov %rax, (%r10) + jmp _return_T_done_decrypt +_T_12_decrypt: + MOVQ_R64_XMM %xmm0, %rax + mov %rax, (%r10) + psrldq $8, %xmm0 + movd %xmm0, %eax + mov %eax, 8(%r10) + jmp _return_T_done_decrypt +_T_16_decrypt: + movdqu %xmm0, (%r10) +_return_T_done_decrypt: + mov %r14, %rsp + pop %r14 + pop %r13 + pop %r12 + ret + + +/***************************************************************************** +* void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* u8 *out, // Ciphertext output. Encrypt in-place is allowed. +* const u8 *in, // Plaintext input +* u64 plaintext_len, // Length of data in bytes for encryption. +* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) +* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) +* // concatenated with 0x00000001. 16-byte aligned pointer. +* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. +* const u8 *aad, // Additional Authentication Data (AAD) +* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes +* u8 *auth_tag, // Authenticated Tag output. +* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), +* // 12 or 8. +* +* Assumptions: +* +* keys: +* keys are pre-expanded and aligned to 16 bytes. we are using the +* first set of 11 keys in the data structure void *aes_ctx +* +* +* iv: +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | Salt (From the SA) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | Initialization Vector | +* | (This is the sequence number from IPSec header) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x1 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* +* +* AAD: +* AAD padded to 128 bits with 0 +* for example, assume AAD is a u32 vector +* +* if AAD is 8 bytes: +* AAD[3] = {A0, A1}; +* padded AAD in xmm register = {A1 A0 0 0} +* +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | SPI (A1) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 32-bit Sequence Number (A0) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x0 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* AAD Format with 32-bit Sequence Number +* +* if AAD is 12 bytes: +* AAD[3] = {A0, A1, A2}; +* padded AAD in xmm register = {A2 A1 A0 0} +* +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | SPI (A2) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 64-bit Extended Sequence Number {A1,A0} | +* | | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x0 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* AAD Format with 64-bit Extended Sequence Number +* +* aadLen: +* from the definition of the spec, aadLen can only be 8 or 12 bytes. +* The code supports 16 too but for other sizes, the code will fail. +* +* TLen: +* from the definition of the spec, TLen can only be 8, 12 or 16 bytes. +* For other sizes, the code will fail. +* +* poly = x^128 + x^127 + x^126 + x^121 + 1 +***************************************************************************/ +ENTRY(aesni_gcm_enc) + push %r12 + push %r13 + push %r14 + mov %rsp, %r14 +# +# states of %xmm registers %xmm6:%xmm15 not saved +# all %xmm registers are clobbered +# + sub $VARIABLE_OFFSET, %rsp + and $~63, %rsp + mov %arg6, %r12 + movdqu (%r12), %xmm13 + movdqa SHUF_MASK(%rip), %xmm2 + PSHUFB_XMM %xmm2, %xmm13 + + +# precompute HashKey<<1 mod poly from the HashKey (required for GHASH) + + movdqa %xmm13, %xmm2 + psllq $1, %xmm13 + psrlq $63, %xmm2 + movdqa %xmm2, %xmm1 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + por %xmm2, %xmm13 + + # reduce HashKey<<1 + + pshufd $0x24, %xmm1, %xmm2 + pcmpeqd TWOONE(%rip), %xmm2 + pand POLY(%rip), %xmm2 + pxor %xmm2, %xmm13 + movdqa %xmm13, HashKey(%rsp) + mov %arg4, %r13 # %xmm13 holds HashKey<<1 (mod poly) + and $-16, %r13 + mov %r13, %r12 + + # Encrypt first few blocks + + and $(3<<4), %r12 + jz _initial_num_blocks_is_0_encrypt + cmp $(2<<4), %r12 + jb _initial_num_blocks_is_1_encrypt + je _initial_num_blocks_is_2_encrypt +_initial_num_blocks_is_3_encrypt: + INITIAL_BLOCKS_ENC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc + sub $48, %r13 + jmp _initial_blocks_encrypted +_initial_num_blocks_is_2_encrypt: + INITIAL_BLOCKS_ENC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc + sub $32, %r13 + jmp _initial_blocks_encrypted +_initial_num_blocks_is_1_encrypt: + INITIAL_BLOCKS_ENC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc + sub $16, %r13 + jmp _initial_blocks_encrypted +_initial_num_blocks_is_0_encrypt: + INITIAL_BLOCKS_ENC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc +_initial_blocks_encrypted: + + # Main loop - Encrypt remaining blocks + + cmp $0, %r13 + je _zero_cipher_left_encrypt + sub $64, %r13 + je _four_cipher_left_encrypt +_encrypt_by_4_encrypt: + GHASH_4_ENCRYPT_4_PARALLEL_ENC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ +%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc + add $64, %r11 + sub $64, %r13 + jne _encrypt_by_4_encrypt +_four_cipher_left_encrypt: + GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ +%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 +_zero_cipher_left_encrypt: + mov %arg4, %r13 + and $15, %r13 # %r13 = arg4 (mod 16) + je _multiple_of_16_bytes_encrypt + + # Handle the last <16 Byte block seperately + paddd ONE(%rip), %xmm0 # INCR CNT to get Yn + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm0 + + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) + sub $16, %r11 + add %r13, %r11 + movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte blocks + lea SHIFT_MASK+16(%rip), %r12 + sub %r13, %r12 + # adjust the shuffle mask pointer to be able to shift 16-r13 bytes + # (%r13 is the number of bytes in plaintext mod 16) + movdqu (%r12), %xmm2 # get the appropriate shuffle mask + PSHUFB_XMM %xmm2, %xmm1 # shift right 16-r13 byte + pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn) + movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + # get the appropriate mask to mask out top 16-r13 bytes of xmm0 + pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10,%xmm0 + + pxor %xmm0, %xmm8 + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + # GHASH computation for the last <16 byte block + sub %r13, %r11 + add $16, %r11 + PSHUFB_XMM %xmm10, %xmm1 + + # shuffle xmm0 back to output as ciphertext + + # Output %r13 bytes + MOVQ_R64_XMM %xmm0, %rax + cmp $8, %r13 + jle _less_than_8_bytes_left_encrypt + mov %rax, (%arg2 , %r11, 1) + add $8, %r11 + psrldq $8, %xmm0 + MOVQ_R64_XMM %xmm0, %rax + sub $8, %r13 +_less_than_8_bytes_left_encrypt: + mov %al, (%arg2, %r11, 1) + add $1, %r11 + shr $8, %rax + sub $1, %r13 + jne _less_than_8_bytes_left_encrypt +_multiple_of_16_bytes_encrypt: + mov arg8, %r12 # %r12 = addLen (number of bytes) + shl $3, %r12 + movd %r12d, %xmm15 # len(A) in %xmm15 + shl $3, %arg4 # len(C) in bits (*128) + MOVQ_R64_XMM %arg4, %xmm1 + pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 + pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) + pxor %xmm15, %xmm8 + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + # final GHASH computation + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm8 # perform a 16 byte swap + + mov %arg5, %rax # %rax = *Y0 + movdqu (%rax), %xmm0 # %xmm0 = Y0 + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm15 # Encrypt(K, Y0) + pxor %xmm8, %xmm0 +_return_T_encrypt: + mov arg9, %r10 # %r10 = authTag + mov arg10, %r11 # %r11 = auth_tag_len + cmp $16, %r11 + je _T_16_encrypt + cmp $12, %r11 + je _T_12_encrypt +_T_8_encrypt: + MOVQ_R64_XMM %xmm0, %rax + mov %rax, (%r10) + jmp _return_T_done_encrypt +_T_12_encrypt: + MOVQ_R64_XMM %xmm0, %rax + mov %rax, (%r10) + psrldq $8, %xmm0 + movd %xmm0, %eax + mov %eax, 8(%r10) + jmp _return_T_done_encrypt +_T_16_encrypt: + movdqu %xmm0, (%r10) +_return_T_done_encrypt: + mov %r14, %rsp + pop %r14 + pop %r13 + pop %r12 + ret + +#endif + _key_expansion_128: _key_expansion_256a: @@ -55,10 +1709,11 @@ _key_expansion_256a: shufps $0b10001100, %xmm0, %xmm4 pxor %xmm4, %xmm0 pxor %xmm1, %xmm0 - movaps %xmm0, (%rcx) - add $0x10, %rcx + movaps %xmm0, (TKEYP) + add $0x10, TKEYP ret +.align 4 _key_expansion_192a: pshufd $0b01010101, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 @@ -76,12 +1731,13 @@ _key_expansion_192a: movaps %xmm0, %xmm1 shufps $0b01000100, %xmm0, %xmm6 - movaps %xmm6, (%rcx) + movaps %xmm6, (TKEYP) shufps $0b01001110, %xmm2, %xmm1 - movaps %xmm1, 16(%rcx) - add $0x20, %rcx + movaps %xmm1, 0x10(TKEYP) + add $0x20, TKEYP ret +.align 4 _key_expansion_192b: pshufd $0b01010101, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 @@ -96,10 +1752,11 @@ _key_expansion_192b: pxor %xmm3, %xmm2 pxor %xmm5, %xmm2 - movaps %xmm0, (%rcx) - add $0x10, %rcx + movaps %xmm0, (TKEYP) + add $0x10, TKEYP ret +.align 4 _key_expansion_256b: pshufd $0b10101010, %xmm1, %xmm1 shufps $0b00010000, %xmm2, %xmm4 @@ -107,8 +1764,8 @@ _key_expansion_256b: shufps $0b10001100, %xmm2, %xmm4 pxor %xmm4, %xmm2 pxor %xmm1, %xmm2 - movaps %xmm2, (%rcx) - add $0x10, %rcx + movaps %xmm2, (TKEYP) + add $0x10, TKEYP ret /* @@ -116,17 +1773,23 @@ _key_expansion_256b: * unsigned int key_len) */ ENTRY(aesni_set_key) - movups (%rsi), %xmm0 # user key (first 16 bytes) - movaps %xmm0, (%rdi) - lea 0x10(%rdi), %rcx # key addr - movl %edx, 480(%rdi) +#ifndef __x86_64__ + pushl KEYP + movl 8(%esp), KEYP # ctx + movl 12(%esp), UKEYP # in_key + movl 16(%esp), %edx # key_len +#endif + movups (UKEYP), %xmm0 # user key (first 16 bytes) + movaps %xmm0, (KEYP) + lea 0x10(KEYP), TKEYP # key addr + movl %edx, 480(KEYP) pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x cmp $24, %dl jb .Lenc_key128 je .Lenc_key192 - movups 0x10(%rsi), %xmm2 # other user key - movaps %xmm2, (%rcx) - add $0x10, %rcx + movups 0x10(UKEYP), %xmm2 # other user key + movaps %xmm2, (TKEYP) + add $0x10, TKEYP AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 call _key_expansion_256a AESKEYGENASSIST 0x1 %xmm0 %xmm1 @@ -155,7 +1818,7 @@ ENTRY(aesni_set_key) call _key_expansion_256a jmp .Ldec_key .Lenc_key192: - movq 0x10(%rsi), %xmm2 # other user key + movq 0x10(UKEYP), %xmm2 # other user key AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 call _key_expansion_192a AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 @@ -195,33 +1858,47 @@ ENTRY(aesni_set_key) AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 call _key_expansion_128 .Ldec_key: - sub $0x10, %rcx - movaps (%rdi), %xmm0 - movaps (%rcx), %xmm1 - movaps %xmm0, 240(%rcx) - movaps %xmm1, 240(%rdi) - add $0x10, %rdi - lea 240-16(%rcx), %rsi + sub $0x10, TKEYP + movaps (KEYP), %xmm0 + movaps (TKEYP), %xmm1 + movaps %xmm0, 240(TKEYP) + movaps %xmm1, 240(KEYP) + add $0x10, KEYP + lea 240-16(TKEYP), UKEYP .align 4 .Ldec_key_loop: - movaps (%rdi), %xmm0 + movaps (KEYP), %xmm0 AESIMC %xmm0 %xmm1 - movaps %xmm1, (%rsi) - add $0x10, %rdi - sub $0x10, %rsi - cmp %rcx, %rdi + movaps %xmm1, (UKEYP) + add $0x10, KEYP + sub $0x10, UKEYP + cmp TKEYP, KEYP jb .Ldec_key_loop - xor %rax, %rax + xor AREG, AREG +#ifndef __x86_64__ + popl KEYP +#endif ret /* * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) */ ENTRY(aesni_enc) +#ifndef __x86_64__ + pushl KEYP + pushl KLEN + movl 12(%esp), KEYP + movl 16(%esp), OUTP + movl 20(%esp), INP +#endif movl 480(KEYP), KLEN # key length movups (INP), STATE # input call _aesni_enc1 movups STATE, (OUTP) # output +#ifndef __x86_64__ + popl KLEN + popl KEYP +#endif ret /* @@ -236,6 +1913,7 @@ ENTRY(aesni_enc) * KEY * TKEYP (T1) */ +.align 4 _aesni_enc1: movaps (KEYP), KEY # key mov KEYP, TKEYP @@ -298,6 +1976,7 @@ _aesni_enc1: * KEY * TKEYP (T1) */ +.align 4 _aesni_enc4: movaps (KEYP), KEY # key mov KEYP, TKEYP @@ -391,11 +2070,22 @@ _aesni_enc4: * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) */ ENTRY(aesni_dec) +#ifndef __x86_64__ + pushl KEYP + pushl KLEN + movl 12(%esp), KEYP + movl 16(%esp), OUTP + movl 20(%esp), INP +#endif mov 480(KEYP), KLEN # key length add $240, KEYP movups (INP), STATE # input call _aesni_dec1 movups STATE, (OUTP) #output +#ifndef __x86_64__ + popl KLEN + popl KEYP +#endif ret /* @@ -410,6 +2100,7 @@ ENTRY(aesni_dec) * KEY * TKEYP (T1) */ +.align 4 _aesni_dec1: movaps (KEYP), KEY # key mov KEYP, TKEYP @@ -472,6 +2163,7 @@ _aesni_dec1: * KEY * TKEYP (T1) */ +.align 4 _aesni_dec4: movaps (KEYP), KEY # key mov KEYP, TKEYP @@ -566,6 +2258,15 @@ _aesni_dec4: * size_t len) */ ENTRY(aesni_ecb_enc) +#ifndef __x86_64__ + pushl LEN + pushl KEYP + pushl KLEN + movl 16(%esp), KEYP + movl 20(%esp), OUTP + movl 24(%esp), INP + movl 28(%esp), LEN +#endif test LEN, LEN # check length jz .Lecb_enc_ret mov 480(KEYP), KLEN @@ -602,6 +2303,11 @@ ENTRY(aesni_ecb_enc) cmp $16, LEN jge .Lecb_enc_loop1 .Lecb_enc_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN +#endif ret /* @@ -609,6 +2315,15 @@ ENTRY(aesni_ecb_enc) * size_t len); */ ENTRY(aesni_ecb_dec) +#ifndef __x86_64__ + pushl LEN + pushl KEYP + pushl KLEN + movl 16(%esp), KEYP + movl 20(%esp), OUTP + movl 24(%esp), INP + movl 28(%esp), LEN +#endif test LEN, LEN jz .Lecb_dec_ret mov 480(KEYP), KLEN @@ -646,6 +2361,11 @@ ENTRY(aesni_ecb_dec) cmp $16, LEN jge .Lecb_dec_loop1 .Lecb_dec_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN +#endif ret /* @@ -653,6 +2373,17 @@ ENTRY(aesni_ecb_dec) * size_t len, u8 *iv) */ ENTRY(aesni_cbc_enc) +#ifndef __x86_64__ + pushl IVP + pushl LEN + pushl KEYP + pushl KLEN + movl 20(%esp), KEYP + movl 24(%esp), OUTP + movl 28(%esp), INP + movl 32(%esp), LEN + movl 36(%esp), IVP +#endif cmp $16, LEN jb .Lcbc_enc_ret mov 480(KEYP), KLEN @@ -670,6 +2401,12 @@ ENTRY(aesni_cbc_enc) jge .Lcbc_enc_loop movups STATE, (IVP) .Lcbc_enc_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN + popl IVP +#endif ret /* @@ -677,6 +2414,17 @@ ENTRY(aesni_cbc_enc) * size_t len, u8 *iv) */ ENTRY(aesni_cbc_dec) +#ifndef __x86_64__ + pushl IVP + pushl LEN + pushl KEYP + pushl KLEN + movl 20(%esp), KEYP + movl 24(%esp), OUTP + movl 28(%esp), INP + movl 32(%esp), LEN + movl 36(%esp), IVP +#endif cmp $16, LEN jb .Lcbc_dec_just_ret mov 480(KEYP), KLEN @@ -690,16 +2438,30 @@ ENTRY(aesni_cbc_dec) movaps IN1, STATE1 movups 0x10(INP), IN2 movaps IN2, STATE2 +#ifdef __x86_64__ movups 0x20(INP), IN3 movaps IN3, STATE3 movups 0x30(INP), IN4 movaps IN4, STATE4 +#else + movups 0x20(INP), IN1 + movaps IN1, STATE3 + movups 0x30(INP), IN2 + movaps IN2, STATE4 +#endif call _aesni_dec4 pxor IV, STATE1 +#ifdef __x86_64__ pxor IN1, STATE2 pxor IN2, STATE3 pxor IN3, STATE4 movaps IN4, IV +#else + pxor (INP), STATE2 + pxor 0x10(INP), STATE3 + pxor IN1, STATE4 + movaps IN2, IV +#endif movups STATE1, (OUTP) movups STATE2, 0x10(OUTP) movups STATE3, 0x20(OUTP) @@ -727,8 +2489,15 @@ ENTRY(aesni_cbc_dec) .Lcbc_dec_ret: movups IV, (IVP) .Lcbc_dec_just_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN + popl IVP +#endif ret +#ifdef __x86_64__ .align 16 .Lbswap_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 @@ -744,6 +2513,7 @@ ENTRY(aesni_cbc_dec) * INC: == 1, in little endian * BSWAP_MASK == endian swapping mask */ +.align 4 _aesni_inc_init: movaps .Lbswap_mask, BSWAP_MASK movaps IV, CTR @@ -768,6 +2538,7 @@ _aesni_inc_init: * CTR: == output IV, in little endian * TCTR_LOW: == lower qword of CTR */ +.align 4 _aesni_inc: paddq INC, CTR add $1, TCTR_LOW @@ -839,3 +2610,4 @@ ENTRY(aesni_ctr_enc) movups IV, (IVP) .Lctr_enc_just_ret: ret +#endif diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 2cb3dcc4490a..e1e60c7d5813 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -5,6 +5,14 @@ * Copyright (C) 2008, Intel Corp. * Author: Huang Ying <ying.huang@intel.com> * + * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD + * interface for 64-bit kernels. + * Authors: Adrian Hoban <adrian.hoban@intel.com> + * Gabriele Paoloni <gabriele.paoloni@intel.com> + * Tadeusz Struk (tadeusz.struk@intel.com) + * Aidan O'Mahony (aidan.o.mahony@intel.com) + * Copyright (c) 2010, Intel Corporation. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -21,6 +29,10 @@ #include <crypto/ctr.h> #include <asm/i387.h> #include <asm/aes.h> +#include <crypto/scatterwalk.h> +#include <crypto/internal/aead.h> +#include <linux/workqueue.h> +#include <linux/spinlock.h> #if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE) #define HAS_CTR @@ -42,8 +54,31 @@ struct async_aes_ctx { struct cryptd_ablkcipher *cryptd_tfm; }; -#define AESNI_ALIGN 16 +/* This data is stored at the end of the crypto_tfm struct. + * It's a type of per "session" data storage location. + * This needs to be 16 byte aligned. + */ +struct aesni_rfc4106_gcm_ctx { + u8 hash_subkey[16]; + struct crypto_aes_ctx aes_key_expanded; + u8 nonce[4]; + struct cryptd_aead *cryptd_tfm; +}; + +struct aesni_gcm_set_hash_subkey_result { + int err; + struct completion completion; +}; + +struct aesni_hash_subkey_req_data { + u8 iv[16]; + struct aesni_gcm_set_hash_subkey_result result; + struct scatterlist sg; +}; + +#define AESNI_ALIGN (16) #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) +#define RFC4106_HASH_SUBKEY_SIZE 16 asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, unsigned int key_len); @@ -59,9 +94,62 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); +#ifdef CONFIG_X86_64 asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); +/* asmlinkage void aesni_gcm_enc() + * void *ctx, AES Key schedule. Starts on a 16 byte boundary. + * u8 *out, Ciphertext output. Encrypt in-place is allowed. + * const u8 *in, Plaintext input + * unsigned long plaintext_len, Length of data in bytes for encryption. + * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association) + * concatenated with 8 byte Initialisation Vector (from IPSec ESP + * Payload) concatenated with 0x00000001. 16-byte aligned pointer. + * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. + * const u8 *aad, Additional Authentication Data (AAD) + * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this + * is going to be 8 or 12 bytes + * u8 *auth_tag, Authenticated Tag output. + * unsigned long auth_tag_len), Authenticated Tag Length in bytes. + * Valid values are 16 (most likely), 12 or 8. + */ +asmlinkage void aesni_gcm_enc(void *ctx, u8 *out, + const u8 *in, unsigned long plaintext_len, u8 *iv, + u8 *hash_subkey, const u8 *aad, unsigned long aad_len, + u8 *auth_tag, unsigned long auth_tag_len); + +/* asmlinkage void aesni_gcm_dec() + * void *ctx, AES Key schedule. Starts on a 16 byte boundary. + * u8 *out, Plaintext output. Decrypt in-place is allowed. + * const u8 *in, Ciphertext input + * unsigned long ciphertext_len, Length of data in bytes for decryption. + * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association) + * concatenated with 8 byte Initialisation Vector (from IPSec ESP + * Payload) concatenated with 0x00000001. 16-byte aligned pointer. + * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. + * const u8 *aad, Additional Authentication Data (AAD) + * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going + * to be 8 or 12 bytes + * u8 *auth_tag, Authenticated Tag output. + * unsigned long auth_tag_len) Authenticated Tag Length in bytes. + * Valid values are 16 (most likely), 12 or 8. + */ +asmlinkage void aesni_gcm_dec(void *ctx, u8 *out, + const u8 *in, unsigned long ciphertext_len, u8 *iv, + u8 *hash_subkey, const u8 *aad, unsigned long aad_len, + u8 *auth_tag, unsigned long auth_tag_len); + +static inline struct +aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) +{ + return + (struct aesni_rfc4106_gcm_ctx *) + PTR_ALIGN((u8 *) + crypto_tfm_ctx(crypto_aead_tfm(tfm)), AESNI_ALIGN); +} +#endif + static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) { unsigned long addr = (unsigned long)raw_ctx; @@ -324,6 +412,7 @@ static struct crypto_alg blk_cbc_alg = { }, }; +#ifdef CONFIG_X86_64 static void ctr_crypt_final(struct crypto_aes_ctx *ctx, struct blkcipher_walk *walk) { @@ -389,6 +478,7 @@ static struct crypto_alg blk_ctr_alg = { }, }, }; +#endif static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, unsigned int key_len) @@ -536,6 +626,7 @@ static struct crypto_alg ablk_cbc_alg = { }, }; +#ifdef CONFIG_X86_64 static int ablk_ctr_init(struct crypto_tfm *tfm) { struct cryptd_ablkcipher *cryptd_tfm; @@ -612,6 +703,7 @@ static struct crypto_alg ablk_rfc3686_ctr_alg = { }, }; #endif +#endif #ifdef HAS_LRW static int ablk_lrw_init(struct crypto_tfm *tfm) @@ -730,6 +822,424 @@ static struct crypto_alg ablk_xts_alg = { }; #endif +#ifdef CONFIG_X86_64 +static int rfc4106_init(struct crypto_tfm *tfm) +{ + struct cryptd_aead *cryptd_tfm; + struct aesni_rfc4106_gcm_ctx *ctx = (struct aesni_rfc4106_gcm_ctx *) + PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); + cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ctx->cryptd_tfm = cryptd_tfm; + tfm->crt_aead.reqsize = sizeof(struct aead_request) + + crypto_aead_reqsize(&cryptd_tfm->base); + return 0; +} + +static void rfc4106_exit(struct crypto_tfm *tfm) +{ + struct aesni_rfc4106_gcm_ctx *ctx = + (struct aesni_rfc4106_gcm_ctx *) + PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); + if (!IS_ERR(ctx->cryptd_tfm)) + cryptd_free_aead(ctx->cryptd_tfm); + return; +} + +static void +rfc4106_set_hash_subkey_done(struct crypto_async_request *req, int err) +{ + struct aesni_gcm_set_hash_subkey_result *result = req->data; + + if (err == -EINPROGRESS) + return; + result->err = err; + complete(&result->completion); +} + +static int +rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) +{ + struct crypto_ablkcipher *ctr_tfm; + struct ablkcipher_request *req; + int ret = -EINVAL; + struct aesni_hash_subkey_req_data *req_data; + + ctr_tfm = crypto_alloc_ablkcipher("ctr(aes)", 0, 0); + if (IS_ERR(ctr_tfm)) + return PTR_ERR(ctr_tfm); + + crypto_ablkcipher_clear_flags(ctr_tfm, ~0); + + ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len); + if (ret) { + crypto_free_ablkcipher(ctr_tfm); + return ret; + } + + req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL); + if (!req) { + crypto_free_ablkcipher(ctr_tfm); + return -EINVAL; + } + + req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); + if (!req_data) { + crypto_free_ablkcipher(ctr_tfm); + return -ENOMEM; + } + memset(req_data->iv, 0, sizeof(req_data->iv)); + + /* Clear the data in the hash sub key container to zero.*/ + /* We want to cipher all zeros to create the hash sub key. */ + memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE); + + init_completion(&req_data->result.completion); + sg_init_one(&req_data->sg, hash_subkey, RFC4106_HASH_SUBKEY_SIZE); + ablkcipher_request_set_tfm(req, ctr_tfm); + ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | + CRYPTO_TFM_REQ_MAY_BACKLOG, + rfc4106_set_hash_subkey_done, + &req_data->result); + + ablkcipher_request_set_crypt(req, &req_data->sg, + &req_data->sg, RFC4106_HASH_SUBKEY_SIZE, req_data->iv); + + ret = crypto_ablkcipher_encrypt(req); + if (ret == -EINPROGRESS || ret == -EBUSY) { + ret = wait_for_completion_interruptible + (&req_data->result.completion); + if (!ret) + ret = req_data->result.err; + } + ablkcipher_request_free(req); + kfree(req_data); + crypto_free_ablkcipher(ctr_tfm); + return ret; +} + +static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, + unsigned int key_len) +{ + int ret = 0; + struct crypto_tfm *tfm = crypto_aead_tfm(parent); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); + u8 *new_key_mem = NULL; + + if (key_len < 4) { + crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + /*Account for 4 byte nonce at the end.*/ + key_len -= 4; + if (key_len != AES_KEYSIZE_128) { + crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce)); + /*This must be on a 16 byte boundary!*/ + if ((unsigned long)(&(ctx->aes_key_expanded.key_enc[0])) % AESNI_ALIGN) + return -EINVAL; + + if ((unsigned long)key % AESNI_ALIGN) { + /*key is not aligned: use an auxuliar aligned pointer*/ + new_key_mem = kmalloc(key_len+AESNI_ALIGN, GFP_KERNEL); + if (!new_key_mem) + return -ENOMEM; + + new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN); + memcpy(new_key_mem, key, key_len); + key = new_key_mem; + } + + if (!irq_fpu_usable()) + ret = crypto_aes_expand_key(&(ctx->aes_key_expanded), + key, key_len); + else { + kernel_fpu_begin(); + ret = aesni_set_key(&(ctx->aes_key_expanded), key, key_len); + kernel_fpu_end(); + } + /*This must be on a 16 byte boundary!*/ + if ((unsigned long)(&(ctx->hash_subkey[0])) % AESNI_ALIGN) { + ret = -EINVAL; + goto exit; + } + ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); +exit: + kfree(new_key_mem); + return ret; +} + +/* This is the Integrity Check Value (aka the authentication tag length and can + * be 8, 12 or 16 bytes long. */ +static int rfc4106_set_authsize(struct crypto_aead *parent, + unsigned int authsize) +{ + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); + struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); + + switch (authsize) { + case 8: + case 12: + case 16: + break; + default: + return -EINVAL; + } + crypto_aead_crt(parent)->authsize = authsize; + crypto_aead_crt(cryptd_child)->authsize = authsize; + return 0; +} + +static int rfc4106_encrypt(struct aead_request *req) +{ + int ret; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); + + if (!irq_fpu_usable()) { + struct aead_request *cryptd_req = + (struct aead_request *) aead_request_ctx(req); + memcpy(cryptd_req, req, sizeof(*req)); + aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + return crypto_aead_encrypt(cryptd_req); + } else { + kernel_fpu_begin(); + ret = cryptd_child->base.crt_aead.encrypt(req); + kernel_fpu_end(); + return ret; + } +} + +static int rfc4106_decrypt(struct aead_request *req) +{ + int ret; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); + + if (!irq_fpu_usable()) { + struct aead_request *cryptd_req = + (struct aead_request *) aead_request_ctx(req); + memcpy(cryptd_req, req, sizeof(*req)); + aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + return crypto_aead_decrypt(cryptd_req); + } else { + kernel_fpu_begin(); + ret = cryptd_child->base.crt_aead.decrypt(req); + kernel_fpu_end(); + return ret; + } +} + +static struct crypto_alg rfc4106_alg = { + .cra_name = "rfc4106(gcm(aes))", + .cra_driver_name = "rfc4106-gcm-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN, + .cra_alignmask = 0, + .cra_type = &crypto_nivaead_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(rfc4106_alg.cra_list), + .cra_init = rfc4106_init, + .cra_exit = rfc4106_exit, + .cra_u = { + .aead = { + .setkey = rfc4106_set_key, + .setauthsize = rfc4106_set_authsize, + .encrypt = rfc4106_encrypt, + .decrypt = rfc4106_decrypt, + .geniv = "seqiv", + .ivsize = 8, + .maxauthsize = 16, + }, + }, +}; + +static int __driver_rfc4106_encrypt(struct aead_request *req) +{ + u8 one_entry_in_sg = 0; + u8 *src, *dst, *assoc; + __be32 counter = cpu_to_be32(1); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + void *aes_ctx = &(ctx->aes_key_expanded); + unsigned long auth_tag_len = crypto_aead_authsize(tfm); + u8 iv_tab[16+AESNI_ALIGN]; + u8* iv = (u8 *) PTR_ALIGN((u8 *)iv_tab, AESNI_ALIGN); + struct scatter_walk src_sg_walk; + struct scatter_walk assoc_sg_walk; + struct scatter_walk dst_sg_walk; + unsigned int i; + + /* Assuming we are supporting rfc4106 64-bit extended */ + /* sequence numbers We need to have the AAD length equal */ + /* to 8 or 12 bytes */ + if (unlikely(req->assoclen != 8 && req->assoclen != 12)) + return -EINVAL; + /* IV below built */ + for (i = 0; i < 4; i++) + *(iv+i) = ctx->nonce[i]; + for (i = 0; i < 8; i++) + *(iv+4+i) = req->iv[i]; + *((__be32 *)(iv+12)) = counter; + + if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) { + one_entry_in_sg = 1; + scatterwalk_start(&src_sg_walk, req->src); + scatterwalk_start(&assoc_sg_walk, req->assoc); + src = scatterwalk_map(&src_sg_walk, 0); + assoc = scatterwalk_map(&assoc_sg_walk, 0); + dst = src; + if (unlikely(req->src != req->dst)) { + scatterwalk_start(&dst_sg_walk, req->dst); + dst = scatterwalk_map(&dst_sg_walk, 0); + } + + } else { + /* Allocate memory for src, dst, assoc */ + src = kmalloc(req->cryptlen + auth_tag_len + req->assoclen, + GFP_ATOMIC); + if (unlikely(!src)) + return -ENOMEM; + assoc = (src + req->cryptlen + auth_tag_len); + scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0); + scatterwalk_map_and_copy(assoc, req->assoc, 0, + req->assoclen, 0); + dst = src; + } + + aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv, + ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst + + ((unsigned long)req->cryptlen), auth_tag_len); + + /* The authTag (aka the Integrity Check Value) needs to be written + * back to the packet. */ + if (one_entry_in_sg) { + if (unlikely(req->src != req->dst)) { + scatterwalk_unmap(dst, 0); + scatterwalk_done(&dst_sg_walk, 0, 0); + } + scatterwalk_unmap(src, 0); + scatterwalk_unmap(assoc, 0); + scatterwalk_done(&src_sg_walk, 0, 0); + scatterwalk_done(&assoc_sg_walk, 0, 0); + } else { + scatterwalk_map_and_copy(dst, req->dst, 0, + req->cryptlen + auth_tag_len, 1); + kfree(src); + } + return 0; +} + +static int __driver_rfc4106_decrypt(struct aead_request *req) +{ + u8 one_entry_in_sg = 0; + u8 *src, *dst, *assoc; + unsigned long tempCipherLen = 0; + __be32 counter = cpu_to_be32(1); + int retval = 0; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + void *aes_ctx = &(ctx->aes_key_expanded); + unsigned long auth_tag_len = crypto_aead_authsize(tfm); + u8 iv_and_authTag[32+AESNI_ALIGN]; + u8 *iv = (u8 *) PTR_ALIGN((u8 *)iv_and_authTag, AESNI_ALIGN); + u8 *authTag = iv + 16; + struct scatter_walk src_sg_walk; + struct scatter_walk assoc_sg_walk; + struct scatter_walk dst_sg_walk; + unsigned int i; + + if (unlikely((req->cryptlen < auth_tag_len) || + (req->assoclen != 8 && req->assoclen != 12))) + return -EINVAL; + /* Assuming we are supporting rfc4106 64-bit extended */ + /* sequence numbers We need to have the AAD length */ + /* equal to 8 or 12 bytes */ + + tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len); + /* IV below built */ + for (i = 0; i < 4; i++) + *(iv+i) = ctx->nonce[i]; + for (i = 0; i < 8; i++) + *(iv+4+i) = req->iv[i]; + *((__be32 *)(iv+12)) = counter; + + if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) { + one_entry_in_sg = 1; + scatterwalk_start(&src_sg_walk, req->src); + scatterwalk_start(&assoc_sg_walk, req->assoc); + src = scatterwalk_map(&src_sg_walk, 0); + assoc = scatterwalk_map(&assoc_sg_walk, 0); + dst = src; + if (unlikely(req->src != req->dst)) { + scatterwalk_start(&dst_sg_walk, req->dst); + dst = scatterwalk_map(&dst_sg_walk, 0); + } + + } else { + /* Allocate memory for src, dst, assoc */ + src = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC); + if (!src) + return -ENOMEM; + assoc = (src + req->cryptlen + auth_tag_len); + scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0); + scatterwalk_map_and_copy(assoc, req->assoc, 0, + req->assoclen, 0); + dst = src; + } + + aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv, + ctx->hash_subkey, assoc, (unsigned long)req->assoclen, + authTag, auth_tag_len); + + /* Compare generated tag with passed in tag. */ + retval = memcmp(src + tempCipherLen, authTag, auth_tag_len) ? + -EBADMSG : 0; + + if (one_entry_in_sg) { + if (unlikely(req->src != req->dst)) { + scatterwalk_unmap(dst, 0); + scatterwalk_done(&dst_sg_walk, 0, 0); + } + scatterwalk_unmap(src, 0); + scatterwalk_unmap(assoc, 0); + scatterwalk_done(&src_sg_walk, 0, 0); + scatterwalk_done(&assoc_sg_walk, 0, 0); + } else { + scatterwalk_map_and_copy(dst, req->dst, 0, req->cryptlen, 1); + kfree(src); + } + return retval; +} + +static struct crypto_alg __rfc4106_alg = { + .cra_name = "__gcm-aes-aesni", + .cra_driver_name = "__driver-gcm-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_AEAD, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN, + .cra_alignmask = 0, + .cra_type = &crypto_aead_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(__rfc4106_alg.cra_list), + .cra_u = { + .aead = { + .encrypt = __driver_rfc4106_encrypt, + .decrypt = __driver_rfc4106_decrypt, + }, + }, +}; +#endif + static int __init aesni_init(void) { int err; @@ -738,6 +1248,7 @@ static int __init aesni_init(void) printk(KERN_INFO "Intel AES-NI instructions are not detected.\n"); return -ENODEV; } + if ((err = crypto_register_alg(&aesni_alg))) goto aes_err; if ((err = crypto_register_alg(&__aesni_alg))) @@ -746,18 +1257,24 @@ static int __init aesni_init(void) goto blk_ecb_err; if ((err = crypto_register_alg(&blk_cbc_alg))) goto blk_cbc_err; - if ((err = crypto_register_alg(&blk_ctr_alg))) - goto blk_ctr_err; if ((err = crypto_register_alg(&ablk_ecb_alg))) goto ablk_ecb_err; if ((err = crypto_register_alg(&ablk_cbc_alg))) goto ablk_cbc_err; +#ifdef CONFIG_X86_64 + if ((err = crypto_register_alg(&blk_ctr_alg))) + goto blk_ctr_err; if ((err = crypto_register_alg(&ablk_ctr_alg))) goto ablk_ctr_err; + if ((err = crypto_register_alg(&__rfc4106_alg))) + goto __aead_gcm_err; + if ((err = crypto_register_alg(&rfc4106_alg))) + goto aead_gcm_err; #ifdef HAS_CTR if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg))) goto ablk_rfc3686_ctr_err; #endif +#endif #ifdef HAS_LRW if ((err = crypto_register_alg(&ablk_lrw_alg))) goto ablk_lrw_err; @@ -770,7 +1287,6 @@ static int __init aesni_init(void) if ((err = crypto_register_alg(&ablk_xts_alg))) goto ablk_xts_err; #endif - return err; #ifdef HAS_XTS @@ -784,18 +1300,24 @@ ablk_pcbc_err: crypto_unregister_alg(&ablk_lrw_alg); ablk_lrw_err: #endif +#ifdef CONFIG_X86_64 #ifdef HAS_CTR crypto_unregister_alg(&ablk_rfc3686_ctr_alg); ablk_rfc3686_ctr_err: #endif + crypto_unregister_alg(&rfc4106_alg); +aead_gcm_err: + crypto_unregister_alg(&__rfc4106_alg); +__aead_gcm_err: crypto_unregister_alg(&ablk_ctr_alg); ablk_ctr_err: + crypto_unregister_alg(&blk_ctr_alg); +blk_ctr_err: +#endif crypto_unregister_alg(&ablk_cbc_alg); ablk_cbc_err: crypto_unregister_alg(&ablk_ecb_alg); ablk_ecb_err: - crypto_unregister_alg(&blk_ctr_alg); -blk_ctr_err: crypto_unregister_alg(&blk_cbc_alg); blk_cbc_err: crypto_unregister_alg(&blk_ecb_alg); @@ -818,13 +1340,17 @@ static void __exit aesni_exit(void) #ifdef HAS_LRW crypto_unregister_alg(&ablk_lrw_alg); #endif +#ifdef CONFIG_X86_64 #ifdef HAS_CTR crypto_unregister_alg(&ablk_rfc3686_ctr_alg); #endif + crypto_unregister_alg(&rfc4106_alg); + crypto_unregister_alg(&__rfc4106_alg); crypto_unregister_alg(&ablk_ctr_alg); + crypto_unregister_alg(&blk_ctr_alg); +#endif crypto_unregister_alg(&ablk_cbc_alg); crypto_unregister_alg(&ablk_ecb_alg); - crypto_unregister_alg(&blk_ctr_alg); crypto_unregister_alg(&blk_cbc_alg); crypto_unregister_alg(&blk_ecb_alg); crypto_unregister_alg(&__aesni_alg); diff --git a/crypto/Kconfig b/crypto/Kconfig index e4bac29a32e7..4b7cb0e691cd 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -110,7 +110,6 @@ config CRYPTO_MANAGER_DISABLE_TESTS config CRYPTO_GF128MUL tristate "GF(2^128) multiplication functions (EXPERIMENTAL)" - depends on EXPERIMENTAL help Efficient table driven implementation of multiplications in the field GF(2^128). This is needed by some cypher modes. This @@ -539,8 +538,9 @@ config CRYPTO_AES_X86_64 config CRYPTO_AES_NI_INTEL tristate "AES cipher algorithms (AES-NI)" - depends on (X86 || UML_X86) && 64BIT - select CRYPTO_AES_X86_64 + depends on (X86 || UML_X86) + select CRYPTO_AES_X86_64 if 64BIT + select CRYPTO_AES_586 if !64BIT select CRYPTO_CRYPTD select CRYPTO_ALGAPI select CRYPTO_FPU @@ -563,9 +563,10 @@ config CRYPTO_AES_NI_INTEL See <http://csrc.nist.gov/encryption/aes/> for more information. - In addition to AES cipher algorithm support, the - acceleration for some popular block cipher mode is supported - too, including ECB, CBC, CTR, LRW, PCBC, XTS. + In addition to AES cipher algorithm support, the acceleration + for some popular block cipher mode is supported too, including + ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional + acceleration for CTR. config CRYPTO_ANUBIS tristate "Anubis cipher algorithm" @@ -841,6 +842,27 @@ config CRYPTO_ANSI_CPRNG ANSI X9.31 A.2.4. Note that this option must be enabled if CRYPTO_FIPS is selected +config CRYPTO_USER_API + tristate + +config CRYPTO_USER_API_HASH + tristate "User-space interface for hash algorithms" + depends on NET + select CRYPTO_HASH + select CRYPTO_USER_API + help + This option enables the user-spaces interface for hash + algorithms. + +config CRYPTO_USER_API_SKCIPHER + tristate "User-space interface for symmetric key cipher algorithms" + depends on NET + select CRYPTO_BLKCIPHER + select CRYPTO_USER_API + help + This option enables the user-spaces interface for symmetric + key cipher algorithms. + source "drivers/crypto/Kconfig" endif # if CRYPTO diff --git a/crypto/Makefile b/crypto/Makefile index 423b7de61f93..e9a399ca69db 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -3,32 +3,32 @@ # obj-$(CONFIG_CRYPTO) += crypto.o -crypto-objs := api.o cipher.o compress.o +crypto-y := api.o cipher.o compress.o obj-$(CONFIG_CRYPTO_WORKQUEUE) += crypto_wq.o obj-$(CONFIG_CRYPTO_FIPS) += fips.o crypto_algapi-$(CONFIG_PROC_FS) += proc.o -crypto_algapi-objs := algapi.o scatterwalk.o $(crypto_algapi-y) +crypto_algapi-y := algapi.o scatterwalk.o $(crypto_algapi-y) obj-$(CONFIG_CRYPTO_ALGAPI2) += crypto_algapi.o obj-$(CONFIG_CRYPTO_AEAD2) += aead.o -crypto_blkcipher-objs := ablkcipher.o -crypto_blkcipher-objs += blkcipher.o +crypto_blkcipher-y := ablkcipher.o +crypto_blkcipher-y += blkcipher.o obj-$(CONFIG_CRYPTO_BLKCIPHER2) += crypto_blkcipher.o obj-$(CONFIG_CRYPTO_BLKCIPHER2) += chainiv.o obj-$(CONFIG_CRYPTO_BLKCIPHER2) += eseqiv.o obj-$(CONFIG_CRYPTO_SEQIV) += seqiv.o -crypto_hash-objs += ahash.o -crypto_hash-objs += shash.o +crypto_hash-y += ahash.o +crypto_hash-y += shash.o obj-$(CONFIG_CRYPTO_HASH2) += crypto_hash.o obj-$(CONFIG_CRYPTO_PCOMP2) += pcompress.o -cryptomgr-objs := algboss.o testmgr.o +cryptomgr-y := algboss.o testmgr.o obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o obj-$(CONFIG_CRYPTO_HMAC) += hmac.o @@ -85,6 +85,9 @@ obj-$(CONFIG_CRYPTO_RNG2) += krng.o obj-$(CONFIG_CRYPTO_ANSI_CPRNG) += ansi_cprng.o obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o obj-$(CONFIG_CRYPTO_GHASH) += ghash-generic.o +obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o +obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o +obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o # # generic algorithms and the async_tx api diff --git a/crypto/af_alg.c b/crypto/af_alg.c new file mode 100644 index 000000000000..940d70cb5c25 --- /dev/null +++ b/crypto/af_alg.c @@ -0,0 +1,483 @@ +/* + * af_alg: User-space algorithm interface + * + * This file provides the user-space API for algorithms. + * + * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <asm/atomic.h> +#include <crypto/if_alg.h> +#include <linux/crypto.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/rwsem.h> + +struct alg_type_list { + const struct af_alg_type *type; + struct list_head list; +}; + +static atomic_long_t alg_memory_allocated; + +static struct proto alg_proto = { + .name = "ALG", + .owner = THIS_MODULE, + .memory_allocated = &alg_memory_allocated, + .obj_size = sizeof(struct alg_sock), +}; + +static LIST_HEAD(alg_types); +static DECLARE_RWSEM(alg_types_sem); + +static const struct af_alg_type *alg_get_type(const char *name) +{ + const struct af_alg_type *type = ERR_PTR(-ENOENT); + struct alg_type_list *node; + + down_read(&alg_types_sem); + list_for_each_entry(node, &alg_types, list) { + if (strcmp(node->type->name, name)) + continue; + + if (try_module_get(node->type->owner)) + type = node->type; + break; + } + up_read(&alg_types_sem); + + return type; +} + +int af_alg_register_type(const struct af_alg_type *type) +{ + struct alg_type_list *node; + int err = -EEXIST; + + down_write(&alg_types_sem); + list_for_each_entry(node, &alg_types, list) { + if (!strcmp(node->type->name, type->name)) + goto unlock; + } + + node = kmalloc(sizeof(*node), GFP_KERNEL); + err = -ENOMEM; + if (!node) + goto unlock; + + type->ops->owner = THIS_MODULE; + node->type = type; + list_add(&node->list, &alg_types); + err = 0; + +unlock: + up_write(&alg_types_sem); + + return err; +} +EXPORT_SYMBOL_GPL(af_alg_register_type); + +int af_alg_unregister_type(const struct af_alg_type *type) +{ + struct alg_type_list *node; + int err = -ENOENT; + + down_write(&alg_types_sem); + list_for_each_entry(node, &alg_types, list) { + if (strcmp(node->type->name, type->name)) + continue; + + list_del(&node->list); + kfree(node); + err = 0; + break; + } + up_write(&alg_types_sem); + + return err; +} +EXPORT_SYMBOL_GPL(af_alg_unregister_type); + +static void alg_do_release(const struct af_alg_type *type, void *private) +{ + if (!type) + return; + + type->release(private); + module_put(type->owner); +} + +int af_alg_release(struct socket *sock) +{ + if (sock->sk) + sock_put(sock->sk); + return 0; +} +EXPORT_SYMBOL_GPL(af_alg_release); + +static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct sockaddr_alg *sa = (void *)uaddr; + const struct af_alg_type *type; + void *private; + + if (sock->state == SS_CONNECTED) + return -EINVAL; + + if (addr_len != sizeof(*sa)) + return -EINVAL; + + sa->salg_type[sizeof(sa->salg_type) - 1] = 0; + sa->salg_name[sizeof(sa->salg_name) - 1] = 0; + + type = alg_get_type(sa->salg_type); + if (IS_ERR(type) && PTR_ERR(type) == -ENOENT) { + request_module("algif-%s", sa->salg_type); + type = alg_get_type(sa->salg_type); + } + + if (IS_ERR(type)) + return PTR_ERR(type); + + private = type->bind(sa->salg_name, sa->salg_feat, sa->salg_mask); + if (IS_ERR(private)) { + module_put(type->owner); + return PTR_ERR(private); + } + + lock_sock(sk); + + swap(ask->type, type); + swap(ask->private, private); + + release_sock(sk); + + alg_do_release(type, private); + + return 0; +} + +static int alg_setkey(struct sock *sk, char __user *ukey, + unsigned int keylen) +{ + struct alg_sock *ask = alg_sk(sk); + const struct af_alg_type *type = ask->type; + u8 *key; + int err; + + key = sock_kmalloc(sk, keylen, GFP_KERNEL); + if (!key) + return -ENOMEM; + + err = -EFAULT; + if (copy_from_user(key, ukey, keylen)) + goto out; + + err = type->setkey(ask->private, key, keylen); + +out: + sock_kfree_s(sk, key, keylen); + + return err; +} + +static int alg_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + const struct af_alg_type *type; + int err = -ENOPROTOOPT; + + lock_sock(sk); + type = ask->type; + + if (level != SOL_ALG || !type) + goto unlock; + + switch (optname) { + case ALG_SET_KEY: + if (sock->state == SS_CONNECTED) + goto unlock; + if (!type->setkey) + goto unlock; + + err = alg_setkey(sk, optval, optlen); + } + +unlock: + release_sock(sk); + + return err; +} + +int af_alg_accept(struct sock *sk, struct socket *newsock) +{ + struct alg_sock *ask = alg_sk(sk); + const struct af_alg_type *type; + struct sock *sk2; + int err; + + lock_sock(sk); + type = ask->type; + + err = -EINVAL; + if (!type) + goto unlock; + + sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto); + err = -ENOMEM; + if (!sk2) + goto unlock; + + sock_init_data(newsock, sk2); + sock_graft(sk2, newsock); + + err = type->accept(ask->private, sk2); + if (err) { + sk_free(sk2); + goto unlock; + } + + sk2->sk_family = PF_ALG; + + sock_hold(sk); + alg_sk(sk2)->parent = sk; + alg_sk(sk2)->type = type; + + newsock->ops = type->ops; + newsock->state = SS_CONNECTED; + + err = 0; + +unlock: + release_sock(sk); + + return err; +} +EXPORT_SYMBOL_GPL(af_alg_accept); + +static int alg_accept(struct socket *sock, struct socket *newsock, int flags) +{ + return af_alg_accept(sock->sk, newsock); +} + +static const struct proto_ops alg_proto_ops = { + .family = PF_ALG, + .owner = THIS_MODULE, + + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .getname = sock_no_getname, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .getsockopt = sock_no_getsockopt, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, + .sendmsg = sock_no_sendmsg, + .recvmsg = sock_no_recvmsg, + .poll = sock_no_poll, + + .bind = alg_bind, + .release = af_alg_release, + .setsockopt = alg_setsockopt, + .accept = alg_accept, +}; + +static void alg_sock_destruct(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + + alg_do_release(ask->type, ask->private); +} + +static int alg_create(struct net *net, struct socket *sock, int protocol, + int kern) +{ + struct sock *sk; + int err; + + if (sock->type != SOCK_SEQPACKET) + return -ESOCKTNOSUPPORT; + if (protocol != 0) + return -EPROTONOSUPPORT; + + err = -ENOMEM; + sk = sk_alloc(net, PF_ALG, GFP_KERNEL, &alg_proto); + if (!sk) + goto out; + + sock->ops = &alg_proto_ops; + sock_init_data(sock, sk); + + sk->sk_family = PF_ALG; + sk->sk_destruct = alg_sock_destruct; + + return 0; +out: + return err; +} + +static const struct net_proto_family alg_family = { + .family = PF_ALG, + .create = alg_create, + .owner = THIS_MODULE, +}; + +int af_alg_make_sg(struct af_alg_sgl *sgl, void __user *addr, int len, + int write) +{ + unsigned long from = (unsigned long)addr; + unsigned long npages; + unsigned off; + int err; + int i; + + err = -EFAULT; + if (!access_ok(write ? VERIFY_READ : VERIFY_WRITE, addr, len)) + goto out; + + off = from & ~PAGE_MASK; + npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (npages > ALG_MAX_PAGES) + npages = ALG_MAX_PAGES; + + err = get_user_pages_fast(from, npages, write, sgl->pages); + if (err < 0) + goto out; + + npages = err; + err = -EINVAL; + if (WARN_ON(npages == 0)) + goto out; + + err = 0; + + sg_init_table(sgl->sg, npages); + + for (i = 0; i < npages; i++) { + int plen = min_t(int, len, PAGE_SIZE - off); + + sg_set_page(sgl->sg + i, sgl->pages[i], plen, off); + + off = 0; + len -= plen; + err += plen; + } + +out: + return err; +} +EXPORT_SYMBOL_GPL(af_alg_make_sg); + +void af_alg_free_sg(struct af_alg_sgl *sgl) +{ + int i; + + i = 0; + do { + put_page(sgl->pages[i]); + } while (!sg_is_last(sgl->sg + (i++))); +} +EXPORT_SYMBOL_GPL(af_alg_free_sg); + +int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con) +{ + struct cmsghdr *cmsg; + + for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + if (cmsg->cmsg_level != SOL_ALG) + continue; + + switch(cmsg->cmsg_type) { + case ALG_SET_IV: + if (cmsg->cmsg_len < CMSG_LEN(sizeof(*con->iv))) + return -EINVAL; + con->iv = (void *)CMSG_DATA(cmsg); + if (cmsg->cmsg_len < CMSG_LEN(con->iv->ivlen + + sizeof(*con->iv))) + return -EINVAL; + break; + + case ALG_SET_OP: + if (cmsg->cmsg_len < CMSG_LEN(sizeof(u32))) + return -EINVAL; + con->op = *(u32 *)CMSG_DATA(cmsg); + break; + + default: + return -EINVAL; + } + } + + return 0; +} +EXPORT_SYMBOL_GPL(af_alg_cmsg_send); + +int af_alg_wait_for_completion(int err, struct af_alg_completion *completion) +{ + switch (err) { + case -EINPROGRESS: + case -EBUSY: + wait_for_completion(&completion->completion); + INIT_COMPLETION(completion->completion); + err = completion->err; + break; + }; + + return err; +} +EXPORT_SYMBOL_GPL(af_alg_wait_for_completion); + +void af_alg_complete(struct crypto_async_request *req, int err) +{ + struct af_alg_completion *completion = req->data; + + completion->err = err; + complete(&completion->completion); +} +EXPORT_SYMBOL_GPL(af_alg_complete); + +static int __init af_alg_init(void) +{ + int err = proto_register(&alg_proto, 0); + + if (err) + goto out; + + err = sock_register(&alg_family); + if (err != 0) + goto out_unregister_proto; + +out: + return err; + +out_unregister_proto: + proto_unregister(&alg_proto); + goto out; +} + +static void __exit af_alg_exit(void) +{ + sock_unregister(PF_ALG); + proto_unregister(&alg_proto); +} + +module_init(af_alg_init); +module_exit(af_alg_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETPROTO(AF_ALG); diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c new file mode 100644 index 000000000000..62122a1a2f7a --- /dev/null +++ b/crypto/algif_hash.c @@ -0,0 +1,319 @@ +/* + * algif_hash: User-space interface for hash algorithms + * + * This file provides the user-space API for hash algorithms. + * + * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <crypto/hash.h> +#include <crypto/if_alg.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/net.h> +#include <net/sock.h> + +struct hash_ctx { + struct af_alg_sgl sgl; + + u8 *result; + + struct af_alg_completion completion; + + unsigned int len; + bool more; + + struct ahash_request req; +}; + +static int hash_sendmsg(struct kiocb *unused, struct socket *sock, + struct msghdr *msg, size_t ignored) +{ + int limit = ALG_MAX_PAGES * PAGE_SIZE; + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct hash_ctx *ctx = ask->private; + unsigned long iovlen; + struct iovec *iov; + long copied = 0; + int err; + + if (limit > sk->sk_sndbuf) + limit = sk->sk_sndbuf; + + lock_sock(sk); + if (!ctx->more) { + err = crypto_ahash_init(&ctx->req); + if (err) + goto unlock; + } + + ctx->more = 0; + + for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0; + iovlen--, iov++) { + unsigned long seglen = iov->iov_len; + char __user *from = iov->iov_base; + + while (seglen) { + int len = min_t(unsigned long, seglen, limit); + int newlen; + + newlen = af_alg_make_sg(&ctx->sgl, from, len, 0); + if (newlen < 0) + goto unlock; + + ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, NULL, + newlen); + + err = af_alg_wait_for_completion( + crypto_ahash_update(&ctx->req), + &ctx->completion); + + af_alg_free_sg(&ctx->sgl); + + if (err) + goto unlock; + + seglen -= newlen; + from += newlen; + copied += newlen; + } + } + + err = 0; + + ctx->more = msg->msg_flags & MSG_MORE; + if (!ctx->more) { + ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0); + err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req), + &ctx->completion); + } + +unlock: + release_sock(sk); + + return err ?: copied; +} + +static ssize_t hash_sendpage(struct socket *sock, struct page *page, + int offset, size_t size, int flags) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct hash_ctx *ctx = ask->private; + int err; + + lock_sock(sk); + sg_init_table(ctx->sgl.sg, 1); + sg_set_page(ctx->sgl.sg, page, size, offset); + + ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, ctx->result, size); + + if (!(flags & MSG_MORE)) { + if (ctx->more) + err = crypto_ahash_finup(&ctx->req); + else + err = crypto_ahash_digest(&ctx->req); + } else { + if (!ctx->more) { + err = crypto_ahash_init(&ctx->req); + if (err) + goto unlock; + } + + err = crypto_ahash_update(&ctx->req); + } + + err = af_alg_wait_for_completion(err, &ctx->completion); + if (err) + goto unlock; + + ctx->more = flags & MSG_MORE; + +unlock: + release_sock(sk); + + return err ?: size; +} + +static int hash_recvmsg(struct kiocb *unused, struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct hash_ctx *ctx = ask->private; + unsigned ds = crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req)); + int err; + + if (len > ds) + len = ds; + else if (len < ds) + msg->msg_flags |= MSG_TRUNC; + + lock_sock(sk); + if (ctx->more) { + ctx->more = 0; + ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0); + err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req), + &ctx->completion); + if (err) + goto unlock; + } + + err = memcpy_toiovec(msg->msg_iov, ctx->result, len); + +unlock: + release_sock(sk); + + return err ?: len; +} + +static int hash_accept(struct socket *sock, struct socket *newsock, int flags) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct hash_ctx *ctx = ask->private; + struct ahash_request *req = &ctx->req; + char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req))]; + struct sock *sk2; + struct alg_sock *ask2; + struct hash_ctx *ctx2; + int err; + + err = crypto_ahash_export(req, state); + if (err) + return err; + + err = af_alg_accept(ask->parent, newsock); + if (err) + return err; + + sk2 = newsock->sk; + ask2 = alg_sk(sk2); + ctx2 = ask2->private; + ctx2->more = 1; + + err = crypto_ahash_import(&ctx2->req, state); + if (err) { + sock_orphan(sk2); + sock_put(sk2); + } + + return err; +} + +static struct proto_ops algif_hash_ops = { + .family = PF_ALG, + + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .getname = sock_no_getname, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .getsockopt = sock_no_getsockopt, + .mmap = sock_no_mmap, + .bind = sock_no_bind, + .setsockopt = sock_no_setsockopt, + .poll = sock_no_poll, + + .release = af_alg_release, + .sendmsg = hash_sendmsg, + .sendpage = hash_sendpage, + .recvmsg = hash_recvmsg, + .accept = hash_accept, +}; + +static void *hash_bind(const char *name, u32 type, u32 mask) +{ + return crypto_alloc_ahash(name, type, mask); +} + +static void hash_release(void *private) +{ + crypto_free_ahash(private); +} + +static int hash_setkey(void *private, const u8 *key, unsigned int keylen) +{ + return crypto_ahash_setkey(private, key, keylen); +} + +static void hash_sock_destruct(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + struct hash_ctx *ctx = ask->private; + + sock_kfree_s(sk, ctx->result, + crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req))); + sock_kfree_s(sk, ctx, ctx->len); + af_alg_release_parent(sk); +} + +static int hash_accept_parent(void *private, struct sock *sk) +{ + struct hash_ctx *ctx; + struct alg_sock *ask = alg_sk(sk); + unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(private); + unsigned ds = crypto_ahash_digestsize(private); + + ctx = sock_kmalloc(sk, len, GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->result = sock_kmalloc(sk, ds, GFP_KERNEL); + if (!ctx->result) { + sock_kfree_s(sk, ctx, len); + return -ENOMEM; + } + + memset(ctx->result, 0, ds); + + ctx->len = len; + ctx->more = 0; + af_alg_init_completion(&ctx->completion); + + ask->private = ctx; + + ahash_request_set_tfm(&ctx->req, private); + ahash_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG, + af_alg_complete, &ctx->completion); + + sk->sk_destruct = hash_sock_destruct; + + return 0; +} + +static const struct af_alg_type algif_type_hash = { + .bind = hash_bind, + .release = hash_release, + .setkey = hash_setkey, + .accept = hash_accept_parent, + .ops = &algif_hash_ops, + .name = "hash", + .owner = THIS_MODULE +}; + +static int __init algif_hash_init(void) +{ + return af_alg_register_type(&algif_type_hash); +} + +static void __exit algif_hash_exit(void) +{ + int err = af_alg_unregister_type(&algif_type_hash); + BUG_ON(err); +} + +module_init(algif_hash_init); +module_exit(algif_hash_exit); +MODULE_LICENSE("GPL"); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c new file mode 100644 index 000000000000..6a6dfc062d2a --- /dev/null +++ b/crypto/algif_skcipher.c @@ -0,0 +1,632 @@ +/* + * algif_skcipher: User-space interface for skcipher algorithms + * + * This file provides the user-space API for symmetric key ciphers. + * + * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <crypto/scatterwalk.h> +#include <crypto/skcipher.h> +#include <crypto/if_alg.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/net.h> +#include <net/sock.h> + +struct skcipher_sg_list { + struct list_head list; + + int cur; + + struct scatterlist sg[0]; +}; + +struct skcipher_ctx { + struct list_head tsgl; + struct af_alg_sgl rsgl; + + void *iv; + + struct af_alg_completion completion; + + unsigned used; + + unsigned int len; + bool more; + bool merge; + bool enc; + + struct ablkcipher_request req; +}; + +#define MAX_SGL_ENTS ((PAGE_SIZE - sizeof(struct skcipher_sg_list)) / \ + sizeof(struct scatterlist) - 1) + +static inline int skcipher_sndbuf(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + + return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) - + ctx->used, 0); +} + +static inline bool skcipher_writable(struct sock *sk) +{ + return PAGE_SIZE <= skcipher_sndbuf(sk); +} + +static int skcipher_alloc_sgl(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + struct skcipher_sg_list *sgl; + struct scatterlist *sg = NULL; + + sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list); + if (!list_empty(&ctx->tsgl)) + sg = sgl->sg; + + if (!sg || sgl->cur >= MAX_SGL_ENTS) { + sgl = sock_kmalloc(sk, sizeof(*sgl) + + sizeof(sgl->sg[0]) * (MAX_SGL_ENTS + 1), + GFP_KERNEL); + if (!sgl) + return -ENOMEM; + + sg_init_table(sgl->sg, MAX_SGL_ENTS + 1); + sgl->cur = 0; + + if (sg) + scatterwalk_sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg); + + list_add_tail(&sgl->list, &ctx->tsgl); + } + + return 0; +} + +static void skcipher_pull_sgl(struct sock *sk, int used) +{ + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + struct skcipher_sg_list *sgl; + struct scatterlist *sg; + int i; + + while (!list_empty(&ctx->tsgl)) { + sgl = list_first_entry(&ctx->tsgl, struct skcipher_sg_list, + list); + sg = sgl->sg; + + for (i = 0; i < sgl->cur; i++) { + int plen = min_t(int, used, sg[i].length); + + if (!sg_page(sg + i)) + continue; + + sg[i].length -= plen; + sg[i].offset += plen; + + used -= plen; + ctx->used -= plen; + + if (sg[i].length) + return; + + put_page(sg_page(sg + i)); + sg_assign_page(sg + i, NULL); + } + + list_del(&sgl->list); + sock_kfree_s(sk, sgl, + sizeof(*sgl) + sizeof(sgl->sg[0]) * + (MAX_SGL_ENTS + 1)); + } + + if (!ctx->used) + ctx->merge = 0; +} + +static void skcipher_free_sgl(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + + skcipher_pull_sgl(sk, ctx->used); +} + +static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags) +{ + long timeout; + DEFINE_WAIT(wait); + int err = -ERESTARTSYS; + + if (flags & MSG_DONTWAIT) + return -EAGAIN; + + set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + + for (;;) { + if (signal_pending(current)) + break; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + timeout = MAX_SCHEDULE_TIMEOUT; + if (sk_wait_event(sk, &timeout, skcipher_writable(sk))) { + err = 0; + break; + } + } + finish_wait(sk_sleep(sk), &wait); + + return err; +} + +static void skcipher_wmem_wakeup(struct sock *sk) +{ + struct socket_wq *wq; + + if (!skcipher_writable(sk)) + return; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, POLLIN | + POLLRDNORM | + POLLRDBAND); + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); + rcu_read_unlock(); +} + +static int skcipher_wait_for_data(struct sock *sk, unsigned flags) +{ + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + long timeout; + DEFINE_WAIT(wait); + int err = -ERESTARTSYS; + + if (flags & MSG_DONTWAIT) { + return -EAGAIN; + } + + set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + + for (;;) { + if (signal_pending(current)) + break; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + timeout = MAX_SCHEDULE_TIMEOUT; + if (sk_wait_event(sk, &timeout, ctx->used)) { + err = 0; + break; + } + } + finish_wait(sk_sleep(sk), &wait); + + clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + + return err; +} + +static void skcipher_data_wakeup(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + struct socket_wq *wq; + + if (!ctx->used) + return; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | + POLLRDNORM | + POLLRDBAND); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + rcu_read_unlock(); +} + +static int skcipher_sendmsg(struct kiocb *unused, struct socket *sock, + struct msghdr *msg, size_t size) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(&ctx->req); + unsigned ivsize = crypto_ablkcipher_ivsize(tfm); + struct skcipher_sg_list *sgl; + struct af_alg_control con = {}; + long copied = 0; + bool enc = 0; + int err; + int i; + + if (msg->msg_controllen) { + err = af_alg_cmsg_send(msg, &con); + if (err) + return err; + + switch (con.op) { + case ALG_OP_ENCRYPT: + enc = 1; + break; + case ALG_OP_DECRYPT: + enc = 0; + break; + default: + return -EINVAL; + } + + if (con.iv && con.iv->ivlen != ivsize) + return -EINVAL; + } + + err = -EINVAL; + + lock_sock(sk); + if (!ctx->more && ctx->used) + goto unlock; + + if (!ctx->used) { + ctx->enc = enc; + if (con.iv) + memcpy(ctx->iv, con.iv->iv, ivsize); + } + + while (size) { + struct scatterlist *sg; + unsigned long len = size; + int plen; + + if (ctx->merge) { + sgl = list_entry(ctx->tsgl.prev, + struct skcipher_sg_list, list); + sg = sgl->sg + sgl->cur - 1; + len = min_t(unsigned long, len, + PAGE_SIZE - sg->offset - sg->length); + + err = memcpy_fromiovec(page_address(sg_page(sg)) + + sg->offset + sg->length, + msg->msg_iov, len); + if (err) + goto unlock; + + sg->length += len; + ctx->merge = (sg->offset + sg->length) & + (PAGE_SIZE - 1); + + ctx->used += len; + copied += len; + size -= len; + continue; + } + + if (!skcipher_writable(sk)) { + err = skcipher_wait_for_wmem(sk, msg->msg_flags); + if (err) + goto unlock; + } + + len = min_t(unsigned long, len, skcipher_sndbuf(sk)); + + err = skcipher_alloc_sgl(sk); + if (err) + goto unlock; + + sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list); + sg = sgl->sg; + do { + i = sgl->cur; + plen = min_t(int, len, PAGE_SIZE); + + sg_assign_page(sg + i, alloc_page(GFP_KERNEL)); + err = -ENOMEM; + if (!sg_page(sg + i)) + goto unlock; + + err = memcpy_fromiovec(page_address(sg_page(sg + i)), + msg->msg_iov, plen); + if (err) { + __free_page(sg_page(sg + i)); + sg_assign_page(sg + i, NULL); + goto unlock; + } + + sg[i].length = plen; + len -= plen; + ctx->used += plen; + copied += plen; + size -= plen; + sgl->cur++; + } while (len && sgl->cur < MAX_SGL_ENTS); + + ctx->merge = plen & (PAGE_SIZE - 1); + } + + err = 0; + + ctx->more = msg->msg_flags & MSG_MORE; + if (!ctx->more && !list_empty(&ctx->tsgl)) + sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list); + +unlock: + skcipher_data_wakeup(sk); + release_sock(sk); + + return copied ?: err; +} + +static ssize_t skcipher_sendpage(struct socket *sock, struct page *page, + int offset, size_t size, int flags) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + struct skcipher_sg_list *sgl; + int err = -EINVAL; + + lock_sock(sk); + if (!ctx->more && ctx->used) + goto unlock; + + if (!size) + goto done; + + if (!skcipher_writable(sk)) { + err = skcipher_wait_for_wmem(sk, flags); + if (err) + goto unlock; + } + + err = skcipher_alloc_sgl(sk); + if (err) + goto unlock; + + ctx->merge = 0; + sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list); + + get_page(page); + sg_set_page(sgl->sg + sgl->cur, page, size, offset); + sgl->cur++; + ctx->used += size; + +done: + ctx->more = flags & MSG_MORE; + if (!ctx->more && !list_empty(&ctx->tsgl)) + sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list); + +unlock: + skcipher_data_wakeup(sk); + release_sock(sk); + + return err ?: size; +} + +static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock, + struct msghdr *msg, size_t ignored, int flags) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + unsigned bs = crypto_ablkcipher_blocksize(crypto_ablkcipher_reqtfm( + &ctx->req)); + struct skcipher_sg_list *sgl; + struct scatterlist *sg; + unsigned long iovlen; + struct iovec *iov; + int err = -EAGAIN; + int used; + long copied = 0; + + lock_sock(sk); + for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0; + iovlen--, iov++) { + unsigned long seglen = iov->iov_len; + char __user *from = iov->iov_base; + + while (seglen) { + sgl = list_first_entry(&ctx->tsgl, + struct skcipher_sg_list, list); + sg = sgl->sg; + + while (!sg->length) + sg++; + + used = ctx->used; + if (!used) { + err = skcipher_wait_for_data(sk, flags); + if (err) + goto unlock; + } + + used = min_t(unsigned long, used, seglen); + + used = af_alg_make_sg(&ctx->rsgl, from, used, 1); + err = used; + if (err < 0) + goto unlock; + + if (ctx->more || used < ctx->used) + used -= used % bs; + + err = -EINVAL; + if (!used) + goto free; + + ablkcipher_request_set_crypt(&ctx->req, sg, + ctx->rsgl.sg, used, + ctx->iv); + + err = af_alg_wait_for_completion( + ctx->enc ? + crypto_ablkcipher_encrypt(&ctx->req) : + crypto_ablkcipher_decrypt(&ctx->req), + &ctx->completion); + +free: + af_alg_free_sg(&ctx->rsgl); + + if (err) + goto unlock; + + copied += used; + from += used; + seglen -= used; + skcipher_pull_sgl(sk, used); + } + } + + err = 0; + +unlock: + skcipher_wmem_wakeup(sk); + release_sock(sk); + + return copied ?: err; +} + + +static unsigned int skcipher_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + unsigned int mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; + + if (ctx->used) + mask |= POLLIN | POLLRDNORM; + + if (skcipher_writable(sk)) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + + return mask; +} + +static struct proto_ops algif_skcipher_ops = { + .family = PF_ALG, + + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .getname = sock_no_getname, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .getsockopt = sock_no_getsockopt, + .mmap = sock_no_mmap, + .bind = sock_no_bind, + .accept = sock_no_accept, + .setsockopt = sock_no_setsockopt, + + .release = af_alg_release, + .sendmsg = skcipher_sendmsg, + .sendpage = skcipher_sendpage, + .recvmsg = skcipher_recvmsg, + .poll = skcipher_poll, +}; + +static void *skcipher_bind(const char *name, u32 type, u32 mask) +{ + return crypto_alloc_ablkcipher(name, type, mask); +} + +static void skcipher_release(void *private) +{ + crypto_free_ablkcipher(private); +} + +static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen) +{ + return crypto_ablkcipher_setkey(private, key, keylen); +} + +static void skcipher_sock_destruct(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(&ctx->req); + + skcipher_free_sgl(sk); + sock_kfree_s(sk, ctx->iv, crypto_ablkcipher_ivsize(tfm)); + sock_kfree_s(sk, ctx, ctx->len); + af_alg_release_parent(sk); +} + +static int skcipher_accept_parent(void *private, struct sock *sk) +{ + struct skcipher_ctx *ctx; + struct alg_sock *ask = alg_sk(sk); + unsigned int len = sizeof(*ctx) + crypto_ablkcipher_reqsize(private); + + ctx = sock_kmalloc(sk, len, GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->iv = sock_kmalloc(sk, crypto_ablkcipher_ivsize(private), + GFP_KERNEL); + if (!ctx->iv) { + sock_kfree_s(sk, ctx, len); + return -ENOMEM; + } + + memset(ctx->iv, 0, crypto_ablkcipher_ivsize(private)); + + INIT_LIST_HEAD(&ctx->tsgl); + ctx->len = len; + ctx->used = 0; + ctx->more = 0; + ctx->merge = 0; + ctx->enc = 0; + af_alg_init_completion(&ctx->completion); + + ask->private = ctx; + + ablkcipher_request_set_tfm(&ctx->req, private); + ablkcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG, + af_alg_complete, &ctx->completion); + + sk->sk_destruct = skcipher_sock_destruct; + + return 0; +} + +static const struct af_alg_type algif_type_skcipher = { + .bind = skcipher_bind, + .release = skcipher_release, + .setkey = skcipher_setkey, + .accept = skcipher_accept_parent, + .ops = &algif_skcipher_ops, + .name = "skcipher", + .owner = THIS_MODULE +}; + +static int __init algif_skcipher_init(void) +{ + return af_alg_register_type(&algif_type_skcipher); +} + +static void __exit algif_skcipher_exit(void) +{ + int err = af_alg_unregister_type(&algif_type_skcipher); + BUG_ON(err); +} + +module_init(algif_skcipher_init); +module_exit(algif_skcipher_exit); +MODULE_LICENSE("GPL"); diff --git a/crypto/authenc.c b/crypto/authenc.c index a5a22cfcd07b..5ef7ba6b6a76 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -107,20 +107,6 @@ badkey: goto out; } -static void authenc_chain(struct scatterlist *head, struct scatterlist *sg, - int chain) -{ - if (chain) { - head->length += sg->length; - sg = scatterwalk_sg_next(sg); - } - - if (sg) - scatterwalk_sg_chain(head, 2, sg); - else - sg_mark_end(head); -} - static void authenc_geniv_ahash_update_done(struct crypto_async_request *areq, int err) { @@ -345,7 +331,7 @@ static int crypto_authenc_genicv(struct aead_request *req, u8 *iv, if (ivsize) { sg_init_table(cipher, 2); sg_set_buf(cipher, iv, ivsize); - authenc_chain(cipher, dst, vdst == iv + ivsize); + scatterwalk_crypto_chain(cipher, dst, vdst == iv + ivsize, 2); dst = cipher; cryptlen += ivsize; } @@ -354,7 +340,7 @@ static int crypto_authenc_genicv(struct aead_request *req, u8 *iv, authenc_ahash_fn = crypto_authenc_ahash; sg_init_table(asg, 2); sg_set_page(asg, sg_page(assoc), assoc->length, assoc->offset); - authenc_chain(asg, dst, 0); + scatterwalk_crypto_chain(asg, dst, 0, 2); dst = asg; cryptlen += req->assoclen; } @@ -499,7 +485,7 @@ static int crypto_authenc_iverify(struct aead_request *req, u8 *iv, if (ivsize) { sg_init_table(cipher, 2); sg_set_buf(cipher, iv, ivsize); - authenc_chain(cipher, src, vsrc == iv + ivsize); + scatterwalk_crypto_chain(cipher, src, vsrc == iv + ivsize, 2); src = cipher; cryptlen += ivsize; } @@ -508,7 +494,7 @@ static int crypto_authenc_iverify(struct aead_request *req, u8 *iv, authenc_ahash_fn = crypto_authenc_ahash; sg_init_table(asg, 2); sg_set_page(asg, sg_page(assoc), assoc->length, assoc->offset); - authenc_chain(asg, src, 0); + scatterwalk_crypto_chain(asg, src, 0, 2); src = asg; cryptlen += req->assoclen; } diff --git a/crypto/cast5.c b/crypto/cast5.c index a1d2294b50ad..4a230ddec877 100644 --- a/crypto/cast5.c +++ b/crypto/cast5.c @@ -604,36 +604,23 @@ static void cast5_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf) * Rounds 3, 6, 9, 12, and 15 use f function Type 3. */ + t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); + t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); + t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); + t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); + t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); + t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); + t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); + t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); + t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); + t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); + t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); + t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); if (!(c->rr)) { - t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); - t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); - t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); - t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); - t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); - t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); - t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); - t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); - t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); - t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); - t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); - t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]); t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]); t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]); t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]); - } else { - t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); - t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); - t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); - t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); - t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); - t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); - t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); - t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); - t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); - t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); - t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); - t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); } /* c1...c64 <-- (R16,L16). (Exchange final blocks L16, R16 and @@ -663,32 +650,19 @@ static void cast5_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf) t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]); t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]); t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]); - t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); - t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); - t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); - t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); - t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); - t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); - t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); - t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); - t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); - t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); - t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); - t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); - } else { - t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); - t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); - t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); - t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); - t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); - t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); - t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); - t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); - t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); - t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); - t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); - t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); } + t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); + t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); + t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); + t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); + t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); + t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); + t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); + t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); + t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); + t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); + t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); + t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); dst[0] = cpu_to_be32(r); dst[1] = cpu_to_be32(l); diff --git a/crypto/crypto_wq.c b/crypto/crypto_wq.c index fdcf6248f152..b980ee1af459 100644 --- a/crypto/crypto_wq.c +++ b/crypto/crypto_wq.c @@ -20,7 +20,8 @@ EXPORT_SYMBOL_GPL(kcrypto_wq); static int __init crypto_wq_init(void) { - kcrypto_wq = create_workqueue("crypto"); + kcrypto_wq = alloc_workqueue("crypto", + WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE, 1); if (unlikely(!kcrypto_wq)) return -ENOMEM; return 0; diff --git a/crypto/deflate.c b/crypto/deflate.c index 463dc859aa05..cbc7a33a9600 100644 --- a/crypto/deflate.c +++ b/crypto/deflate.c @@ -48,12 +48,11 @@ static int deflate_comp_init(struct deflate_ctx *ctx) int ret = 0; struct z_stream_s *stream = &ctx->comp_stream; - stream->workspace = vmalloc(zlib_deflate_workspacesize()); + stream->workspace = vzalloc(zlib_deflate_workspacesize()); if (!stream->workspace) { ret = -ENOMEM; goto out; } - memset(stream->workspace, 0, zlib_deflate_workspacesize()); ret = zlib_deflateInit2(stream, DEFLATE_DEF_LEVEL, Z_DEFLATED, -DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL, Z_DEFAULT_STRATEGY); diff --git a/crypto/eseqiv.c b/crypto/eseqiv.c index 3ca3b669d5d5..42ce9f570aec 100644 --- a/crypto/eseqiv.c +++ b/crypto/eseqiv.c @@ -62,20 +62,6 @@ out: skcipher_givcrypt_complete(req, err); } -static void eseqiv_chain(struct scatterlist *head, struct scatterlist *sg, - int chain) -{ - if (chain) { - head->length += sg->length; - sg = scatterwalk_sg_next(sg); - } - - if (sg) - scatterwalk_sg_chain(head, 2, sg); - else - sg_mark_end(head); -} - static int eseqiv_givencrypt(struct skcipher_givcrypt_request *req) { struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req); @@ -124,13 +110,13 @@ static int eseqiv_givencrypt(struct skcipher_givcrypt_request *req) sg_init_table(reqctx->src, 2); sg_set_buf(reqctx->src, giv, ivsize); - eseqiv_chain(reqctx->src, osrc, vsrc == giv + ivsize); + scatterwalk_crypto_chain(reqctx->src, osrc, vsrc == giv + ivsize, 2); dst = reqctx->src; if (osrc != odst) { sg_init_table(reqctx->dst, 2); sg_set_buf(reqctx->dst, giv, ivsize); - eseqiv_chain(reqctx->dst, odst, vdst == giv + ivsize); + scatterwalk_crypto_chain(reqctx->dst, odst, vdst == giv + ivsize, 2); dst = reqctx->dst; } diff --git a/crypto/gcm.c b/crypto/gcm.c index 2f5fbba6576c..1a252639ef91 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -1102,21 +1102,6 @@ static int crypto_rfc4543_setauthsize(struct crypto_aead *parent, return crypto_aead_setauthsize(ctx->child, authsize); } -/* this is the same as crypto_authenc_chain */ -static void crypto_rfc4543_chain(struct scatterlist *head, - struct scatterlist *sg, int chain) -{ - if (chain) { - head->length += sg->length; - sg = scatterwalk_sg_next(sg); - } - - if (sg) - scatterwalk_sg_chain(head, 2, sg); - else - sg_mark_end(head); -} - static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req, int enc) { @@ -1154,13 +1139,13 @@ static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req, sg_init_table(payload, 2); sg_set_buf(payload, req->iv, 8); - crypto_rfc4543_chain(payload, dst, vdst == req->iv + 8); + scatterwalk_crypto_chain(payload, dst, vdst == req->iv + 8, 2); assoclen += 8 + req->cryptlen - (enc ? 0 : authsize); sg_init_table(assoc, 2); sg_set_page(assoc, sg_page(req->assoc), req->assoc->length, req->assoc->offset); - crypto_rfc4543_chain(assoc, payload, 0); + scatterwalk_crypto_chain(assoc, payload, 0, 2); aead_request_set_tfm(subreq, ctx->child); aead_request_set_callback(subreq, req->base.flags, req->base.complete, diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index 75586f1f86e7..29a89dad68b6 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -455,7 +455,8 @@ static int pcrypt_init_padata(struct padata_pcrypt *pcrypt, get_online_cpus(); - pcrypt->wq = create_workqueue(name); + pcrypt->wq = alloc_workqueue(name, + WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE, 1); if (!pcrypt->wq) goto err; diff --git a/crypto/rmd128.c b/crypto/rmd128.c index 1ceb6735aa53..8a0f68b7f257 100644 --- a/crypto/rmd128.c +++ b/crypto/rmd128.c @@ -5,7 +5,7 @@ * * Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC * - * Copyright (c) 2008 Adrian-Ken Rueegsegger <rueegsegger (at) swiss-it.ch> + * Copyright (c) 2008 Adrian-Ken Rueegsegger <ken@codelabs.ch> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -325,4 +325,5 @@ module_init(rmd128_mod_init); module_exit(rmd128_mod_fini); MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>"); MODULE_DESCRIPTION("RIPEMD-128 Message Digest"); diff --git a/crypto/rmd160.c b/crypto/rmd160.c index 472261fc913f..525d7bb752cf 100644 --- a/crypto/rmd160.c +++ b/crypto/rmd160.c @@ -5,7 +5,7 @@ * * Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC * - * Copyright (c) 2008 Adrian-Ken Rueegsegger <rueegsegger (at) swiss-it.ch> + * Copyright (c) 2008 Adrian-Ken Rueegsegger <ken@codelabs.ch> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -369,4 +369,5 @@ module_init(rmd160_mod_init); module_exit(rmd160_mod_fini); MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>"); MODULE_DESCRIPTION("RIPEMD-160 Message Digest"); diff --git a/crypto/rmd256.c b/crypto/rmd256.c index 72eafa8d2e7b..69293d9b56e0 100644 --- a/crypto/rmd256.c +++ b/crypto/rmd256.c @@ -5,7 +5,7 @@ * * Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC * - * Copyright (c) 2008 Adrian-Ken Rueegsegger <rueegsegger (at) swiss-it.ch> + * Copyright (c) 2008 Adrian-Ken Rueegsegger <ken@codelabs.ch> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -344,4 +344,5 @@ module_init(rmd256_mod_init); module_exit(rmd256_mod_fini); MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>"); MODULE_DESCRIPTION("RIPEMD-256 Message Digest"); diff --git a/crypto/rmd320.c b/crypto/rmd320.c index 86becaba2f05..09f97dfdfbba 100644 --- a/crypto/rmd320.c +++ b/crypto/rmd320.c @@ -5,7 +5,7 @@ * * Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC * - * Copyright (c) 2008 Adrian-Ken Rueegsegger <rueegsegger (at) swiss-it.ch> + * Copyright (c) 2008 Adrian-Ken Rueegsegger <ken@codelabs.ch> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -393,4 +393,5 @@ module_init(rmd320_mod_init); module_exit(rmd320_mod_fini); MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>"); MODULE_DESCRIPTION("RIPEMD-320 Message Digest"); diff --git a/crypto/shash.c b/crypto/shash.c index 22fd9433141f..76f74b963151 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -310,7 +310,13 @@ static int shash_async_export(struct ahash_request *req, void *out) static int shash_async_import(struct ahash_request *req, const void *in) { - return crypto_shash_import(ahash_request_ctx(req), in); + struct crypto_shash **ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct shash_desc *desc = ahash_request_ctx(req); + + desc->tfm = *ctx; + desc->flags = req->base.flags; + + return crypto_shash_import(desc, in); } static void crypto_exit_shash_ops_async(struct crypto_tfm *tfm) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 3ca68f9fc14d..9aac5e58be94 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -8,6 +8,13 @@ * Copyright (c) 2002 Jean-Francois Dive <jef@linuxbe.org> * Copyright (c) 2007 Nokia Siemens Networks * + * Updated RFC4106 AES-GCM testing. + * Authors: Aidan O'Mahony (aidan.o.mahony@intel.com) + * Adrian Hoban <adrian.hoban@intel.com> + * Gabriele Paoloni <gabriele.paoloni@intel.com> + * Tadeusz Struk (tadeusz.struk@intel.com) + * Copyright (c) 2010, Intel Corporation. + * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) @@ -980,6 +987,10 @@ static int do_test(int m) ret += tcrypt_test("ansi_cprng"); break; + case 151: + ret += tcrypt_test("rfc4106(gcm(aes))"); + break; + case 200: test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0, speed_template_16_24_32); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index fa8c8f78c8d4..27ea9fe9476f 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -6,6 +6,13 @@ * Copyright (c) 2007 Nokia Siemens Networks * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au> * + * Updated RFC4106 AES-GCM testing. + * Authors: Aidan O'Mahony (aidan.o.mahony@intel.com) + * Adrian Hoban <adrian.hoban@intel.com> + * Gabriele Paoloni <gabriele.paoloni@intel.com> + * Tadeusz Struk (tadeusz.struk@intel.com) + * Copyright (c) 2010, Intel Corporation. + * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) @@ -2242,6 +2249,23 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + .alg = "rfc4106(gcm(aes))", + .test = alg_test_aead, + .suite = { + .aead = { + .enc = { + .vecs = aes_gcm_rfc4106_enc_tv_template, + .count = AES_GCM_4106_ENC_TEST_VECTORS + }, + .dec = { + .vecs = aes_gcm_rfc4106_dec_tv_template, + .count = AES_GCM_4106_DEC_TEST_VECTORS + } + } + } + }, { + + .alg = "rfc4309(ccm(aes))", .test = alg_test_aead, .fips_allowed = 1, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 74e35377fd30..834af7f2adee 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -6,6 +6,15 @@ * Copyright (c) 2007 Nokia Siemens Networks * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au> * + * Updated RFC4106 AES-GCM testing. Some test vectors were taken from + * http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/ + * gcm/gcm-test-vectors.tar.gz + * Authors: Aidan O'Mahony (aidan.o.mahony@intel.com) + * Adrian Hoban <adrian.hoban@intel.com> + * Gabriele Paoloni <gabriele.paoloni@intel.com> + * Tadeusz Struk (tadeusz.struk@intel.com) + * Copyright (c) 2010, Intel Corporation. + * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) @@ -2947,6 +2956,8 @@ static struct cipher_testvec cast6_dec_tv_template[] = { #define AES_CTR_3686_DEC_TEST_VECTORS 6 #define AES_GCM_ENC_TEST_VECTORS 9 #define AES_GCM_DEC_TEST_VECTORS 8 +#define AES_GCM_4106_ENC_TEST_VECTORS 7 +#define AES_GCM_4106_DEC_TEST_VECTORS 7 #define AES_CCM_ENC_TEST_VECTORS 7 #define AES_CCM_DEC_TEST_VECTORS 7 #define AES_CCM_4309_ENC_TEST_VECTORS 7 @@ -5829,6 +5840,356 @@ static struct aead_testvec aes_gcm_dec_tv_template[] = { } }; +static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = { + { /* Generated using Crypto++ */ + .key = zeroed_string, + .klen = 20, + .iv = zeroed_string, + .input = zeroed_string, + .ilen = 16, + .assoc = zeroed_string, + .alen = 8, + .result = "\x03\x88\xDA\xCE\x60\xB6\xA3\x92" + "\xF3\x28\xC2\xB9\x71\xB2\xFE\x78" + "\x97\xFE\x4C\x23\x37\x42\x01\xE0" + "\x81\x9F\x8D\xC5\xD7\x41\xA0\x1B", + .rlen = 32, + },{ + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x01" + "\x00\x00\x00\x00", + .input = zeroed_string, + .ilen = 16, + .assoc = zeroed_string, + .alen = 8, + .result = "\xC0\x0D\x8B\x42\x0F\x8F\x34\x18" + "\x88\xB1\xC5\xBC\xC5\xB6\xD6\x28" + "\x6A\x9D\xDF\x11\x5E\xFE\x5E\x9D" + "\x2F\x70\x44\x92\xF7\xF2\xE3\xEF", + .rlen = 32, + + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = zeroed_string, + .input = "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01", + .ilen = 16, + .assoc = zeroed_string, + .alen = 8, + .result = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE" + "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC" + "\x0B\x8F\x88\x69\x17\xE6\xB4\x3C" + "\xB1\x68\xFD\x14\x52\x64\x61\xB2", + .rlen = 32, + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = zeroed_string, + .input = "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01", + .ilen = 16, + .assoc = "\x01\x01\x01\x01\x01\x01\x01\x01", + .alen = 8, + .result = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE" + "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC" + "\x90\x92\xB7\xE3\x5F\xA3\x9A\x63" + "\x7E\xD7\x1F\xD8\xD3\x7C\x4B\xF5", + .rlen = 32, + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x01" + "\x00\x00\x00\x00", + .input = "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01", + .ilen = 16, + .assoc = "\x01\x01\x01\x01\x01\x01\x01\x01", + .alen = 8, + .result = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19" + "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29" + "\x64\x50\xF9\x32\x13\xFB\x74\x61" + "\xF4\xED\x52\xD3\xC5\x10\x55\x3C", + .rlen = 32, + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x01" + "\x00\x00\x00\x00", + .input = "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01", + .ilen = 64, + .assoc = "\x01\x01\x01\x01\x01\x01\x01\x01", + .alen = 8, + .result = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19" + "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29" + "\x98\x14\xA1\x42\x37\x80\xFD\x90" + "\x68\x12\x01\xA8\x91\x89\xB9\x83" + "\x5B\x11\x77\x12\x9B\xFF\x24\x89" + "\x94\x5F\x18\x12\xBA\x27\x09\x39" + "\x99\x96\x76\x42\x15\x1C\xCD\xCB" + "\xDC\xD3\xDA\x65\x73\xAF\x80\xCD" + "\xD2\xB6\xC2\x4A\x76\xC2\x92\x85" + "\xBD\xCF\x62\x98\x58\x14\xE5\xBD", + .rlen = 80, + }, { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x00\x00\x00\x00", + .klen = 20, + .iv = "\x00\x00\x45\x67\x89\xab\xcd\xef" + "\x00\x00\x00\x00", + .input = "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff", + .ilen = 192, + .assoc = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa", + .alen = 12, + .result = "\xC1\x76\x33\x85\xE2\x9B\x5F\xDE" + "\xDE\x89\x3D\x42\xE7\xC9\x69\x8A" + "\x44\x6D\xC3\x88\x46\x2E\xC2\x01" + "\x5E\xF6\x0C\x39\xF0\xC4\xA5\x82" + "\xCD\xE8\x31\xCC\x0A\x4C\xE4\x44" + "\x41\xA9\x82\x6F\x22\xA1\x23\x1A" + "\xA8\xE3\x16\xFD\x31\x5C\x27\x31" + "\xF1\x7F\x01\x63\xA3\xAF\x70\xA1" + "\xCF\x07\x57\x41\x67\xD0\xC4\x42" + "\xDB\x18\xC6\x4C\x4C\xE0\x3D\x9F" + "\x05\x07\xFB\x13\x7D\x4A\xCA\x5B" + "\xF0\xBF\x64\x7E\x05\xB1\x72\xEE" + "\x7C\x3B\xD4\xCD\x14\x03\xB2\x2C" + "\xD3\xA9\xEE\xFA\x17\xFC\x9C\xDF" + "\xC7\x75\x40\xFF\xAE\xAD\x1E\x59" + "\x2F\x30\x24\xFB\xAD\x6B\x10\xFA" + "\x6C\x9F\x5B\xE7\x25\xD5\xD0\x25" + "\xAC\x4A\x4B\xDA\xFC\x7A\x85\x1B" + "\x7E\x13\x06\x82\x08\x17\xA4\x35" + "\xEC\xC5\x8D\x63\x96\x81\x0A\x8F" + "\xA3\x05\x38\x95\x20\x1A\x47\x04" + "\x6F\x6D\xDA\x8F\xEF\xC1\x76\x35" + "\x6B\xC7\x4D\x0F\x94\x12\xCA\x3E" + "\x2E\xD5\x03\x2E\x86\x7E\xAA\x3B" + "\x37\x08\x1C\xCF\xBA\x5D\x71\x46" + "\x80\x72\xB0\x4C\x82\x0D\x60\x3C", + .rlen = 208, + } +}; + +static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = { + { /* Generated using Crypto++ */ + .key = zeroed_string, + .klen = 20, + .iv = zeroed_string, + .input = "\x03\x88\xDA\xCE\x60\xB6\xA3\x92" + "\xF3\x28\xC2\xB9\x71\xB2\xFE\x78" + "\x97\xFE\x4C\x23\x37\x42\x01\xE0" + "\x81\x9F\x8D\xC5\xD7\x41\xA0\x1B", + .ilen = 32, + .assoc = zeroed_string, + .alen = 8, + .result = zeroed_string, + .rlen = 16, + + },{ + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x01" + "\x00\x00\x00\x00", + .input = "\xC0\x0D\x8B\x42\x0F\x8F\x34\x18" + "\x88\xB1\xC5\xBC\xC5\xB6\xD6\x28" + "\x6A\x9D\xDF\x11\x5E\xFE\x5E\x9D" + "\x2F\x70\x44\x92\xF7\xF2\xE3\xEF", + .ilen = 32, + .assoc = zeroed_string, + .alen = 8, + .result = zeroed_string, + .rlen = 16, + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = zeroed_string, + .input = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE" + "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC" + "\x0B\x8F\x88\x69\x17\xE6\xB4\x3C" + "\xB1\x68\xFD\x14\x52\x64\x61\xB2", + .ilen = 32, + .assoc = zeroed_string, + .alen = 8, + .result = "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01", + .rlen = 16, + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = zeroed_string, + .input = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE" + "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC" + "\x90\x92\xB7\xE3\x5F\xA3\x9A\x63" + "\x7E\xD7\x1F\xD8\xD3\x7C\x4B\xF5", + .ilen = 32, + .assoc = "\x01\x01\x01\x01\x01\x01\x01\x01", + .alen = 8, + .result = "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01", + .rlen = 16, + + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x01" + "\x00\x00\x00\x00", + .input = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19" + "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29" + "\x64\x50\xF9\x32\x13\xFB\x74\x61" + "\xF4\xED\x52\xD3\xC5\x10\x55\x3C", + .ilen = 32, + .assoc = "\x01\x01\x01\x01\x01\x01\x01\x01", + .alen = 8, + .result = "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01", + .rlen = 16, + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x01" + "\x00\x00\x00\x00", + .input = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19" + "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29" + "\x98\x14\xA1\x42\x37\x80\xFD\x90" + "\x68\x12\x01\xA8\x91\x89\xB9\x83" + "\x5B\x11\x77\x12\x9B\xFF\x24\x89" + "\x94\x5F\x18\x12\xBA\x27\x09\x39" + "\x99\x96\x76\x42\x15\x1C\xCD\xCB" + "\xDC\xD3\xDA\x65\x73\xAF\x80\xCD" + "\xD2\xB6\xC2\x4A\x76\xC2\x92\x85" + "\xBD\xCF\x62\x98\x58\x14\xE5\xBD", + .ilen = 80, + .assoc = "\x01\x01\x01\x01\x01\x01\x01\x01", + .alen = 8, + .result = "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01", + .rlen = 64, + }, { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x00\x00\x00\x00", + .klen = 20, + .iv = "\x00\x00\x45\x67\x89\xab\xcd\xef" + "\x00\x00\x00\x00", + .input = "\xC1\x76\x33\x85\xE2\x9B\x5F\xDE" + "\xDE\x89\x3D\x42\xE7\xC9\x69\x8A" + "\x44\x6D\xC3\x88\x46\x2E\xC2\x01" + "\x5E\xF6\x0C\x39\xF0\xC4\xA5\x82" + "\xCD\xE8\x31\xCC\x0A\x4C\xE4\x44" + "\x41\xA9\x82\x6F\x22\xA1\x23\x1A" + "\xA8\xE3\x16\xFD\x31\x5C\x27\x31" + "\xF1\x7F\x01\x63\xA3\xAF\x70\xA1" + "\xCF\x07\x57\x41\x67\xD0\xC4\x42" + "\xDB\x18\xC6\x4C\x4C\xE0\x3D\x9F" + "\x05\x07\xFB\x13\x7D\x4A\xCA\x5B" + "\xF0\xBF\x64\x7E\x05\xB1\x72\xEE" + "\x7C\x3B\xD4\xCD\x14\x03\xB2\x2C" + "\xD3\xA9\xEE\xFA\x17\xFC\x9C\xDF" + "\xC7\x75\x40\xFF\xAE\xAD\x1E\x59" + "\x2F\x30\x24\xFB\xAD\x6B\x10\xFA" + "\x6C\x9F\x5B\xE7\x25\xD5\xD0\x25" + "\xAC\x4A\x4B\xDA\xFC\x7A\x85\x1B" + "\x7E\x13\x06\x82\x08\x17\xA4\x35" + "\xEC\xC5\x8D\x63\x96\x81\x0A\x8F" + "\xA3\x05\x38\x95\x20\x1A\x47\x04" + "\x6F\x6D\xDA\x8F\xEF\xC1\x76\x35" + "\x6B\xC7\x4D\x0F\x94\x12\xCA\x3E" + "\x2E\xD5\x03\x2E\x86\x7E\xAA\x3B" + "\x37\x08\x1C\xCF\xBA\x5D\x71\x46" + "\x80\x72\xB0\x4C\x82\x0D\x60\x3C", + .ilen = 208, + .assoc = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa", + .alen = 12, + .result = "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff", + .rlen = 192, + + } +}; + static struct aead_testvec aes_ccm_enc_tv_template[] = { { /* From RFC 3610 */ .key = "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" diff --git a/crypto/zlib.c b/crypto/zlib.c index c3015733c990..739b8fca4cea 100644 --- a/crypto/zlib.c +++ b/crypto/zlib.c @@ -95,11 +95,10 @@ static int zlib_compress_setup(struct crypto_pcomp *tfm, void *params, zlib_comp_exit(ctx); workspacesize = zlib_deflate_workspacesize(); - stream->workspace = vmalloc(workspacesize); + stream->workspace = vzalloc(workspacesize); if (!stream->workspace) return -ENOMEM; - memset(stream->workspace, 0, workspacesize); ret = zlib_deflateInit2(stream, tb[ZLIB_COMP_LEVEL] ? nla_get_u32(tb[ZLIB_COMP_LEVEL]) diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c index 794aacb715c1..d0387a84eec1 100644 --- a/drivers/char/hw_random/via-rng.c +++ b/drivers/char/hw_random/via-rng.c @@ -24,6 +24,7 @@ * warranty of any kind, whether express or implied. */ +#include <crypto/padlock.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/hw_random.h> @@ -34,7 +35,6 @@ #include <asm/i387.h> -#define PFX KBUILD_MODNAME ": " enum { @@ -81,8 +81,7 @@ static inline u32 xstore(u32 *addr, u32 edx_in) ts_state = irq_ts_save(); asm(".byte 0x0F,0xA7,0xC0 /* xstore %%edi (addr=%0) */" - :"=m"(*addr), "=a"(eax_out) - :"D"(addr), "d"(edx_in)); + : "=m" (*addr), "=a" (eax_out), "+d" (edx_in), "+D" (addr)); irq_ts_restore(ts_state); return eax_out; @@ -90,8 +89,10 @@ static inline u32 xstore(u32 *addr, u32 edx_in) static int via_rng_data_present(struct hwrng *rng, int wait) { + char buf[16 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ + ((aligned(STACK_ALIGN))); + u32 *via_rng_datum = (u32 *)PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); u32 bytes_out; - u32 *via_rng_datum = (u32 *)(&rng->priv); int i; /* We choose the recommended 1-byte-per-instruction RNG rate, @@ -115,6 +116,7 @@ static int via_rng_data_present(struct hwrng *rng, int wait) break; udelay(10); } + rng->priv = *via_rng_datum; return bytes_out ? 1 : 0; } diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c index 7d279e578df5..c99305afa58a 100644 --- a/drivers/crypto/mv_cesa.c +++ b/drivers/crypto/mv_cesa.c @@ -857,7 +857,7 @@ static int mv_cra_hash_init(struct crypto_tfm *tfm, const char *base_hash_name, printk(KERN_WARNING MV_CESA "Base driver '%s' could not be loaded!\n", base_hash_name); - err = PTR_ERR(fallback_tfm); + err = PTR_ERR(base_hash); goto err_bad_base; } } diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index 76141262ea1d..80dc094e78c6 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -1542,7 +1542,7 @@ out: return err; } -static void __exit n2_unregister_algs(void) +static void __devexit n2_unregister_algs(void) { mutex_lock(&spu_lock); if (!--algs_registered) diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 799ca517c121..add2a1a72ba4 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -74,11 +74,9 @@ #define FLAGS_CBC BIT(1) #define FLAGS_GIV BIT(2) -#define FLAGS_NEW_KEY BIT(4) -#define FLAGS_NEW_IV BIT(5) -#define FLAGS_INIT BIT(6) -#define FLAGS_FAST BIT(7) -#define FLAGS_BUSY 8 +#define FLAGS_INIT BIT(4) +#define FLAGS_FAST BIT(5) +#define FLAGS_BUSY BIT(6) struct omap_aes_ctx { struct omap_aes_dev *dd; @@ -98,19 +96,18 @@ struct omap_aes_reqctx { struct omap_aes_dev { struct list_head list; unsigned long phys_base; - void __iomem *io_base; + void __iomem *io_base; struct clk *iclk; struct omap_aes_ctx *ctx; struct device *dev; unsigned long flags; + int err; - u32 *iv; - u32 ctrl; + spinlock_t lock; + struct crypto_queue queue; - spinlock_t lock; - struct crypto_queue queue; - - struct tasklet_struct task; + struct tasklet_struct done_task; + struct tasklet_struct queue_task; struct ablkcipher_request *req; size_t total; @@ -179,9 +176,13 @@ static int omap_aes_wait(struct omap_aes_dev *dd, u32 offset, u32 bit) static int omap_aes_hw_init(struct omap_aes_dev *dd) { - int err = 0; - + /* + * clocks are enabled when request starts and disabled when finished. + * It may be long delays between requests. + * Device might go to off mode to save power. + */ clk_enable(dd->iclk); + if (!(dd->flags & FLAGS_INIT)) { /* is it necessary to reset before every operation? */ omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_SOFTRESET, @@ -193,39 +194,26 @@ static int omap_aes_hw_init(struct omap_aes_dev *dd) __asm__ __volatile__("nop"); __asm__ __volatile__("nop"); - err = omap_aes_wait(dd, AES_REG_SYSSTATUS, - AES_REG_SYSSTATUS_RESETDONE); - if (!err) - dd->flags |= FLAGS_INIT; - } + if (omap_aes_wait(dd, AES_REG_SYSSTATUS, + AES_REG_SYSSTATUS_RESETDONE)) + return -ETIMEDOUT; - return err; -} + dd->flags |= FLAGS_INIT; + dd->err = 0; + } -static void omap_aes_hw_cleanup(struct omap_aes_dev *dd) -{ - clk_disable(dd->iclk); + return 0; } -static void omap_aes_write_ctrl(struct omap_aes_dev *dd) +static int omap_aes_write_ctrl(struct omap_aes_dev *dd) { unsigned int key32; - int i; + int i, err; u32 val, mask; - val = FLD_VAL(((dd->ctx->keylen >> 3) - 1), 4, 3); - if (dd->flags & FLAGS_CBC) - val |= AES_REG_CTRL_CBC; - if (dd->flags & FLAGS_ENCRYPT) - val |= AES_REG_CTRL_DIRECTION; - - if (dd->ctrl == val && !(dd->flags & FLAGS_NEW_IV) && - !(dd->ctx->flags & FLAGS_NEW_KEY)) - goto out; - - /* only need to write control registers for new settings */ - - dd->ctrl = val; + err = omap_aes_hw_init(dd); + if (err) + return err; val = 0; if (dd->dma_lch_out >= 0) @@ -237,30 +225,43 @@ static void omap_aes_write_ctrl(struct omap_aes_dev *dd) omap_aes_write_mask(dd, AES_REG_MASK, val, mask); - pr_debug("Set key\n"); key32 = dd->ctx->keylen / sizeof(u32); - /* set a key */ + + /* it seems a key should always be set even if it has not changed */ for (i = 0; i < key32; i++) { omap_aes_write(dd, AES_REG_KEY(i), __le32_to_cpu(dd->ctx->key[i])); } - dd->ctx->flags &= ~FLAGS_NEW_KEY; - if (dd->flags & FLAGS_NEW_IV) { - pr_debug("Set IV\n"); - omap_aes_write_n(dd, AES_REG_IV(0), dd->iv, 4); - dd->flags &= ~FLAGS_NEW_IV; - } + if ((dd->flags & FLAGS_CBC) && dd->req->info) + omap_aes_write_n(dd, AES_REG_IV(0), dd->req->info, 4); + + val = FLD_VAL(((dd->ctx->keylen >> 3) - 1), 4, 3); + if (dd->flags & FLAGS_CBC) + val |= AES_REG_CTRL_CBC; + if (dd->flags & FLAGS_ENCRYPT) + val |= AES_REG_CTRL_DIRECTION; mask = AES_REG_CTRL_CBC | AES_REG_CTRL_DIRECTION | AES_REG_CTRL_KEY_SIZE; - omap_aes_write_mask(dd, AES_REG_CTRL, dd->ctrl, mask); + omap_aes_write_mask(dd, AES_REG_CTRL, val, mask); -out: - /* start DMA or disable idle mode */ - omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_START, - AES_REG_MASK_START); + /* IN */ + omap_set_dma_dest_params(dd->dma_lch_in, 0, OMAP_DMA_AMODE_CONSTANT, + dd->phys_base + AES_REG_DATA, 0, 4); + + omap_set_dma_dest_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4); + omap_set_dma_src_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4); + + /* OUT */ + omap_set_dma_src_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_CONSTANT, + dd->phys_base + AES_REG_DATA, 0, 4); + + omap_set_dma_src_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4); + omap_set_dma_dest_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4); + + return 0; } static struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_ctx *ctx) @@ -288,8 +289,16 @@ static void omap_aes_dma_callback(int lch, u16 ch_status, void *data) { struct omap_aes_dev *dd = data; - if (lch == dd->dma_lch_out) - tasklet_schedule(&dd->task); + if (ch_status != OMAP_DMA_BLOCK_IRQ) { + pr_err("omap-aes DMA error status: 0x%hx\n", ch_status); + dd->err = -EIO; + dd->flags &= ~FLAGS_INIT; /* request to re-initialize */ + } else if (lch == dd->dma_lch_in) { + return; + } + + /* dma_lch_out - completed */ + tasklet_schedule(&dd->done_task); } static int omap_aes_dma_init(struct omap_aes_dev *dd) @@ -339,18 +348,6 @@ static int omap_aes_dma_init(struct omap_aes_dev *dd) goto err_dma_out; } - omap_set_dma_dest_params(dd->dma_lch_in, 0, OMAP_DMA_AMODE_CONSTANT, - dd->phys_base + AES_REG_DATA, 0, 4); - - omap_set_dma_dest_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4); - omap_set_dma_src_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4); - - omap_set_dma_src_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_CONSTANT, - dd->phys_base + AES_REG_DATA, 0, 4); - - omap_set_dma_src_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4); - omap_set_dma_dest_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4); - return 0; err_dma_out: @@ -406,6 +403,11 @@ static int sg_copy(struct scatterlist **sg, size_t *offset, void *buf, if (!count) return off; + /* + * buflen and total are AES_BLOCK_SIZE size aligned, + * so count should be also aligned + */ + sg_copy_buf(buf + off, *sg, *offset, count, out); off += count; @@ -461,7 +463,9 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, omap_start_dma(dd->dma_lch_in); omap_start_dma(dd->dma_lch_out); - omap_aes_write_ctrl(dd); + /* start DMA or disable idle mode */ + omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_START, + AES_REG_MASK_START); return 0; } @@ -488,8 +492,10 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd) count = min(dd->total, sg_dma_len(dd->in_sg)); count = min(count, sg_dma_len(dd->out_sg)); - if (count != dd->total) + if (count != dd->total) { + pr_err("request length != buffer length\n"); return -EINVAL; + } pr_debug("fast\n"); @@ -525,23 +531,25 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd) dd->total -= count; - err = omap_aes_hw_init(dd); - err = omap_aes_crypt_dma(tfm, addr_in, addr_out, count); + if (err) { + dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); + dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_TO_DEVICE); + } return err; } static void omap_aes_finish_req(struct omap_aes_dev *dd, int err) { - struct omap_aes_ctx *ctx; + struct ablkcipher_request *req = dd->req; pr_debug("err: %d\n", err); - ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(dd->req)); + clk_disable(dd->iclk); + dd->flags &= ~FLAGS_BUSY; - if (!dd->total) - dd->req->base.complete(&dd->req->base, err); + req->base.complete(&req->base, err); } static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd) @@ -553,8 +561,6 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd) omap_aes_write_mask(dd, AES_REG_MASK, 0, AES_REG_MASK_START); - omap_aes_hw_cleanup(dd); - omap_stop_dma(dd->dma_lch_in); omap_stop_dma(dd->dma_lch_out); @@ -574,40 +580,39 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd) } } - if (err || !dd->total) - omap_aes_finish_req(dd, err); - return err; } -static int omap_aes_handle_req(struct omap_aes_dev *dd) +static int omap_aes_handle_queue(struct omap_aes_dev *dd, + struct ablkcipher_request *req) { struct crypto_async_request *async_req, *backlog; struct omap_aes_ctx *ctx; struct omap_aes_reqctx *rctx; - struct ablkcipher_request *req; unsigned long flags; - - if (dd->total) - goto start; + int err, ret = 0; spin_lock_irqsave(&dd->lock, flags); + if (req) + ret = ablkcipher_enqueue_request(&dd->queue, req); + if (dd->flags & FLAGS_BUSY) { + spin_unlock_irqrestore(&dd->lock, flags); + return ret; + } backlog = crypto_get_backlog(&dd->queue); async_req = crypto_dequeue_request(&dd->queue); - if (!async_req) - clear_bit(FLAGS_BUSY, &dd->flags); + if (async_req) + dd->flags |= FLAGS_BUSY; spin_unlock_irqrestore(&dd->lock, flags); if (!async_req) - return 0; + return ret; if (backlog) backlog->complete(backlog, -EINPROGRESS); req = ablkcipher_request_cast(async_req); - pr_debug("get new req\n"); - /* assign new request to device */ dd->req = req; dd->total = req->nbytes; @@ -621,27 +626,22 @@ static int omap_aes_handle_req(struct omap_aes_dev *dd) rctx->mode &= FLAGS_MODE_MASK; dd->flags = (dd->flags & ~FLAGS_MODE_MASK) | rctx->mode; - dd->iv = req->info; - if ((dd->flags & FLAGS_CBC) && dd->iv) - dd->flags |= FLAGS_NEW_IV; - else - dd->flags &= ~FLAGS_NEW_IV; - + dd->ctx = ctx; ctx->dd = dd; - if (dd->ctx != ctx) { - /* assign new context to device */ - dd->ctx = ctx; - ctx->flags |= FLAGS_NEW_KEY; - } - if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) - pr_err("request size is not exact amount of AES blocks\n"); + err = omap_aes_write_ctrl(dd); + if (!err) + err = omap_aes_crypt_dma_start(dd); + if (err) { + /* aes_task will not finish it, so do it here */ + omap_aes_finish_req(dd, err); + tasklet_schedule(&dd->queue_task); + } -start: - return omap_aes_crypt_dma_start(dd); + return ret; /* return ret, which is enqueue return value */ } -static void omap_aes_task(unsigned long data) +static void omap_aes_done_task(unsigned long data) { struct omap_aes_dev *dd = (struct omap_aes_dev *)data; int err; @@ -650,40 +650,50 @@ static void omap_aes_task(unsigned long data) err = omap_aes_crypt_dma_stop(dd); - err = omap_aes_handle_req(dd); + err = dd->err ? : err; + + if (dd->total && !err) { + err = omap_aes_crypt_dma_start(dd); + if (!err) + return; /* DMA started. Not fininishing. */ + } + + omap_aes_finish_req(dd, err); + omap_aes_handle_queue(dd, NULL); pr_debug("exit\n"); } +static void omap_aes_queue_task(unsigned long data) +{ + struct omap_aes_dev *dd = (struct omap_aes_dev *)data; + + omap_aes_handle_queue(dd, NULL); +} + static int omap_aes_crypt(struct ablkcipher_request *req, unsigned long mode) { struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx( crypto_ablkcipher_reqtfm(req)); struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req); struct omap_aes_dev *dd; - unsigned long flags; - int err; pr_debug("nbytes: %d, enc: %d, cbc: %d\n", req->nbytes, !!(mode & FLAGS_ENCRYPT), !!(mode & FLAGS_CBC)); + if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) { + pr_err("request size is not exact amount of AES blocks\n"); + return -EINVAL; + } + dd = omap_aes_find_dev(ctx); if (!dd) return -ENODEV; rctx->mode = mode; - spin_lock_irqsave(&dd->lock, flags); - err = ablkcipher_enqueue_request(&dd->queue, req); - spin_unlock_irqrestore(&dd->lock, flags); - - if (!test_and_set_bit(FLAGS_BUSY, &dd->flags)) - omap_aes_handle_req(dd); - - pr_debug("exit\n"); - - return err; + return omap_aes_handle_queue(dd, req); } /* ********************** ALG API ************************************ */ @@ -701,7 +711,6 @@ static int omap_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, memcpy(ctx->key, key, keylen); ctx->keylen = keylen; - ctx->flags |= FLAGS_NEW_KEY; return 0; } @@ -750,7 +759,7 @@ static struct crypto_alg algs[] = { .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct omap_aes_ctx), - .cra_alignmask = 0, + .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, .cra_init = omap_aes_cra_init, @@ -770,7 +779,7 @@ static struct crypto_alg algs[] = { .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct omap_aes_ctx), - .cra_alignmask = 0, + .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, .cra_init = omap_aes_cra_init, @@ -849,7 +858,8 @@ static int omap_aes_probe(struct platform_device *pdev) (reg & AES_REG_REV_MAJOR) >> 4, reg & AES_REG_REV_MINOR); clk_disable(dd->iclk); - tasklet_init(&dd->task, omap_aes_task, (unsigned long)dd); + tasklet_init(&dd->done_task, omap_aes_done_task, (unsigned long)dd); + tasklet_init(&dd->queue_task, omap_aes_queue_task, (unsigned long)dd); err = omap_aes_dma_init(dd); if (err) @@ -876,7 +886,8 @@ err_algs: crypto_unregister_alg(&algs[j]); omap_aes_dma_cleanup(dd); err_dma: - tasklet_kill(&dd->task); + tasklet_kill(&dd->done_task); + tasklet_kill(&dd->queue_task); iounmap(dd->io_base); err_io: clk_put(dd->iclk); @@ -903,7 +914,8 @@ static int omap_aes_remove(struct platform_device *pdev) for (i = 0; i < ARRAY_SIZE(algs); i++) crypto_unregister_alg(&algs[i]); - tasklet_kill(&dd->task); + tasklet_kill(&dd->done_task); + tasklet_kill(&dd->queue_task); omap_aes_dma_cleanup(dd); iounmap(dd->io_base); clk_put(dd->iclk); diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index a081c7c7d03f..2e71123516e0 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -72,10 +72,9 @@ #define DEFAULT_TIMEOUT_INTERVAL HZ -#define FLAGS_FIRST 0x0001 #define FLAGS_FINUP 0x0002 #define FLAGS_FINAL 0x0004 -#define FLAGS_FAST 0x0008 +#define FLAGS_SG 0x0008 #define FLAGS_SHA1 0x0010 #define FLAGS_DMA_ACTIVE 0x0020 #define FLAGS_OUTPUT_READY 0x0040 @@ -83,13 +82,17 @@ #define FLAGS_INIT 0x0100 #define FLAGS_CPU 0x0200 #define FLAGS_HMAC 0x0400 - -/* 3rd byte */ -#define FLAGS_BUSY 16 +#define FLAGS_ERROR 0x0800 +#define FLAGS_BUSY 0x1000 #define OP_UPDATE 1 #define OP_FINAL 2 +#define OMAP_ALIGN_MASK (sizeof(u32)-1) +#define OMAP_ALIGNED __attribute__((aligned(sizeof(u32)))) + +#define BUFLEN PAGE_SIZE + struct omap_sham_dev; struct omap_sham_reqctx { @@ -97,8 +100,8 @@ struct omap_sham_reqctx { unsigned long flags; unsigned long op; + u8 digest[SHA1_DIGEST_SIZE] OMAP_ALIGNED; size_t digcnt; - u8 *buffer; size_t bufcnt; size_t buflen; dma_addr_t dma_addr; @@ -107,6 +110,8 @@ struct omap_sham_reqctx { struct scatterlist *sg; unsigned int offset; /* offset in current sg */ unsigned int total; /* total request */ + + u8 buffer[0] OMAP_ALIGNED; }; struct omap_sham_hmac_ctx { @@ -136,6 +141,7 @@ struct omap_sham_dev { int irq; struct clk *iclk; spinlock_t lock; + int err; int dma; int dma_lch; struct tasklet_struct done_task; @@ -194,53 +200,68 @@ static inline int omap_sham_wait(struct omap_sham_dev *dd, u32 offset, u32 bit) static void omap_sham_copy_hash(struct ahash_request *req, int out) { struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + u32 *hash = (u32 *)ctx->digest; + int i; + + /* MD5 is almost unused. So copy sha1 size to reduce code */ + for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) { + if (out) + hash[i] = omap_sham_read(ctx->dd, + SHA_REG_DIGEST(i)); + else + omap_sham_write(ctx->dd, + SHA_REG_DIGEST(i), hash[i]); + } +} + +static void omap_sham_copy_ready_hash(struct ahash_request *req) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + u32 *in = (u32 *)ctx->digest; u32 *hash = (u32 *)req->result; int i; + if (!hash) + return; + if (likely(ctx->flags & FLAGS_SHA1)) { /* SHA1 results are in big endian */ for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) - if (out) - hash[i] = be32_to_cpu(omap_sham_read(ctx->dd, - SHA_REG_DIGEST(i))); - else - omap_sham_write(ctx->dd, SHA_REG_DIGEST(i), - cpu_to_be32(hash[i])); + hash[i] = be32_to_cpu(in[i]); } else { /* MD5 results are in little endian */ for (i = 0; i < MD5_DIGEST_SIZE / sizeof(u32); i++) - if (out) - hash[i] = le32_to_cpu(omap_sham_read(ctx->dd, - SHA_REG_DIGEST(i))); - else - omap_sham_write(ctx->dd, SHA_REG_DIGEST(i), - cpu_to_le32(hash[i])); + hash[i] = le32_to_cpu(in[i]); } } -static int omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length, - int final, int dma) +static int omap_sham_hw_init(struct omap_sham_dev *dd) { - struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); - u32 val = length << 5, mask; + clk_enable(dd->iclk); - if (unlikely(!ctx->digcnt)) { + if (!(dd->flags & FLAGS_INIT)) { + omap_sham_write_mask(dd, SHA_REG_MASK, + SHA_REG_MASK_SOFTRESET, SHA_REG_MASK_SOFTRESET); - clk_enable(dd->iclk); + if (omap_sham_wait(dd, SHA_REG_SYSSTATUS, + SHA_REG_SYSSTATUS_RESETDONE)) + return -ETIMEDOUT; - if (!(dd->flags & FLAGS_INIT)) { - omap_sham_write_mask(dd, SHA_REG_MASK, - SHA_REG_MASK_SOFTRESET, SHA_REG_MASK_SOFTRESET); + dd->flags |= FLAGS_INIT; + dd->err = 0; + } - if (omap_sham_wait(dd, SHA_REG_SYSSTATUS, - SHA_REG_SYSSTATUS_RESETDONE)) - return -ETIMEDOUT; + return 0; +} - dd->flags |= FLAGS_INIT; - } - } else { +static void omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length, + int final, int dma) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); + u32 val = length << 5, mask; + + if (likely(ctx->digcnt)) omap_sham_write(dd, SHA_REG_DIGCNT, ctx->digcnt); - } omap_sham_write_mask(dd, SHA_REG_MASK, SHA_REG_MASK_IT_EN | (dma ? SHA_REG_MASK_DMA_EN : 0), @@ -260,29 +281,26 @@ static int omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length, SHA_REG_CTRL_ALGO | SHA_REG_CTRL_LENGTH; omap_sham_write_mask(dd, SHA_REG_CTRL, val, mask); - - return 0; } static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf, size_t length, int final) { struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); - int err, count, len32; + int count, len32; const u32 *buffer = (const u32 *)buf; dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n", ctx->digcnt, length, final); - err = omap_sham_write_ctrl(dd, length, final, 0); - if (err) - return err; + omap_sham_write_ctrl(dd, length, final, 0); + + /* should be non-zero before next lines to disable clocks later */ + ctx->digcnt += length; if (omap_sham_wait(dd, SHA_REG_CTRL, SHA_REG_CTRL_INPUT_READY)) return -ETIMEDOUT; - ctx->digcnt += length; - if (final) ctx->flags |= FLAGS_FINAL; /* catch last interrupt */ @@ -298,16 +316,11 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr, size_t length, int final) { struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); - int err, len32; + int len32; dev_dbg(dd->dev, "xmit_dma: digcnt: %d, length: %d, final: %d\n", ctx->digcnt, length, final); - /* flush cache entries related to our page */ - if (dma_addr == ctx->dma_addr) - dma_sync_single_for_device(dd->dev, dma_addr, length, - DMA_TO_DEVICE); - len32 = DIV_ROUND_UP(length, sizeof(u32)); omap_set_dma_transfer_params(dd->dma_lch, OMAP_DMA_DATA_TYPE_S32, len32, @@ -317,9 +330,7 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr, omap_set_dma_src_params(dd->dma_lch, 0, OMAP_DMA_AMODE_POST_INC, dma_addr, 0, 0); - err = omap_sham_write_ctrl(dd, length, final, 1); - if (err) - return err; + omap_sham_write_ctrl(dd, length, final, 1); ctx->digcnt += length; @@ -371,15 +382,29 @@ static size_t omap_sham_append_sg(struct omap_sham_reqctx *ctx) return 0; } +static int omap_sham_xmit_dma_map(struct omap_sham_dev *dd, + struct omap_sham_reqctx *ctx, + size_t length, int final) +{ + ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, ctx->buflen, + DMA_TO_DEVICE); + if (dma_mapping_error(dd->dev, ctx->dma_addr)) { + dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen); + return -EINVAL; + } + + ctx->flags &= ~FLAGS_SG; + + /* next call does not fail... so no unmap in the case of error */ + return omap_sham_xmit_dma(dd, ctx->dma_addr, length, final); +} + static int omap_sham_update_dma_slow(struct omap_sham_dev *dd) { struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); unsigned int final; size_t count; - if (!ctx->total) - return 0; - omap_sham_append_sg(ctx); final = (ctx->flags & FLAGS_FINUP) && !ctx->total; @@ -390,30 +415,68 @@ static int omap_sham_update_dma_slow(struct omap_sham_dev *dd) if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) { count = ctx->bufcnt; ctx->bufcnt = 0; - return omap_sham_xmit_dma(dd, ctx->dma_addr, count, final); + return omap_sham_xmit_dma_map(dd, ctx, count, final); } return 0; } -static int omap_sham_update_dma_fast(struct omap_sham_dev *dd) +/* Start address alignment */ +#define SG_AA(sg) (IS_ALIGNED(sg->offset, sizeof(u32))) +/* SHA1 block size alignment */ +#define SG_SA(sg) (IS_ALIGNED(sg->length, SHA1_MD5_BLOCK_SIZE)) + +static int omap_sham_update_dma_start(struct omap_sham_dev *dd) { struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); - unsigned int length; + unsigned int length, final, tail; + struct scatterlist *sg; - ctx->flags |= FLAGS_FAST; + if (!ctx->total) + return 0; + + if (ctx->bufcnt || ctx->offset) + return omap_sham_update_dma_slow(dd); + + dev_dbg(dd->dev, "fast: digcnt: %d, bufcnt: %u, total: %u\n", + ctx->digcnt, ctx->bufcnt, ctx->total); + + sg = ctx->sg; - length = min(ctx->total, sg_dma_len(ctx->sg)); - ctx->total = length; + if (!SG_AA(sg)) + return omap_sham_update_dma_slow(dd); + + if (!sg_is_last(sg) && !SG_SA(sg)) + /* size is not SHA1_BLOCK_SIZE aligned */ + return omap_sham_update_dma_slow(dd); + + length = min(ctx->total, sg->length); + + if (sg_is_last(sg)) { + if (!(ctx->flags & FLAGS_FINUP)) { + /* not last sg must be SHA1_MD5_BLOCK_SIZE aligned */ + tail = length & (SHA1_MD5_BLOCK_SIZE - 1); + /* without finup() we need one block to close hash */ + if (!tail) + tail = SHA1_MD5_BLOCK_SIZE; + length -= tail; + } + } if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) { dev_err(dd->dev, "dma_map_sg error\n"); return -EINVAL; } + ctx->flags |= FLAGS_SG; + ctx->total -= length; + ctx->offset = length; /* offset where to start slow */ - return omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, 1); + final = (ctx->flags & FLAGS_FINUP) && !ctx->total; + + /* next call does not fail... so no unmap in the case of error */ + return omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, final); } static int omap_sham_update_cpu(struct omap_sham_dev *dd) @@ -433,8 +496,17 @@ static int omap_sham_update_dma_stop(struct omap_sham_dev *dd) struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); omap_stop_dma(dd->dma_lch); - if (ctx->flags & FLAGS_FAST) + if (ctx->flags & FLAGS_SG) { dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE); + if (ctx->sg->length == ctx->offset) { + ctx->sg = sg_next(ctx->sg); + if (ctx->sg) + ctx->offset = 0; + } + } else { + dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen, + DMA_TO_DEVICE); + } return 0; } @@ -454,14 +526,7 @@ static void omap_sham_cleanup(struct ahash_request *req) spin_unlock_irqrestore(&dd->lock, flags); if (ctx->digcnt) - clk_disable(dd->iclk); - - if (ctx->dma_addr) - dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen, - DMA_TO_DEVICE); - - if (ctx->buffer) - free_page((unsigned long)ctx->buffer); + omap_sham_copy_ready_hash(req); dev_dbg(dd->dev, "digcnt: %d, bufcnt: %d\n", ctx->digcnt, ctx->bufcnt); } @@ -489,8 +554,6 @@ static int omap_sham_init(struct ahash_request *req) ctx->flags = 0; - ctx->flags |= FLAGS_FIRST; - dev_dbg(dd->dev, "init: digest size: %d\n", crypto_ahash_digestsize(tfm)); @@ -499,21 +562,7 @@ static int omap_sham_init(struct ahash_request *req) ctx->bufcnt = 0; ctx->digcnt = 0; - - ctx->buflen = PAGE_SIZE; - ctx->buffer = (void *)__get_free_page( - (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC); - if (!ctx->buffer) - return -ENOMEM; - - ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, ctx->buflen, - DMA_TO_DEVICE); - if (dma_mapping_error(dd->dev, ctx->dma_addr)) { - dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen); - free_page((unsigned long)ctx->buffer); - return -EINVAL; - } + ctx->buflen = BUFLEN; if (tctx->flags & FLAGS_HMAC) { struct omap_sham_hmac_ctx *bctx = tctx->base; @@ -538,10 +587,8 @@ static int omap_sham_update_req(struct omap_sham_dev *dd) if (ctx->flags & FLAGS_CPU) err = omap_sham_update_cpu(dd); - else if (ctx->flags & FLAGS_FAST) - err = omap_sham_update_dma_fast(dd); else - err = omap_sham_update_dma_slow(dd); + err = omap_sham_update_dma_start(dd); /* wait for dma completion before can take more data */ dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n", err, ctx->digcnt); @@ -560,15 +607,12 @@ static int omap_sham_final_req(struct omap_sham_dev *dd) use_dma = 0; if (use_dma) - err = omap_sham_xmit_dma(dd, ctx->dma_addr, ctx->bufcnt, 1); + err = omap_sham_xmit_dma_map(dd, ctx, ctx->bufcnt, 1); else err = omap_sham_xmit_cpu(dd, ctx->buffer, ctx->bufcnt, 1); ctx->bufcnt = 0; - if (err != -EINPROGRESS) - omap_sham_cleanup(req); - dev_dbg(dd->dev, "final_req: err: %d\n", err); return err; @@ -576,6 +620,7 @@ static int omap_sham_final_req(struct omap_sham_dev *dd) static int omap_sham_finish_req_hmac(struct ahash_request *req) { + struct omap_sham_reqctx *ctx = ahash_request_ctx(req); struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm); struct omap_sham_hmac_ctx *bctx = tctx->base; int bs = crypto_shash_blocksize(bctx->shash); @@ -590,48 +635,56 @@ static int omap_sham_finish_req_hmac(struct ahash_request *req) return crypto_shash_init(&desc.shash) ?: crypto_shash_update(&desc.shash, bctx->opad, bs) ?: - crypto_shash_finup(&desc.shash, req->result, ds, req->result); + crypto_shash_finup(&desc.shash, ctx->digest, ds, ctx->digest); } static void omap_sham_finish_req(struct ahash_request *req, int err) { struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + struct omap_sham_dev *dd = ctx->dd; if (!err) { omap_sham_copy_hash(ctx->dd->req, 1); if (ctx->flags & FLAGS_HMAC) err = omap_sham_finish_req_hmac(req); + } else { + ctx->flags |= FLAGS_ERROR; } - if (ctx->flags & FLAGS_FINAL) + if ((ctx->flags & FLAGS_FINAL) || err) omap_sham_cleanup(req); - clear_bit(FLAGS_BUSY, &ctx->dd->flags); + clk_disable(dd->iclk); + dd->flags &= ~FLAGS_BUSY; if (req->base.complete) req->base.complete(&req->base, err); } -static int omap_sham_handle_queue(struct omap_sham_dev *dd) +static int omap_sham_handle_queue(struct omap_sham_dev *dd, + struct ahash_request *req) { struct crypto_async_request *async_req, *backlog; struct omap_sham_reqctx *ctx; - struct ahash_request *req, *prev_req; + struct ahash_request *prev_req; unsigned long flags; - int err = 0; - - if (test_and_set_bit(FLAGS_BUSY, &dd->flags)) - return 0; + int err = 0, ret = 0; spin_lock_irqsave(&dd->lock, flags); + if (req) + ret = ahash_enqueue_request(&dd->queue, req); + if (dd->flags & FLAGS_BUSY) { + spin_unlock_irqrestore(&dd->lock, flags); + return ret; + } backlog = crypto_get_backlog(&dd->queue); async_req = crypto_dequeue_request(&dd->queue); - if (!async_req) - clear_bit(FLAGS_BUSY, &dd->flags); + if (async_req) + dd->flags |= FLAGS_BUSY; spin_unlock_irqrestore(&dd->lock, flags); if (!async_req) - return 0; + return ret; if (backlog) backlog->complete(backlog, -EINPROGRESS); @@ -646,7 +699,22 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd) dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n", ctx->op, req->nbytes); - if (req != prev_req && ctx->digcnt) + + err = omap_sham_hw_init(dd); + if (err) + goto err1; + + omap_set_dma_dest_params(dd->dma_lch, 0, + OMAP_DMA_AMODE_CONSTANT, + dd->phys_base + SHA_REG_DIN(0), 0, 16); + + omap_set_dma_dest_burst_mode(dd->dma_lch, + OMAP_DMA_DATA_BURST_16); + + omap_set_dma_src_burst_mode(dd->dma_lch, + OMAP_DMA_DATA_BURST_4); + + if (ctx->digcnt) /* request has changed - restore hash */ omap_sham_copy_hash(req, 0); @@ -658,7 +726,7 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd) } else if (ctx->op == OP_FINAL) { err = omap_sham_final_req(dd); } - +err1: if (err != -EINPROGRESS) { /* done_task will not finish it, so do it here */ omap_sham_finish_req(req, err); @@ -667,7 +735,7 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd) dev_dbg(dd->dev, "exit, err: %d\n", err); - return err; + return ret; } static int omap_sham_enqueue(struct ahash_request *req, unsigned int op) @@ -675,18 +743,10 @@ static int omap_sham_enqueue(struct ahash_request *req, unsigned int op) struct omap_sham_reqctx *ctx = ahash_request_ctx(req); struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm); struct omap_sham_dev *dd = tctx->dd; - unsigned long flags; - int err; ctx->op = op; - spin_lock_irqsave(&dd->lock, flags); - err = ahash_enqueue_request(&dd->queue, req); - spin_unlock_irqrestore(&dd->lock, flags); - - omap_sham_handle_queue(dd); - - return err; + return omap_sham_handle_queue(dd, req); } static int omap_sham_update(struct ahash_request *req) @@ -709,21 +769,13 @@ static int omap_sham_update(struct ahash_request *req) */ omap_sham_append_sg(ctx); return 0; - } else if (ctx->bufcnt + ctx->total <= 64) { + } else if (ctx->bufcnt + ctx->total <= SHA1_MD5_BLOCK_SIZE) { + /* + * faster to use CPU for short transfers + */ ctx->flags |= FLAGS_CPU; - } else if (!ctx->bufcnt && sg_is_last(ctx->sg)) { - /* may be can use faster functions */ - int aligned = IS_ALIGNED((u32)ctx->sg->offset, - sizeof(u32)); - - if (aligned && (ctx->flags & FLAGS_FIRST)) - /* digest: first and final */ - ctx->flags |= FLAGS_FAST; - - ctx->flags &= ~FLAGS_FIRST; } - } else if (ctx->bufcnt + ctx->total <= ctx->buflen) { - /* if not finaup -> not fast */ + } else if (ctx->bufcnt + ctx->total < ctx->buflen) { omap_sham_append_sg(ctx); return 0; } @@ -761,12 +813,14 @@ static int omap_sham_final(struct ahash_request *req) ctx->flags |= FLAGS_FINUP; - /* OMAP HW accel works only with buffers >= 9 */ - /* HMAC is always >= 9 because of ipad */ - if ((ctx->digcnt + ctx->bufcnt) < 9) - err = omap_sham_final_shash(req); - else if (ctx->bufcnt) - return omap_sham_enqueue(req, OP_FINAL); + if (!(ctx->flags & FLAGS_ERROR)) { + /* OMAP HW accel works only with buffers >= 9 */ + /* HMAC is always >= 9 because of ipad */ + if ((ctx->digcnt + ctx->bufcnt) < 9) + err = omap_sham_final_shash(req); + else if (ctx->bufcnt) + return omap_sham_enqueue(req, OP_FINAL); + } omap_sham_cleanup(req); @@ -836,6 +890,8 @@ static int omap_sham_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base) struct omap_sham_ctx *tctx = crypto_tfm_ctx(tfm); const char *alg_name = crypto_tfm_alg_name(tfm); + pr_info("enter\n"); + /* Allocate a fallback and abort if it failed. */ tctx->fallback = crypto_alloc_shash(alg_name, 0, CRYPTO_ALG_NEED_FALLBACK); @@ -846,7 +902,7 @@ static int omap_sham_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base) } crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct omap_sham_reqctx)); + sizeof(struct omap_sham_reqctx) + BUFLEN); if (alg_base) { struct omap_sham_hmac_ctx *bctx = tctx->base; @@ -932,7 +988,7 @@ static struct ahash_alg algs[] = { CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_ctxsize = sizeof(struct omap_sham_ctx), - .cra_alignmask = 0, + .cra_alignmask = OMAP_ALIGN_MASK, .cra_module = THIS_MODULE, .cra_init = omap_sham_cra_init, .cra_exit = omap_sham_cra_exit, @@ -956,7 +1012,7 @@ static struct ahash_alg algs[] = { .cra_blocksize = SHA1_BLOCK_SIZE, .cra_ctxsize = sizeof(struct omap_sham_ctx) + sizeof(struct omap_sham_hmac_ctx), - .cra_alignmask = 0, + .cra_alignmask = OMAP_ALIGN_MASK, .cra_module = THIS_MODULE, .cra_init = omap_sham_cra_sha1_init, .cra_exit = omap_sham_cra_exit, @@ -980,7 +1036,7 @@ static struct ahash_alg algs[] = { .cra_blocksize = SHA1_BLOCK_SIZE, .cra_ctxsize = sizeof(struct omap_sham_ctx) + sizeof(struct omap_sham_hmac_ctx), - .cra_alignmask = 0, + .cra_alignmask = OMAP_ALIGN_MASK, .cra_module = THIS_MODULE, .cra_init = omap_sham_cra_md5_init, .cra_exit = omap_sham_cra_exit, @@ -993,7 +1049,7 @@ static void omap_sham_done_task(unsigned long data) struct omap_sham_dev *dd = (struct omap_sham_dev *)data; struct ahash_request *req = dd->req; struct omap_sham_reqctx *ctx = ahash_request_ctx(req); - int ready = 1; + int ready = 0, err = 0; if (ctx->flags & FLAGS_OUTPUT_READY) { ctx->flags &= ~FLAGS_OUTPUT_READY; @@ -1003,15 +1059,18 @@ static void omap_sham_done_task(unsigned long data) if (dd->flags & FLAGS_DMA_ACTIVE) { dd->flags &= ~FLAGS_DMA_ACTIVE; omap_sham_update_dma_stop(dd); - omap_sham_update_dma_slow(dd); + if (!dd->err) + err = omap_sham_update_dma_start(dd); } - if (ready && !(dd->flags & FLAGS_DMA_ACTIVE)) { - dev_dbg(dd->dev, "update done\n"); + err = dd->err ? : err; + + if (err != -EINPROGRESS && (ready || err)) { + dev_dbg(dd->dev, "update done: err: %d\n", err); /* finish curent request */ - omap_sham_finish_req(req, 0); + omap_sham_finish_req(req, err); /* start new request */ - omap_sham_handle_queue(dd); + omap_sham_handle_queue(dd, NULL); } } @@ -1019,7 +1078,7 @@ static void omap_sham_queue_task(unsigned long data) { struct omap_sham_dev *dd = (struct omap_sham_dev *)data; - omap_sham_handle_queue(dd); + omap_sham_handle_queue(dd, NULL); } static irqreturn_t omap_sham_irq(int irq, void *dev_id) @@ -1041,6 +1100,7 @@ static irqreturn_t omap_sham_irq(int irq, void *dev_id) omap_sham_read(dd, SHA_REG_CTRL); ctx->flags |= FLAGS_OUTPUT_READY; + dd->err = 0; tasklet_schedule(&dd->done_task); return IRQ_HANDLED; @@ -1050,8 +1110,13 @@ static void omap_sham_dma_callback(int lch, u16 ch_status, void *data) { struct omap_sham_dev *dd = data; - if (likely(lch == dd->dma_lch)) - tasklet_schedule(&dd->done_task); + if (ch_status != OMAP_DMA_BLOCK_IRQ) { + pr_err("omap-sham DMA error status: 0x%hx\n", ch_status); + dd->err = -EIO; + dd->flags &= ~FLAGS_INIT; /* request to re-initialize */ + } + + tasklet_schedule(&dd->done_task); } static int omap_sham_dma_init(struct omap_sham_dev *dd) @@ -1066,15 +1131,6 @@ static int omap_sham_dma_init(struct omap_sham_dev *dd) dev_err(dd->dev, "Unable to request DMA channel\n"); return err; } - omap_set_dma_dest_params(dd->dma_lch, 0, - OMAP_DMA_AMODE_CONSTANT, - dd->phys_base + SHA_REG_DIN(0), 0, 16); - - omap_set_dma_dest_burst_mode(dd->dma_lch, - OMAP_DMA_DATA_BURST_16); - - omap_set_dma_src_burst_mode(dd->dma_lch, - OMAP_DMA_DATA_BURST_4); return 0; } diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 8a515baa38f7..db33d300aa23 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -9,6 +9,7 @@ #include <crypto/algapi.h> #include <crypto/aes.h> +#include <crypto/padlock.h> #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> @@ -21,7 +22,6 @@ #include <asm/byteorder.h> #include <asm/processor.h> #include <asm/i387.h> -#include "padlock.h" /* * Number of data blocks actually fetched for each xcrypt insn. diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index d3a27e0119bc..adf075b6b9a8 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -13,6 +13,7 @@ */ #include <crypto/internal/hash.h> +#include <crypto/padlock.h> #include <crypto/sha.h> #include <linux/err.h> #include <linux/module.h> @@ -22,13 +23,6 @@ #include <linux/kernel.h> #include <linux/scatterlist.h> #include <asm/i387.h> -#include "padlock.h" - -#ifdef CONFIG_64BIT -#define STACK_ALIGN 16 -#else -#define STACK_ALIGN 4 -#endif struct padlock_sha_desc { struct shash_desc fallback; diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h new file mode 100644 index 000000000000..c5813c87de06 --- /dev/null +++ b/include/crypto/if_alg.h @@ -0,0 +1,92 @@ +/* + * if_alg: User-space algorithm interface + * + * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef _CRYPTO_IF_ALG_H +#define _CRYPTO_IF_ALG_H + +#include <linux/compiler.h> +#include <linux/completion.h> +#include <linux/if_alg.h> +#include <linux/types.h> +#include <net/sock.h> + +#define ALG_MAX_PAGES 16 + +struct crypto_async_request; + +struct alg_sock { + /* struct sock must be the first member of struct alg_sock */ + struct sock sk; + + struct sock *parent; + + const struct af_alg_type *type; + void *private; +}; + +struct af_alg_completion { + struct completion completion; + int err; +}; + +struct af_alg_control { + struct af_alg_iv *iv; + int op; +}; + +struct af_alg_type { + void *(*bind)(const char *name, u32 type, u32 mask); + void (*release)(void *private); + int (*setkey)(void *private, const u8 *key, unsigned int keylen); + int (*accept)(void *private, struct sock *sk); + + struct proto_ops *ops; + struct module *owner; + char name[14]; +}; + +struct af_alg_sgl { + struct scatterlist sg[ALG_MAX_PAGES]; + struct page *pages[ALG_MAX_PAGES]; +}; + +int af_alg_register_type(const struct af_alg_type *type); +int af_alg_unregister_type(const struct af_alg_type *type); + +int af_alg_release(struct socket *sock); +int af_alg_accept(struct sock *sk, struct socket *newsock); + +int af_alg_make_sg(struct af_alg_sgl *sgl, void __user *addr, int len, + int write); +void af_alg_free_sg(struct af_alg_sgl *sgl); + +int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con); + +int af_alg_wait_for_completion(int err, struct af_alg_completion *completion); +void af_alg_complete(struct crypto_async_request *req, int err); + +static inline struct alg_sock *alg_sk(struct sock *sk) +{ + return (struct alg_sock *)sk; +} + +static inline void af_alg_release_parent(struct sock *sk) +{ + sock_put(alg_sk(sk)->parent); +} + +static inline void af_alg_init_completion(struct af_alg_completion *completion) +{ + init_completion(&completion->completion); +} + +#endif /* _CRYPTO_IF_ALG_H */ diff --git a/drivers/crypto/padlock.h b/include/crypto/padlock.h index b728e4518bd1..d2cfa2ef49e8 100644 --- a/drivers/crypto/padlock.h +++ b/include/crypto/padlock.h @@ -15,9 +15,15 @@ #define PADLOCK_ALIGNMENT 16 -#define PFX "padlock: " +#define PFX KBUILD_MODNAME ": " #define PADLOCK_CRA_PRIORITY 300 #define PADLOCK_COMPOSITE_PRIORITY 400 +#ifdef CONFIG_64BIT +#define STACK_ALIGN 16 +#else +#define STACK_ALIGN 4 +#endif + #endif /* _CRYPTO_PADLOCK_H */ diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h index 833d208c25d6..4fd95a323beb 100644 --- a/include/crypto/scatterwalk.h +++ b/include/crypto/scatterwalk.h @@ -68,6 +68,21 @@ static inline struct scatterlist *scatterwalk_sg_next(struct scatterlist *sg) return (++sg)->length ? sg : (void *)sg_page(sg); } +static inline void scatterwalk_crypto_chain(struct scatterlist *head, + struct scatterlist *sg, + int chain, int num) +{ + if (chain) { + head->length += sg->length; + sg = scatterwalk_sg_next(sg); + } + + if (sg) + scatterwalk_sg_chain(head, num, sg); + else + sg_mark_end(head); +} + static inline unsigned long scatterwalk_samebuf(struct scatter_walk *walk_in, struct scatter_walk *walk_out) { diff --git a/include/linux/Kbuild b/include/linux/Kbuild index d1580c17cab3..2296d8b1931f 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -158,6 +158,7 @@ header-y += icmpv6.h header-y += if.h header-y += if_addr.h header-y += if_addrlabel.h +header-y += if_alg.h header-y += if_arcnet.h header-y += if_arp.h header-y += if_bonding.h diff --git a/include/linux/if_alg.h b/include/linux/if_alg.h new file mode 100644 index 000000000000..0f9acce5b1ff --- /dev/null +++ b/include/linux/if_alg.h @@ -0,0 +1,40 @@ +/* + * if_alg: User-space algorithm interface + * + * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef _LINUX_IF_ALG_H +#define _LINUX_IF_ALG_H + +#include <linux/types.h> + +struct sockaddr_alg { + __u16 salg_family; + __u8 salg_type[14]; + __u32 salg_feat; + __u32 salg_mask; + __u8 salg_name[64]; +}; + +struct af_alg_iv { + __u32 ivlen; + __u8 iv[0]; +}; + +/* Socket options */ +#define ALG_SET_KEY 1 +#define ALG_SET_IV 2 +#define ALG_SET_OP 3 + +/* Operations */ +#define ALG_OP_DECRYPT 0 +#define ALG_OP_ENCRYPT 1 + +#endif /* _LINUX_IF_ALG_H */ diff --git a/include/linux/socket.h b/include/linux/socket.h index 5f65f14c4f44..edbb1d07ddf4 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -191,7 +191,8 @@ struct ucred { #define AF_PHONET 35 /* Phonet sockets */ #define AF_IEEE802154 36 /* IEEE802154 sockets */ #define AF_CAIF 37 /* CAIF sockets */ -#define AF_MAX 38 /* For now.. */ +#define AF_ALG 38 /* Algorithm sockets */ +#define AF_MAX 39 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -232,6 +233,7 @@ struct ucred { #define PF_PHONET AF_PHONET #define PF_IEEE802154 AF_IEEE802154 #define PF_CAIF AF_CAIF +#define PF_ALG AF_ALG #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ @@ -305,6 +307,7 @@ struct ucred { #define SOL_RDS 276 #define SOL_IUCV 277 #define SOL_CAIF 278 +#define SOL_ALG 279 /* IPX options */ #define IPX_TYPE 1 diff --git a/net/core/sock.c b/net/core/sock.c index a658aeb6d554..7dfed792434d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -157,7 +157,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = { "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , - "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , + "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , "sk_lock-AF_MAX" }; static const char *const af_family_slock_key_strings[AF_MAX+1] = { @@ -173,7 +173,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-27" , "slock-28" , "slock-AF_CAN" , "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , - "slock-AF_IEEE802154", "slock-AF_CAIF" , + "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , "slock-AF_MAX" }; static const char *const af_family_clock_key_strings[AF_MAX+1] = { @@ -189,7 +189,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-27" , "clock-28" , "clock-AF_CAN" , "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , - "clock-AF_IEEE802154", "clock-AF_CAIF" , + "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , "clock-AF_MAX" }; |