diff options
author | David S. Miller <davem@davemloft.net> | 2019-04-26 06:52:29 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-04-26 06:52:29 +0300 |
commit | 8b4483658364f05b2e32845c8f445cdfd9452286 (patch) | |
tree | 4734ef83378b1e90475ec4776ac72dfe16a7249b /arch/x86/crypto/poly1305-avx2-x86_64.S | |
parent | c049d56eb219661c9ae48d596c3e633973f89d1f (diff) | |
parent | cd8dead0c39457e58ec1d36db93aedca811d48f1 (diff) | |
download | linux-8b4483658364f05b2e32845c8f445cdfd9452286.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Two easy cases of overlapping changes.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/x86/crypto/poly1305-avx2-x86_64.S')
-rw-r--r-- | arch/x86/crypto/poly1305-avx2-x86_64.S | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S index 3b6e70d085da..8457cdd47f75 100644 --- a/arch/x86/crypto/poly1305-avx2-x86_64.S +++ b/arch/x86/crypto/poly1305-avx2-x86_64.S @@ -323,6 +323,12 @@ ENTRY(poly1305_4block_avx2) vpaddq t2,t1,t1 vmovq t1x,d4 + # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 -> + # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small + # amount. Careful: we must not assume the carry bits 'd0 >> 26', + # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit + # integers. It's true in a single-block implementation, but not here. + # d1 += d0 >> 26 mov d0,%rax shr $26,%rax @@ -361,16 +367,16 @@ ENTRY(poly1305_4block_avx2) # h0 += (d4 >> 26) * 5 mov d4,%rax shr $26,%rax - lea (%eax,%eax,4),%eax - add %eax,%ebx + lea (%rax,%rax,4),%rax + add %rax,%rbx # h4 = d4 & 0x3ffffff mov d4,%rax and $0x3ffffff,%eax mov %eax,h4 # h1 += h0 >> 26 - mov %ebx,%eax - shr $26,%eax + mov %rbx,%rax + shr $26,%rax add %eax,h1 # h0 = h0 & 0x3ffffff andl $0x3ffffff,%ebx |