summaryrefslogtreecommitdiff
path: root/arch/x86/crypto/sha512-mb
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/crypto/sha512-mb')
-rw-r--r--arch/x86/crypto/sha512-mb/Makefile12
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb.c1047
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_ctx.h128
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr.h104
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S281
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S297
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c69
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S224
-rw-r--r--arch/x86/crypto/sha512-mb/sha512_x4_avx2.S531
9 files changed, 0 insertions, 2693 deletions
diff --git a/arch/x86/crypto/sha512-mb/Makefile b/arch/x86/crypto/sha512-mb/Makefile
deleted file mode 100644
index 90f1ef69152e..000000000000
--- a/arch/x86/crypto/sha512-mb/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Arch-specific CryptoAPI modules.
-#
-
-avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
- $(comma)4)$(comma)%ymm2,yes,no)
-ifeq ($(avx2_supported),yes)
- obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o
- sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \
- sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o
-endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c
deleted file mode 100644
index 26b85678012d..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb.c
+++ /dev/null
@@ -1,1047 +0,0 @@
-/*
- * Multi buffer SHA512 algorithm Glue Code
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/sha.h>
-#include <crypto/mcryptd.h>
-#include <crypto/crypto_wq.h>
-#include <asm/byteorder.h>
-#include <linux/hardirq.h>
-#include <asm/fpu/api.h>
-#include "sha512_mb_ctx.h"
-
-#define FLUSH_INTERVAL 1000 /* in usec */
-
-static struct mcryptd_alg_state sha512_mb_alg_state;
-
-struct sha512_mb_ctx {
- struct mcryptd_ahash *mcryptd_tfm;
-};
-
-static inline struct mcryptd_hash_request_ctx
- *cast_hash_to_mcryptd_ctx(struct sha512_hash_ctx *hash_ctx)
-{
- struct ahash_request *areq;
-
- areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
- return container_of(areq, struct mcryptd_hash_request_ctx, areq);
-}
-
-static inline struct ahash_request
- *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
-{
- return container_of((void *) ctx, struct ahash_request, __ctx);
-}
-
-static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
- struct ahash_request *areq)
-{
- rctx->flag = HASH_UPDATE;
-}
-
-static asmlinkage void (*sha512_job_mgr_init)(struct sha512_mb_mgr *state);
-static asmlinkage struct job_sha512* (*sha512_job_mgr_submit)
- (struct sha512_mb_mgr *state,
- struct job_sha512 *job);
-static asmlinkage struct job_sha512* (*sha512_job_mgr_flush)
- (struct sha512_mb_mgr *state);
-static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job)
- (struct sha512_mb_mgr *state);
-
-inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
- uint64_t total_len)
-{
- uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1);
-
- memset(&padblock[i], 0, SHA512_BLOCK_SIZE);
- padblock[i] = 0x80;
-
- i += ((SHA512_BLOCK_SIZE - 1) &
- (0 - (total_len + SHA512_PADLENGTHFIELD_SIZE + 1)))
- + 1 + SHA512_PADLENGTHFIELD_SIZE;
-
-#if SHA512_PADLENGTHFIELD_SIZE == 16
- *((uint64_t *) &padblock[i - 16]) = 0;
-#endif
-
- *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
-
- /* Number of extra blocks to hash */
- return i >> SHA512_LOG2_BLOCK_SIZE;
-}
-
-static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit
- (struct sha512_ctx_mgr *mgr, struct sha512_hash_ctx *ctx)
-{
- while (ctx) {
- if (ctx->status & HASH_CTX_STS_COMPLETE) {
- /* Clear PROCESSING bit */
- ctx->status = HASH_CTX_STS_COMPLETE;
- return ctx;
- }
-
- /*
- * If the extra blocks are empty, begin hashing what remains
- * in the user's buffer.
- */
- if (ctx->partial_block_buffer_length == 0 &&
- ctx->incoming_buffer_length) {
-
- const void *buffer = ctx->incoming_buffer;
- uint32_t len = ctx->incoming_buffer_length;
- uint32_t copy_len;
-
- /*
- * Only entire blocks can be hashed.
- * Copy remainder to extra blocks buffer.
- */
- copy_len = len & (SHA512_BLOCK_SIZE-1);
-
- if (copy_len) {
- len -= copy_len;
- memcpy(ctx->partial_block_buffer,
- ((const char *) buffer + len),
- copy_len);
- ctx->partial_block_buffer_length = copy_len;
- }
-
- ctx->incoming_buffer_length = 0;
-
- /* len should be a multiple of the block size now */
- assert((len % SHA512_BLOCK_SIZE) == 0);
-
- /* Set len to the number of blocks to be hashed */
- len >>= SHA512_LOG2_BLOCK_SIZE;
-
- if (len) {
-
- ctx->job.buffer = (uint8_t *) buffer;
- ctx->job.len = len;
- ctx = (struct sha512_hash_ctx *)
- sha512_job_mgr_submit(&mgr->mgr,
- &ctx->job);
- continue;
- }
- }
-
- /*
- * If the extra blocks are not empty, then we are
- * either on the last block(s) or we need more
- * user input before continuing.
- */
- if (ctx->status & HASH_CTX_STS_LAST) {
-
- uint8_t *buf = ctx->partial_block_buffer;
- uint32_t n_extra_blocks =
- sha512_pad(buf, ctx->total_length);
-
- ctx->status = (HASH_CTX_STS_PROCESSING |
- HASH_CTX_STS_COMPLETE);
- ctx->job.buffer = buf;
- ctx->job.len = (uint32_t) n_extra_blocks;
- ctx = (struct sha512_hash_ctx *)
- sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
- continue;
- }
-
- if (ctx)
- ctx->status = HASH_CTX_STS_IDLE;
- return ctx;
- }
-
- return NULL;
-}
-
-static struct sha512_hash_ctx
- *sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate)
-{
- /*
- * If get_comp_job returns NULL, there are no jobs complete.
- * If get_comp_job returns a job, verify that it is safe to return to
- * the user.
- * If it is not ready, resubmit the job to finish processing.
- * If sha512_ctx_mgr_resubmit returned a job, it is ready to be
- * returned.
- * Otherwise, all jobs currently being managed by the hash_ctx_mgr
- * still need processing.
- */
- struct sha512_ctx_mgr *mgr;
- struct sha512_hash_ctx *ctx;
- unsigned long flags;
-
- mgr = cstate->mgr;
- spin_lock_irqsave(&cstate->work_lock, flags);
- ctx = (struct sha512_hash_ctx *)
- sha512_job_mgr_get_comp_job(&mgr->mgr);
- ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
- spin_unlock_irqrestore(&cstate->work_lock, flags);
- return ctx;
-}
-
-static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr)
-{
- sha512_job_mgr_init(&mgr->mgr);
-}
-
-static struct sha512_hash_ctx
- *sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate,
- struct sha512_hash_ctx *ctx,
- const void *buffer,
- uint32_t len,
- int flags)
-{
- struct sha512_ctx_mgr *mgr;
- unsigned long irqflags;
-
- mgr = cstate->mgr;
- spin_lock_irqsave(&cstate->work_lock, irqflags);
- if (flags & ~(HASH_UPDATE | HASH_LAST)) {
- /* User should not pass anything other than UPDATE or LAST */
- ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
- goto unlock;
- }
-
- if (ctx->status & HASH_CTX_STS_PROCESSING) {
- /* Cannot submit to a currently processing job. */
- ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
- goto unlock;
- }
-
- if (ctx->status & HASH_CTX_STS_COMPLETE) {
- /* Cannot update a finished job. */
- ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
- goto unlock;
- }
-
- /*
- * If we made it here, there were no errors during this call to
- * submit
- */
- ctx->error = HASH_CTX_ERROR_NONE;
-
- /* Store buffer ptr info from user */
- ctx->incoming_buffer = buffer;
- ctx->incoming_buffer_length = len;
-
- /*
- * Store the user's request flags and mark this ctx as currently being
- * processed.
- */
- ctx->status = (flags & HASH_LAST) ?
- (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
- HASH_CTX_STS_PROCESSING;
-
- /* Advance byte counter */
- ctx->total_length += len;
-
- /*
- * If there is anything currently buffered in the extra blocks,
- * append to it until it contains a whole block.
- * Or if the user's buffer contains less than a whole block,
- * append as much as possible to the extra block.
- */
- if (ctx->partial_block_buffer_length || len < SHA512_BLOCK_SIZE) {
- /* Compute how many bytes to copy from user buffer into extra
- * block
- */
- uint32_t copy_len = SHA512_BLOCK_SIZE -
- ctx->partial_block_buffer_length;
- if (len < copy_len)
- copy_len = len;
-
- if (copy_len) {
- /* Copy and update relevant pointers and counters */
- memcpy
- (&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
- buffer, copy_len);
-
- ctx->partial_block_buffer_length += copy_len;
- ctx->incoming_buffer = (const void *)
- ((const char *)buffer + copy_len);
- ctx->incoming_buffer_length = len - copy_len;
- }
-
- /* The extra block should never contain more than 1 block
- * here
- */
- assert(ctx->partial_block_buffer_length <= SHA512_BLOCK_SIZE);
-
- /* If the extra block buffer contains exactly 1 block, it can
- * be hashed.
- */
- if (ctx->partial_block_buffer_length >= SHA512_BLOCK_SIZE) {
- ctx->partial_block_buffer_length = 0;
-
- ctx->job.buffer = ctx->partial_block_buffer;
- ctx->job.len = 1;
- ctx = (struct sha512_hash_ctx *)
- sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
- }
- }
-
- ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
-unlock:
- spin_unlock_irqrestore(&cstate->work_lock, irqflags);
- return ctx;
-}
-
-static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate)
-{
- struct sha512_ctx_mgr *mgr;
- struct sha512_hash_ctx *ctx;
- unsigned long flags;
-
- mgr = cstate->mgr;
- spin_lock_irqsave(&cstate->work_lock, flags);
- while (1) {
- ctx = (struct sha512_hash_ctx *)
- sha512_job_mgr_flush(&mgr->mgr);
-
- /* If flush returned 0, there are no more jobs in flight. */
- if (!ctx)
- break;
-
- /*
- * If flush returned a job, resubmit the job to finish
- * processing.
- */
- ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
-
- /*
- * If sha512_ctx_mgr_resubmit returned a job, it is ready to
- * be returned. Otherwise, all jobs currently being managed by
- * the sha512_ctx_mgr still need processing. Loop.
- */
- if (ctx)
- break;
- }
- spin_unlock_irqrestore(&cstate->work_lock, flags);
- return ctx;
-}
-
-static int sha512_mb_init(struct ahash_request *areq)
-{
- struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
-
- hash_ctx_init(sctx);
- sctx->job.result_digest[0] = SHA512_H0;
- sctx->job.result_digest[1] = SHA512_H1;
- sctx->job.result_digest[2] = SHA512_H2;
- sctx->job.result_digest[3] = SHA512_H3;
- sctx->job.result_digest[4] = SHA512_H4;
- sctx->job.result_digest[5] = SHA512_H5;
- sctx->job.result_digest[6] = SHA512_H6;
- sctx->job.result_digest[7] = SHA512_H7;
- sctx->total_length = 0;
- sctx->partial_block_buffer_length = 0;
- sctx->status = HASH_CTX_STS_IDLE;
-
- return 0;
-}
-
-static int sha512_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
-{
- int i;
- struct sha512_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
- __be64 *dst = (__be64 *) rctx->out;
-
- for (i = 0; i < 8; ++i)
- dst[i] = cpu_to_be64(sctx->job.result_digest[i]);
-
- return 0;
-}
-
-static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
- struct mcryptd_alg_cstate *cstate, bool flush)
-{
- int flag = HASH_UPDATE;
- int nbytes, err = 0;
- struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
- struct sha512_hash_ctx *sha_ctx;
-
- /* more work ? */
- while (!(rctx->flag & HASH_DONE)) {
- nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
- if (nbytes < 0) {
- err = nbytes;
- goto out;
- }
- /* check if the walk is done */
- if (crypto_ahash_walk_last(&rctx->walk)) {
- rctx->flag |= HASH_DONE;
- if (rctx->flag & HASH_FINAL)
- flag |= HASH_LAST;
-
- }
- sha_ctx = (struct sha512_hash_ctx *)
- ahash_request_ctx(&rctx->areq);
- kernel_fpu_begin();
- sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx,
- rctx->walk.data, nbytes, flag);
- if (!sha_ctx) {
- if (flush)
- sha_ctx = sha512_ctx_mgr_flush(cstate);
- }
- kernel_fpu_end();
- if (sha_ctx)
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- else {
- rctx = NULL;
- goto out;
- }
- }
-
- /* copy the results */
- if (rctx->flag & HASH_FINAL)
- sha512_mb_set_results(rctx);
-
-out:
- *ret_rctx = rctx;
- return err;
-}
-
-static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
- struct mcryptd_alg_cstate *cstate,
- int err)
-{
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha512_hash_ctx *sha_ctx;
- struct mcryptd_hash_request_ctx *req_ctx;
- int ret;
- unsigned long flags;
-
- /* remove from work list */
- spin_lock_irqsave(&cstate->work_lock, flags);
- list_del(&rctx->waiter);
- spin_unlock_irqrestore(&cstate->work_lock, flags);
-
- if (irqs_disabled())
- rctx->complete(&req->base, err);
- else {
- local_bh_disable();
- rctx->complete(&req->base, err);
- local_bh_enable();
- }
-
- /* check to see if there are other jobs that are done */
- sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
- while (sha_ctx) {
- req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&req_ctx, cstate, false);
- if (req_ctx) {
- spin_lock_irqsave(&cstate->work_lock, flags);
- list_del(&req_ctx->waiter);
- spin_unlock_irqrestore(&cstate->work_lock, flags);
-
- req = cast_mcryptd_ctx_to_req(req_ctx);
- if (irqs_disabled())
- req_ctx->complete(&req->base, ret);
- else {
- local_bh_disable();
- req_ctx->complete(&req->base, ret);
- local_bh_enable();
- }
- }
- sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
- }
-
- return 0;
-}
-
-static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
- struct mcryptd_alg_cstate *cstate)
-{
- unsigned long next_flush;
- unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
- unsigned long flags;
-
- /* initialize tag */
- rctx->tag.arrival = jiffies; /* tag the arrival time */
- rctx->tag.seq_num = cstate->next_seq_num++;
- next_flush = rctx->tag.arrival + delay;
- rctx->tag.expire = next_flush;
-
- spin_lock_irqsave(&cstate->work_lock, flags);
- list_add_tail(&rctx->waiter, &cstate->work_list);
- spin_unlock_irqrestore(&cstate->work_lock, flags);
-
- mcryptd_arm_flusher(cstate, delay);
-}
-
-static int sha512_mb_update(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx,
- areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
-
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha512_hash_ctx *sha_ctx;
- int ret = 0, nbytes;
-
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
- if (nbytes < 0) {
- ret = nbytes;
- goto done;
- }
-
- if (crypto_ahash_walk_last(&rctx->walk))
- rctx->flag |= HASH_DONE;
-
- /* submit */
- sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
- sha512_mb_add_list(rctx, cstate);
- kernel_fpu_begin();
- sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
- nbytes, HASH_UPDATE);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
-
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha512_mb_finup(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx,
- areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
-
- struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
- struct sha512_hash_ctx *sha_ctx;
- int ret = 0, flag = HASH_UPDATE, nbytes;
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
- if (nbytes < 0) {
- ret = nbytes;
- goto done;
- }
-
- if (crypto_ahash_walk_last(&rctx->walk)) {
- rctx->flag |= HASH_DONE;
- flag = HASH_LAST;
- }
-
- /* submit */
- rctx->flag |= HASH_FINAL;
- sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
- sha512_mb_add_list(rctx, cstate);
-
- kernel_fpu_begin();
- sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
- nbytes, flag);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha512_mb_final(struct ahash_request *areq)
-{
- struct mcryptd_hash_request_ctx *rctx =
- container_of(areq, struct mcryptd_hash_request_ctx,
- areq);
- struct mcryptd_alg_cstate *cstate =
- this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
-
- struct sha512_hash_ctx *sha_ctx;
- int ret = 0;
- u8 data;
-
- /* sanity check */
- if (rctx->tag.cpu != smp_processor_id()) {
- pr_err("mcryptd error: cpu clash\n");
- goto done;
- }
-
- /* need to init context */
- req_ctx_init(rctx, areq);
-
- rctx->flag |= HASH_DONE | HASH_FINAL;
-
- sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
- /* flag HASH_FINAL and 0 data size */
- sha512_mb_add_list(rctx, cstate);
- kernel_fpu_begin();
- sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST);
- kernel_fpu_end();
-
- /* check if anything is returned */
- if (!sha_ctx)
- return -EINPROGRESS;
-
- if (sha_ctx->error) {
- ret = sha_ctx->error;
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- goto done;
- }
-
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- ret = sha_finish_walk(&rctx, cstate, false);
- if (!rctx)
- return -EINPROGRESS;
-done:
- sha_complete_job(rctx, cstate, ret);
- return ret;
-}
-
-static int sha512_mb_export(struct ahash_request *areq, void *out)
-{
- struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
-
- memcpy(out, sctx, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha512_mb_import(struct ahash_request *areq, const void *in)
-{
- struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
-
- memcpy(sctx, in, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha512_mb_async_init_tfm(struct crypto_tfm *tfm)
-{
- struct mcryptd_ahash *mcryptd_tfm;
- struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
- struct mcryptd_hash_ctx *mctx;
-
- mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha512-mb",
- CRYPTO_ALG_INTERNAL,
- CRYPTO_ALG_INTERNAL);
- if (IS_ERR(mcryptd_tfm))
- return PTR_ERR(mcryptd_tfm);
- mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
- mctx->alg_state = &sha512_mb_alg_state;
- ctx->mcryptd_tfm = mcryptd_tfm;
- crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct ahash_request) +
- crypto_ahash_reqsize(&mcryptd_tfm->base));
-
- return 0;
-}
-
-static void sha512_mb_async_exit_tfm(struct crypto_tfm *tfm)
-{
- struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
- mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static int sha512_mb_areq_init_tfm(struct crypto_tfm *tfm)
-{
- crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
- sizeof(struct ahash_request) +
- sizeof(struct sha512_hash_ctx));
-
- return 0;
-}
-
-static void sha512_mb_areq_exit_tfm(struct crypto_tfm *tfm)
-{
- struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
- mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static struct ahash_alg sha512_mb_areq_alg = {
- .init = sha512_mb_init,
- .update = sha512_mb_update,
- .final = sha512_mb_final,
- .finup = sha512_mb_finup,
- .export = sha512_mb_export,
- .import = sha512_mb_import,
- .halg = {
- .digestsize = SHA512_DIGEST_SIZE,
- .statesize = sizeof(struct sha512_hash_ctx),
- .base = {
- .cra_name = "__sha512-mb",
- .cra_driver_name = "__intel_sha512-mb",
- .cra_priority = 100,
- /*
- * use ASYNC flag as some buffers in multi-buffer
- * algo may not have completed before hashing thread
- * sleep
- */
- .cra_flags = CRYPTO_ALG_ASYNC |
- CRYPTO_ALG_INTERNAL,
- .cra_blocksize = SHA512_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT
- (sha512_mb_areq_alg.halg.base.cra_list),
- .cra_init = sha512_mb_areq_init_tfm,
- .cra_exit = sha512_mb_areq_exit_tfm,
- .cra_ctxsize = sizeof(struct sha512_hash_ctx),
- }
- }
-};
-
-static int sha512_mb_async_init(struct ahash_request *req)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_init(mcryptd_req);
-}
-
-static int sha512_mb_async_update(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_update(mcryptd_req);
-}
-
-static int sha512_mb_async_finup(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_finup(mcryptd_req);
-}
-
-static int sha512_mb_async_final(struct ahash_request *req)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_final(mcryptd_req);
-}
-
-static int sha512_mb_async_digest(struct ahash_request *req)
-{
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_digest(mcryptd_req);
-}
-
-static int sha512_mb_async_export(struct ahash_request *req, void *out)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- return crypto_ahash_export(mcryptd_req, out);
-}
-
-static int sha512_mb_async_import(struct ahash_request *req, const void *in)
-{
- struct ahash_request *mcryptd_req = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
- struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
- struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
- struct mcryptd_hash_request_ctx *rctx;
- struct ahash_request *areq;
-
- memcpy(mcryptd_req, req, sizeof(*req));
- ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
- rctx = ahash_request_ctx(mcryptd_req);
-
- areq = &rctx->areq;
-
- ahash_request_set_tfm(areq, child);
- ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
- rctx->complete, req);
-
- return crypto_ahash_import(mcryptd_req, in);
-}
-
-static struct ahash_alg sha512_mb_async_alg = {
- .init = sha512_mb_async_init,
- .update = sha512_mb_async_update,
- .final = sha512_mb_async_final,
- .finup = sha512_mb_async_finup,
- .digest = sha512_mb_async_digest,
- .export = sha512_mb_async_export,
- .import = sha512_mb_async_import,
- .halg = {
- .digestsize = SHA512_DIGEST_SIZE,
- .statesize = sizeof(struct sha512_hash_ctx),
- .base = {
- .cra_name = "sha512",
- .cra_driver_name = "sha512_mb",
- /*
- * Low priority, since with few concurrent hash requests
- * this is extremely slow due to the flush delay. Users
- * whose workloads would benefit from this can request
- * it explicitly by driver name, or can increase its
- * priority at runtime using NETLINK_CRYPTO.
- */
- .cra_priority = 50,
- .cra_flags = CRYPTO_ALG_ASYNC,
- .cra_blocksize = SHA512_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT
- (sha512_mb_async_alg.halg.base.cra_list),
- .cra_init = sha512_mb_async_init_tfm,
- .cra_exit = sha512_mb_async_exit_tfm,
- .cra_ctxsize = sizeof(struct sha512_mb_ctx),
- .cra_alignmask = 0,
- },
- },
-};
-
-static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate)
-{
- struct mcryptd_hash_request_ctx *rctx;
- unsigned long cur_time;
- unsigned long next_flush = 0;
- struct sha512_hash_ctx *sha_ctx;
-
-
- cur_time = jiffies;
-
- while (!list_empty(&cstate->work_list)) {
- rctx = list_entry(cstate->work_list.next,
- struct mcryptd_hash_request_ctx, waiter);
- if time_before(cur_time, rctx->tag.expire)
- break;
- kernel_fpu_begin();
- sha_ctx = (struct sha512_hash_ctx *)
- sha512_ctx_mgr_flush(cstate);
- kernel_fpu_end();
- if (!sha_ctx) {
- pr_err("sha512_mb error: nothing got flushed for"
- " non-empty list\n");
- break;
- }
- rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
- sha_finish_walk(&rctx, cstate, true);
- sha_complete_job(rctx, cstate, 0);
- }
-
- if (!list_empty(&cstate->work_list)) {
- rctx = list_entry(cstate->work_list.next,
- struct mcryptd_hash_request_ctx, waiter);
- /* get the hash context and then flush time */
- next_flush = rctx->tag.expire;
- mcryptd_arm_flusher(cstate, get_delay(next_flush));
- }
- return next_flush;
-}
-
-static int __init sha512_mb_mod_init(void)
-{
-
- int cpu;
- int err;
- struct mcryptd_alg_cstate *cpu_state;
-
- /* check for dependent cpu features */
- if (!boot_cpu_has(X86_FEATURE_AVX2) ||
- !boot_cpu_has(X86_FEATURE_BMI2))
- return -ENODEV;
-
- /* initialize multibuffer structures */
- sha512_mb_alg_state.alg_cstate =
- alloc_percpu(struct mcryptd_alg_cstate);
-
- sha512_job_mgr_init = sha512_mb_mgr_init_avx2;
- sha512_job_mgr_submit = sha512_mb_mgr_submit_avx2;
- sha512_job_mgr_flush = sha512_mb_mgr_flush_avx2;
- sha512_job_mgr_get_comp_job = sha512_mb_mgr_get_comp_job_avx2;
-
- if (!sha512_mb_alg_state.alg_cstate)
- return -ENOMEM;
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
- cpu_state->next_flush = 0;
- cpu_state->next_seq_num = 0;
- cpu_state->flusher_engaged = false;
- INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
- cpu_state->cpu = cpu;
- cpu_state->alg_state = &sha512_mb_alg_state;
- cpu_state->mgr = kzalloc(sizeof(struct sha512_ctx_mgr),
- GFP_KERNEL);
- if (!cpu_state->mgr)
- goto err2;
- sha512_ctx_mgr_init(cpu_state->mgr);
- INIT_LIST_HEAD(&cpu_state->work_list);
- spin_lock_init(&cpu_state->work_lock);
- }
- sha512_mb_alg_state.flusher = &sha512_mb_flusher;
-
- err = crypto_register_ahash(&sha512_mb_areq_alg);
- if (err)
- goto err2;
- err = crypto_register_ahash(&sha512_mb_async_alg);
- if (err)
- goto err1;
-
-
- return 0;
-err1:
- crypto_unregister_ahash(&sha512_mb_areq_alg);
-err2:
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
- kfree(cpu_state->mgr);
- }
- free_percpu(sha512_mb_alg_state.alg_cstate);
- return -ENODEV;
-}
-
-static void __exit sha512_mb_mod_fini(void)
-{
- int cpu;
- struct mcryptd_alg_cstate *cpu_state;
-
- crypto_unregister_ahash(&sha512_mb_async_alg);
- crypto_unregister_ahash(&sha512_mb_areq_alg);
- for_each_possible_cpu(cpu) {
- cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
- kfree(cpu_state->mgr);
- }
- free_percpu(sha512_mb_alg_state.alg_cstate);
-}
-
-module_init(sha512_mb_mod_init);
-module_exit(sha512_mb_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, multi buffer accelerated");
-
-MODULE_ALIAS("sha512");
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
deleted file mode 100644
index e5c465bd821e..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Header file for multi buffer SHA512 context
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _SHA_MB_CTX_INTERNAL_H
-#define _SHA_MB_CTX_INTERNAL_H
-
-#include "sha512_mb_mgr.h"
-
-#define HASH_UPDATE 0x00
-#define HASH_LAST 0x01
-#define HASH_DONE 0x02
-#define HASH_FINAL 0x04
-
-#define HASH_CTX_STS_IDLE 0x00
-#define HASH_CTX_STS_PROCESSING 0x01
-#define HASH_CTX_STS_LAST 0x02
-#define HASH_CTX_STS_COMPLETE 0x04
-
-enum hash_ctx_error {
- HASH_CTX_ERROR_NONE = 0,
- HASH_CTX_ERROR_INVALID_FLAGS = -1,
- HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
- HASH_CTX_ERROR_ALREADY_COMPLETED = -3,
-};
-
-#define hash_ctx_user_data(ctx) ((ctx)->user_data)
-#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
-#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
-#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
-#define hash_ctx_status(ctx) ((ctx)->status)
-#define hash_ctx_error(ctx) ((ctx)->error)
-#define hash_ctx_init(ctx) \
- do { \
- (ctx)->error = HASH_CTX_ERROR_NONE; \
- (ctx)->status = HASH_CTX_STS_COMPLETE; \
- } while (0)
-
-/* Hash Constants and Typedefs */
-#define SHA512_DIGEST_LENGTH 8
-#define SHA512_LOG2_BLOCK_SIZE 7
-
-#define SHA512_PADLENGTHFIELD_SIZE 16
-
-#ifdef SHA_MB_DEBUG
-#define assert(expr) \
-do { \
- if (unlikely(!(expr))) { \
- printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
- #expr, __FILE__, __func__, __LINE__); \
- } \
-} while (0)
-#else
-#define assert(expr) do {} while (0)
-#endif
-
-struct sha512_ctx_mgr {
- struct sha512_mb_mgr mgr;
-};
-
-/* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */
-
-struct sha512_hash_ctx {
- /* Must be at struct offset 0 */
- struct job_sha512 job;
- /* status flag */
- int status;
- /* error flag */
- int error;
-
- uint64_t total_length;
- const void *incoming_buffer;
- uint32_t incoming_buffer_length;
- uint8_t partial_block_buffer[SHA512_BLOCK_SIZE * 2];
- uint32_t partial_block_buffer_length;
- void *user_data;
-};
-
-#endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
deleted file mode 100644
index 178f17eef382..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Header file for multi buffer SHA512 algorithm manager
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __SHA_MB_MGR_H
-#define __SHA_MB_MGR_H
-
-#include <linux/types.h>
-
-#define NUM_SHA512_DIGEST_WORDS 8
-
-enum job_sts {STS_UNKNOWN = 0,
- STS_BEING_PROCESSED = 1,
- STS_COMPLETED = 2,
- STS_INTERNAL_ERROR = 3,
- STS_ERROR = 4
-};
-
-struct job_sha512 {
- u8 *buffer;
- u64 len;
- u64 result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32);
- enum job_sts status;
- void *user_data;
-};
-
-struct sha512_args_x4 {
- uint64_t digest[8][4];
- uint8_t *data_ptr[4];
-};
-
-struct sha512_lane_data {
- struct job_sha512 *job_in_lane;
-};
-
-struct sha512_mb_mgr {
- struct sha512_args_x4 args;
-
- uint64_t lens[4];
-
- /* each byte is index (0...7) of unused lanes */
- uint64_t unused_lanes;
- /* byte 4 is set to FF as a flag */
- struct sha512_lane_data ldata[4];
-};
-
-#define SHA512_MB_MGR_NUM_LANES_AVX2 4
-
-void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state);
-struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state,
- struct job_sha512 *job);
-struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state);
-struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state);
-
-#endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
deleted file mode 100644
index cf2636d4c9ba..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Header file for multi buffer SHA256 algorithm data structure
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-# Macros for defining data structures
-
-# Usage example
-
-#START_FIELDS # JOB_AES
-### name size align
-#FIELD _plaintext, 8, 8 # pointer to plaintext
-#FIELD _ciphertext, 8, 8 # pointer to ciphertext
-#FIELD _IV, 16, 8 # IV
-#FIELD _keys, 8, 8 # pointer to keys
-#FIELD _len, 4, 4 # length in bytes
-#FIELD _status, 4, 4 # status enumeration
-#FIELD _user_data, 8, 8 # pointer to user data
-#UNION _union, size1, align1, \
-# size2, align2, \
-# size3, align3, \
-# ...
-#END_FIELDS
-#%assign _JOB_AES_size _FIELD_OFFSET
-#%assign _JOB_AES_align _STRUCT_ALIGN
-
-#########################################################################
-
-# Alternate "struc-like" syntax:
-# STRUCT job_aes2
-# RES_Q .plaintext, 1
-# RES_Q .ciphertext, 1
-# RES_DQ .IV, 1
-# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN
-# RES_U .union, size1, align1, \
-# size2, align2, \
-# ...
-# ENDSTRUCT
-# # Following only needed if nesting
-# %assign job_aes2_size _FIELD_OFFSET
-# %assign job_aes2_align _STRUCT_ALIGN
-#
-# RES_* macros take a name, a count and an optional alignment.
-# The count in in terms of the base size of the macro, and the
-# default alignment is the base size.
-# The macros are:
-# Macro Base size
-# RES_B 1
-# RES_W 2
-# RES_D 4
-# RES_Q 8
-# RES_DQ 16
-# RES_Y 32
-# RES_Z 64
-#
-# RES_U defines a union. It's arguments are a name and two or more
-# pairs of "size, alignment"
-#
-# The two assigns are only needed if this structure is being nested
-# within another. Even if the assigns are not done, one can still use
-# STRUCT_NAME_size as the size of the structure.
-#
-# Note that for nesting, you still need to assign to STRUCT_NAME_size.
-#
-# The differences between this and using "struc" directly are that each
-# type is implicitly aligned to its natural length (although this can be
-# over-ridden with an explicit third parameter), and that the structure
-# is padded at the end to its overall alignment.
-#
-
-#########################################################################
-
-#ifndef _DATASTRUCT_ASM_
-#define _DATASTRUCT_ASM_
-
-#define PTR_SZ 8
-#define SHA512_DIGEST_WORD_SIZE 8
-#define SHA512_MB_MGR_NUM_LANES_AVX2 4
-#define NUM_SHA512_DIGEST_WORDS 8
-#define SZ4 4*SHA512_DIGEST_WORD_SIZE
-#define ROUNDS 80*SZ4
-#define SHA512_DIGEST_ROW_SIZE (SHA512_MB_MGR_NUM_LANES_AVX2 * 8)
-
-# START_FIELDS
-.macro START_FIELDS
- _FIELD_OFFSET = 0
- _STRUCT_ALIGN = 0
-.endm
-
-# FIELD name size align
-.macro FIELD name size align
- _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
- \name = _FIELD_OFFSET
- _FIELD_OFFSET = _FIELD_OFFSET + (\size)
-.if (\align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = \align
-.endif
-.endm
-
-# END_FIELDS
-.macro END_FIELDS
- _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
-.endm
-
-.macro STRUCT p1
-START_FIELDS
-.struc \p1
-.endm
-
-.macro ENDSTRUCT
- tmp = _FIELD_OFFSET
- END_FIELDS
- tmp = (_FIELD_OFFSET - ##tmp)
-.if (tmp > 0)
- .lcomm tmp
-.endm
-
-## RES_int name size align
-.macro RES_int p1 p2 p3
- name = \p1
- size = \p2
- align = .\p3
-
- _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
-.align align
-.lcomm name size
- _FIELD_OFFSET = _FIELD_OFFSET + (size)
-.if (align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = align
-.endif
-.endm
-
-# macro RES_B name, size [, align]
-.macro RES_B _name, _size, _align=1
-RES_int _name _size _align
-.endm
-
-# macro RES_W name, size [, align]
-.macro RES_W _name, _size, _align=2
-RES_int _name 2*(_size) _align
-.endm
-
-# macro RES_D name, size [, align]
-.macro RES_D _name, _size, _align=4
-RES_int _name 4*(_size) _align
-.endm
-
-# macro RES_Q name, size [, align]
-.macro RES_Q _name, _size, _align=8
-RES_int _name 8*(_size) _align
-.endm
-
-# macro RES_DQ name, size [, align]
-.macro RES_DQ _name, _size, _align=16
-RES_int _name 16*(_size) _align
-.endm
-
-# macro RES_Y name, size [, align]
-.macro RES_Y _name, _size, _align=32
-RES_int _name 32*(_size) _align
-.endm
-
-# macro RES_Z name, size [, align]
-.macro RES_Z _name, _size, _align=64
-RES_int _name 64*(_size) _align
-.endm
-
-#endif
-
-###################################################################
-### Define SHA512 Out Of Order Data Structures
-###################################################################
-
-START_FIELDS # LANE_DATA
-### name size align
-FIELD _job_in_lane, 8, 8 # pointer to job object
-END_FIELDS
-
- _LANE_DATA_size = _FIELD_OFFSET
- _LANE_DATA_align = _STRUCT_ALIGN
-
-####################################################################
-
-START_FIELDS # SHA512_ARGS_X4
-### name size align
-FIELD _digest, 8*8*4, 4 # transposed digest
-FIELD _data_ptr, 8*4, 8 # array of pointers to data
-END_FIELDS
-
- _SHA512_ARGS_X4_size = _FIELD_OFFSET
- _SHA512_ARGS_X4_align = _STRUCT_ALIGN
-
-#####################################################################
-
-START_FIELDS # MB_MGR
-### name size align
-FIELD _args, _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align
-FIELD _lens, 8*4, 8
-FIELD _unused_lanes, 8, 8
-FIELD _ldata, _LANE_DATA_size*4, _LANE_DATA_align
-END_FIELDS
-
- _MB_MGR_size = _FIELD_OFFSET
- _MB_MGR_align = _STRUCT_ALIGN
-
-_args_digest = _args + _digest
-_args_data_ptr = _args + _data_ptr
-
-#######################################################################
-
-#######################################################################
-#### Define constants
-#######################################################################
-
-#define STS_UNKNOWN 0
-#define STS_BEING_PROCESSED 1
-#define STS_COMPLETED 2
-
-#######################################################################
-#### Define JOB_SHA512 structure
-#######################################################################
-
-START_FIELDS # JOB_SHA512
-### name size align
-FIELD _buffer, 8, 8 # pointer to buffer
-FIELD _len, 8, 8 # length in bytes
-FIELD _result_digest, 8*8, 32 # Digest (output)
-FIELD _status, 4, 4
-FIELD _user_data, 8, 8
-END_FIELDS
-
- _JOB_SHA512_size = _FIELD_OFFSET
- _JOB_SHA512_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
deleted file mode 100644
index 7c629caebc05..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * Flush routine for SHA512 multibuffer
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha512_mb_mgr_datastruct.S"
-
-.extern sha512_x4_avx2
-
-# LINUX register definitions
-#define arg1 %rdi
-#define arg2 %rsi
-
-# idx needs to be other than arg1, arg2, rbx, r12
-#define idx %rdx
-
-# Common definitions
-#define state arg1
-#define job arg2
-#define len2 arg2
-
-#define unused_lanes %rbx
-#define lane_data %rbx
-#define tmp2 %rbx
-
-#define job_rax %rax
-#define tmp1 %rax
-#define size_offset %rax
-#define tmp %rax
-#define start_offset %rax
-
-#define tmp3 arg1
-
-#define extra_blocks arg2
-#define p arg2
-
-#define tmp4 %r8
-#define lens0 %r8
-
-#define lens1 %r9
-#define lens2 %r10
-#define lens3 %r11
-
-.macro LABEL prefix n
-\prefix\n\():
-.endm
-
-.macro JNE_SKIP i
-jne skip_\i
-.endm
-
-.altmacro
-.macro SET_OFFSET _offset
-offset = \_offset
-.endm
-.noaltmacro
-
-# JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state)
-# arg 1 : rcx : state
-ENTRY(sha512_mb_mgr_flush_avx2)
- FRAME_BEGIN
- push %rbx
-
- # If bit (32+3) is set, then all lanes are empty
- mov _unused_lanes(state), unused_lanes
- bt $32+7, unused_lanes
- jc return_null
-
- # find a lane with a non-null job
- xor idx, idx
- offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne one(%rip), idx
- offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne two(%rip), idx
- offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
- cmovne three(%rip), idx
-
- # copy idx to empty lanes
-copy_lane_data:
- offset = (_args + _data_ptr)
- mov offset(state,idx,8), tmp
-
- I = 0
-.rep 4
- offset = (_ldata + I * _LANE_DATA_size + _job_in_lane)
- cmpq $0, offset(state)
-.altmacro
- JNE_SKIP %I
- offset = (_args + _data_ptr + 8*I)
- mov tmp, offset(state)
- offset = (_lens + 8*I +4)
- movl $0xFFFFFFFF, offset(state)
-LABEL skip_ %I
- I = (I+1)
-.noaltmacro
-.endr
-
- # Find min length
- mov _lens + 0*8(state),lens0
- mov lens0,idx
- mov _lens + 1*8(state),lens1
- cmp idx,lens1
- cmovb lens1,idx
- mov _lens + 2*8(state),lens2
- cmp idx,lens2
- cmovb lens2,idx
- mov _lens + 3*8(state),lens3
- cmp idx,lens3
- cmovb lens3,idx
- mov idx,len2
- and $0xF,idx
- and $~0xFF,len2
- jz len_is_0
-
- sub len2, lens0
- sub len2, lens1
- sub len2, lens2
- sub len2, lens3
- shr $32,len2
- mov lens0, _lens + 0*8(state)
- mov lens1, _lens + 1*8(state)
- mov lens2, _lens + 2*8(state)
- mov lens3, _lens + 3*8(state)
-
- # "state" and "args" are the same address, arg1
- # len is arg2
- call sha512_x4_avx2
- # state and idx are intact
-
-len_is_0:
- # process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- mov _unused_lanes(state), unused_lanes
- shl $8, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF, _lens+4(state, idx, 8)
-
- vmovq _args_digest+0*32(state, idx, 8), %xmm0
- vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
- vmovq _args_digest+2*32(state, idx, 8), %xmm1
- vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
- vmovq _args_digest+4*32(state, idx, 8), %xmm2
- vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
- vmovq _args_digest+6*32(state, idx, 8), %xmm3
- vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
-
- vmovdqu %xmm0, _result_digest(job_rax)
- vmovdqu %xmm1, _result_digest+1*16(job_rax)
- vmovdqu %xmm2, _result_digest+2*16(job_rax)
- vmovdqu %xmm3, _result_digest+3*16(job_rax)
-
-return:
- pop %rbx
- FRAME_END
- ret
-
-return_null:
- xor job_rax, job_rax
- jmp return
-ENDPROC(sha512_mb_mgr_flush_avx2)
-.align 16
-
-ENTRY(sha512_mb_mgr_get_comp_job_avx2)
- push %rbx
-
- mov _unused_lanes(state), unused_lanes
- bt $(32+7), unused_lanes
- jc .return_null
-
- # Find min length
- mov _lens(state),lens0
- mov lens0,idx
- mov _lens+1*8(state),lens1
- cmp idx,lens1
- cmovb lens1,idx
- mov _lens+2*8(state),lens2
- cmp idx,lens2
- cmovb lens2,idx
- mov _lens+3*8(state),lens3
- cmp idx,lens3
- cmovb lens3,idx
- test $~0xF,idx
- jnz .return_null
- and $0xF,idx
-
- #process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- mov _unused_lanes(state), unused_lanes
- shl $8, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF, _lens+4(state, idx, 8)
-
- vmovq _args_digest(state, idx, 8), %xmm0
- vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
- vmovq _args_digest+2*32(state, idx, 8), %xmm1
- vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
- vmovq _args_digest+4*32(state, idx, 8), %xmm2
- vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
- vmovq _args_digest+6*32(state, idx, 8), %xmm3
- vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
-
- vmovdqu %xmm0, _result_digest+0*16(job_rax)
- vmovdqu %xmm1, _result_digest+1*16(job_rax)
- vmovdqu %xmm2, _result_digest+2*16(job_rax)
- vmovdqu %xmm3, _result_digest+3*16(job_rax)
-
- pop %rbx
-
- ret
-
-.return_null:
- xor job_rax, job_rax
- pop %rbx
- ret
-ENDPROC(sha512_mb_mgr_get_comp_job_avx2)
-
-.section .rodata.cst8.one, "aM", @progbits, 8
-.align 8
-one:
-.quad 1
-
-.section .rodata.cst8.two, "aM", @progbits, 8
-.align 8
-two:
-.quad 2
-
-.section .rodata.cst8.three, "aM", @progbits, 8
-.align 8
-three:
-.quad 3
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
deleted file mode 100644
index d08805032f01..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Initialization code for multi buffer SHA256 algorithm for AVX2
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "sha512_mb_mgr.h"
-
-void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
-{
- unsigned int j;
-
- /* initially all lanes are unused */
- state->lens[0] = 0xFFFFFFFF00000000;
- state->lens[1] = 0xFFFFFFFF00000001;
- state->lens[2] = 0xFFFFFFFF00000002;
- state->lens[3] = 0xFFFFFFFF00000003;
-
- state->unused_lanes = 0xFF03020100;
- for (j = 0; j < 4; j++)
- state->ldata[j].job_in_lane = NULL;
-}
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
deleted file mode 100644
index 4ba709ba78e5..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Buffer submit code for multi buffer SHA512 algorithm
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha512_mb_mgr_datastruct.S"
-
-.extern sha512_x4_avx2
-
-#define arg1 %rdi
-#define arg2 %rsi
-
-#define idx %rdx
-#define last_len %rdx
-
-#define size_offset %rcx
-#define tmp2 %rcx
-
-# Common definitions
-#define state arg1
-#define job arg2
-#define len2 arg2
-#define p2 arg2
-
-#define p %r11
-#define start_offset %r11
-
-#define unused_lanes %rbx
-
-#define job_rax %rax
-#define len %rax
-
-#define lane %r12
-#define tmp3 %r12
-#define lens3 %r12
-
-#define extra_blocks %r8
-#define lens0 %r8
-
-#define tmp %r9
-#define lens1 %r9
-
-#define lane_data %r10
-#define lens2 %r10
-
-#define DWORD_len %eax
-
-# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job)
-# arg 1 : rcx : state
-# arg 2 : rdx : job
-ENTRY(sha512_mb_mgr_submit_avx2)
- FRAME_BEGIN
- push %rbx
- push %r12
-
- mov _unused_lanes(state), unused_lanes
- movzb %bl,lane
- shr $8, unused_lanes
- imul $_LANE_DATA_size, lane,lane_data
- movl $STS_BEING_PROCESSED, _status(job)
- lea _ldata(state, lane_data), lane_data
- mov unused_lanes, _unused_lanes(state)
- movl _len(job), DWORD_len
-
- mov job, _job_in_lane(lane_data)
- movl DWORD_len,_lens+4(state , lane, 8)
-
- # Load digest words from result_digest
- vmovdqu _result_digest+0*16(job), %xmm0
- vmovdqu _result_digest+1*16(job), %xmm1
- vmovdqu _result_digest+2*16(job), %xmm2
- vmovdqu _result_digest+3*16(job), %xmm3
-
- vmovq %xmm0, _args_digest(state, lane, 8)
- vpextrq $1, %xmm0, _args_digest+1*32(state , lane, 8)
- vmovq %xmm1, _args_digest+2*32(state , lane, 8)
- vpextrq $1, %xmm1, _args_digest+3*32(state , lane, 8)
- vmovq %xmm2, _args_digest+4*32(state , lane, 8)
- vpextrq $1, %xmm2, _args_digest+5*32(state , lane, 8)
- vmovq %xmm3, _args_digest+6*32(state , lane, 8)
- vpextrq $1, %xmm3, _args_digest+7*32(state , lane, 8)
-
- mov _buffer(job), p
- mov p, _args_data_ptr(state, lane, 8)
-
- cmp $0xFF, unused_lanes
- jne return_null
-
-start_loop:
-
- # Find min length
- mov _lens+0*8(state),lens0
- mov lens0,idx
- mov _lens+1*8(state),lens1
- cmp idx,lens1
- cmovb lens1, idx
- mov _lens+2*8(state),lens2
- cmp idx,lens2
- cmovb lens2,idx
- mov _lens+3*8(state),lens3
- cmp idx,lens3
- cmovb lens3,idx
- mov idx,len2
- and $0xF,idx
- and $~0xFF,len2
- jz len_is_0
-
- sub len2,lens0
- sub len2,lens1
- sub len2,lens2
- sub len2,lens3
- shr $32,len2
- mov lens0, _lens + 0*8(state)
- mov lens1, _lens + 1*8(state)
- mov lens2, _lens + 2*8(state)
- mov lens3, _lens + 3*8(state)
-
- # "state" and "args" are the same address, arg1
- # len is arg2
- call sha512_x4_avx2
- # state and idx are intact
-
-len_is_0:
-
- # process completed job "idx"
- imul $_LANE_DATA_size, idx, lane_data
- lea _ldata(state, lane_data), lane_data
-
- mov _job_in_lane(lane_data), job_rax
- mov _unused_lanes(state), unused_lanes
- movq $0, _job_in_lane(lane_data)
- movl $STS_COMPLETED, _status(job_rax)
- shl $8, unused_lanes
- or idx, unused_lanes
- mov unused_lanes, _unused_lanes(state)
-
- movl $0xFFFFFFFF,_lens+4(state,idx,8)
- vmovq _args_digest+0*32(state , idx, 8), %xmm0
- vpinsrq $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0
- vmovq _args_digest+2*32(state , idx, 8), %xmm1
- vpinsrq $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1
- vmovq _args_digest+4*32(state , idx, 8), %xmm2
- vpinsrq $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2
- vmovq _args_digest+6*32(state , idx, 8), %xmm3
- vpinsrq $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3
-
- vmovdqu %xmm0, _result_digest + 0*16(job_rax)
- vmovdqu %xmm1, _result_digest + 1*16(job_rax)
- vmovdqu %xmm2, _result_digest + 2*16(job_rax)
- vmovdqu %xmm3, _result_digest + 3*16(job_rax)
-
-return:
- pop %r12
- pop %rbx
- FRAME_END
- ret
-
-return_null:
- xor job_rax, job_rax
- jmp return
-ENDPROC(sha512_mb_mgr_submit_avx2)
-
-/* UNUSED?
-.section .rodata.cst16, "aM", @progbits, 16
-.align 16
-H0: .int 0x6a09e667
-H1: .int 0xbb67ae85
-H2: .int 0x3c6ef372
-H3: .int 0xa54ff53a
-H4: .int 0x510e527f
-H5: .int 0x9b05688c
-H6: .int 0x1f83d9ab
-H7: .int 0x5be0cd19
-*/
diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
deleted file mode 100644
index e22e907643a6..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
+++ /dev/null
@@ -1,531 +0,0 @@
-/*
- * Multi-buffer SHA512 algorithm hash compute routine
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- * Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-# code to compute quad SHA512 using AVX2
-# use YMMs to tackle the larger digest size
-# outer calling routine takes care of save and restore of XMM registers
-# Logic designed/laid out by JDG
-
-# Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15
-# Stack must be aligned to 32 bytes before call
-# Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12
-# Linux preserves: rcx rdx rdi rbp r13 r14 r15
-# clobbers ymm0-15
-
-#include <linux/linkage.h>
-#include "sha512_mb_mgr_datastruct.S"
-
-arg1 = %rdi
-arg2 = %rsi
-
-# Common definitions
-STATE = arg1
-INP_SIZE = arg2
-
-IDX = %rax
-ROUND = %rbx
-TBL = %r8
-
-inp0 = %r9
-inp1 = %r10
-inp2 = %r11
-inp3 = %r12
-
-a = %ymm0
-b = %ymm1
-c = %ymm2
-d = %ymm3
-e = %ymm4
-f = %ymm5
-g = %ymm6
-h = %ymm7
-
-a0 = %ymm8
-a1 = %ymm9
-a2 = %ymm10
-
-TT0 = %ymm14
-TT1 = %ymm13
-TT2 = %ymm12
-TT3 = %ymm11
-TT4 = %ymm10
-TT5 = %ymm9
-
-T1 = %ymm14
-TMP = %ymm15
-
-# Define stack usage
-STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24
-
-#define VMOVPD vmovupd
-_digest = SZ4*16
-
-# transpose r0, r1, r2, r3, t0, t1
-# "transpose" data in {r0..r3} using temps {t0..t3}
-# Input looks like: {r0 r1 r2 r3}
-# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
-# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
-# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
-# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
-#
-# output looks like: {t0 r1 r0 r3}
-# t0 = {d1 d0 c1 c0 b1 b0 a1 a0}
-# r1 = {d3 d2 c3 c2 b3 b2 a3 a2}
-# r0 = {d5 d4 c5 c4 b5 b4 a5 a4}
-# r3 = {d7 d6 c7 c6 b7 b6 a7 a6}
-
-.macro TRANSPOSE r0 r1 r2 r3 t0 t1
- vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0}
- vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2}
- vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0}
- vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2}
-
- vperm2f128 $0x20, \r2, \r0, \r1 # h6...a6
- vperm2f128 $0x31, \r2, \r0, \r3 # h2...a2
- vperm2f128 $0x31, \t1, \t0, \r0 # h5...a5
- vperm2f128 $0x20, \t1, \t0, \t0 # h1...a1
-.endm
-
-.macro ROTATE_ARGS
-TMP_ = h
-h = g
-g = f
-f = e
-e = d
-d = c
-c = b
-b = a
-a = TMP_
-.endm
-
-# PRORQ reg, imm, tmp
-# packed-rotate-right-double
-# does a rotate by doing two shifts and an or
-.macro _PRORQ reg imm tmp
- vpsllq $(64-\imm),\reg,\tmp
- vpsrlq $\imm,\reg, \reg
- vpor \tmp,\reg, \reg
-.endm
-
-# non-destructive
-# PRORQ_nd reg, imm, tmp, src
-.macro _PRORQ_nd reg imm tmp src
- vpsllq $(64-\imm), \src, \tmp
- vpsrlq $\imm, \src, \reg
- vpor \tmp, \reg, \reg
-.endm
-
-# PRORQ dst/src, amt
-.macro PRORQ reg imm
- _PRORQ \reg, \imm, TMP
-.endm
-
-# PRORQ_nd dst, src, amt
-.macro PRORQ_nd reg tmp imm
- _PRORQ_nd \reg, \imm, TMP, \tmp
-.endm
-
-#; arguments passed implicitly in preprocessor symbols i, a...h
-.macro ROUND_00_15 _T1 i
- PRORQ_nd a0, e, (18-14) # sig1: a0 = (e >> 4)
-
- vpxor g, f, a2 # ch: a2 = f^g
- vpand e,a2, a2 # ch: a2 = (f^g)&e
- vpxor g, a2, a2 # a2 = ch
-
- PRORQ_nd a1,e,41 # sig1: a1 = (e >> 25)
-
- offset = SZ4*(\i & 0xf)
- vmovdqu \_T1,offset(%rsp)
- vpaddq (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K
- vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5)
- PRORQ a0, 14 # sig1: a0 = (e >> 6) ^ (e >> 11)
- vpaddq a2, h, h # h = h + ch
- PRORQ_nd a2,a,6 # sig0: a2 = (a >> 11)
- vpaddq \_T1,h, h # h = h + ch + W + K
- vpxor a1, a0, a0 # a0 = sigma1
- vmovdqu a,\_T1
- PRORQ_nd a1,a,39 # sig0: a1 = (a >> 22)
- vpxor c, \_T1, \_T1 # maj: T1 = a^c
- add $SZ4, ROUND # ROUND++
- vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b
- vpaddq a0, h, h
- vpaddq h, d, d
- vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11)
- PRORQ a2,28 # sig0: a2 = (a >> 2) ^ (a >> 13)
- vpxor a1, a2, a2 # a2 = sig0
- vpand c, a, a1 # maj: a1 = a&c
- vpor \_T1, a1, a1 # a1 = maj
- vpaddq a1, h, h # h = h + ch + W + K + maj
- vpaddq a2, h, h # h = h + ch + W + K + maj + sigma0
- ROTATE_ARGS
-.endm
-
-
-#; arguments passed implicitly in preprocessor symbols i, a...h
-.macro ROUND_16_XX _T1 i
- vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1
- vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1
- vmovdqu \_T1, a0
- PRORQ \_T1,7
- vmovdqu a1, a2
- PRORQ a1,42
- vpxor a0, \_T1, \_T1
- PRORQ \_T1, 1
- vpxor a2, a1, a1
- PRORQ a1, 19
- vpsrlq $7, a0, a0
- vpxor a0, \_T1, \_T1
- vpsrlq $6, a2, a2
- vpxor a2, a1, a1
- vpaddq SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1
- vpaddq SZ4*((\i-7)&0xf)(%rsp), a1, a1
- vpaddq a1, \_T1, \_T1
-
- ROUND_00_15 \_T1,\i
-.endm
-
-
-# void sha512_x4_avx2(void *STATE, const int INP_SIZE)
-# arg 1 : STATE : pointer to input data
-# arg 2 : INP_SIZE : size of data in blocks (assumed >= 1)
-ENTRY(sha512_x4_avx2)
- # general registers preserved in outer calling routine
- # outer calling routine saves all the XMM registers
- # save callee-saved clobbered registers to comply with C function ABI
- push %r12
- push %r13
- push %r14
- push %r15
-
- sub $STACK_SPACE1, %rsp
-
- # Load the pre-transposed incoming digest.
- vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a
- vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b
- vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c
- vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d
- vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e
- vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f
- vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g
- vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h
-
- lea K512_4(%rip),TBL
-
- # load the address of each of the 4 message lanes
- # getting ready to transpose input onto stack
- mov _data_ptr+0*PTR_SZ(STATE),inp0
- mov _data_ptr+1*PTR_SZ(STATE),inp1
- mov _data_ptr+2*PTR_SZ(STATE),inp2
- mov _data_ptr+3*PTR_SZ(STATE),inp3
-
- xor IDX, IDX
-lloop:
- xor ROUND, ROUND
-
- # save old digest
- vmovdqu a, _digest(%rsp)
- vmovdqu b, _digest+1*SZ4(%rsp)
- vmovdqu c, _digest+2*SZ4(%rsp)
- vmovdqu d, _digest+3*SZ4(%rsp)
- vmovdqu e, _digest+4*SZ4(%rsp)
- vmovdqu f, _digest+5*SZ4(%rsp)
- vmovdqu g, _digest+6*SZ4(%rsp)
- vmovdqu h, _digest+7*SZ4(%rsp)
- i = 0
-.rep 4
- vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP
- VMOVPD i*32(inp0, IDX), TT2
- VMOVPD i*32(inp1, IDX), TT1
- VMOVPD i*32(inp2, IDX), TT4
- VMOVPD i*32(inp3, IDX), TT3
- TRANSPOSE TT2, TT1, TT4, TT3, TT0, TT5
- vpshufb TMP, TT0, TT0
- vpshufb TMP, TT1, TT1
- vpshufb TMP, TT2, TT2
- vpshufb TMP, TT3, TT3
- ROUND_00_15 TT0,(i*4+0)
- ROUND_00_15 TT1,(i*4+1)
- ROUND_00_15 TT2,(i*4+2)
- ROUND_00_15 TT3,(i*4+3)
- i = (i+1)
-.endr
- add $128, IDX
-
- i = (i*4)
-
- jmp Lrounds_16_xx
-.align 16
-Lrounds_16_xx:
-.rep 16
- ROUND_16_XX T1, i
- i = (i+1)
-.endr
- cmp $0xa00,ROUND
- jb Lrounds_16_xx
-
- # add old digest
- vpaddq _digest(%rsp), a, a
- vpaddq _digest+1*SZ4(%rsp), b, b
- vpaddq _digest+2*SZ4(%rsp), c, c
- vpaddq _digest+3*SZ4(%rsp), d, d
- vpaddq _digest+4*SZ4(%rsp), e, e
- vpaddq _digest+5*SZ4(%rsp), f, f
- vpaddq _digest+6*SZ4(%rsp), g, g
- vpaddq _digest+7*SZ4(%rsp), h, h
-
- sub $1, INP_SIZE # unit is blocks
- jne lloop
-
- # write back to memory (state object) the transposed digest
- vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE)
- vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE)
-
- # update input data pointers
- add IDX, inp0
- mov inp0, _data_ptr+0*PTR_SZ(STATE)
- add IDX, inp1
- mov inp1, _data_ptr+1*PTR_SZ(STATE)
- add IDX, inp2
- mov inp2, _data_ptr+2*PTR_SZ(STATE)
- add IDX, inp3
- mov inp3, _data_ptr+3*PTR_SZ(STATE)
-
- #;;;;;;;;;;;;;;;
- #; Postamble
- add $STACK_SPACE1, %rsp
- # restore callee-saved clobbered registers
-
- pop %r15
- pop %r14
- pop %r13
- pop %r12
-
- # outer calling routine restores XMM and other GP registers
- ret
-ENDPROC(sha512_x4_avx2)
-
-.section .rodata.K512_4, "a", @progbits
-.align 64
-K512_4:
- .octa 0x428a2f98d728ae22428a2f98d728ae22,\
- 0x428a2f98d728ae22428a2f98d728ae22
- .octa 0x7137449123ef65cd7137449123ef65cd,\
- 0x7137449123ef65cd7137449123ef65cd
- .octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\
- 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f
- .octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\
- 0xe9b5dba58189dbbce9b5dba58189dbbc
- .octa 0x3956c25bf348b5383956c25bf348b538,\
- 0x3956c25bf348b5383956c25bf348b538
- .octa 0x59f111f1b605d01959f111f1b605d019,\
- 0x59f111f1b605d01959f111f1b605d019
- .octa 0x923f82a4af194f9b923f82a4af194f9b,\
- 0x923f82a4af194f9b923f82a4af194f9b
- .octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\
- 0xab1c5ed5da6d8118ab1c5ed5da6d8118
- .octa 0xd807aa98a3030242d807aa98a3030242,\
- 0xd807aa98a3030242d807aa98a3030242
- .octa 0x12835b0145706fbe12835b0145706fbe,\
- 0x12835b0145706fbe12835b0145706fbe
- .octa 0x243185be4ee4b28c243185be4ee4b28c,\
- 0x243185be4ee4b28c243185be4ee4b28c
- .octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\
- 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2
- .octa 0x72be5d74f27b896f72be5d74f27b896f,\
- 0x72be5d74f27b896f72be5d74f27b896f
- .octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\
- 0x80deb1fe3b1696b180deb1fe3b1696b1
- .octa 0x9bdc06a725c712359bdc06a725c71235,\
- 0x9bdc06a725c712359bdc06a725c71235
- .octa 0xc19bf174cf692694c19bf174cf692694,\
- 0xc19bf174cf692694c19bf174cf692694
- .octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\
- 0xe49b69c19ef14ad2e49b69c19ef14ad2
- .octa 0xefbe4786384f25e3efbe4786384f25e3,\
- 0xefbe4786384f25e3efbe4786384f25e3
- .octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\
- 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5
- .octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\
- 0x240ca1cc77ac9c65240ca1cc77ac9c65
- .octa 0x2de92c6f592b02752de92c6f592b0275,\
- 0x2de92c6f592b02752de92c6f592b0275
- .octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\
- 0x4a7484aa6ea6e4834a7484aa6ea6e483
- .octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\
- 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4
- .octa 0x76f988da831153b576f988da831153b5,\
- 0x76f988da831153b576f988da831153b5
- .octa 0x983e5152ee66dfab983e5152ee66dfab,\
- 0x983e5152ee66dfab983e5152ee66dfab
- .octa 0xa831c66d2db43210a831c66d2db43210,\
- 0xa831c66d2db43210a831c66d2db43210
- .octa 0xb00327c898fb213fb00327c898fb213f,\
- 0xb00327c898fb213fb00327c898fb213f
- .octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\
- 0xbf597fc7beef0ee4bf597fc7beef0ee4
- .octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\
- 0xc6e00bf33da88fc2c6e00bf33da88fc2
- .octa 0xd5a79147930aa725d5a79147930aa725,\
- 0xd5a79147930aa725d5a79147930aa725
- .octa 0x06ca6351e003826f06ca6351e003826f,\
- 0x06ca6351e003826f06ca6351e003826f
- .octa 0x142929670a0e6e70142929670a0e6e70,\
- 0x142929670a0e6e70142929670a0e6e70
- .octa 0x27b70a8546d22ffc27b70a8546d22ffc,\
- 0x27b70a8546d22ffc27b70a8546d22ffc
- .octa 0x2e1b21385c26c9262e1b21385c26c926,\
- 0x2e1b21385c26c9262e1b21385c26c926
- .octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\
- 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed
- .octa 0x53380d139d95b3df53380d139d95b3df,\
- 0x53380d139d95b3df53380d139d95b3df
- .octa 0x650a73548baf63de650a73548baf63de,\
- 0x650a73548baf63de650a73548baf63de
- .octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\
- 0x766a0abb3c77b2a8766a0abb3c77b2a8
- .octa 0x81c2c92e47edaee681c2c92e47edaee6,\
- 0x81c2c92e47edaee681c2c92e47edaee6
- .octa 0x92722c851482353b92722c851482353b,\
- 0x92722c851482353b92722c851482353b
- .octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\
- 0xa2bfe8a14cf10364a2bfe8a14cf10364
- .octa 0xa81a664bbc423001a81a664bbc423001,\
- 0xa81a664bbc423001a81a664bbc423001
- .octa 0xc24b8b70d0f89791c24b8b70d0f89791,\
- 0xc24b8b70d0f89791c24b8b70d0f89791
- .octa 0xc76c51a30654be30c76c51a30654be30,\
- 0xc76c51a30654be30c76c51a30654be30
- .octa 0xd192e819d6ef5218d192e819d6ef5218,\
- 0xd192e819d6ef5218d192e819d6ef5218
- .octa 0xd69906245565a910d69906245565a910,\
- 0xd69906245565a910d69906245565a910
- .octa 0xf40e35855771202af40e35855771202a,\
- 0xf40e35855771202af40e35855771202a
- .octa 0x106aa07032bbd1b8106aa07032bbd1b8,\
- 0x106aa07032bbd1b8106aa07032bbd1b8
- .octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\
- 0x19a4c116b8d2d0c819a4c116b8d2d0c8
- .octa 0x1e376c085141ab531e376c085141ab53,\
- 0x1e376c085141ab531e376c085141ab53
- .octa 0x2748774cdf8eeb992748774cdf8eeb99,\
- 0x2748774cdf8eeb992748774cdf8eeb99
- .octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\
- 0x34b0bcb5e19b48a834b0bcb5e19b48a8
- .octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\
- 0x391c0cb3c5c95a63391c0cb3c5c95a63
- .octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\
- 0x4ed8aa4ae3418acb4ed8aa4ae3418acb
- .octa 0x5b9cca4f7763e3735b9cca4f7763e373,\
- 0x5b9cca4f7763e3735b9cca4f7763e373
- .octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\
- 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3
- .octa 0x748f82ee5defb2fc748f82ee5defb2fc,\
- 0x748f82ee5defb2fc748f82ee5defb2fc
- .octa 0x78a5636f43172f6078a5636f43172f60,\
- 0x78a5636f43172f6078a5636f43172f60
- .octa 0x84c87814a1f0ab7284c87814a1f0ab72,\
- 0x84c87814a1f0ab7284c87814a1f0ab72
- .octa 0x8cc702081a6439ec8cc702081a6439ec,\
- 0x8cc702081a6439ec8cc702081a6439ec
- .octa 0x90befffa23631e2890befffa23631e28,\
- 0x90befffa23631e2890befffa23631e28
- .octa 0xa4506cebde82bde9a4506cebde82bde9,\
- 0xa4506cebde82bde9a4506cebde82bde9
- .octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\
- 0xbef9a3f7b2c67915bef9a3f7b2c67915
- .octa 0xc67178f2e372532bc67178f2e372532b,\
- 0xc67178f2e372532bc67178f2e372532b
- .octa 0xca273eceea26619cca273eceea26619c,\
- 0xca273eceea26619cca273eceea26619c
- .octa 0xd186b8c721c0c207d186b8c721c0c207,\
- 0xd186b8c721c0c207d186b8c721c0c207
- .octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\
- 0xeada7dd6cde0eb1eeada7dd6cde0eb1e
- .octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\
- 0xf57d4f7fee6ed178f57d4f7fee6ed178
- .octa 0x06f067aa72176fba06f067aa72176fba,\
- 0x06f067aa72176fba06f067aa72176fba
- .octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\
- 0x0a637dc5a2c898a60a637dc5a2c898a6
- .octa 0x113f9804bef90dae113f9804bef90dae,\
- 0x113f9804bef90dae113f9804bef90dae
- .octa 0x1b710b35131c471b1b710b35131c471b,\
- 0x1b710b35131c471b1b710b35131c471b
- .octa 0x28db77f523047d8428db77f523047d84,\
- 0x28db77f523047d8428db77f523047d84
- .octa 0x32caab7b40c7249332caab7b40c72493,\
- 0x32caab7b40c7249332caab7b40c72493
- .octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\
- 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc
- .octa 0x431d67c49c100d4c431d67c49c100d4c,\
- 0x431d67c49c100d4c431d67c49c100d4c
- .octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\
- 0x4cc5d4becb3e42b64cc5d4becb3e42b6
- .octa 0x597f299cfc657e2a597f299cfc657e2a,\
- 0x597f299cfc657e2a597f299cfc657e2a
- .octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\
- 0x5fcb6fab3ad6faec5fcb6fab3ad6faec
- .octa 0x6c44198c4a4758176c44198c4a475817,\
- 0x6c44198c4a4758176c44198c4a475817
-
-.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
-.align 32
-PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
- .octa 0x18191a1b1c1d1e1f1011121314151617