9 files changed, 0 insertions, 2693 deletions
diff --git a/arch/x86/crypto/sha512-mb/Makefile b/arch/x86/crypto/sha512-mb/Makefile
deleted file mode 100644
index 90f1ef69152e..000000000000
--- a/arch/x86/crypto/sha512-mb/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Arch-specific CryptoAPI modules.
-#
-
-avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
-                                $(comma)4)$(comma)%ymm2,yes,no)
-ifeq ($(avx2_supported),yes)
-	obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o
-	sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \
-	     sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o
-endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c
deleted file mode 100644
index 26b85678012d..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb.c
+++ /dev/null
@@ -1,1047 +0,0 @@
-/*
- * Multi buffer SHA512 algorithm Glue Code
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- *	Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
-
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/sha.h>
-#include <crypto/mcryptd.h>
-#include <crypto/crypto_wq.h>
-#include <asm/byteorder.h>
-#include <linux/hardirq.h>
-#include <asm/fpu/api.h>
-#include "sha512_mb_ctx.h"
-
-#define FLUSH_INTERVAL 1000 /* in usec */
-
-static struct mcryptd_alg_state sha512_mb_alg_state;
-
-struct sha512_mb_ctx {
-	struct mcryptd_ahash *mcryptd_tfm;
-};
-
-static inline struct mcryptd_hash_request_ctx
-		*cast_hash_to_mcryptd_ctx(struct sha512_hash_ctx *hash_ctx)
-{
-	struct ahash_request *areq;
-
-	areq = container_of((void *) hash_ctx, struct ahash_request, __ctx);
-	return container_of(areq, struct mcryptd_hash_request_ctx, areq);
-}
-
-static inline struct ahash_request
-		*cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx)
-{
-	return container_of((void *) ctx, struct ahash_request, __ctx);
-}
-
-static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx,
-				struct ahash_request *areq)
-{
-	rctx->flag = HASH_UPDATE;
-}
-
-static asmlinkage void (*sha512_job_mgr_init)(struct sha512_mb_mgr *state);
-static asmlinkage struct job_sha512* (*sha512_job_mgr_submit)
-						(struct sha512_mb_mgr *state,
-						struct job_sha512 *job);
-static asmlinkage struct job_sha512* (*sha512_job_mgr_flush)
-						(struct sha512_mb_mgr *state);
-static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job)
-						(struct sha512_mb_mgr *state);
-
-inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2],
-			 uint64_t total_len)
-{
-	uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1);
-
-	memset(&padblock[i], 0, SHA512_BLOCK_SIZE);
-	padblock[i] = 0x80;
-
-	i += ((SHA512_BLOCK_SIZE - 1) &
-	      (0 - (total_len + SHA512_PADLENGTHFIELD_SIZE + 1)))
-	     + 1 + SHA512_PADLENGTHFIELD_SIZE;
-
-#if SHA512_PADLENGTHFIELD_SIZE == 16
-	*((uint64_t *) &padblock[i - 16]) = 0;
-#endif
-
-	*((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3);
-
-	/* Number of extra blocks to hash */
-	return i >> SHA512_LOG2_BLOCK_SIZE;
-}
-
-static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit
-		(struct sha512_ctx_mgr *mgr, struct sha512_hash_ctx *ctx)
-{
-	while (ctx) {
-		if (ctx->status & HASH_CTX_STS_COMPLETE) {
-			/* Clear PROCESSING bit */
-			ctx->status = HASH_CTX_STS_COMPLETE;
-			return ctx;
-		}
-
-		/*
-		 * If the extra blocks are empty, begin hashing what remains
-		 * in the user's buffer.
-		 */
-		if (ctx->partial_block_buffer_length == 0 &&
-		    ctx->incoming_buffer_length) {
-
-			const void *buffer = ctx->incoming_buffer;
-			uint32_t len = ctx->incoming_buffer_length;
-			uint32_t copy_len;
-
-			/*
-			 * Only entire blocks can be hashed.
-			 * Copy remainder to extra blocks buffer.
-			 */
-			copy_len = len & (SHA512_BLOCK_SIZE-1);
-
-			if (copy_len) {
-				len -= copy_len;
-				memcpy(ctx->partial_block_buffer,
-				       ((const char *) buffer + len),
-				       copy_len);
-				ctx->partial_block_buffer_length = copy_len;
-			}
-
-			ctx->incoming_buffer_length = 0;
-
-			/* len should be a multiple of the block size now */
-			assert((len % SHA512_BLOCK_SIZE) == 0);
-
-			/* Set len to the number of blocks to be hashed */
-			len >>= SHA512_LOG2_BLOCK_SIZE;
-
-			if (len) {
-
-				ctx->job.buffer = (uint8_t *) buffer;
-				ctx->job.len = len;
-				ctx = (struct sha512_hash_ctx *)
-					sha512_job_mgr_submit(&mgr->mgr,
-					&ctx->job);
-				continue;
-			}
-		}
-
-		/*
-		 * If the extra blocks are not empty, then we are
-		 * either on the last block(s) or we need more
-		 * user input before continuing.
-		 */
-		if (ctx->status & HASH_CTX_STS_LAST) {
-
-			uint8_t *buf = ctx->partial_block_buffer;
-			uint32_t n_extra_blocks =
-					sha512_pad(buf, ctx->total_length);
-
-			ctx->status = (HASH_CTX_STS_PROCESSING |
-				       HASH_CTX_STS_COMPLETE);
-			ctx->job.buffer = buf;
-			ctx->job.len = (uint32_t) n_extra_blocks;
-			ctx = (struct sha512_hash_ctx *)
-				sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
-			continue;
-		}
-
-		if (ctx)
-			ctx->status = HASH_CTX_STS_IDLE;
-		return ctx;
-	}
-
-	return NULL;
-}
-
-static struct sha512_hash_ctx
-		*sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate)
-{
-	/*
-	 * If get_comp_job returns NULL, there are no jobs complete.
-	 * If get_comp_job returns a job, verify that it is safe to return to
-	 * the user.
-	 * If it is not ready, resubmit the job to finish processing.
-	 * If sha512_ctx_mgr_resubmit returned a job, it is ready to be
-	 * returned.
-	 * Otherwise, all jobs currently being managed by the hash_ctx_mgr
-	 * still need processing.
-	 */
-	struct sha512_ctx_mgr *mgr;
-	struct sha512_hash_ctx *ctx;
-	unsigned long flags;
-
-	mgr = cstate->mgr;
-	spin_lock_irqsave(&cstate->work_lock, flags);
-	ctx = (struct sha512_hash_ctx *)
-				sha512_job_mgr_get_comp_job(&mgr->mgr);
-	ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
-	spin_unlock_irqrestore(&cstate->work_lock, flags);
-	return ctx;
-}
-
-static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr)
-{
-	sha512_job_mgr_init(&mgr->mgr);
-}
-
-static struct sha512_hash_ctx
-			*sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate,
-					  struct sha512_hash_ctx *ctx,
-					  const void *buffer,
-					  uint32_t len,
-					  int flags)
-{
-	struct sha512_ctx_mgr *mgr;
-	unsigned long irqflags;
-
-	mgr = cstate->mgr;
-	spin_lock_irqsave(&cstate->work_lock, irqflags);
-	if (flags & ~(HASH_UPDATE | HASH_LAST)) {
-		/* User should not pass anything other than UPDATE or LAST */
-		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
-		goto unlock;
-	}
-
-	if (ctx->status & HASH_CTX_STS_PROCESSING) {
-		/* Cannot submit to a currently processing job. */
-		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
-		goto unlock;
-	}
-
-	if (ctx->status & HASH_CTX_STS_COMPLETE) {
-		/* Cannot update a finished job. */
-		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
-		goto unlock;
-	}
-
-	/*
-	 * If we made it here, there were no errors during this call to
-	 * submit
-	 */
-	ctx->error = HASH_CTX_ERROR_NONE;
-
-	/* Store buffer ptr info from user */
-	ctx->incoming_buffer = buffer;
-	ctx->incoming_buffer_length = len;
-
-	/*
-	 * Store the user's request flags and mark this ctx as currently being
-	 * processed.
-	 */
-	ctx->status = (flags & HASH_LAST) ?
-			(HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
-			HASH_CTX_STS_PROCESSING;
-
-	/* Advance byte counter */
-	ctx->total_length += len;
-
-	/*
-	 * If there is anything currently buffered in the extra blocks,
-	 * append to it until it contains a whole block.
-	 * Or if the user's buffer contains less than a whole block,
-	 * append as much as possible to the extra block.
-	 */
-	if (ctx->partial_block_buffer_length || len < SHA512_BLOCK_SIZE) {
-		/* Compute how many bytes to copy from user buffer into extra
-		 * block
-		 */
-		uint32_t copy_len = SHA512_BLOCK_SIZE -
-					ctx->partial_block_buffer_length;
-		if (len < copy_len)
-			copy_len = len;
-
-		if (copy_len) {
-			/* Copy and update relevant pointers and counters */
-			memcpy
-		(&ctx->partial_block_buffer[ctx->partial_block_buffer_length],
-				buffer, copy_len);
-
-			ctx->partial_block_buffer_length += copy_len;
-			ctx->incoming_buffer = (const void *)
-					((const char *)buffer + copy_len);
-			ctx->incoming_buffer_length = len - copy_len;
-		}
-
-		/* The extra block should never contain more than 1 block
-		 * here
-		 */
-		assert(ctx->partial_block_buffer_length <= SHA512_BLOCK_SIZE);
-
-		/* If the extra block buffer contains exactly 1 block, it can
-		 * be hashed.
-		 */
-		if (ctx->partial_block_buffer_length >= SHA512_BLOCK_SIZE) {
-			ctx->partial_block_buffer_length = 0;
-
-			ctx->job.buffer = ctx->partial_block_buffer;
-			ctx->job.len = 1;
-			ctx = (struct sha512_hash_ctx *)
-				sha512_job_mgr_submit(&mgr->mgr, &ctx->job);
-		}
-	}
-
-	ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
-unlock:
-	spin_unlock_irqrestore(&cstate->work_lock, irqflags);
-	return ctx;
-}
-
-static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate)
-{
-	struct sha512_ctx_mgr *mgr;
-	struct sha512_hash_ctx *ctx;
-	unsigned long flags;
-
-	mgr = cstate->mgr;
-	spin_lock_irqsave(&cstate->work_lock, flags);
-	while (1) {
-		ctx = (struct sha512_hash_ctx *)
-					sha512_job_mgr_flush(&mgr->mgr);
-
-		/* If flush returned 0, there are no more jobs in flight. */
-		if (!ctx)
-			break;
-
-		/*
-		 * If flush returned a job, resubmit the job to finish
-		 * processing.
-		 */
-		ctx = sha512_ctx_mgr_resubmit(mgr, ctx);
-
-		/*
-		 * If sha512_ctx_mgr_resubmit returned a job, it is ready to
-		 * be returned. Otherwise, all jobs currently being managed by
-		 * the sha512_ctx_mgr still need processing. Loop.
-		 */
-		if (ctx)
-			break;
-	}
-	spin_unlock_irqrestore(&cstate->work_lock, flags);
-	return ctx;
-}
-
-static int sha512_mb_init(struct ahash_request *areq)
-{
-	struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
-
-	hash_ctx_init(sctx);
-	sctx->job.result_digest[0] = SHA512_H0;
-	sctx->job.result_digest[1] = SHA512_H1;
-	sctx->job.result_digest[2] = SHA512_H2;
-	sctx->job.result_digest[3] = SHA512_H3;
-	sctx->job.result_digest[4] = SHA512_H4;
-	sctx->job.result_digest[5] = SHA512_H5;
-	sctx->job.result_digest[6] = SHA512_H6;
-	sctx->job.result_digest[7] = SHA512_H7;
-	sctx->total_length = 0;
-	sctx->partial_block_buffer_length = 0;
-	sctx->status = HASH_CTX_STS_IDLE;
-
-	return 0;
-}
-
-static int sha512_mb_set_results(struct mcryptd_hash_request_ctx *rctx)
-{
-	int	i;
-	struct	sha512_hash_ctx *sctx = ahash_request_ctx(&rctx->areq);
-	__be64	*dst = (__be64 *) rctx->out;
-
-	for (i = 0; i < 8; ++i)
-		dst[i] = cpu_to_be64(sctx->job.result_digest[i]);
-
-	return 0;
-}
-
-static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx,
-			struct mcryptd_alg_cstate *cstate, bool flush)
-{
-	int	flag = HASH_UPDATE;
-	int	nbytes, err = 0;
-	struct mcryptd_hash_request_ctx *rctx = *ret_rctx;
-	struct sha512_hash_ctx *sha_ctx;
-
-	/* more work ? */
-	while (!(rctx->flag & HASH_DONE)) {
-		nbytes = crypto_ahash_walk_done(&rctx->walk, 0);
-		if (nbytes < 0) {
-			err = nbytes;
-			goto out;
-		}
-		/* check if the walk is done */
-		if (crypto_ahash_walk_last(&rctx->walk)) {
-			rctx->flag |= HASH_DONE;
-			if (rctx->flag & HASH_FINAL)
-				flag |= HASH_LAST;
-
-		}
-		sha_ctx = (struct sha512_hash_ctx *)
-						ahash_request_ctx(&rctx->areq);
-		kernel_fpu_begin();
-		sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx,
-						rctx->walk.data, nbytes, flag);
-		if (!sha_ctx) {
-			if (flush)
-				sha_ctx = sha512_ctx_mgr_flush(cstate);
-		}
-		kernel_fpu_end();
-		if (sha_ctx)
-			rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		else {
-			rctx = NULL;
-			goto out;
-		}
-	}
-
-	/* copy the results */
-	if (rctx->flag & HASH_FINAL)
-		sha512_mb_set_results(rctx);
-
-out:
-	*ret_rctx = rctx;
-	return err;
-}
-
-static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
-			    struct mcryptd_alg_cstate *cstate,
-			    int err)
-{
-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
-	struct sha512_hash_ctx *sha_ctx;
-	struct mcryptd_hash_request_ctx *req_ctx;
-	int ret;
-	unsigned long flags;
-
-	/* remove from work list */
-	spin_lock_irqsave(&cstate->work_lock, flags);
-	list_del(&rctx->waiter);
-	spin_unlock_irqrestore(&cstate->work_lock, flags);
-
-	if (irqs_disabled())
-		rctx->complete(&req->base, err);
-	else {
-		local_bh_disable();
-		rctx->complete(&req->base, err);
-		local_bh_enable();
-	}
-
-	/* check to see if there are other jobs that are done */
-	sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
-	while (sha_ctx) {
-		req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		ret = sha_finish_walk(&req_ctx, cstate, false);
-		if (req_ctx) {
-			spin_lock_irqsave(&cstate->work_lock, flags);
-			list_del(&req_ctx->waiter);
-			spin_unlock_irqrestore(&cstate->work_lock, flags);
-
-			req = cast_mcryptd_ctx_to_req(req_ctx);
-			if (irqs_disabled())
-				req_ctx->complete(&req->base, ret);
-			else {
-				local_bh_disable();
-				req_ctx->complete(&req->base, ret);
-				local_bh_enable();
-			}
-		}
-		sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate);
-	}
-
-	return 0;
-}
-
-static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx,
-			     struct mcryptd_alg_cstate *cstate)
-{
-	unsigned long next_flush;
-	unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL);
-	unsigned long flags;
-
-	/* initialize tag */
-	rctx->tag.arrival = jiffies;    /* tag the arrival time */
-	rctx->tag.seq_num = cstate->next_seq_num++;
-	next_flush = rctx->tag.arrival + delay;
-	rctx->tag.expire = next_flush;
-
-	spin_lock_irqsave(&cstate->work_lock, flags);
-	list_add_tail(&rctx->waiter, &cstate->work_list);
-	spin_unlock_irqrestore(&cstate->work_lock, flags);
-
-	mcryptd_arm_flusher(cstate, delay);
-}
-
-static int sha512_mb_update(struct ahash_request *areq)
-{
-	struct mcryptd_hash_request_ctx *rctx =
-			container_of(areq, struct mcryptd_hash_request_ctx,
-									areq);
-	struct mcryptd_alg_cstate *cstate =
-				this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
-
-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
-	struct sha512_hash_ctx *sha_ctx;
-	int ret = 0, nbytes;
-
-
-	/* sanity check */
-	if (rctx->tag.cpu != smp_processor_id()) {
-		pr_err("mcryptd error: cpu clash\n");
-		goto done;
-	}
-
-	/* need to init context */
-	req_ctx_init(rctx, areq);
-
-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
-	if (nbytes < 0) {
-		ret = nbytes;
-		goto done;
-	}
-
-	if (crypto_ahash_walk_last(&rctx->walk))
-		rctx->flag |= HASH_DONE;
-
-	/* submit */
-	sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
-	sha512_mb_add_list(rctx, cstate);
-	kernel_fpu_begin();
-	sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
-							nbytes, HASH_UPDATE);
-	kernel_fpu_end();
-
-	/* check if anything is returned */
-	if (!sha_ctx)
-		return -EINPROGRESS;
-
-	if (sha_ctx->error) {
-		ret = sha_ctx->error;
-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		goto done;
-	}
-
-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-	ret = sha_finish_walk(&rctx, cstate, false);
-
-	if (!rctx)
-		return -EINPROGRESS;
-done:
-	sha_complete_job(rctx, cstate, ret);
-	return ret;
-}
-
-static int sha512_mb_finup(struct ahash_request *areq)
-{
-	struct mcryptd_hash_request_ctx *rctx =
-			container_of(areq, struct mcryptd_hash_request_ctx,
-									areq);
-	struct mcryptd_alg_cstate *cstate =
-				this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
-
-	struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx);
-	struct sha512_hash_ctx *sha_ctx;
-	int ret = 0, flag = HASH_UPDATE, nbytes;
-
-	/* sanity check */
-	if (rctx->tag.cpu != smp_processor_id()) {
-		pr_err("mcryptd error: cpu clash\n");
-		goto done;
-	}
-
-	/* need to init context */
-	req_ctx_init(rctx, areq);
-
-	nbytes = crypto_ahash_walk_first(req, &rctx->walk);
-
-	if (nbytes < 0) {
-		ret = nbytes;
-		goto done;
-	}
-
-	if (crypto_ahash_walk_last(&rctx->walk)) {
-		rctx->flag |= HASH_DONE;
-		flag = HASH_LAST;
-	}
-
-	/* submit */
-	rctx->flag |= HASH_FINAL;
-	sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
-	sha512_mb_add_list(rctx, cstate);
-
-	kernel_fpu_begin();
-	sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data,
-								nbytes, flag);
-	kernel_fpu_end();
-
-	/* check if anything is returned */
-	if (!sha_ctx)
-		return -EINPROGRESS;
-
-	if (sha_ctx->error) {
-		ret = sha_ctx->error;
-		goto done;
-	}
-
-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-	ret = sha_finish_walk(&rctx, cstate, false);
-	if (!rctx)
-		return -EINPROGRESS;
-done:
-	sha_complete_job(rctx, cstate, ret);
-	return ret;
-}
-
-static int sha512_mb_final(struct ahash_request *areq)
-{
-	struct mcryptd_hash_request_ctx *rctx =
-			container_of(areq, struct mcryptd_hash_request_ctx,
-									areq);
-	struct mcryptd_alg_cstate *cstate =
-				this_cpu_ptr(sha512_mb_alg_state.alg_cstate);
-
-	struct sha512_hash_ctx *sha_ctx;
-	int ret = 0;
-	u8 data;
-
-	/* sanity check */
-	if (rctx->tag.cpu != smp_processor_id()) {
-		pr_err("mcryptd error: cpu clash\n");
-		goto done;
-	}
-
-	/* need to init context */
-	req_ctx_init(rctx, areq);
-
-	rctx->flag |= HASH_DONE | HASH_FINAL;
-
-	sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq);
-	/* flag HASH_FINAL and 0 data size */
-	sha512_mb_add_list(rctx, cstate);
-	kernel_fpu_begin();
-	sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST);
-	kernel_fpu_end();
-
-	/* check if anything is returned */
-	if (!sha_ctx)
-		return -EINPROGRESS;
-
-	if (sha_ctx->error) {
-		ret = sha_ctx->error;
-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		goto done;
-	}
-
-	rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-	ret = sha_finish_walk(&rctx, cstate, false);
-	if (!rctx)
-		return -EINPROGRESS;
-done:
-	sha_complete_job(rctx, cstate, ret);
-	return ret;
-}
-
-static int sha512_mb_export(struct ahash_request *areq, void *out)
-{
-	struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
-
-	memcpy(out, sctx, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha512_mb_import(struct ahash_request *areq, const void *in)
-{
-	struct sha512_hash_ctx *sctx = ahash_request_ctx(areq);
-
-	memcpy(sctx, in, sizeof(*sctx));
-
-	return 0;
-}
-
-static int sha512_mb_async_init_tfm(struct crypto_tfm *tfm)
-{
-	struct mcryptd_ahash *mcryptd_tfm;
-	struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct mcryptd_hash_ctx *mctx;
-
-	mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha512-mb",
-						CRYPTO_ALG_INTERNAL,
-						CRYPTO_ALG_INTERNAL);
-	if (IS_ERR(mcryptd_tfm))
-		return PTR_ERR(mcryptd_tfm);
-	mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
-	mctx->alg_state = &sha512_mb_alg_state;
-	ctx->mcryptd_tfm = mcryptd_tfm;
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				sizeof(struct ahash_request) +
-				crypto_ahash_reqsize(&mcryptd_tfm->base));
-
-	return 0;
-}
-
-static void sha512_mb_async_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static int sha512_mb_areq_init_tfm(struct crypto_tfm *tfm)
-{
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				sizeof(struct ahash_request) +
-				sizeof(struct sha512_hash_ctx));
-
-	return 0;
-}
-
-static void sha512_mb_areq_exit_tfm(struct crypto_tfm *tfm)
-{
-	struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	mcryptd_free_ahash(ctx->mcryptd_tfm);
-}
-
-static struct ahash_alg sha512_mb_areq_alg = {
-	.init		=	sha512_mb_init,
-	.update		=	sha512_mb_update,
-	.final		=	sha512_mb_final,
-	.finup		=	sha512_mb_finup,
-	.export		=	sha512_mb_export,
-	.import		=	sha512_mb_import,
-	.halg		=	{
-	.digestsize	=	SHA512_DIGEST_SIZE,
-	.statesize	=	sizeof(struct sha512_hash_ctx),
-	.base		=	{
-			.cra_name	 = "__sha512-mb",
-			.cra_driver_name = "__intel_sha512-mb",
-			.cra_priority	 = 100,
-			/*
-			 * use ASYNC flag as some buffers in multi-buffer
-			 * algo may not have completed before hashing thread
-			 * sleep
-			 */
-			.cra_flags	= CRYPTO_ALG_ASYNC |
-					  CRYPTO_ALG_INTERNAL,
-			.cra_blocksize	= SHA512_BLOCK_SIZE,
-			.cra_module	= THIS_MODULE,
-			.cra_list	= LIST_HEAD_INIT
-					(sha512_mb_areq_alg.halg.base.cra_list),
-			.cra_init	= sha512_mb_areq_init_tfm,
-			.cra_exit	= sha512_mb_areq_exit_tfm,
-			.cra_ctxsize	= sizeof(struct sha512_hash_ctx),
-		}
-	}
-};
-
-static int sha512_mb_async_init(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_init(mcryptd_req);
-}
-
-static int sha512_mb_async_update(struct ahash_request *req)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_update(mcryptd_req);
-}
-
-static int sha512_mb_async_finup(struct ahash_request *req)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_finup(mcryptd_req);
-}
-
-static int sha512_mb_async_final(struct ahash_request *req)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_final(mcryptd_req);
-}
-
-static int sha512_mb_async_digest(struct ahash_request *req)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_digest(mcryptd_req);
-}
-
-static int sha512_mb_async_export(struct ahash_request *req, void *out)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	return crypto_ahash_export(mcryptd_req, out);
-}
-
-static int sha512_mb_async_import(struct ahash_request *req, const void *in)
-{
-	struct ahash_request *mcryptd_req = ahash_request_ctx(req);
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm;
-	struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm);
-	struct mcryptd_hash_request_ctx *rctx;
-	struct ahash_request *areq;
-
-	memcpy(mcryptd_req, req, sizeof(*req));
-	ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base);
-	rctx = ahash_request_ctx(mcryptd_req);
-
-	areq = &rctx->areq;
-
-	ahash_request_set_tfm(areq, child);
-	ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP,
-					rctx->complete, req);
-
-	return crypto_ahash_import(mcryptd_req, in);
-}
-
-static struct ahash_alg sha512_mb_async_alg = {
-	.init           = sha512_mb_async_init,
-	.update         = sha512_mb_async_update,
-	.final          = sha512_mb_async_final,
-	.finup          = sha512_mb_async_finup,
-	.digest         = sha512_mb_async_digest,
-	.export		= sha512_mb_async_export,
-	.import		= sha512_mb_async_import,
-	.halg = {
-		.digestsize     = SHA512_DIGEST_SIZE,
-		.statesize      = sizeof(struct sha512_hash_ctx),
-		.base = {
-			.cra_name               = "sha512",
-			.cra_driver_name        = "sha512_mb",
-			/*
-			 * Low priority, since with few concurrent hash requests
-			 * this is extremely slow due to the flush delay.  Users
-			 * whose workloads would benefit from this can request
-			 * it explicitly by driver name, or can increase its
-			 * priority at runtime using NETLINK_CRYPTO.
-			 */
-			.cra_priority           = 50,
-			.cra_flags              = CRYPTO_ALG_ASYNC,
-			.cra_blocksize          = SHA512_BLOCK_SIZE,
-			.cra_module             = THIS_MODULE,
-			.cra_list               = LIST_HEAD_INIT
-				(sha512_mb_async_alg.halg.base.cra_list),
-			.cra_init               = sha512_mb_async_init_tfm,
-			.cra_exit               = sha512_mb_async_exit_tfm,
-			.cra_ctxsize		= sizeof(struct sha512_mb_ctx),
-			.cra_alignmask		= 0,
-		},
-	},
-};
-
-static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate)
-{
-	struct mcryptd_hash_request_ctx *rctx;
-	unsigned long cur_time;
-	unsigned long next_flush = 0;
-	struct sha512_hash_ctx *sha_ctx;
-
-
-	cur_time = jiffies;
-
-	while (!list_empty(&cstate->work_list)) {
-		rctx = list_entry(cstate->work_list.next,
-				struct mcryptd_hash_request_ctx, waiter);
-		if time_before(cur_time, rctx->tag.expire)
-			break;
-		kernel_fpu_begin();
-		sha_ctx = (struct sha512_hash_ctx *)
-					sha512_ctx_mgr_flush(cstate);
-		kernel_fpu_end();
-		if (!sha_ctx) {
-			pr_err("sha512_mb error: nothing got flushed for"
-							" non-empty list\n");
-			break;
-		}
-		rctx = cast_hash_to_mcryptd_ctx(sha_ctx);
-		sha_finish_walk(&rctx, cstate, true);
-		sha_complete_job(rctx, cstate, 0);
-	}
-
-	if (!list_empty(&cstate->work_list)) {
-		rctx = list_entry(cstate->work_list.next,
-				struct mcryptd_hash_request_ctx, waiter);
-		/* get the hash context and then flush time */
-		next_flush = rctx->tag.expire;
-		mcryptd_arm_flusher(cstate, get_delay(next_flush));
-	}
-	return next_flush;
-}
-
-static int __init sha512_mb_mod_init(void)
-{
-
-	int cpu;
-	int err;
-	struct mcryptd_alg_cstate *cpu_state;
-
-	/* check for dependent cpu features */
-	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
-	    !boot_cpu_has(X86_FEATURE_BMI2))
-		return -ENODEV;
-
-	/* initialize multibuffer structures */
-	sha512_mb_alg_state.alg_cstate =
-				alloc_percpu(struct mcryptd_alg_cstate);
-
-	sha512_job_mgr_init = sha512_mb_mgr_init_avx2;
-	sha512_job_mgr_submit = sha512_mb_mgr_submit_avx2;
-	sha512_job_mgr_flush = sha512_mb_mgr_flush_avx2;
-	sha512_job_mgr_get_comp_job = sha512_mb_mgr_get_comp_job_avx2;
-
-	if (!sha512_mb_alg_state.alg_cstate)
-		return -ENOMEM;
-	for_each_possible_cpu(cpu) {
-		cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
-		cpu_state->next_flush = 0;
-		cpu_state->next_seq_num = 0;
-		cpu_state->flusher_engaged = false;
-		INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher);
-		cpu_state->cpu = cpu;
-		cpu_state->alg_state = &sha512_mb_alg_state;
-		cpu_state->mgr = kzalloc(sizeof(struct sha512_ctx_mgr),
-								GFP_KERNEL);
-		if (!cpu_state->mgr)
-			goto err2;
-		sha512_ctx_mgr_init(cpu_state->mgr);
-		INIT_LIST_HEAD(&cpu_state->work_list);
-		spin_lock_init(&cpu_state->work_lock);
-	}
-	sha512_mb_alg_state.flusher = &sha512_mb_flusher;
-
-	err = crypto_register_ahash(&sha512_mb_areq_alg);
-	if (err)
-		goto err2;
-	err = crypto_register_ahash(&sha512_mb_async_alg);
-	if (err)
-		goto err1;
-
-
-	return 0;
-err1:
-	crypto_unregister_ahash(&sha512_mb_areq_alg);
-err2:
-	for_each_possible_cpu(cpu) {
-		cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
-		kfree(cpu_state->mgr);
-	}
-	free_percpu(sha512_mb_alg_state.alg_cstate);
-	return -ENODEV;
-}
-
-static void __exit sha512_mb_mod_fini(void)
-{
-	int cpu;
-	struct mcryptd_alg_cstate *cpu_state;
-
-	crypto_unregister_ahash(&sha512_mb_async_alg);
-	crypto_unregister_ahash(&sha512_mb_areq_alg);
-	for_each_possible_cpu(cpu) {
-		cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu);
-		kfree(cpu_state->mgr);
-	}
-	free_percpu(sha512_mb_alg_state.alg_cstate);
-}
-
-module_init(sha512_mb_mod_init);
-module_exit(sha512_mb_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, multi buffer accelerated");
-
-MODULE_ALIAS("sha512");
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
deleted file mode 100644
index e5c465bd821e..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Header file for multi buffer SHA512 context
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *      Megha Dey <megha.dey@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef _SHA_MB_CTX_INTERNAL_H
-#define _SHA_MB_CTX_INTERNAL_H
-
-#include "sha512_mb_mgr.h"
-
-#define HASH_UPDATE          0x00
-#define HASH_LAST            0x01
-#define HASH_DONE            0x02
-#define HASH_FINAL           0x04
-
-#define HASH_CTX_STS_IDLE       0x00
-#define HASH_CTX_STS_PROCESSING 0x01
-#define HASH_CTX_STS_LAST       0x02
-#define HASH_CTX_STS_COMPLETE   0x04
-
-enum hash_ctx_error {
-	HASH_CTX_ERROR_NONE               =  0,
-	HASH_CTX_ERROR_INVALID_FLAGS      = -1,
-	HASH_CTX_ERROR_ALREADY_PROCESSING = -2,
-	HASH_CTX_ERROR_ALREADY_COMPLETED  = -3,
-};
-
-#define hash_ctx_user_data(ctx)  ((ctx)->user_data)
-#define hash_ctx_digest(ctx)     ((ctx)->job.result_digest)
-#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
-#define hash_ctx_complete(ctx)   ((ctx)->status == HASH_CTX_STS_COMPLETE)
-#define hash_ctx_status(ctx)     ((ctx)->status)
-#define hash_ctx_error(ctx)      ((ctx)->error)
-#define hash_ctx_init(ctx) \
-	do { \
-		(ctx)->error = HASH_CTX_ERROR_NONE; \
-		(ctx)->status = HASH_CTX_STS_COMPLETE; \
-	} while (0)
-
-/* Hash Constants and Typedefs */
-#define SHA512_DIGEST_LENGTH          8
-#define SHA512_LOG2_BLOCK_SIZE        7
-
-#define SHA512_PADLENGTHFIELD_SIZE    16
-
-#ifdef SHA_MB_DEBUG
-#define assert(expr) \
-do { \
-	if (unlikely(!(expr))) { \
-		printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
-		#expr, __FILE__, __func__, __LINE__); \
-	} \
-} while (0)
-#else
-#define assert(expr) do {} while (0)
-#endif
-
-struct sha512_ctx_mgr {
-	struct sha512_mb_mgr mgr;
-};
-
-/* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */
-
-struct sha512_hash_ctx {
-	/* Must be at struct offset 0 */
-	struct job_sha512       job;
-	/* status flag */
-	int status;
-	/* error flag */
-	int error;
-
-	uint64_t        total_length;
-	const void      *incoming_buffer;
-	uint32_t        incoming_buffer_length;
-	uint8_t         partial_block_buffer[SHA512_BLOCK_SIZE * 2];
-	uint32_t        partial_block_buffer_length;
-	void            *user_data;
-};
-
-#endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
deleted file mode 100644
index 178f17eef382..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Header file for multi buffer SHA512 algorithm manager
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *      Megha Dey <megha.dey@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __SHA_MB_MGR_H
-#define __SHA_MB_MGR_H
-
-#include <linux/types.h>
-
-#define NUM_SHA512_DIGEST_WORDS 8
-
-enum job_sts {STS_UNKNOWN = 0,
-	STS_BEING_PROCESSED = 1,
-	STS_COMPLETED =       2,
-	STS_INTERNAL_ERROR = 3,
-	STS_ERROR = 4
-};
-
-struct job_sha512 {
-	u8  *buffer;
-	u64  len;
-	u64  result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32);
-	enum job_sts status;
-	void   *user_data;
-};
-
-struct sha512_args_x4 {
-	uint64_t        digest[8][4];
-	uint8_t         *data_ptr[4];
-};
-
-struct sha512_lane_data {
-	struct job_sha512 *job_in_lane;
-};
-
-struct sha512_mb_mgr {
-	struct sha512_args_x4 args;
-
-	uint64_t lens[4];
-
-	/* each byte is index (0...7) of unused lanes */
-	uint64_t unused_lanes;
-	/* byte 4 is set to FF as a flag */
-	struct sha512_lane_data ldata[4];
-};
-
-#define SHA512_MB_MGR_NUM_LANES_AVX2 4
-
-void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state);
-struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state,
-						struct job_sha512 *job);
-struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state);
-struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state);
-
-#endif
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
deleted file mode 100644
index cf2636d4c9ba..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Header file for multi buffer SHA256 algorithm data structure
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of version 2 of the GNU General Public License as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful, but
- *  WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- *  Contact Information:
- *      Megha Dey <megha.dey@linux.intel.com>
- *
- *  BSD LICENSE
- *
- *  Copyright(c) 2016 Intel Corporation.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in
- *      the documentation and/or other materials provided with the
- *      distribution.
- *    * Neither the name of Intel Corporation nor the names of its
- *      contributors may be used to endorse or promote products derived
- *      from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-# Macros for defining data structures
-
-# Usage example
-
-#START_FIELDS   # JOB_AES
-###     name            size    align
-#FIELD  _plaintext,     8,      8       # pointer to plaintext
-#FIELD  _ciphertext,    8,      8       # pointer to ciphertext
-#FIELD  _IV,            16,     8       # IV
-#FIELD  _keys,          8,      8       # pointer to keys
-#FIELD  _len,           4,      4       # length in bytes
-#FIELD  _status,        4,      4       # status enumeration
-#FIELD  _user_data,     8,      8       # pointer to user data
-#UNION  _union,         size1,  align1, \
-#                       size2,  align2, \
-#                       size3,  align3, \
-#                       ...
-#END_FIELDS
-#%assign _JOB_AES_size  _FIELD_OFFSET
-#%assign _JOB_AES_align _STRUCT_ALIGN
-
-#########################################################################
-
-# Alternate "struc-like" syntax:
-#       STRUCT job_aes2
-#       RES_Q   .plaintext,     1
-#       RES_Q   .ciphertext,    1
-#       RES_DQ  .IV,            1
-#       RES_B   .nested,        _JOB_AES_SIZE, _JOB_AES_ALIGN
-#       RES_U   .union,         size1, align1, \
-#                               size2, align2, \
-#                               ...
-#       ENDSTRUCT
-#       # Following only needed if nesting
-#       %assign job_aes2_size   _FIELD_OFFSET
-#       %assign job_aes2_align  _STRUCT_ALIGN
-#
-# RES_* macros take a name, a count and an optional alignment.
-# The count in in terms of the base size of the macro, and the
-# default alignment is the base size.
-# The macros are:
-# Macro    Base size
-# RES_B     1
-# RES_W     2
-# RES_D     4
-# RES_Q     8
-# RES_DQ   16
-# RES_Y    32
-# RES_Z    64
-#
-# RES_U defines a union. It's arguments are a name and two or more
-# pairs of "size, alignment"
-#
-# The two assigns are only needed if this structure is being nested
-# within another. Even if the assigns are not done, one can still use
-# STRUCT_NAME_size as the size of the structure.
-#
-# Note that for nesting, you still need to assign to STRUCT_NAME_size.
-#
-# The differences between this and using "struc" directly are that each
-# type is implicitly aligned to its natural length (although this can be
-# over-ridden with an explicit third parameter), and that the structure
-# is padded at the end to its overall alignment.
-#
-
-#########################################################################
-
-#ifndef _DATASTRUCT_ASM_
-#define _DATASTRUCT_ASM_
-
-#define PTR_SZ                  8
-#define SHA512_DIGEST_WORD_SIZE 8
-#define SHA512_MB_MGR_NUM_LANES_AVX2 4
-#define NUM_SHA512_DIGEST_WORDS 8
-#define SZ4                     4*SHA512_DIGEST_WORD_SIZE
-#define ROUNDS                  80*SZ4
-#define SHA512_DIGEST_ROW_SIZE  (SHA512_MB_MGR_NUM_LANES_AVX2 * 8)
-
-# START_FIELDS
-.macro START_FIELDS
- _FIELD_OFFSET = 0
- _STRUCT_ALIGN = 0
-.endm
-
-# FIELD name size align
-.macro FIELD name size align
- _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1))
- \name  = _FIELD_OFFSET
- _FIELD_OFFSET = _FIELD_OFFSET + (\size)
-.if (\align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = \align
-.endif
-.endm
-
-# END_FIELDS
-.macro END_FIELDS
- _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
-.endm
-
-.macro STRUCT p1
-START_FIELDS
-.struc \p1
-.endm
-
-.macro ENDSTRUCT
- tmp = _FIELD_OFFSET
- END_FIELDS
- tmp = (_FIELD_OFFSET - ##tmp)
-.if (tmp > 0)
-        .lcomm  tmp
-.endm
-
-## RES_int name size align
-.macro RES_int p1 p2 p3
- name = \p1
- size = \p2
- align = .\p3
-
- _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1))
-.align align
-.lcomm name size
- _FIELD_OFFSET = _FIELD_OFFSET + (size)
-.if (align > _STRUCT_ALIGN)
- _STRUCT_ALIGN = align
-.endif
-.endm
-
-# macro RES_B name, size [, align]
-.macro RES_B _name, _size, _align=1
-RES_int _name _size _align
-.endm
-
-# macro RES_W name, size [, align]
-.macro RES_W _name, _size, _align=2
-RES_int _name 2*(_size) _align
-.endm
-
-# macro RES_D name, size [, align]
-.macro RES_D _name, _size, _align=4
-RES_int _name 4*(_size) _align
-.endm
-
-# macro RES_Q name, size [, align]
-.macro RES_Q _name, _size, _align=8
-RES_int _name 8*(_size) _align
-.endm
-
-# macro RES_DQ name, size [, align]
-.macro RES_DQ _name, _size, _align=16
-RES_int _name 16*(_size) _align
-.endm
-
-# macro RES_Y name, size [, align]
-.macro RES_Y _name, _size, _align=32
-RES_int _name 32*(_size) _align
-.endm
-
-# macro RES_Z name, size [, align]
-.macro RES_Z _name, _size, _align=64
-RES_int _name 64*(_size) _align
-.endm
-
-#endif
-
-###################################################################
-### Define SHA512 Out Of Order Data Structures
-###################################################################
-
-START_FIELDS    # LANE_DATA
-###     name            size    align
-FIELD   _job_in_lane,   8,      8       # pointer to job object
-END_FIELDS
-
- _LANE_DATA_size = _FIELD_OFFSET
- _LANE_DATA_align = _STRUCT_ALIGN
-
-####################################################################
-
-START_FIELDS    # SHA512_ARGS_X4
-###     name            size    align
-FIELD   _digest,        8*8*4,  4      # transposed digest
-FIELD   _data_ptr,      8*4,    8       # array of pointers to data
-END_FIELDS
-
- _SHA512_ARGS_X4_size  =  _FIELD_OFFSET
- _SHA512_ARGS_X4_align =  _STRUCT_ALIGN
-
-#####################################################################
-
-START_FIELDS    # MB_MGR
-###     name            size    align
-FIELD   _args,          _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align
-FIELD   _lens,          8*4,    8
-FIELD   _unused_lanes,  8,      8
-FIELD   _ldata,         _LANE_DATA_size*4, _LANE_DATA_align
-END_FIELDS
-
- _MB_MGR_size  =  _FIELD_OFFSET
- _MB_MGR_align =  _STRUCT_ALIGN
-
-_args_digest = _args + _digest
-_args_data_ptr = _args + _data_ptr
-
-#######################################################################
-
-#######################################################################
-#### Define constants
-#######################################################################
-
-#define STS_UNKNOWN             0
-#define STS_BEING_PROCESSED     1
-#define STS_COMPLETED           2
-
-#######################################################################
-#### Define JOB_SHA512 structure
-#######################################################################
-
-START_FIELDS    # JOB_SHA512
-###     name                            size    align
-FIELD   _buffer,                        8,      8       # pointer to buffer
-FIELD   _len,                           8,      8       # length in bytes
-FIELD   _result_digest,                 8*8,    32      # Digest (output)
-FIELD   _status,                        4,      4
-FIELD   _user_data,                     8,      8
-END_FIELDS
-
- _JOB_SHA512_size = _FIELD_OFFSET
- _JOB_SHA512_align = _STRUCT_ALIGN
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
deleted file mode 100644
index 7c629caebc05..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * Flush routine for SHA512 multibuffer
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- *     Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha512_mb_mgr_datastruct.S"
-
-.extern sha512_x4_avx2
-
-# LINUX register definitions
-#define arg1    %rdi
-#define arg2    %rsi
-
-# idx needs to be other than arg1, arg2, rbx, r12
-#define idx     %rdx
-
-# Common definitions
-#define state   arg1
-#define job     arg2
-#define len2    arg2
-
-#define unused_lanes    %rbx
-#define lane_data       %rbx
-#define tmp2            %rbx
-
-#define job_rax         %rax
-#define tmp1            %rax
-#define size_offset     %rax
-#define tmp             %rax
-#define start_offset    %rax
-
-#define tmp3            arg1
-
-#define extra_blocks    arg2
-#define p               arg2
-
-#define tmp4            %r8
-#define lens0           %r8
-
-#define lens1           %r9
-#define lens2           %r10
-#define lens3           %r11
-
-.macro LABEL prefix n
-\prefix\n\():
-.endm
-
-.macro JNE_SKIP i
-jne     skip_\i
-.endm
-
-.altmacro
-.macro SET_OFFSET _offset
-offset = \_offset
-.endm
-.noaltmacro
-
-# JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state)
-# arg 1 : rcx : state
-ENTRY(sha512_mb_mgr_flush_avx2)
-	FRAME_BEGIN
-	push	%rbx
-
-	# If bit (32+3) is set, then all lanes are empty
-	mov     _unused_lanes(state), unused_lanes
-        bt      $32+7, unused_lanes
-        jc      return_null
-
-        # find a lane with a non-null job
-	xor     idx, idx
-        offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane)
-        cmpq    $0, offset(state)
-        cmovne  one(%rip), idx
-        offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane)
-        cmpq    $0, offset(state)
-        cmovne  two(%rip), idx
-        offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane)
-        cmpq    $0, offset(state)
-        cmovne  three(%rip), idx
-
-        # copy idx to empty lanes
-copy_lane_data:
-	offset =  (_args + _data_ptr)
-        mov     offset(state,idx,8), tmp
-
-        I = 0
-.rep 4
-	offset =  (_ldata + I * _LANE_DATA_size + _job_in_lane)
-        cmpq    $0, offset(state)
-.altmacro
-        JNE_SKIP %I
-        offset =  (_args + _data_ptr + 8*I)
-        mov     tmp, offset(state)
-        offset =  (_lens + 8*I +4)
-        movl    $0xFFFFFFFF, offset(state)
-LABEL skip_ %I
-        I = (I+1)
-.noaltmacro
-.endr
-
-        # Find min length
-        mov     _lens + 0*8(state),lens0
-        mov     lens0,idx
-        mov     _lens + 1*8(state),lens1
-        cmp     idx,lens1
-        cmovb   lens1,idx
-        mov     _lens + 2*8(state),lens2
-        cmp     idx,lens2
-        cmovb   lens2,idx
-        mov     _lens + 3*8(state),lens3
-        cmp     idx,lens3
-        cmovb   lens3,idx
-        mov     idx,len2
-        and     $0xF,idx
-        and     $~0xFF,len2
-	jz      len_is_0
-
-        sub     len2, lens0
-        sub     len2, lens1
-        sub     len2, lens2
-        sub     len2, lens3
-        shr     $32,len2
-        mov     lens0, _lens + 0*8(state)
-        mov     lens1, _lens + 1*8(state)
-        mov     lens2, _lens + 2*8(state)
-        mov     lens3, _lens + 3*8(state)
-
-        # "state" and "args" are the same address, arg1
-        # len is arg2
-        call    sha512_x4_avx2
-        # state and idx are intact
-
-len_is_0:
-        # process completed job "idx"
-	imul    $_LANE_DATA_size, idx, lane_data
-        lea     _ldata(state, lane_data), lane_data
-
-        mov     _job_in_lane(lane_data), job_rax
-        movq    $0,  _job_in_lane(lane_data)
-        movl    $STS_COMPLETED, _status(job_rax)
-        mov     _unused_lanes(state), unused_lanes
-        shl     $8, unused_lanes
-        or      idx, unused_lanes
-        mov     unused_lanes, _unused_lanes(state)
-
-	movl    $0xFFFFFFFF, _lens+4(state,  idx, 8)
-
-	vmovq _args_digest+0*32(state, idx, 8), %xmm0
-        vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
-	vmovq _args_digest+2*32(state, idx, 8), %xmm1
-        vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
-	vmovq _args_digest+4*32(state, idx, 8), %xmm2
-        vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
-	vmovq _args_digest+6*32(state, idx, 8), %xmm3
-	vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
-
-	vmovdqu %xmm0, _result_digest(job_rax)
-	vmovdqu %xmm1, _result_digest+1*16(job_rax)
-	vmovdqu %xmm2, _result_digest+2*16(job_rax)
-	vmovdqu %xmm3, _result_digest+3*16(job_rax)
-
-return:
-	pop	%rbx
-	FRAME_END
-        ret
-
-return_null:
-        xor     job_rax, job_rax
-        jmp     return
-ENDPROC(sha512_mb_mgr_flush_avx2)
-.align 16
-
-ENTRY(sha512_mb_mgr_get_comp_job_avx2)
-        push    %rbx
-
-	mov     _unused_lanes(state), unused_lanes
-        bt      $(32+7), unused_lanes
-        jc      .return_null
-
-        # Find min length
-        mov     _lens(state),lens0
-        mov     lens0,idx
-        mov     _lens+1*8(state),lens1
-        cmp     idx,lens1
-        cmovb   lens1,idx
-        mov     _lens+2*8(state),lens2
-        cmp     idx,lens2
-        cmovb   lens2,idx
-        mov     _lens+3*8(state),lens3
-        cmp     idx,lens3
-        cmovb   lens3,idx
-        test    $~0xF,idx
-        jnz     .return_null
-        and     $0xF,idx
-
-        #process completed job "idx"
-	imul    $_LANE_DATA_size, idx, lane_data
-        lea     _ldata(state, lane_data), lane_data
-
-        mov     _job_in_lane(lane_data), job_rax
-        movq    $0,  _job_in_lane(lane_data)
-        movl    $STS_COMPLETED, _status(job_rax)
-        mov     _unused_lanes(state), unused_lanes
-        shl     $8, unused_lanes
-        or      idx, unused_lanes
-        mov     unused_lanes, _unused_lanes(state)
-
-        movl    $0xFFFFFFFF, _lens+4(state,  idx, 8)
-
-	vmovq   _args_digest(state, idx, 8), %xmm0
-        vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0
-	vmovq    _args_digest+2*32(state, idx, 8), %xmm1
-        vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1
-	vmovq    _args_digest+4*32(state, idx, 8), %xmm2
-        vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2
-        vmovq    _args_digest+6*32(state, idx, 8), %xmm3
-        vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3
-
-	vmovdqu %xmm0, _result_digest+0*16(job_rax)
-	vmovdqu %xmm1, _result_digest+1*16(job_rax)
-	vmovdqu %xmm2, _result_digest+2*16(job_rax)
-	vmovdqu %xmm3, _result_digest+3*16(job_rax)
-
-	pop     %rbx
-
-        ret
-
-.return_null:
-        xor     job_rax, job_rax
-	pop     %rbx
-        ret
-ENDPROC(sha512_mb_mgr_get_comp_job_avx2)
-
-.section	.rodata.cst8.one, "aM", @progbits, 8
-.align 8
-one:
-.quad  1
-
-.section	.rodata.cst8.two, "aM", @progbits, 8
-.align 8
-two:
-.quad  2
-
-.section	.rodata.cst8.three, "aM", @progbits, 8
-.align 8
-three:
-.quad  3
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
deleted file mode 100644
index d08805032f01..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Initialization code for multi buffer SHA256 algorithm for AVX2
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- *     Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "sha512_mb_mgr.h"
-
-void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
-{
-	unsigned int j;
-
-	/* initially all lanes are unused */
-	state->lens[0] = 0xFFFFFFFF00000000;
-	state->lens[1] = 0xFFFFFFFF00000001;
-	state->lens[2] = 0xFFFFFFFF00000002;
-	state->lens[3] = 0xFFFFFFFF00000003;
-
-	state->unused_lanes = 0xFF03020100;
-	for (j = 0; j < 4; j++)
-		state->ldata[j].job_in_lane = NULL;
-}
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
deleted file mode 100644
index 4ba709ba78e5..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Buffer submit code for multi buffer SHA512 algorithm
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- *     Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-#include "sha512_mb_mgr_datastruct.S"
-
-.extern sha512_x4_avx2
-
-#define arg1    %rdi
-#define arg2    %rsi
-
-#define idx             %rdx
-#define last_len        %rdx
-
-#define size_offset     %rcx
-#define tmp2            %rcx
-
-# Common definitions
-#define state   arg1
-#define job     arg2
-#define len2    arg2
-#define p2      arg2
-
-#define p               %r11
-#define start_offset    %r11
-
-#define unused_lanes    %rbx
-
-#define job_rax         %rax
-#define len             %rax
-
-#define lane            %r12
-#define tmp3            %r12
-#define lens3           %r12
-
-#define extra_blocks    %r8
-#define lens0           %r8
-
-#define tmp             %r9
-#define lens1           %r9
-
-#define lane_data       %r10
-#define lens2           %r10
-
-#define DWORD_len %eax
-
-# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job)
-# arg 1 : rcx : state
-# arg 2 : rdx : job
-ENTRY(sha512_mb_mgr_submit_avx2)
-	FRAME_BEGIN
-	push	%rbx
-	push	%r12
-
-        mov     _unused_lanes(state), unused_lanes
-        movzb     %bl,lane
-        shr     $8, unused_lanes
-        imul    $_LANE_DATA_size, lane,lane_data
-        movl    $STS_BEING_PROCESSED, _status(job)
-	lea     _ldata(state, lane_data), lane_data
-        mov     unused_lanes, _unused_lanes(state)
-        movl    _len(job),  DWORD_len
-
-	mov     job, _job_in_lane(lane_data)
-        movl    DWORD_len,_lens+4(state , lane, 8)
-
-	# Load digest words from result_digest
-	vmovdqu	_result_digest+0*16(job), %xmm0
-	vmovdqu _result_digest+1*16(job), %xmm1
-	vmovdqu	_result_digest+2*16(job), %xmm2
-        vmovdqu	_result_digest+3*16(job), %xmm3
-
-	vmovq    %xmm0, _args_digest(state, lane, 8)
-	vpextrq  $1, %xmm0, _args_digest+1*32(state , lane, 8)
-	vmovq    %xmm1, _args_digest+2*32(state , lane, 8)
-	vpextrq  $1, %xmm1, _args_digest+3*32(state , lane, 8)
-	vmovq    %xmm2, _args_digest+4*32(state , lane, 8)
-	vpextrq  $1, %xmm2, _args_digest+5*32(state , lane, 8)
-	vmovq    %xmm3, _args_digest+6*32(state , lane, 8)
-	vpextrq  $1, %xmm3, _args_digest+7*32(state , lane, 8)
-
-	mov     _buffer(job), p
-	mov     p, _args_data_ptr(state, lane, 8)
-
-	cmp     $0xFF, unused_lanes
-	jne     return_null
-
-start_loop:
-
-	# Find min length
-	mov     _lens+0*8(state),lens0
-	mov     lens0,idx
-	mov     _lens+1*8(state),lens1
-	cmp     idx,lens1
-	cmovb   lens1, idx
-	mov     _lens+2*8(state),lens2
-	cmp     idx,lens2
-	cmovb   lens2,idx
-	mov     _lens+3*8(state),lens3
-	cmp     idx,lens3
-	cmovb   lens3,idx
-	mov     idx,len2
-	and     $0xF,idx
-	and     $~0xFF,len2
-	jz      len_is_0
-
-	sub     len2,lens0
-	sub     len2,lens1
-	sub     len2,lens2
-	sub     len2,lens3
-	shr     $32,len2
-	mov     lens0, _lens + 0*8(state)
-	mov     lens1, _lens + 1*8(state)
-	mov     lens2, _lens + 2*8(state)
-	mov     lens3, _lens + 3*8(state)
-
-	# "state" and "args" are the same address, arg1
-	# len is arg2
-	call    sha512_x4_avx2
-	# state and idx are intact
-
-len_is_0:
-
-	# process completed job "idx"
-	imul    $_LANE_DATA_size, idx, lane_data
-	lea     _ldata(state, lane_data), lane_data
-
-	mov     _job_in_lane(lane_data), job_rax
-	mov     _unused_lanes(state), unused_lanes
-	movq    $0, _job_in_lane(lane_data)
-	movl    $STS_COMPLETED, _status(job_rax)
-	shl     $8, unused_lanes
-	or      idx, unused_lanes
-	mov     unused_lanes, _unused_lanes(state)
-
-	movl	$0xFFFFFFFF,_lens+4(state,idx,8)
-	vmovq    _args_digest+0*32(state , idx, 8), %xmm0
-	vpinsrq  $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0
-	vmovq    _args_digest+2*32(state , idx, 8), %xmm1
-	vpinsrq  $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1
-	vmovq    _args_digest+4*32(state , idx, 8), %xmm2
-	vpinsrq  $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2
-	vmovq    _args_digest+6*32(state , idx, 8), %xmm3
-	vpinsrq  $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3
-
-	vmovdqu  %xmm0, _result_digest + 0*16(job_rax)
-	vmovdqu  %xmm1, _result_digest + 1*16(job_rax)
-	vmovdqu  %xmm2, _result_digest + 2*16(job_rax)
-	vmovdqu  %xmm3, _result_digest + 3*16(job_rax)
-
-return:
-	pop	%r12
-	pop	%rbx
-	FRAME_END
-	ret
-
-return_null:
-	xor     job_rax, job_rax
-	jmp     return
-ENDPROC(sha512_mb_mgr_submit_avx2)
-
-/* UNUSED?
-.section	.rodata.cst16, "aM", @progbits, 16
-.align 16
-H0:     .int  0x6a09e667
-H1:     .int  0xbb67ae85
-H2:     .int  0x3c6ef372
-H3:     .int  0xa54ff53a
-H4:     .int  0x510e527f
-H5:     .int  0x9b05688c
-H6:     .int  0x1f83d9ab
-H7:     .int  0x5be0cd19
-*/
diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
deleted file mode 100644
index e22e907643a6..000000000000
--- a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S
+++ /dev/null
@@ -1,531 +0,0 @@
-/*
- * Multi-buffer SHA512 algorithm hash compute routine
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * Contact Information:
- *     Megha Dey <megha.dey@linux.intel.com>
- *
- * BSD LICENSE
- *
- * Copyright(c) 2016 Intel Corporation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-# code to compute quad SHA512 using AVX2
-# use YMMs to tackle the larger digest size
-# outer calling routine takes care of save and restore of XMM registers
-# Logic designed/laid out by JDG
-
-# Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15
-# Stack must be aligned to 32 bytes before call
-# Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12
-# Linux preserves: rcx rdx rdi rbp r13 r14 r15
-# clobbers ymm0-15
-
-#include <linux/linkage.h>
-#include "sha512_mb_mgr_datastruct.S"
-
-arg1 = %rdi
-arg2 = %rsi
-
-# Common definitions
-STATE = arg1
-INP_SIZE = arg2
-
-IDX = %rax
-ROUND = %rbx
-TBL = %r8
-
-inp0 = %r9
-inp1 = %r10
-inp2 = %r11
-inp3 = %r12
-
-a = %ymm0
-b = %ymm1
-c = %ymm2
-d = %ymm3
-e = %ymm4
-f = %ymm5
-g = %ymm6
-h = %ymm7
-
-a0 = %ymm8
-a1 = %ymm9
-a2 = %ymm10
-
-TT0 = %ymm14
-TT1 = %ymm13
-TT2 = %ymm12
-TT3 = %ymm11
-TT4 = %ymm10
-TT5 = %ymm9
-
-T1 = %ymm14
-TMP = %ymm15
-
-# Define stack usage
-STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24
-
-#define VMOVPD	vmovupd
-_digest = SZ4*16
-
-# transpose r0, r1, r2, r3, t0, t1
-# "transpose" data in {r0..r3} using temps {t0..t3}
-# Input looks like: {r0 r1 r2 r3}
-# r0 = {a7 a6 a5 a4 a3 a2 a1 a0}
-# r1 = {b7 b6 b5 b4 b3 b2 b1 b0}
-# r2 = {c7 c6 c5 c4 c3 c2 c1 c0}
-# r3 = {d7 d6 d5 d4 d3 d2 d1 d0}
-#
-# output looks like: {t0 r1 r0 r3}
-# t0 = {d1 d0 c1 c0 b1 b0 a1 a0}
-# r1 = {d3 d2 c3 c2 b3 b2 a3 a2}
-# r0 = {d5 d4 c5 c4 b5 b4 a5 a4}
-# r3 = {d7 d6 c7 c6 b7 b6 a7 a6}
-
-.macro TRANSPOSE r0 r1 r2 r3 t0 t1
-	vshufps  $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4   b1 b0 a1 a0}
-        vshufps  $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6   b3 b2 a3 a2}
-        vshufps  $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4   d1 d0 c1 c0}
-        vshufps  $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6   d3 d2 c3 c2}
-
-	vperm2f128      $0x20, \r2, \r0, \r1  # h6...a6
-        vperm2f128      $0x31, \r2, \r0, \r3  # h2...a2
-        vperm2f128      $0x31, \t1, \t0, \r0  # h5...a5
-        vperm2f128      $0x20, \t1, \t0, \t0  # h1...a1
-.endm
-
-.macro ROTATE_ARGS
-TMP_ = h
-h = g
-g = f
-f = e
-e = d
-d = c
-c = b
-b = a
-a = TMP_
-.endm
-
-# PRORQ reg, imm, tmp
-# packed-rotate-right-double
-# does a rotate by doing two shifts and an or
-.macro _PRORQ reg imm tmp
-	vpsllq	$(64-\imm),\reg,\tmp
-	vpsrlq	$\imm,\reg, \reg
-	vpor	\tmp,\reg, \reg
-.endm
-
-# non-destructive
-# PRORQ_nd reg, imm, tmp, src
-.macro _PRORQ_nd reg imm tmp src
-	vpsllq	$(64-\imm), \src, \tmp
-	vpsrlq	$\imm, \src, \reg
-	vpor	\tmp, \reg, \reg
-.endm
-
-# PRORQ dst/src, amt
-.macro PRORQ reg imm
-	_PRORQ	\reg, \imm, TMP
-.endm
-
-# PRORQ_nd dst, src, amt
-.macro PRORQ_nd reg tmp imm
-	_PRORQ_nd	\reg, \imm, TMP, \tmp
-.endm
-
-#; arguments passed implicitly in preprocessor symbols i, a...h
-.macro ROUND_00_15 _T1 i
-	PRORQ_nd a0, e, (18-14)	# sig1: a0 = (e >> 4)
-
-	vpxor   g, f, a2        # ch: a2 = f^g
-        vpand   e,a2, a2                # ch: a2 = (f^g)&e
-        vpxor   g, a2, a2               # a2 = ch
-
-        PRORQ_nd        a1,e,41         # sig1: a1 = (e >> 25)
-
-        offset = SZ4*(\i & 0xf)
-        vmovdqu \_T1,offset(%rsp)
-        vpaddq  (TBL,ROUND,1), \_T1, \_T1       # T1 = W + K
-        vpxor   e,a0, a0        # sig1: a0 = e ^ (e >> 5)
-        PRORQ   a0, 14           # sig1: a0 = (e >> 6) ^ (e >> 11)
-        vpaddq  a2, h, h        # h = h + ch
-        PRORQ_nd        a2,a,6  # sig0: a2 = (a >> 11)
-        vpaddq  \_T1,h, h       # h = h + ch + W + K
-        vpxor   a1, a0, a0      # a0 = sigma1
-	vmovdqu a,\_T1
-        PRORQ_nd        a1,a,39 # sig0: a1 = (a >> 22)
-        vpxor   c, \_T1, \_T1      # maj: T1 = a^c
-        add     $SZ4, ROUND     # ROUND++
-        vpand   b, \_T1, \_T1   # maj: T1 = (a^c)&b
-        vpaddq  a0, h, h
-        vpaddq  h, d, d
-        vpxor   a, a2, a2       # sig0: a2 = a ^ (a >> 11)
-        PRORQ   a2,28            # sig0: a2 = (a >> 2) ^ (a >> 13)
-        vpxor   a1, a2, a2      # a2 = sig0
-        vpand   c, a, a1        # maj: a1 = a&c
-        vpor    \_T1, a1, a1    # a1 = maj
-        vpaddq  a1, h, h        # h = h + ch + W + K + maj
-        vpaddq  a2, h, h        # h = h + ch + W + K + maj + sigma0
-        ROTATE_ARGS
-.endm
-
-
-#; arguments passed implicitly in preprocessor symbols i, a...h
-.macro ROUND_16_XX _T1 i
-	vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1
-        vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1
-        vmovdqu \_T1, a0
-        PRORQ   \_T1,7
-        vmovdqu a1, a2
-        PRORQ   a1,42
-        vpxor   a0, \_T1, \_T1
-        PRORQ   \_T1, 1
-        vpxor   a2, a1, a1
-        PRORQ   a1, 19
-        vpsrlq  $7, a0, a0
-        vpxor   a0, \_T1, \_T1
-        vpsrlq  $6, a2, a2
-        vpxor   a2, a1, a1
-        vpaddq  SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1
-        vpaddq  SZ4*((\i-7)&0xf)(%rsp), a1, a1
-        vpaddq  a1, \_T1, \_T1
-
-        ROUND_00_15 \_T1,\i
-.endm
-
-
-# void sha512_x4_avx2(void *STATE, const int INP_SIZE)
-# arg 1 : STATE    : pointer to input data
-# arg 2 : INP_SIZE : size of data in blocks (assumed >= 1)
-ENTRY(sha512_x4_avx2)
-	# general registers preserved in outer calling routine
-	# outer calling routine saves all the XMM registers
-	# save callee-saved clobbered registers to comply with C function ABI
-	push    %r12
-	push    %r13
-	push    %r14
-	push    %r15
-
-	sub     $STACK_SPACE1, %rsp
-
-        # Load the pre-transposed incoming digest.
-        vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a
-        vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b
-        vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c
-        vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d
-        vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e
-        vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f
-        vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g
-        vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h
-
-        lea     K512_4(%rip),TBL
-
-        # load the address of each of the 4 message lanes
-        # getting ready to transpose input onto stack
-        mov     _data_ptr+0*PTR_SZ(STATE),inp0
-        mov     _data_ptr+1*PTR_SZ(STATE),inp1
-        mov     _data_ptr+2*PTR_SZ(STATE),inp2
-        mov     _data_ptr+3*PTR_SZ(STATE),inp3
-
-        xor     IDX, IDX
-lloop:
-        xor     ROUND, ROUND
-
-	# save old digest
-        vmovdqu a, _digest(%rsp)
-        vmovdqu b, _digest+1*SZ4(%rsp)
-        vmovdqu c, _digest+2*SZ4(%rsp)
-        vmovdqu d, _digest+3*SZ4(%rsp)
-        vmovdqu e, _digest+4*SZ4(%rsp)
-        vmovdqu f, _digest+5*SZ4(%rsp)
-        vmovdqu g, _digest+6*SZ4(%rsp)
-        vmovdqu h, _digest+7*SZ4(%rsp)
-        i = 0
-.rep 4
-	vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP
-        VMOVPD  i*32(inp0, IDX), TT2
-        VMOVPD  i*32(inp1, IDX), TT1
-        VMOVPD  i*32(inp2, IDX), TT4
-        VMOVPD  i*32(inp3, IDX), TT3
-	TRANSPOSE	TT2, TT1, TT4, TT3, TT0, TT5
-	vpshufb	TMP, TT0, TT0
-	vpshufb	TMP, TT1, TT1
-	vpshufb	TMP, TT2, TT2
-	vpshufb	TMP, TT3, TT3
-	ROUND_00_15	TT0,(i*4+0)
-	ROUND_00_15	TT1,(i*4+1)
-	ROUND_00_15	TT2,(i*4+2)
-	ROUND_00_15	TT3,(i*4+3)
-	i = (i+1)
-.endr
-        add     $128, IDX
-
-        i = (i*4)
-
-        jmp     Lrounds_16_xx
-.align 16
-Lrounds_16_xx:
-.rep 16
-        ROUND_16_XX     T1, i
-        i = (i+1)
-.endr
-        cmp     $0xa00,ROUND
-        jb      Lrounds_16_xx
-
-	# add old digest
-        vpaddq  _digest(%rsp), a, a
-        vpaddq  _digest+1*SZ4(%rsp), b, b
-        vpaddq  _digest+2*SZ4(%rsp), c, c
-        vpaddq  _digest+3*SZ4(%rsp), d, d
-        vpaddq  _digest+4*SZ4(%rsp), e, e
-        vpaddq  _digest+5*SZ4(%rsp), f, f
-        vpaddq  _digest+6*SZ4(%rsp), g, g
-        vpaddq  _digest+7*SZ4(%rsp), h, h
-
-        sub     $1, INP_SIZE  # unit is blocks
-        jne     lloop
-
-        # write back to memory (state object) the transposed digest
-        vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE)
-        vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE)
-        vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE)
-        vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE)
-        vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE)
-        vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE)
-        vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE)
-        vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE)
-
-	# update input data pointers
-	add     IDX, inp0
-        mov     inp0, _data_ptr+0*PTR_SZ(STATE)
-        add     IDX, inp1
-        mov     inp1, _data_ptr+1*PTR_SZ(STATE)
-        add     IDX, inp2
-        mov     inp2, _data_ptr+2*PTR_SZ(STATE)
-        add     IDX, inp3
-        mov     inp3, _data_ptr+3*PTR_SZ(STATE)
-
-	#;;;;;;;;;;;;;;;
-	#; Postamble
-	add $STACK_SPACE1, %rsp
-	# restore callee-saved clobbered registers
-
-	pop     %r15
-	pop     %r14
-	pop     %r13
-	pop     %r12
-
-	# outer calling routine restores XMM and other GP registers
-	ret
-ENDPROC(sha512_x4_avx2)
-
-.section	.rodata.K512_4, "a", @progbits
-.align 64
-K512_4:
-	.octa 0x428a2f98d728ae22428a2f98d728ae22,\
-		0x428a2f98d728ae22428a2f98d728ae22
-	.octa 0x7137449123ef65cd7137449123ef65cd,\
-		0x7137449123ef65cd7137449123ef65cd
-	.octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\
-		0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f
-	.octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\
-		0xe9b5dba58189dbbce9b5dba58189dbbc
-	.octa 0x3956c25bf348b5383956c25bf348b538,\
-		0x3956c25bf348b5383956c25bf348b538
-	.octa 0x59f111f1b605d01959f111f1b605d019,\
-		0x59f111f1b605d01959f111f1b605d019
-	.octa 0x923f82a4af194f9b923f82a4af194f9b,\
-		0x923f82a4af194f9b923f82a4af194f9b
-	.octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\
-		0xab1c5ed5da6d8118ab1c5ed5da6d8118
-	.octa 0xd807aa98a3030242d807aa98a3030242,\
-		0xd807aa98a3030242d807aa98a3030242
-	.octa 0x12835b0145706fbe12835b0145706fbe,\
-		0x12835b0145706fbe12835b0145706fbe
-	.octa 0x243185be4ee4b28c243185be4ee4b28c,\
-		0x243185be4ee4b28c243185be4ee4b28c
-	.octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\
-		0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2
-	.octa 0x72be5d74f27b896f72be5d74f27b896f,\
-		0x72be5d74f27b896f72be5d74f27b896f
-	.octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\
-		0x80deb1fe3b1696b180deb1fe3b1696b1
-	.octa 0x9bdc06a725c712359bdc06a725c71235,\
-		0x9bdc06a725c712359bdc06a725c71235
-	.octa 0xc19bf174cf692694c19bf174cf692694,\
-		0xc19bf174cf692694c19bf174cf692694
-	.octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\
-		0xe49b69c19ef14ad2e49b69c19ef14ad2
-	.octa 0xefbe4786384f25e3efbe4786384f25e3,\
-		0xefbe4786384f25e3efbe4786384f25e3
-	.octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\
-		0x0fc19dc68b8cd5b50fc19dc68b8cd5b5
-	.octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\
-		0x240ca1cc77ac9c65240ca1cc77ac9c65
-	.octa 0x2de92c6f592b02752de92c6f592b0275,\
-		0x2de92c6f592b02752de92c6f592b0275
-	.octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\
-		0x4a7484aa6ea6e4834a7484aa6ea6e483
-	.octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\
-		0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4
-	.octa 0x76f988da831153b576f988da831153b5,\
-		0x76f988da831153b576f988da831153b5
-	.octa 0x983e5152ee66dfab983e5152ee66dfab,\
-		0x983e5152ee66dfab983e5152ee66dfab
-	.octa 0xa831c66d2db43210a831c66d2db43210,\
-		0xa831c66d2db43210a831c66d2db43210
-	.octa 0xb00327c898fb213fb00327c898fb213f,\
-		0xb00327c898fb213fb00327c898fb213f
-	.octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\
-		0xbf597fc7beef0ee4bf597fc7beef0ee4
-	.octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\
-		0xc6e00bf33da88fc2c6e00bf33da88fc2
-	.octa 0xd5a79147930aa725d5a79147930aa725,\
-		0xd5a79147930aa725d5a79147930aa725
-	.octa 0x06ca6351e003826f06ca6351e003826f,\
-		0x06ca6351e003826f06ca6351e003826f
-	.octa 0x142929670a0e6e70142929670a0e6e70,\
-		0x142929670a0e6e70142929670a0e6e70
-	.octa 0x27b70a8546d22ffc27b70a8546d22ffc,\
-		0x27b70a8546d22ffc27b70a8546d22ffc
-	.octa 0x2e1b21385c26c9262e1b21385c26c926,\
-		0x2e1b21385c26c9262e1b21385c26c926
-	.octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\
-		0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed
-	.octa 0x53380d139d95b3df53380d139d95b3df,\
-		0x53380d139d95b3df53380d139d95b3df
-	.octa 0x650a73548baf63de650a73548baf63de,\
-		0x650a73548baf63de650a73548baf63de
-	.octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\
-		0x766a0abb3c77b2a8766a0abb3c77b2a8
-	.octa 0x81c2c92e47edaee681c2c92e47edaee6,\
-		0x81c2c92e47edaee681c2c92e47edaee6
-	.octa 0x92722c851482353b92722c851482353b,\
-		0x92722c851482353b92722c851482353b
-	.octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\
-		0xa2bfe8a14cf10364a2bfe8a14cf10364
-	.octa 0xa81a664bbc423001a81a664bbc423001,\
-		0xa81a664bbc423001a81a664bbc423001
-	.octa 0xc24b8b70d0f89791c24b8b70d0f89791,\
-		0xc24b8b70d0f89791c24b8b70d0f89791
-	.octa 0xc76c51a30654be30c76c51a30654be30,\
-		0xc76c51a30654be30c76c51a30654be30
-	.octa 0xd192e819d6ef5218d192e819d6ef5218,\
-		0xd192e819d6ef5218d192e819d6ef5218
-	.octa 0xd69906245565a910d69906245565a910,\
-		0xd69906245565a910d69906245565a910
-	.octa 0xf40e35855771202af40e35855771202a,\
-		0xf40e35855771202af40e35855771202a
-	.octa 0x106aa07032bbd1b8106aa07032bbd1b8,\
-		0x106aa07032bbd1b8106aa07032bbd1b8
-	.octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\
-		0x19a4c116b8d2d0c819a4c116b8d2d0c8
-	.octa 0x1e376c085141ab531e376c085141ab53,\
-		0x1e376c085141ab531e376c085141ab53
-	.octa 0x2748774cdf8eeb992748774cdf8eeb99,\
-		0x2748774cdf8eeb992748774cdf8eeb99
-	.octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\
-		0x34b0bcb5e19b48a834b0bcb5e19b48a8
-	.octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\
-		0x391c0cb3c5c95a63391c0cb3c5c95a63
-	.octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\
-		0x4ed8aa4ae3418acb4ed8aa4ae3418acb
-	.octa 0x5b9cca4f7763e3735b9cca4f7763e373,\
-		0x5b9cca4f7763e3735b9cca4f7763e373
-	.octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\
-		0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3
-	.octa 0x748f82ee5defb2fc748f82ee5defb2fc,\
-		0x748f82ee5defb2fc748f82ee5defb2fc
-	.octa 0x78a5636f43172f6078a5636f43172f60,\
-		0x78a5636f43172f6078a5636f43172f60
-	.octa 0x84c87814a1f0ab7284c87814a1f0ab72,\
-		0x84c87814a1f0ab7284c87814a1f0ab72
-	.octa 0x8cc702081a6439ec8cc702081a6439ec,\
-		0x8cc702081a6439ec8cc702081a6439ec
-	.octa 0x90befffa23631e2890befffa23631e28,\
-		0x90befffa23631e2890befffa23631e28
-	.octa 0xa4506cebde82bde9a4506cebde82bde9,\
-		0xa4506cebde82bde9a4506cebde82bde9
-	.octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\
-		0xbef9a3f7b2c67915bef9a3f7b2c67915
-	.octa 0xc67178f2e372532bc67178f2e372532b,\
-		0xc67178f2e372532bc67178f2e372532b
-	.octa 0xca273eceea26619cca273eceea26619c,\
-		0xca273eceea26619cca273eceea26619c
-	.octa 0xd186b8c721c0c207d186b8c721c0c207,\
-		0xd186b8c721c0c207d186b8c721c0c207
-	.octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\
-		0xeada7dd6cde0eb1eeada7dd6cde0eb1e
-	.octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\
-		0xf57d4f7fee6ed178f57d4f7fee6ed178
-	.octa 0x06f067aa72176fba06f067aa72176fba,\
-		0x06f067aa72176fba06f067aa72176fba
-	.octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\
-		0x0a637dc5a2c898a60a637dc5a2c898a6
-	.octa 0x113f9804bef90dae113f9804bef90dae,\
-		0x113f9804bef90dae113f9804bef90dae
-	.octa 0x1b710b35131c471b1b710b35131c471b,\
-		0x1b710b35131c471b1b710b35131c471b
-	.octa 0x28db77f523047d8428db77f523047d84,\
-		0x28db77f523047d8428db77f523047d84
-	.octa 0x32caab7b40c7249332caab7b40c72493,\
-		0x32caab7b40c7249332caab7b40c72493
-	.octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\
-		0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc
-	.octa 0x431d67c49c100d4c431d67c49c100d4c,\
-		0x431d67c49c100d4c431d67c49c100d4c
-	.octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\
-		0x4cc5d4becb3e42b64cc5d4becb3e42b6
-	.octa 0x597f299cfc657e2a597f299cfc657e2a,\
-		0x597f299cfc657e2a597f299cfc657e2a
-	.octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\
-		0x5fcb6fab3ad6faec5fcb6fab3ad6faec
-	.octa 0x6c44198c4a4758176c44198c4a475817,\
-		0x6c44198c4a4758176c44198c4a475817
-
-.section	.rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32
-.align 32
-PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
-                         .octa 0x18191a1b1c1d1e1f1011121314151617