diff options
Diffstat (limited to 'arch/x86/crypto/sha512-mb')
-rw-r--r-- | arch/x86/crypto/sha512-mb/Makefile | 12 | ||||
-rw-r--r-- | arch/x86/crypto/sha512-mb/sha512_mb.c | 1047 | ||||
-rw-r--r-- | arch/x86/crypto/sha512-mb/sha512_mb_ctx.h | 128 | ||||
-rw-r--r-- | arch/x86/crypto/sha512-mb/sha512_mb_mgr.h | 104 | ||||
-rw-r--r-- | arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S | 281 | ||||
-rw-r--r-- | arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S | 297 | ||||
-rw-r--r-- | arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c | 69 | ||||
-rw-r--r-- | arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S | 224 | ||||
-rw-r--r-- | arch/x86/crypto/sha512-mb/sha512_x4_avx2.S | 531 |
9 files changed, 0 insertions, 2693 deletions
diff --git a/arch/x86/crypto/sha512-mb/Makefile b/arch/x86/crypto/sha512-mb/Makefile deleted file mode 100644 index 90f1ef69152e..000000000000 --- a/arch/x86/crypto/sha512-mb/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Arch-specific CryptoAPI modules. -# - -avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ - $(comma)4)$(comma)%ymm2,yes,no) -ifeq ($(avx2_supported),yes) - obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o - sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \ - sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o -endif diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c deleted file mode 100644 index 26b85678012d..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb.c +++ /dev/null @@ -1,1047 +0,0 @@ -/* - * Multi buffer SHA512 algorithm Glue Code - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/cryptohash.h> -#include <linux/types.h> -#include <linux/list.h> -#include <crypto/scatterwalk.h> -#include <crypto/sha.h> -#include <crypto/mcryptd.h> -#include <crypto/crypto_wq.h> -#include <asm/byteorder.h> -#include <linux/hardirq.h> -#include <asm/fpu/api.h> -#include "sha512_mb_ctx.h" - -#define FLUSH_INTERVAL 1000 /* in usec */ - -static struct mcryptd_alg_state sha512_mb_alg_state; - -struct sha512_mb_ctx { - struct mcryptd_ahash *mcryptd_tfm; -}; - -static inline struct mcryptd_hash_request_ctx - *cast_hash_to_mcryptd_ctx(struct sha512_hash_ctx *hash_ctx) -{ - struct ahash_request *areq; - - areq = container_of((void *) hash_ctx, struct ahash_request, __ctx); - return container_of(areq, struct mcryptd_hash_request_ctx, areq); -} - -static inline struct ahash_request - *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) -{ - return container_of((void *) ctx, struct ahash_request, __ctx); -} - -static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, - struct ahash_request *areq) -{ - rctx->flag = HASH_UPDATE; -} - -static asmlinkage void (*sha512_job_mgr_init)(struct sha512_mb_mgr *state); -static asmlinkage struct job_sha512* (*sha512_job_mgr_submit) - (struct sha512_mb_mgr *state, - struct job_sha512 *job); -static asmlinkage struct job_sha512* (*sha512_job_mgr_flush) - (struct sha512_mb_mgr *state); -static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job) - (struct sha512_mb_mgr *state); - -inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2], - uint64_t total_len) -{ - uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1); - - memset(&padblock[i], 0, SHA512_BLOCK_SIZE); - padblock[i] = 0x80; - - i += ((SHA512_BLOCK_SIZE - 1) & - (0 - (total_len + SHA512_PADLENGTHFIELD_SIZE + 1))) - + 1 + SHA512_PADLENGTHFIELD_SIZE; - -#if SHA512_PADLENGTHFIELD_SIZE == 16 - *((uint64_t *) &padblock[i - 16]) = 0; -#endif - - *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3); - - /* Number of extra blocks to hash */ - return i >> SHA512_LOG2_BLOCK_SIZE; -} - -static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit - (struct sha512_ctx_mgr *mgr, struct sha512_hash_ctx *ctx) -{ - while (ctx) { - if (ctx->status & HASH_CTX_STS_COMPLETE) { - /* Clear PROCESSING bit */ - ctx->status = HASH_CTX_STS_COMPLETE; - return ctx; - } - - /* - * If the extra blocks are empty, begin hashing what remains - * in the user's buffer. - */ - if (ctx->partial_block_buffer_length == 0 && - ctx->incoming_buffer_length) { - - const void *buffer = ctx->incoming_buffer; - uint32_t len = ctx->incoming_buffer_length; - uint32_t copy_len; - - /* - * Only entire blocks can be hashed. - * Copy remainder to extra blocks buffer. - */ - copy_len = len & (SHA512_BLOCK_SIZE-1); - - if (copy_len) { - len -= copy_len; - memcpy(ctx->partial_block_buffer, - ((const char *) buffer + len), - copy_len); - ctx->partial_block_buffer_length = copy_len; - } - - ctx->incoming_buffer_length = 0; - - /* len should be a multiple of the block size now */ - assert((len % SHA512_BLOCK_SIZE) == 0); - - /* Set len to the number of blocks to be hashed */ - len >>= SHA512_LOG2_BLOCK_SIZE; - - if (len) { - - ctx->job.buffer = (uint8_t *) buffer; - ctx->job.len = len; - ctx = (struct sha512_hash_ctx *) - sha512_job_mgr_submit(&mgr->mgr, - &ctx->job); - continue; - } - } - - /* - * If the extra blocks are not empty, then we are - * either on the last block(s) or we need more - * user input before continuing. - */ - if (ctx->status & HASH_CTX_STS_LAST) { - - uint8_t *buf = ctx->partial_block_buffer; - uint32_t n_extra_blocks = - sha512_pad(buf, ctx->total_length); - - ctx->status = (HASH_CTX_STS_PROCESSING | - HASH_CTX_STS_COMPLETE); - ctx->job.buffer = buf; - ctx->job.len = (uint32_t) n_extra_blocks; - ctx = (struct sha512_hash_ctx *) - sha512_job_mgr_submit(&mgr->mgr, &ctx->job); - continue; - } - - if (ctx) - ctx->status = HASH_CTX_STS_IDLE; - return ctx; - } - - return NULL; -} - -static struct sha512_hash_ctx - *sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate) -{ - /* - * If get_comp_job returns NULL, there are no jobs complete. - * If get_comp_job returns a job, verify that it is safe to return to - * the user. - * If it is not ready, resubmit the job to finish processing. - * If sha512_ctx_mgr_resubmit returned a job, it is ready to be - * returned. - * Otherwise, all jobs currently being managed by the hash_ctx_mgr - * still need processing. - */ - struct sha512_ctx_mgr *mgr; - struct sha512_hash_ctx *ctx; - unsigned long flags; - - mgr = cstate->mgr; - spin_lock_irqsave(&cstate->work_lock, flags); - ctx = (struct sha512_hash_ctx *) - sha512_job_mgr_get_comp_job(&mgr->mgr); - ctx = sha512_ctx_mgr_resubmit(mgr, ctx); - spin_unlock_irqrestore(&cstate->work_lock, flags); - return ctx; -} - -static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr) -{ - sha512_job_mgr_init(&mgr->mgr); -} - -static struct sha512_hash_ctx - *sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate, - struct sha512_hash_ctx *ctx, - const void *buffer, - uint32_t len, - int flags) -{ - struct sha512_ctx_mgr *mgr; - unsigned long irqflags; - - mgr = cstate->mgr; - spin_lock_irqsave(&cstate->work_lock, irqflags); - if (flags & ~(HASH_UPDATE | HASH_LAST)) { - /* User should not pass anything other than UPDATE or LAST */ - ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; - goto unlock; - } - - if (ctx->status & HASH_CTX_STS_PROCESSING) { - /* Cannot submit to a currently processing job. */ - ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; - goto unlock; - } - - if (ctx->status & HASH_CTX_STS_COMPLETE) { - /* Cannot update a finished job. */ - ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; - goto unlock; - } - - /* - * If we made it here, there were no errors during this call to - * submit - */ - ctx->error = HASH_CTX_ERROR_NONE; - - /* Store buffer ptr info from user */ - ctx->incoming_buffer = buffer; - ctx->incoming_buffer_length = len; - - /* - * Store the user's request flags and mark this ctx as currently being - * processed. - */ - ctx->status = (flags & HASH_LAST) ? - (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : - HASH_CTX_STS_PROCESSING; - - /* Advance byte counter */ - ctx->total_length += len; - - /* - * If there is anything currently buffered in the extra blocks, - * append to it until it contains a whole block. - * Or if the user's buffer contains less than a whole block, - * append as much as possible to the extra block. - */ - if (ctx->partial_block_buffer_length || len < SHA512_BLOCK_SIZE) { - /* Compute how many bytes to copy from user buffer into extra - * block - */ - uint32_t copy_len = SHA512_BLOCK_SIZE - - ctx->partial_block_buffer_length; - if (len < copy_len) - copy_len = len; - - if (copy_len) { - /* Copy and update relevant pointers and counters */ - memcpy - (&ctx->partial_block_buffer[ctx->partial_block_buffer_length], - buffer, copy_len); - - ctx->partial_block_buffer_length += copy_len; - ctx->incoming_buffer = (const void *) - ((const char *)buffer + copy_len); - ctx->incoming_buffer_length = len - copy_len; - } - - /* The extra block should never contain more than 1 block - * here - */ - assert(ctx->partial_block_buffer_length <= SHA512_BLOCK_SIZE); - - /* If the extra block buffer contains exactly 1 block, it can - * be hashed. - */ - if (ctx->partial_block_buffer_length >= SHA512_BLOCK_SIZE) { - ctx->partial_block_buffer_length = 0; - - ctx->job.buffer = ctx->partial_block_buffer; - ctx->job.len = 1; - ctx = (struct sha512_hash_ctx *) - sha512_job_mgr_submit(&mgr->mgr, &ctx->job); - } - } - - ctx = sha512_ctx_mgr_resubmit(mgr, ctx); -unlock: - spin_unlock_irqrestore(&cstate->work_lock, irqflags); - return ctx; -} - -static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate) -{ - struct sha512_ctx_mgr *mgr; - struct sha512_hash_ctx *ctx; - unsigned long flags; - - mgr = cstate->mgr; - spin_lock_irqsave(&cstate->work_lock, flags); - while (1) { - ctx = (struct sha512_hash_ctx *) - sha512_job_mgr_flush(&mgr->mgr); - - /* If flush returned 0, there are no more jobs in flight. */ - if (!ctx) - break; - - /* - * If flush returned a job, resubmit the job to finish - * processing. - */ - ctx = sha512_ctx_mgr_resubmit(mgr, ctx); - - /* - * If sha512_ctx_mgr_resubmit returned a job, it is ready to - * be returned. Otherwise, all jobs currently being managed by - * the sha512_ctx_mgr still need processing. Loop. - */ - if (ctx) - break; - } - spin_unlock_irqrestore(&cstate->work_lock, flags); - return ctx; -} - -static int sha512_mb_init(struct ahash_request *areq) -{ - struct sha512_hash_ctx *sctx = ahash_request_ctx(areq); - - hash_ctx_init(sctx); - sctx->job.result_digest[0] = SHA512_H0; - sctx->job.result_digest[1] = SHA512_H1; - sctx->job.result_digest[2] = SHA512_H2; - sctx->job.result_digest[3] = SHA512_H3; - sctx->job.result_digest[4] = SHA512_H4; - sctx->job.result_digest[5] = SHA512_H5; - sctx->job.result_digest[6] = SHA512_H6; - sctx->job.result_digest[7] = SHA512_H7; - sctx->total_length = 0; - sctx->partial_block_buffer_length = 0; - sctx->status = HASH_CTX_STS_IDLE; - - return 0; -} - -static int sha512_mb_set_results(struct mcryptd_hash_request_ctx *rctx) -{ - int i; - struct sha512_hash_ctx *sctx = ahash_request_ctx(&rctx->areq); - __be64 *dst = (__be64 *) rctx->out; - - for (i = 0; i < 8; ++i) - dst[i] = cpu_to_be64(sctx->job.result_digest[i]); - - return 0; -} - -static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, - struct mcryptd_alg_cstate *cstate, bool flush) -{ - int flag = HASH_UPDATE; - int nbytes, err = 0; - struct mcryptd_hash_request_ctx *rctx = *ret_rctx; - struct sha512_hash_ctx *sha_ctx; - - /* more work ? */ - while (!(rctx->flag & HASH_DONE)) { - nbytes = crypto_ahash_walk_done(&rctx->walk, 0); - if (nbytes < 0) { - err = nbytes; - goto out; - } - /* check if the walk is done */ - if (crypto_ahash_walk_last(&rctx->walk)) { - rctx->flag |= HASH_DONE; - if (rctx->flag & HASH_FINAL) - flag |= HASH_LAST; - - } - sha_ctx = (struct sha512_hash_ctx *) - ahash_request_ctx(&rctx->areq); - kernel_fpu_begin(); - sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, - rctx->walk.data, nbytes, flag); - if (!sha_ctx) { - if (flush) - sha_ctx = sha512_ctx_mgr_flush(cstate); - } - kernel_fpu_end(); - if (sha_ctx) - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - else { - rctx = NULL; - goto out; - } - } - - /* copy the results */ - if (rctx->flag & HASH_FINAL) - sha512_mb_set_results(rctx); - -out: - *ret_rctx = rctx; - return err; -} - -static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, - struct mcryptd_alg_cstate *cstate, - int err) -{ - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); - struct sha512_hash_ctx *sha_ctx; - struct mcryptd_hash_request_ctx *req_ctx; - int ret; - unsigned long flags; - - /* remove from work list */ - spin_lock_irqsave(&cstate->work_lock, flags); - list_del(&rctx->waiter); - spin_unlock_irqrestore(&cstate->work_lock, flags); - - if (irqs_disabled()) - rctx->complete(&req->base, err); - else { - local_bh_disable(); - rctx->complete(&req->base, err); - local_bh_enable(); - } - - /* check to see if there are other jobs that are done */ - sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate); - while (sha_ctx) { - req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); - ret = sha_finish_walk(&req_ctx, cstate, false); - if (req_ctx) { - spin_lock_irqsave(&cstate->work_lock, flags); - list_del(&req_ctx->waiter); - spin_unlock_irqrestore(&cstate->work_lock, flags); - - req = cast_mcryptd_ctx_to_req(req_ctx); - if (irqs_disabled()) - req_ctx->complete(&req->base, ret); - else { - local_bh_disable(); - req_ctx->complete(&req->base, ret); - local_bh_enable(); - } - } - sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate); - } - - return 0; -} - -static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx, - struct mcryptd_alg_cstate *cstate) -{ - unsigned long next_flush; - unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); - unsigned long flags; - - /* initialize tag */ - rctx->tag.arrival = jiffies; /* tag the arrival time */ - rctx->tag.seq_num = cstate->next_seq_num++; - next_flush = rctx->tag.arrival + delay; - rctx->tag.expire = next_flush; - - spin_lock_irqsave(&cstate->work_lock, flags); - list_add_tail(&rctx->waiter, &cstate->work_list); - spin_unlock_irqrestore(&cstate->work_lock, flags); - - mcryptd_arm_flusher(cstate, delay); -} - -static int sha512_mb_update(struct ahash_request *areq) -{ - struct mcryptd_hash_request_ctx *rctx = - container_of(areq, struct mcryptd_hash_request_ctx, - areq); - struct mcryptd_alg_cstate *cstate = - this_cpu_ptr(sha512_mb_alg_state.alg_cstate); - - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); - struct sha512_hash_ctx *sha_ctx; - int ret = 0, nbytes; - - - /* sanity check */ - if (rctx->tag.cpu != smp_processor_id()) { - pr_err("mcryptd error: cpu clash\n"); - goto done; - } - - /* need to init context */ - req_ctx_init(rctx, areq); - - nbytes = crypto_ahash_walk_first(req, &rctx->walk); - - if (nbytes < 0) { - ret = nbytes; - goto done; - } - - if (crypto_ahash_walk_last(&rctx->walk)) - rctx->flag |= HASH_DONE; - - /* submit */ - sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); - sha512_mb_add_list(rctx, cstate); - kernel_fpu_begin(); - sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data, - nbytes, HASH_UPDATE); - kernel_fpu_end(); - - /* check if anything is returned */ - if (!sha_ctx) - return -EINPROGRESS; - - if (sha_ctx->error) { - ret = sha_ctx->error; - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - goto done; - } - - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - ret = sha_finish_walk(&rctx, cstate, false); - - if (!rctx) - return -EINPROGRESS; -done: - sha_complete_job(rctx, cstate, ret); - return ret; -} - -static int sha512_mb_finup(struct ahash_request *areq) -{ - struct mcryptd_hash_request_ctx *rctx = - container_of(areq, struct mcryptd_hash_request_ctx, - areq); - struct mcryptd_alg_cstate *cstate = - this_cpu_ptr(sha512_mb_alg_state.alg_cstate); - - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); - struct sha512_hash_ctx *sha_ctx; - int ret = 0, flag = HASH_UPDATE, nbytes; - - /* sanity check */ - if (rctx->tag.cpu != smp_processor_id()) { - pr_err("mcryptd error: cpu clash\n"); - goto done; - } - - /* need to init context */ - req_ctx_init(rctx, areq); - - nbytes = crypto_ahash_walk_first(req, &rctx->walk); - - if (nbytes < 0) { - ret = nbytes; - goto done; - } - - if (crypto_ahash_walk_last(&rctx->walk)) { - rctx->flag |= HASH_DONE; - flag = HASH_LAST; - } - - /* submit */ - rctx->flag |= HASH_FINAL; - sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); - sha512_mb_add_list(rctx, cstate); - - kernel_fpu_begin(); - sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data, - nbytes, flag); - kernel_fpu_end(); - - /* check if anything is returned */ - if (!sha_ctx) - return -EINPROGRESS; - - if (sha_ctx->error) { - ret = sha_ctx->error; - goto done; - } - - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - ret = sha_finish_walk(&rctx, cstate, false); - if (!rctx) - return -EINPROGRESS; -done: - sha_complete_job(rctx, cstate, ret); - return ret; -} - -static int sha512_mb_final(struct ahash_request *areq) -{ - struct mcryptd_hash_request_ctx *rctx = - container_of(areq, struct mcryptd_hash_request_ctx, - areq); - struct mcryptd_alg_cstate *cstate = - this_cpu_ptr(sha512_mb_alg_state.alg_cstate); - - struct sha512_hash_ctx *sha_ctx; - int ret = 0; - u8 data; - - /* sanity check */ - if (rctx->tag.cpu != smp_processor_id()) { - pr_err("mcryptd error: cpu clash\n"); - goto done; - } - - /* need to init context */ - req_ctx_init(rctx, areq); - - rctx->flag |= HASH_DONE | HASH_FINAL; - - sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); - /* flag HASH_FINAL and 0 data size */ - sha512_mb_add_list(rctx, cstate); - kernel_fpu_begin(); - sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST); - kernel_fpu_end(); - - /* check if anything is returned */ - if (!sha_ctx) - return -EINPROGRESS; - - if (sha_ctx->error) { - ret = sha_ctx->error; - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - goto done; - } - - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - ret = sha_finish_walk(&rctx, cstate, false); - if (!rctx) - return -EINPROGRESS; -done: - sha_complete_job(rctx, cstate, ret); - return ret; -} - -static int sha512_mb_export(struct ahash_request *areq, void *out) -{ - struct sha512_hash_ctx *sctx = ahash_request_ctx(areq); - - memcpy(out, sctx, sizeof(*sctx)); - - return 0; -} - -static int sha512_mb_import(struct ahash_request *areq, const void *in) -{ - struct sha512_hash_ctx *sctx = ahash_request_ctx(areq); - - memcpy(sctx, in, sizeof(*sctx)); - - return 0; -} - -static int sha512_mb_async_init_tfm(struct crypto_tfm *tfm) -{ - struct mcryptd_ahash *mcryptd_tfm; - struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm); - struct mcryptd_hash_ctx *mctx; - - mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha512-mb", - CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(mcryptd_tfm)) - return PTR_ERR(mcryptd_tfm); - mctx = crypto_ahash_ctx(&mcryptd_tfm->base); - mctx->alg_state = &sha512_mb_alg_state; - ctx->mcryptd_tfm = mcryptd_tfm; - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct ahash_request) + - crypto_ahash_reqsize(&mcryptd_tfm->base)); - - return 0; -} - -static void sha512_mb_async_exit_tfm(struct crypto_tfm *tfm) -{ - struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm); - - mcryptd_free_ahash(ctx->mcryptd_tfm); -} - -static int sha512_mb_areq_init_tfm(struct crypto_tfm *tfm) -{ - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct ahash_request) + - sizeof(struct sha512_hash_ctx)); - - return 0; -} - -static void sha512_mb_areq_exit_tfm(struct crypto_tfm *tfm) -{ - struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm); - - mcryptd_free_ahash(ctx->mcryptd_tfm); -} - -static struct ahash_alg sha512_mb_areq_alg = { - .init = sha512_mb_init, - .update = sha512_mb_update, - .final = sha512_mb_final, - .finup = sha512_mb_finup, - .export = sha512_mb_export, - .import = sha512_mb_import, - .halg = { - .digestsize = SHA512_DIGEST_SIZE, - .statesize = sizeof(struct sha512_hash_ctx), - .base = { - .cra_name = "__sha512-mb", - .cra_driver_name = "__intel_sha512-mb", - .cra_priority = 100, - /* - * use ASYNC flag as some buffers in multi-buffer - * algo may not have completed before hashing thread - * sleep - */ - .cra_flags = CRYPTO_ALG_ASYNC | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SHA512_BLOCK_SIZE, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT - (sha512_mb_areq_alg.halg.base.cra_list), - .cra_init = sha512_mb_areq_init_tfm, - .cra_exit = sha512_mb_areq_exit_tfm, - .cra_ctxsize = sizeof(struct sha512_hash_ctx), - } - } -}; - -static int sha512_mb_async_init(struct ahash_request *req) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_init(mcryptd_req); -} - -static int sha512_mb_async_update(struct ahash_request *req) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_update(mcryptd_req); -} - -static int sha512_mb_async_finup(struct ahash_request *req) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_finup(mcryptd_req); -} - -static int sha512_mb_async_final(struct ahash_request *req) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_final(mcryptd_req); -} - -static int sha512_mb_async_digest(struct ahash_request *req) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_digest(mcryptd_req); -} - -static int sha512_mb_async_export(struct ahash_request *req, void *out) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_export(mcryptd_req, out); -} - -static int sha512_mb_async_import(struct ahash_request *req, const void *in) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm); - struct mcryptd_hash_request_ctx *rctx; - struct ahash_request *areq; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - rctx = ahash_request_ctx(mcryptd_req); - - areq = &rctx->areq; - - ahash_request_set_tfm(areq, child); - ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP, - rctx->complete, req); - - return crypto_ahash_import(mcryptd_req, in); -} - -static struct ahash_alg sha512_mb_async_alg = { - .init = sha512_mb_async_init, - .update = sha512_mb_async_update, - .final = sha512_mb_async_final, - .finup = sha512_mb_async_finup, - .digest = sha512_mb_async_digest, - .export = sha512_mb_async_export, - .import = sha512_mb_async_import, - .halg = { - .digestsize = SHA512_DIGEST_SIZE, - .statesize = sizeof(struct sha512_hash_ctx), - .base = { - .cra_name = "sha512", - .cra_driver_name = "sha512_mb", - /* - * Low priority, since with few concurrent hash requests - * this is extremely slow due to the flush delay. Users - * whose workloads would benefit from this can request - * it explicitly by driver name, or can increase its - * priority at runtime using NETLINK_CRYPTO. - */ - .cra_priority = 50, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = SHA512_BLOCK_SIZE, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT - (sha512_mb_async_alg.halg.base.cra_list), - .cra_init = sha512_mb_async_init_tfm, - .cra_exit = sha512_mb_async_exit_tfm, - .cra_ctxsize = sizeof(struct sha512_mb_ctx), - .cra_alignmask = 0, - }, - }, -}; - -static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate) -{ - struct mcryptd_hash_request_ctx *rctx; - unsigned long cur_time; - unsigned long next_flush = 0; - struct sha512_hash_ctx *sha_ctx; - - - cur_time = jiffies; - - while (!list_empty(&cstate->work_list)) { - rctx = list_entry(cstate->work_list.next, - struct mcryptd_hash_request_ctx, waiter); - if time_before(cur_time, rctx->tag.expire) - break; - kernel_fpu_begin(); - sha_ctx = (struct sha512_hash_ctx *) - sha512_ctx_mgr_flush(cstate); - kernel_fpu_end(); - if (!sha_ctx) { - pr_err("sha512_mb error: nothing got flushed for" - " non-empty list\n"); - break; - } - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - sha_finish_walk(&rctx, cstate, true); - sha_complete_job(rctx, cstate, 0); - } - - if (!list_empty(&cstate->work_list)) { - rctx = list_entry(cstate->work_list.next, - struct mcryptd_hash_request_ctx, waiter); - /* get the hash context and then flush time */ - next_flush = rctx->tag.expire; - mcryptd_arm_flusher(cstate, get_delay(next_flush)); - } - return next_flush; -} - -static int __init sha512_mb_mod_init(void) -{ - - int cpu; - int err; - struct mcryptd_alg_cstate *cpu_state; - - /* check for dependent cpu features */ - if (!boot_cpu_has(X86_FEATURE_AVX2) || - !boot_cpu_has(X86_FEATURE_BMI2)) - return -ENODEV; - - /* initialize multibuffer structures */ - sha512_mb_alg_state.alg_cstate = - alloc_percpu(struct mcryptd_alg_cstate); - - sha512_job_mgr_init = sha512_mb_mgr_init_avx2; - sha512_job_mgr_submit = sha512_mb_mgr_submit_avx2; - sha512_job_mgr_flush = sha512_mb_mgr_flush_avx2; - sha512_job_mgr_get_comp_job = sha512_mb_mgr_get_comp_job_avx2; - - if (!sha512_mb_alg_state.alg_cstate) - return -ENOMEM; - for_each_possible_cpu(cpu) { - cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu); - cpu_state->next_flush = 0; - cpu_state->next_seq_num = 0; - cpu_state->flusher_engaged = false; - INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher); - cpu_state->cpu = cpu; - cpu_state->alg_state = &sha512_mb_alg_state; - cpu_state->mgr = kzalloc(sizeof(struct sha512_ctx_mgr), - GFP_KERNEL); - if (!cpu_state->mgr) - goto err2; - sha512_ctx_mgr_init(cpu_state->mgr); - INIT_LIST_HEAD(&cpu_state->work_list); - spin_lock_init(&cpu_state->work_lock); - } - sha512_mb_alg_state.flusher = &sha512_mb_flusher; - - err = crypto_register_ahash(&sha512_mb_areq_alg); - if (err) - goto err2; - err = crypto_register_ahash(&sha512_mb_async_alg); - if (err) - goto err1; - - - return 0; -err1: - crypto_unregister_ahash(&sha512_mb_areq_alg); -err2: - for_each_possible_cpu(cpu) { - cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu); - kfree(cpu_state->mgr); - } - free_percpu(sha512_mb_alg_state.alg_cstate); - return -ENODEV; -} - -static void __exit sha512_mb_mod_fini(void) -{ - int cpu; - struct mcryptd_alg_cstate *cpu_state; - - crypto_unregister_ahash(&sha512_mb_async_alg); - crypto_unregister_ahash(&sha512_mb_areq_alg); - for_each_possible_cpu(cpu) { - cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu); - kfree(cpu_state->mgr); - } - free_percpu(sha512_mb_alg_state.alg_cstate); -} - -module_init(sha512_mb_mod_init); -module_exit(sha512_mb_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, multi buffer accelerated"); - -MODULE_ALIAS("sha512"); diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h deleted file mode 100644 index e5c465bd821e..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Header file for multi buffer SHA512 context - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _SHA_MB_CTX_INTERNAL_H -#define _SHA_MB_CTX_INTERNAL_H - -#include "sha512_mb_mgr.h" - -#define HASH_UPDATE 0x00 -#define HASH_LAST 0x01 -#define HASH_DONE 0x02 -#define HASH_FINAL 0x04 - -#define HASH_CTX_STS_IDLE 0x00 -#define HASH_CTX_STS_PROCESSING 0x01 -#define HASH_CTX_STS_LAST 0x02 -#define HASH_CTX_STS_COMPLETE 0x04 - -enum hash_ctx_error { - HASH_CTX_ERROR_NONE = 0, - HASH_CTX_ERROR_INVALID_FLAGS = -1, - HASH_CTX_ERROR_ALREADY_PROCESSING = -2, - HASH_CTX_ERROR_ALREADY_COMPLETED = -3, -}; - -#define hash_ctx_user_data(ctx) ((ctx)->user_data) -#define hash_ctx_digest(ctx) ((ctx)->job.result_digest) -#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) -#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) -#define hash_ctx_status(ctx) ((ctx)->status) -#define hash_ctx_error(ctx) ((ctx)->error) -#define hash_ctx_init(ctx) \ - do { \ - (ctx)->error = HASH_CTX_ERROR_NONE; \ - (ctx)->status = HASH_CTX_STS_COMPLETE; \ - } while (0) - -/* Hash Constants and Typedefs */ -#define SHA512_DIGEST_LENGTH 8 -#define SHA512_LOG2_BLOCK_SIZE 7 - -#define SHA512_PADLENGTHFIELD_SIZE 16 - -#ifdef SHA_MB_DEBUG -#define assert(expr) \ -do { \ - if (unlikely(!(expr))) { \ - printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \ - #expr, __FILE__, __func__, __LINE__); \ - } \ -} while (0) -#else -#define assert(expr) do {} while (0) -#endif - -struct sha512_ctx_mgr { - struct sha512_mb_mgr mgr; -}; - -/* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */ - -struct sha512_hash_ctx { - /* Must be at struct offset 0 */ - struct job_sha512 job; - /* status flag */ - int status; - /* error flag */ - int error; - - uint64_t total_length; - const void *incoming_buffer; - uint32_t incoming_buffer_length; - uint8_t partial_block_buffer[SHA512_BLOCK_SIZE * 2]; - uint32_t partial_block_buffer_length; - void *user_data; -}; - -#endif diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h deleted file mode 100644 index 178f17eef382..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Header file for multi buffer SHA512 algorithm manager - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __SHA_MB_MGR_H -#define __SHA_MB_MGR_H - -#include <linux/types.h> - -#define NUM_SHA512_DIGEST_WORDS 8 - -enum job_sts {STS_UNKNOWN = 0, - STS_BEING_PROCESSED = 1, - STS_COMPLETED = 2, - STS_INTERNAL_ERROR = 3, - STS_ERROR = 4 -}; - -struct job_sha512 { - u8 *buffer; - u64 len; - u64 result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32); - enum job_sts status; - void *user_data; -}; - -struct sha512_args_x4 { - uint64_t digest[8][4]; - uint8_t *data_ptr[4]; -}; - -struct sha512_lane_data { - struct job_sha512 *job_in_lane; -}; - -struct sha512_mb_mgr { - struct sha512_args_x4 args; - - uint64_t lens[4]; - - /* each byte is index (0...7) of unused lanes */ - uint64_t unused_lanes; - /* byte 4 is set to FF as a flag */ - struct sha512_lane_data ldata[4]; -}; - -#define SHA512_MB_MGR_NUM_LANES_AVX2 4 - -void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state); -struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state, - struct job_sha512 *job); -struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state); -struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state); - -#endif diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S deleted file mode 100644 index cf2636d4c9ba..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Header file for multi buffer SHA256 algorithm data structure - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -# Macros for defining data structures - -# Usage example - -#START_FIELDS # JOB_AES -### name size align -#FIELD _plaintext, 8, 8 # pointer to plaintext -#FIELD _ciphertext, 8, 8 # pointer to ciphertext -#FIELD _IV, 16, 8 # IV -#FIELD _keys, 8, 8 # pointer to keys -#FIELD _len, 4, 4 # length in bytes -#FIELD _status, 4, 4 # status enumeration -#FIELD _user_data, 8, 8 # pointer to user data -#UNION _union, size1, align1, \ -# size2, align2, \ -# size3, align3, \ -# ... -#END_FIELDS -#%assign _JOB_AES_size _FIELD_OFFSET -#%assign _JOB_AES_align _STRUCT_ALIGN - -######################################################################### - -# Alternate "struc-like" syntax: -# STRUCT job_aes2 -# RES_Q .plaintext, 1 -# RES_Q .ciphertext, 1 -# RES_DQ .IV, 1 -# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN -# RES_U .union, size1, align1, \ -# size2, align2, \ -# ... -# ENDSTRUCT -# # Following only needed if nesting -# %assign job_aes2_size _FIELD_OFFSET -# %assign job_aes2_align _STRUCT_ALIGN -# -# RES_* macros take a name, a count and an optional alignment. -# The count in in terms of the base size of the macro, and the -# default alignment is the base size. -# The macros are: -# Macro Base size -# RES_B 1 -# RES_W 2 -# RES_D 4 -# RES_Q 8 -# RES_DQ 16 -# RES_Y 32 -# RES_Z 64 -# -# RES_U defines a union. It's arguments are a name and two or more -# pairs of "size, alignment" -# -# The two assigns are only needed if this structure is being nested -# within another. Even if the assigns are not done, one can still use -# STRUCT_NAME_size as the size of the structure. -# -# Note that for nesting, you still need to assign to STRUCT_NAME_size. -# -# The differences between this and using "struc" directly are that each -# type is implicitly aligned to its natural length (although this can be -# over-ridden with an explicit third parameter), and that the structure -# is padded at the end to its overall alignment. -# - -######################################################################### - -#ifndef _DATASTRUCT_ASM_ -#define _DATASTRUCT_ASM_ - -#define PTR_SZ 8 -#define SHA512_DIGEST_WORD_SIZE 8 -#define SHA512_MB_MGR_NUM_LANES_AVX2 4 -#define NUM_SHA512_DIGEST_WORDS 8 -#define SZ4 4*SHA512_DIGEST_WORD_SIZE -#define ROUNDS 80*SZ4 -#define SHA512_DIGEST_ROW_SIZE (SHA512_MB_MGR_NUM_LANES_AVX2 * 8) - -# START_FIELDS -.macro START_FIELDS - _FIELD_OFFSET = 0 - _STRUCT_ALIGN = 0 -.endm - -# FIELD name size align -.macro FIELD name size align - _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) - \name = _FIELD_OFFSET - _FIELD_OFFSET = _FIELD_OFFSET + (\size) -.if (\align > _STRUCT_ALIGN) - _STRUCT_ALIGN = \align -.endif -.endm - -# END_FIELDS -.macro END_FIELDS - _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) -.endm - -.macro STRUCT p1 -START_FIELDS -.struc \p1 -.endm - -.macro ENDSTRUCT - tmp = _FIELD_OFFSET - END_FIELDS - tmp = (_FIELD_OFFSET - ##tmp) -.if (tmp > 0) - .lcomm tmp -.endm - -## RES_int name size align -.macro RES_int p1 p2 p3 - name = \p1 - size = \p2 - align = .\p3 - - _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)) -.align align -.lcomm name size - _FIELD_OFFSET = _FIELD_OFFSET + (size) -.if (align > _STRUCT_ALIGN) - _STRUCT_ALIGN = align -.endif -.endm - -# macro RES_B name, size [, align] -.macro RES_B _name, _size, _align=1 -RES_int _name _size _align -.endm - -# macro RES_W name, size [, align] -.macro RES_W _name, _size, _align=2 -RES_int _name 2*(_size) _align -.endm - -# macro RES_D name, size [, align] -.macro RES_D _name, _size, _align=4 -RES_int _name 4*(_size) _align -.endm - -# macro RES_Q name, size [, align] -.macro RES_Q _name, _size, _align=8 -RES_int _name 8*(_size) _align -.endm - -# macro RES_DQ name, size [, align] -.macro RES_DQ _name, _size, _align=16 -RES_int _name 16*(_size) _align -.endm - -# macro RES_Y name, size [, align] -.macro RES_Y _name, _size, _align=32 -RES_int _name 32*(_size) _align -.endm - -# macro RES_Z name, size [, align] -.macro RES_Z _name, _size, _align=64 -RES_int _name 64*(_size) _align -.endm - -#endif - -################################################################### -### Define SHA512 Out Of Order Data Structures -################################################################### - -START_FIELDS # LANE_DATA -### name size align -FIELD _job_in_lane, 8, 8 # pointer to job object -END_FIELDS - - _LANE_DATA_size = _FIELD_OFFSET - _LANE_DATA_align = _STRUCT_ALIGN - -#################################################################### - -START_FIELDS # SHA512_ARGS_X4 -### name size align -FIELD _digest, 8*8*4, 4 # transposed digest -FIELD _data_ptr, 8*4, 8 # array of pointers to data -END_FIELDS - - _SHA512_ARGS_X4_size = _FIELD_OFFSET - _SHA512_ARGS_X4_align = _STRUCT_ALIGN - -##################################################################### - -START_FIELDS # MB_MGR -### name size align -FIELD _args, _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align -FIELD _lens, 8*4, 8 -FIELD _unused_lanes, 8, 8 -FIELD _ldata, _LANE_DATA_size*4, _LANE_DATA_align -END_FIELDS - - _MB_MGR_size = _FIELD_OFFSET - _MB_MGR_align = _STRUCT_ALIGN - -_args_digest = _args + _digest -_args_data_ptr = _args + _data_ptr - -####################################################################### - -####################################################################### -#### Define constants -####################################################################### - -#define STS_UNKNOWN 0 -#define STS_BEING_PROCESSED 1 -#define STS_COMPLETED 2 - -####################################################################### -#### Define JOB_SHA512 structure -####################################################################### - -START_FIELDS # JOB_SHA512 -### name size align -FIELD _buffer, 8, 8 # pointer to buffer -FIELD _len, 8, 8 # length in bytes -FIELD _result_digest, 8*8, 32 # Digest (output) -FIELD _status, 4, 4 -FIELD _user_data, 8, 8 -END_FIELDS - - _JOB_SHA512_size = _FIELD_OFFSET - _JOB_SHA512_align = _STRUCT_ALIGN diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S deleted file mode 100644 index 7c629caebc05..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S +++ /dev/null @@ -1,297 +0,0 @@ -/* - * Flush routine for SHA512 multibuffer - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/linkage.h> -#include <asm/frame.h> -#include "sha512_mb_mgr_datastruct.S" - -.extern sha512_x4_avx2 - -# LINUX register definitions -#define arg1 %rdi -#define arg2 %rsi - -# idx needs to be other than arg1, arg2, rbx, r12 -#define idx %rdx - -# Common definitions -#define state arg1 -#define job arg2 -#define len2 arg2 - -#define unused_lanes %rbx -#define lane_data %rbx -#define tmp2 %rbx - -#define job_rax %rax -#define tmp1 %rax -#define size_offset %rax -#define tmp %rax -#define start_offset %rax - -#define tmp3 arg1 - -#define extra_blocks arg2 -#define p arg2 - -#define tmp4 %r8 -#define lens0 %r8 - -#define lens1 %r9 -#define lens2 %r10 -#define lens3 %r11 - -.macro LABEL prefix n -\prefix\n\(): -.endm - -.macro JNE_SKIP i -jne skip_\i -.endm - -.altmacro -.macro SET_OFFSET _offset -offset = \_offset -.endm -.noaltmacro - -# JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state) -# arg 1 : rcx : state -ENTRY(sha512_mb_mgr_flush_avx2) - FRAME_BEGIN - push %rbx - - # If bit (32+3) is set, then all lanes are empty - mov _unused_lanes(state), unused_lanes - bt $32+7, unused_lanes - jc return_null - - # find a lane with a non-null job - xor idx, idx - offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne one(%rip), idx - offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne two(%rip), idx - offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne three(%rip), idx - - # copy idx to empty lanes -copy_lane_data: - offset = (_args + _data_ptr) - mov offset(state,idx,8), tmp - - I = 0 -.rep 4 - offset = (_ldata + I * _LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) -.altmacro - JNE_SKIP %I - offset = (_args + _data_ptr + 8*I) - mov tmp, offset(state) - offset = (_lens + 8*I +4) - movl $0xFFFFFFFF, offset(state) -LABEL skip_ %I - I = (I+1) -.noaltmacro -.endr - - # Find min length - mov _lens + 0*8(state),lens0 - mov lens0,idx - mov _lens + 1*8(state),lens1 - cmp idx,lens1 - cmovb lens1,idx - mov _lens + 2*8(state),lens2 - cmp idx,lens2 - cmovb lens2,idx - mov _lens + 3*8(state),lens3 - cmp idx,lens3 - cmovb lens3,idx - mov idx,len2 - and $0xF,idx - and $~0xFF,len2 - jz len_is_0 - - sub len2, lens0 - sub len2, lens1 - sub len2, lens2 - sub len2, lens3 - shr $32,len2 - mov lens0, _lens + 0*8(state) - mov lens1, _lens + 1*8(state) - mov lens2, _lens + 2*8(state) - mov lens3, _lens + 3*8(state) - - # "state" and "args" are the same address, arg1 - # len is arg2 - call sha512_x4_avx2 - # state and idx are intact - -len_is_0: - # process completed job "idx" - imul $_LANE_DATA_size, idx, lane_data - lea _ldata(state, lane_data), lane_data - - mov _job_in_lane(lane_data), job_rax - movq $0, _job_in_lane(lane_data) - movl $STS_COMPLETED, _status(job_rax) - mov _unused_lanes(state), unused_lanes - shl $8, unused_lanes - or idx, unused_lanes - mov unused_lanes, _unused_lanes(state) - - movl $0xFFFFFFFF, _lens+4(state, idx, 8) - - vmovq _args_digest+0*32(state, idx, 8), %xmm0 - vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0 - vmovq _args_digest+2*32(state, idx, 8), %xmm1 - vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1 - vmovq _args_digest+4*32(state, idx, 8), %xmm2 - vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2 - vmovq _args_digest+6*32(state, idx, 8), %xmm3 - vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3 - - vmovdqu %xmm0, _result_digest(job_rax) - vmovdqu %xmm1, _result_digest+1*16(job_rax) - vmovdqu %xmm2, _result_digest+2*16(job_rax) - vmovdqu %xmm3, _result_digest+3*16(job_rax) - -return: - pop %rbx - FRAME_END - ret - -return_null: - xor job_rax, job_rax - jmp return -ENDPROC(sha512_mb_mgr_flush_avx2) -.align 16 - -ENTRY(sha512_mb_mgr_get_comp_job_avx2) - push %rbx - - mov _unused_lanes(state), unused_lanes - bt $(32+7), unused_lanes - jc .return_null - - # Find min length - mov _lens(state),lens0 - mov lens0,idx - mov _lens+1*8(state),lens1 - cmp idx,lens1 - cmovb lens1,idx - mov _lens+2*8(state),lens2 - cmp idx,lens2 - cmovb lens2,idx - mov _lens+3*8(state),lens3 - cmp idx,lens3 - cmovb lens3,idx - test $~0xF,idx - jnz .return_null - and $0xF,idx - - #process completed job "idx" - imul $_LANE_DATA_size, idx, lane_data - lea _ldata(state, lane_data), lane_data - - mov _job_in_lane(lane_data), job_rax - movq $0, _job_in_lane(lane_data) - movl $STS_COMPLETED, _status(job_rax) - mov _unused_lanes(state), unused_lanes - shl $8, unused_lanes - or idx, unused_lanes - mov unused_lanes, _unused_lanes(state) - - movl $0xFFFFFFFF, _lens+4(state, idx, 8) - - vmovq _args_digest(state, idx, 8), %xmm0 - vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0 - vmovq _args_digest+2*32(state, idx, 8), %xmm1 - vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1 - vmovq _args_digest+4*32(state, idx, 8), %xmm2 - vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2 - vmovq _args_digest+6*32(state, idx, 8), %xmm3 - vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3 - - vmovdqu %xmm0, _result_digest+0*16(job_rax) - vmovdqu %xmm1, _result_digest+1*16(job_rax) - vmovdqu %xmm2, _result_digest+2*16(job_rax) - vmovdqu %xmm3, _result_digest+3*16(job_rax) - - pop %rbx - - ret - -.return_null: - xor job_rax, job_rax - pop %rbx - ret -ENDPROC(sha512_mb_mgr_get_comp_job_avx2) - -.section .rodata.cst8.one, "aM", @progbits, 8 -.align 8 -one: -.quad 1 - -.section .rodata.cst8.two, "aM", @progbits, 8 -.align 8 -two: -.quad 2 - -.section .rodata.cst8.three, "aM", @progbits, 8 -.align 8 -three: -.quad 3 diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c deleted file mode 100644 index d08805032f01..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Initialization code for multi buffer SHA256 algorithm for AVX2 - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "sha512_mb_mgr.h" - -void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state) -{ - unsigned int j; - - /* initially all lanes are unused */ - state->lens[0] = 0xFFFFFFFF00000000; - state->lens[1] = 0xFFFFFFFF00000001; - state->lens[2] = 0xFFFFFFFF00000002; - state->lens[3] = 0xFFFFFFFF00000003; - - state->unused_lanes = 0xFF03020100; - for (j = 0; j < 4; j++) - state->ldata[j].job_in_lane = NULL; -} diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S deleted file mode 100644 index 4ba709ba78e5..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Buffer submit code for multi buffer SHA512 algorithm - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/linkage.h> -#include <asm/frame.h> -#include "sha512_mb_mgr_datastruct.S" - -.extern sha512_x4_avx2 - -#define arg1 %rdi -#define arg2 %rsi - -#define idx %rdx -#define last_len %rdx - -#define size_offset %rcx -#define tmp2 %rcx - -# Common definitions -#define state arg1 -#define job arg2 -#define len2 arg2 -#define p2 arg2 - -#define p %r11 -#define start_offset %r11 - -#define unused_lanes %rbx - -#define job_rax %rax -#define len %rax - -#define lane %r12 -#define tmp3 %r12 -#define lens3 %r12 - -#define extra_blocks %r8 -#define lens0 %r8 - -#define tmp %r9 -#define lens1 %r9 - -#define lane_data %r10 -#define lens2 %r10 - -#define DWORD_len %eax - -# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job) -# arg 1 : rcx : state -# arg 2 : rdx : job -ENTRY(sha512_mb_mgr_submit_avx2) - FRAME_BEGIN - push %rbx - push %r12 - - mov _unused_lanes(state), unused_lanes - movzb %bl,lane - shr $8, unused_lanes - imul $_LANE_DATA_size, lane,lane_data - movl $STS_BEING_PROCESSED, _status(job) - lea _ldata(state, lane_data), lane_data - mov unused_lanes, _unused_lanes(state) - movl _len(job), DWORD_len - - mov job, _job_in_lane(lane_data) - movl DWORD_len,_lens+4(state , lane, 8) - - # Load digest words from result_digest - vmovdqu _result_digest+0*16(job), %xmm0 - vmovdqu _result_digest+1*16(job), %xmm1 - vmovdqu _result_digest+2*16(job), %xmm2 - vmovdqu _result_digest+3*16(job), %xmm3 - - vmovq %xmm0, _args_digest(state, lane, 8) - vpextrq $1, %xmm0, _args_digest+1*32(state , lane, 8) - vmovq %xmm1, _args_digest+2*32(state , lane, 8) - vpextrq $1, %xmm1, _args_digest+3*32(state , lane, 8) - vmovq %xmm2, _args_digest+4*32(state , lane, 8) - vpextrq $1, %xmm2, _args_digest+5*32(state , lane, 8) - vmovq %xmm3, _args_digest+6*32(state , lane, 8) - vpextrq $1, %xmm3, _args_digest+7*32(state , lane, 8) - - mov _buffer(job), p - mov p, _args_data_ptr(state, lane, 8) - - cmp $0xFF, unused_lanes - jne return_null - -start_loop: - - # Find min length - mov _lens+0*8(state),lens0 - mov lens0,idx - mov _lens+1*8(state),lens1 - cmp idx,lens1 - cmovb lens1, idx - mov _lens+2*8(state),lens2 - cmp idx,lens2 - cmovb lens2,idx - mov _lens+3*8(state),lens3 - cmp idx,lens3 - cmovb lens3,idx - mov idx,len2 - and $0xF,idx - and $~0xFF,len2 - jz len_is_0 - - sub len2,lens0 - sub len2,lens1 - sub len2,lens2 - sub len2,lens3 - shr $32,len2 - mov lens0, _lens + 0*8(state) - mov lens1, _lens + 1*8(state) - mov lens2, _lens + 2*8(state) - mov lens3, _lens + 3*8(state) - - # "state" and "args" are the same address, arg1 - # len is arg2 - call sha512_x4_avx2 - # state and idx are intact - -len_is_0: - - # process completed job "idx" - imul $_LANE_DATA_size, idx, lane_data - lea _ldata(state, lane_data), lane_data - - mov _job_in_lane(lane_data), job_rax - mov _unused_lanes(state), unused_lanes - movq $0, _job_in_lane(lane_data) - movl $STS_COMPLETED, _status(job_rax) - shl $8, unused_lanes - or idx, unused_lanes - mov unused_lanes, _unused_lanes(state) - - movl $0xFFFFFFFF,_lens+4(state,idx,8) - vmovq _args_digest+0*32(state , idx, 8), %xmm0 - vpinsrq $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0 - vmovq _args_digest+2*32(state , idx, 8), %xmm1 - vpinsrq $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1 - vmovq _args_digest+4*32(state , idx, 8), %xmm2 - vpinsrq $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2 - vmovq _args_digest+6*32(state , idx, 8), %xmm3 - vpinsrq $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3 - - vmovdqu %xmm0, _result_digest + 0*16(job_rax) - vmovdqu %xmm1, _result_digest + 1*16(job_rax) - vmovdqu %xmm2, _result_digest + 2*16(job_rax) - vmovdqu %xmm3, _result_digest + 3*16(job_rax) - -return: - pop %r12 - pop %rbx - FRAME_END - ret - -return_null: - xor job_rax, job_rax - jmp return -ENDPROC(sha512_mb_mgr_submit_avx2) - -/* UNUSED? -.section .rodata.cst16, "aM", @progbits, 16 -.align 16 -H0: .int 0x6a09e667 -H1: .int 0xbb67ae85 -H2: .int 0x3c6ef372 -H3: .int 0xa54ff53a -H4: .int 0x510e527f -H5: .int 0x9b05688c -H6: .int 0x1f83d9ab -H7: .int 0x5be0cd19 -*/ diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S deleted file mode 100644 index e22e907643a6..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S +++ /dev/null @@ -1,531 +0,0 @@ -/* - * Multi-buffer SHA512 algorithm hash compute routine - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -# code to compute quad SHA512 using AVX2 -# use YMMs to tackle the larger digest size -# outer calling routine takes care of save and restore of XMM registers -# Logic designed/laid out by JDG - -# Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15 -# Stack must be aligned to 32 bytes before call -# Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12 -# Linux preserves: rcx rdx rdi rbp r13 r14 r15 -# clobbers ymm0-15 - -#include <linux/linkage.h> -#include "sha512_mb_mgr_datastruct.S" - -arg1 = %rdi -arg2 = %rsi - -# Common definitions -STATE = arg1 -INP_SIZE = arg2 - -IDX = %rax -ROUND = %rbx -TBL = %r8 - -inp0 = %r9 -inp1 = %r10 -inp2 = %r11 -inp3 = %r12 - -a = %ymm0 -b = %ymm1 -c = %ymm2 -d = %ymm3 -e = %ymm4 -f = %ymm5 -g = %ymm6 -h = %ymm7 - -a0 = %ymm8 -a1 = %ymm9 -a2 = %ymm10 - -TT0 = %ymm14 -TT1 = %ymm13 -TT2 = %ymm12 -TT3 = %ymm11 -TT4 = %ymm10 -TT5 = %ymm9 - -T1 = %ymm14 -TMP = %ymm15 - -# Define stack usage -STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24 - -#define VMOVPD vmovupd -_digest = SZ4*16 - -# transpose r0, r1, r2, r3, t0, t1 -# "transpose" data in {r0..r3} using temps {t0..t3} -# Input looks like: {r0 r1 r2 r3} -# r0 = {a7 a6 a5 a4 a3 a2 a1 a0} -# r1 = {b7 b6 b5 b4 b3 b2 b1 b0} -# r2 = {c7 c6 c5 c4 c3 c2 c1 c0} -# r3 = {d7 d6 d5 d4 d3 d2 d1 d0} -# -# output looks like: {t0 r1 r0 r3} -# t0 = {d1 d0 c1 c0 b1 b0 a1 a0} -# r1 = {d3 d2 c3 c2 b3 b2 a3 a2} -# r0 = {d5 d4 c5 c4 b5 b4 a5 a4} -# r3 = {d7 d6 c7 c6 b7 b6 a7 a6} - -.macro TRANSPOSE r0 r1 r2 r3 t0 t1 - vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0} - vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2} - vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0} - vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2} - - vperm2f128 $0x20, \r2, \r0, \r1 # h6...a6 - vperm2f128 $0x31, \r2, \r0, \r3 # h2...a2 - vperm2f128 $0x31, \t1, \t0, \r0 # h5...a5 - vperm2f128 $0x20, \t1, \t0, \t0 # h1...a1 -.endm - -.macro ROTATE_ARGS -TMP_ = h -h = g -g = f -f = e -e = d -d = c -c = b -b = a -a = TMP_ -.endm - -# PRORQ reg, imm, tmp -# packed-rotate-right-double -# does a rotate by doing two shifts and an or -.macro _PRORQ reg imm tmp - vpsllq $(64-\imm),\reg,\tmp - vpsrlq $\imm,\reg, \reg - vpor \tmp,\reg, \reg -.endm - -# non-destructive -# PRORQ_nd reg, imm, tmp, src -.macro _PRORQ_nd reg imm tmp src - vpsllq $(64-\imm), \src, \tmp - vpsrlq $\imm, \src, \reg - vpor \tmp, \reg, \reg -.endm - -# PRORQ dst/src, amt -.macro PRORQ reg imm - _PRORQ \reg, \imm, TMP -.endm - -# PRORQ_nd dst, src, amt -.macro PRORQ_nd reg tmp imm - _PRORQ_nd \reg, \imm, TMP, \tmp -.endm - -#; arguments passed implicitly in preprocessor symbols i, a...h -.macro ROUND_00_15 _T1 i - PRORQ_nd a0, e, (18-14) # sig1: a0 = (e >> 4) - - vpxor g, f, a2 # ch: a2 = f^g - vpand e,a2, a2 # ch: a2 = (f^g)&e - vpxor g, a2, a2 # a2 = ch - - PRORQ_nd a1,e,41 # sig1: a1 = (e >> 25) - - offset = SZ4*(\i & 0xf) - vmovdqu \_T1,offset(%rsp) - vpaddq (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K - vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5) - PRORQ a0, 14 # sig1: a0 = (e >> 6) ^ (e >> 11) - vpaddq a2, h, h # h = h + ch - PRORQ_nd a2,a,6 # sig0: a2 = (a >> 11) - vpaddq \_T1,h, h # h = h + ch + W + K - vpxor a1, a0, a0 # a0 = sigma1 - vmovdqu a,\_T1 - PRORQ_nd a1,a,39 # sig0: a1 = (a >> 22) - vpxor c, \_T1, \_T1 # maj: T1 = a^c - add $SZ4, ROUND # ROUND++ - vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b - vpaddq a0, h, h - vpaddq h, d, d - vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11) - PRORQ a2,28 # sig0: a2 = (a >> 2) ^ (a >> 13) - vpxor a1, a2, a2 # a2 = sig0 - vpand c, a, a1 # maj: a1 = a&c - vpor \_T1, a1, a1 # a1 = maj - vpaddq a1, h, h # h = h + ch + W + K + maj - vpaddq a2, h, h # h = h + ch + W + K + maj + sigma0 - ROTATE_ARGS -.endm - - -#; arguments passed implicitly in preprocessor symbols i, a...h -.macro ROUND_16_XX _T1 i - vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1 - vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1 - vmovdqu \_T1, a0 - PRORQ \_T1,7 - vmovdqu a1, a2 - PRORQ a1,42 - vpxor a0, \_T1, \_T1 - PRORQ \_T1, 1 - vpxor a2, a1, a1 - PRORQ a1, 19 - vpsrlq $7, a0, a0 - vpxor a0, \_T1, \_T1 - vpsrlq $6, a2, a2 - vpxor a2, a1, a1 - vpaddq SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1 - vpaddq SZ4*((\i-7)&0xf)(%rsp), a1, a1 - vpaddq a1, \_T1, \_T1 - - ROUND_00_15 \_T1,\i -.endm - - -# void sha512_x4_avx2(void *STATE, const int INP_SIZE) -# arg 1 : STATE : pointer to input data -# arg 2 : INP_SIZE : size of data in blocks (assumed >= 1) -ENTRY(sha512_x4_avx2) - # general registers preserved in outer calling routine - # outer calling routine saves all the XMM registers - # save callee-saved clobbered registers to comply with C function ABI - push %r12 - push %r13 - push %r14 - push %r15 - - sub $STACK_SPACE1, %rsp - - # Load the pre-transposed incoming digest. - vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a - vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b - vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c - vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d - vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e - vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f - vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g - vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h - - lea K512_4(%rip),TBL - - # load the address of each of the 4 message lanes - # getting ready to transpose input onto stack - mov _data_ptr+0*PTR_SZ(STATE),inp0 - mov _data_ptr+1*PTR_SZ(STATE),inp1 - mov _data_ptr+2*PTR_SZ(STATE),inp2 - mov _data_ptr+3*PTR_SZ(STATE),inp3 - - xor IDX, IDX -lloop: - xor ROUND, ROUND - - # save old digest - vmovdqu a, _digest(%rsp) - vmovdqu b, _digest+1*SZ4(%rsp) - vmovdqu c, _digest+2*SZ4(%rsp) - vmovdqu d, _digest+3*SZ4(%rsp) - vmovdqu e, _digest+4*SZ4(%rsp) - vmovdqu f, _digest+5*SZ4(%rsp) - vmovdqu g, _digest+6*SZ4(%rsp) - vmovdqu h, _digest+7*SZ4(%rsp) - i = 0 -.rep 4 - vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP - VMOVPD i*32(inp0, IDX), TT2 - VMOVPD i*32(inp1, IDX), TT1 - VMOVPD i*32(inp2, IDX), TT4 - VMOVPD i*32(inp3, IDX), TT3 - TRANSPOSE TT2, TT1, TT4, TT3, TT0, TT5 - vpshufb TMP, TT0, TT0 - vpshufb TMP, TT1, TT1 - vpshufb TMP, TT2, TT2 - vpshufb TMP, TT3, TT3 - ROUND_00_15 TT0,(i*4+0) - ROUND_00_15 TT1,(i*4+1) - ROUND_00_15 TT2,(i*4+2) - ROUND_00_15 TT3,(i*4+3) - i = (i+1) -.endr - add $128, IDX - - i = (i*4) - - jmp Lrounds_16_xx -.align 16 -Lrounds_16_xx: -.rep 16 - ROUND_16_XX T1, i - i = (i+1) -.endr - cmp $0xa00,ROUND - jb Lrounds_16_xx - - # add old digest - vpaddq _digest(%rsp), a, a - vpaddq _digest+1*SZ4(%rsp), b, b - vpaddq _digest+2*SZ4(%rsp), c, c - vpaddq _digest+3*SZ4(%rsp), d, d - vpaddq _digest+4*SZ4(%rsp), e, e - vpaddq _digest+5*SZ4(%rsp), f, f - vpaddq _digest+6*SZ4(%rsp), g, g - vpaddq _digest+7*SZ4(%rsp), h, h - - sub $1, INP_SIZE # unit is blocks - jne lloop - - # write back to memory (state object) the transposed digest - vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE) - vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE) - vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE) - vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE) - vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE) - vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE) - vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE) - vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE) - - # update input data pointers - add IDX, inp0 - mov inp0, _data_ptr+0*PTR_SZ(STATE) - add IDX, inp1 - mov inp1, _data_ptr+1*PTR_SZ(STATE) - add IDX, inp2 - mov inp2, _data_ptr+2*PTR_SZ(STATE) - add IDX, inp3 - mov inp3, _data_ptr+3*PTR_SZ(STATE) - - #;;;;;;;;;;;;;;; - #; Postamble - add $STACK_SPACE1, %rsp - # restore callee-saved clobbered registers - - pop %r15 - pop %r14 - pop %r13 - pop %r12 - - # outer calling routine restores XMM and other GP registers - ret -ENDPROC(sha512_x4_avx2) - -.section .rodata.K512_4, "a", @progbits -.align 64 -K512_4: - .octa 0x428a2f98d728ae22428a2f98d728ae22,\ - 0x428a2f98d728ae22428a2f98d728ae22 - .octa 0x7137449123ef65cd7137449123ef65cd,\ - 0x7137449123ef65cd7137449123ef65cd - .octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\ - 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f - .octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\ - 0xe9b5dba58189dbbce9b5dba58189dbbc - .octa 0x3956c25bf348b5383956c25bf348b538,\ - 0x3956c25bf348b5383956c25bf348b538 - .octa 0x59f111f1b605d01959f111f1b605d019,\ - 0x59f111f1b605d01959f111f1b605d019 - .octa 0x923f82a4af194f9b923f82a4af194f9b,\ - 0x923f82a4af194f9b923f82a4af194f9b - .octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\ - 0xab1c5ed5da6d8118ab1c5ed5da6d8118 - .octa 0xd807aa98a3030242d807aa98a3030242,\ - 0xd807aa98a3030242d807aa98a3030242 - .octa 0x12835b0145706fbe12835b0145706fbe,\ - 0x12835b0145706fbe12835b0145706fbe - .octa 0x243185be4ee4b28c243185be4ee4b28c,\ - 0x243185be4ee4b28c243185be4ee4b28c - .octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\ - 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2 - .octa 0x72be5d74f27b896f72be5d74f27b896f,\ - 0x72be5d74f27b896f72be5d74f27b896f - .octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\ - 0x80deb1fe3b1696b180deb1fe3b1696b1 - .octa 0x9bdc06a725c712359bdc06a725c71235,\ - 0x9bdc06a725c712359bdc06a725c71235 - .octa 0xc19bf174cf692694c19bf174cf692694,\ - 0xc19bf174cf692694c19bf174cf692694 - .octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\ - 0xe49b69c19ef14ad2e49b69c19ef14ad2 - .octa 0xefbe4786384f25e3efbe4786384f25e3,\ - 0xefbe4786384f25e3efbe4786384f25e3 - .octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\ - 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5 - .octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\ - 0x240ca1cc77ac9c65240ca1cc77ac9c65 - .octa 0x2de92c6f592b02752de92c6f592b0275,\ - 0x2de92c6f592b02752de92c6f592b0275 - .octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\ - 0x4a7484aa6ea6e4834a7484aa6ea6e483 - .octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\ - 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4 - .octa 0x76f988da831153b576f988da831153b5,\ - 0x76f988da831153b576f988da831153b5 - .octa 0x983e5152ee66dfab983e5152ee66dfab,\ - 0x983e5152ee66dfab983e5152ee66dfab - .octa 0xa831c66d2db43210a831c66d2db43210,\ - 0xa831c66d2db43210a831c66d2db43210 - .octa 0xb00327c898fb213fb00327c898fb213f,\ - 0xb00327c898fb213fb00327c898fb213f - .octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\ - 0xbf597fc7beef0ee4bf597fc7beef0ee4 - .octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\ - 0xc6e00bf33da88fc2c6e00bf33da88fc2 - .octa 0xd5a79147930aa725d5a79147930aa725,\ - 0xd5a79147930aa725d5a79147930aa725 - .octa 0x06ca6351e003826f06ca6351e003826f,\ - 0x06ca6351e003826f06ca6351e003826f - .octa 0x142929670a0e6e70142929670a0e6e70,\ - 0x142929670a0e6e70142929670a0e6e70 - .octa 0x27b70a8546d22ffc27b70a8546d22ffc,\ - 0x27b70a8546d22ffc27b70a8546d22ffc - .octa 0x2e1b21385c26c9262e1b21385c26c926,\ - 0x2e1b21385c26c9262e1b21385c26c926 - .octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\ - 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed - .octa 0x53380d139d95b3df53380d139d95b3df,\ - 0x53380d139d95b3df53380d139d95b3df - .octa 0x650a73548baf63de650a73548baf63de,\ - 0x650a73548baf63de650a73548baf63de - .octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\ - 0x766a0abb3c77b2a8766a0abb3c77b2a8 - .octa 0x81c2c92e47edaee681c2c92e47edaee6,\ - 0x81c2c92e47edaee681c2c92e47edaee6 - .octa 0x92722c851482353b92722c851482353b,\ - 0x92722c851482353b92722c851482353b - .octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\ - 0xa2bfe8a14cf10364a2bfe8a14cf10364 - .octa 0xa81a664bbc423001a81a664bbc423001,\ - 0xa81a664bbc423001a81a664bbc423001 - .octa 0xc24b8b70d0f89791c24b8b70d0f89791,\ - 0xc24b8b70d0f89791c24b8b70d0f89791 - .octa 0xc76c51a30654be30c76c51a30654be30,\ - 0xc76c51a30654be30c76c51a30654be30 - .octa 0xd192e819d6ef5218d192e819d6ef5218,\ - 0xd192e819d6ef5218d192e819d6ef5218 - .octa 0xd69906245565a910d69906245565a910,\ - 0xd69906245565a910d69906245565a910 - .octa 0xf40e35855771202af40e35855771202a,\ - 0xf40e35855771202af40e35855771202a - .octa 0x106aa07032bbd1b8106aa07032bbd1b8,\ - 0x106aa07032bbd1b8106aa07032bbd1b8 - .octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\ - 0x19a4c116b8d2d0c819a4c116b8d2d0c8 - .octa 0x1e376c085141ab531e376c085141ab53,\ - 0x1e376c085141ab531e376c085141ab53 - .octa 0x2748774cdf8eeb992748774cdf8eeb99,\ - 0x2748774cdf8eeb992748774cdf8eeb99 - .octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\ - 0x34b0bcb5e19b48a834b0bcb5e19b48a8 - .octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\ - 0x391c0cb3c5c95a63391c0cb3c5c95a63 - .octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\ - 0x4ed8aa4ae3418acb4ed8aa4ae3418acb - .octa 0x5b9cca4f7763e3735b9cca4f7763e373,\ - 0x5b9cca4f7763e3735b9cca4f7763e373 - .octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\ - 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3 - .octa 0x748f82ee5defb2fc748f82ee5defb2fc,\ - 0x748f82ee5defb2fc748f82ee5defb2fc - .octa 0x78a5636f43172f6078a5636f43172f60,\ - 0x78a5636f43172f6078a5636f43172f60 - .octa 0x84c87814a1f0ab7284c87814a1f0ab72,\ - 0x84c87814a1f0ab7284c87814a1f0ab72 - .octa 0x8cc702081a6439ec8cc702081a6439ec,\ - 0x8cc702081a6439ec8cc702081a6439ec - .octa 0x90befffa23631e2890befffa23631e28,\ - 0x90befffa23631e2890befffa23631e28 - .octa 0xa4506cebde82bde9a4506cebde82bde9,\ - 0xa4506cebde82bde9a4506cebde82bde9 - .octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\ - 0xbef9a3f7b2c67915bef9a3f7b2c67915 - .octa 0xc67178f2e372532bc67178f2e372532b,\ - 0xc67178f2e372532bc67178f2e372532b - .octa 0xca273eceea26619cca273eceea26619c,\ - 0xca273eceea26619cca273eceea26619c - .octa 0xd186b8c721c0c207d186b8c721c0c207,\ - 0xd186b8c721c0c207d186b8c721c0c207 - .octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\ - 0xeada7dd6cde0eb1eeada7dd6cde0eb1e - .octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\ - 0xf57d4f7fee6ed178f57d4f7fee6ed178 - .octa 0x06f067aa72176fba06f067aa72176fba,\ - 0x06f067aa72176fba06f067aa72176fba - .octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\ - 0x0a637dc5a2c898a60a637dc5a2c898a6 - .octa 0x113f9804bef90dae113f9804bef90dae,\ - 0x113f9804bef90dae113f9804bef90dae - .octa 0x1b710b35131c471b1b710b35131c471b,\ - 0x1b710b35131c471b1b710b35131c471b - .octa 0x28db77f523047d8428db77f523047d84,\ - 0x28db77f523047d8428db77f523047d84 - .octa 0x32caab7b40c7249332caab7b40c72493,\ - 0x32caab7b40c7249332caab7b40c72493 - .octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\ - 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc - .octa 0x431d67c49c100d4c431d67c49c100d4c,\ - 0x431d67c49c100d4c431d67c49c100d4c - .octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\ - 0x4cc5d4becb3e42b64cc5d4becb3e42b6 - .octa 0x597f299cfc657e2a597f299cfc657e2a,\ - 0x597f299cfc657e2a597f299cfc657e2a - .octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\ - 0x5fcb6fab3ad6faec5fcb6fab3ad6faec - .octa 0x6c44198c4a4758176c44198c4a475817,\ - 0x6c44198c4a4758176c44198c4a475817 - -.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 -.align 32 -PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607 - .octa 0x18191a1b1c1d1e1f1011121314151617 |