diff options
author | Ingo Molnar <mingo@kernel.org> | 2018-10-29 09:12:34 +0300 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2018-10-29 09:12:34 +0300 |
commit | 97ec37c57dd411d0815455bca07166411c0da1df (patch) | |
tree | d64850c43778d15c137772957788093a8b7d032c /arch/x86 | |
parent | ace6485a03266cc3c198ce8e927a1ce0ce139699 (diff) | |
parent | b59dfdaef173677b0b7e10f375226c0a1114fd20 (diff) | |
download | linux-97ec37c57dd411d0815455bca07166411c0da1df.tar.xz |
Merge branch 'linus' into x86/urgent, to refresh the tree
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
68 files changed, 2614 insertions, 9823 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index a450ad573dcb..a4b0007a54e1 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -60,9 +60,6 @@ endif ifeq ($(avx2_supported),yes) obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o - obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/ - obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/ - obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/ obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o endif @@ -106,7 +103,7 @@ ifeq ($(avx2_supported),yes) morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o endif -aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o +aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index acbe7e8336d8..661f7daf43da 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -102,9 +102,6 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); -int crypto_fpu_init(void); -void crypto_fpu_exit(void); - #define AVX_GEN2_OPTSIZE 640 #define AVX_GEN4_OPTSIZE 4096 @@ -817,7 +814,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, /* Linearize assoc, if not already linear */ if (req->src->length >= assoclen && req->src->length && (!PageHighMem(sg_page(req->src)) || - req->src->offset + req->src->length < PAGE_SIZE)) { + req->src->offset + req->src->length <= PAGE_SIZE)) { scatterwalk_start(&assoc_sg_walk, req->src); assoc = scatterwalk_map(&assoc_sg_walk); } else { @@ -1253,22 +1250,6 @@ static struct skcipher_alg aesni_skciphers[] = { static struct simd_skcipher_alg *aesni_simd_skciphers[ARRAY_SIZE(aesni_skciphers)]; -static struct { - const char *algname; - const char *drvname; - const char *basename; - struct simd_skcipher_alg *simd; -} aesni_simd_skciphers2[] = { -#if (defined(MODULE) && IS_ENABLED(CONFIG_CRYPTO_PCBC)) || \ - IS_BUILTIN(CONFIG_CRYPTO_PCBC) - { - .algname = "pcbc(aes)", - .drvname = "pcbc-aes-aesni", - .basename = "fpu(pcbc(__aes-aesni))", - }, -#endif -}; - #ifdef CONFIG_X86_64 static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key, unsigned int key_len) @@ -1422,10 +1403,6 @@ static void aesni_free_simds(void) for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) && aesni_simd_skciphers[i]; i++) simd_skcipher_free(aesni_simd_skciphers[i]); - - for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++) - if (aesni_simd_skciphers2[i].simd) - simd_skcipher_free(aesni_simd_skciphers2[i].simd); } static int __init aesni_init(void) @@ -1469,13 +1446,9 @@ static int __init aesni_init(void) #endif #endif - err = crypto_fpu_init(); - if (err) - return err; - err = crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); if (err) - goto fpu_exit; + return err; err = crypto_register_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers)); @@ -1499,18 +1472,6 @@ static int __init aesni_init(void) aesni_simd_skciphers[i] = simd; } - for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++) { - algname = aesni_simd_skciphers2[i].algname; - drvname = aesni_simd_skciphers2[i].drvname; - basename = aesni_simd_skciphers2[i].basename; - simd = simd_skcipher_create_compat(algname, drvname, basename); - err = PTR_ERR(simd); - if (IS_ERR(simd)) - continue; - - aesni_simd_skciphers2[i].simd = simd; - } - return 0; unregister_simds: @@ -1521,8 +1482,6 @@ unregister_skciphers: ARRAY_SIZE(aesni_skciphers)); unregister_algs: crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); -fpu_exit: - crypto_fpu_exit(); return err; } @@ -1533,8 +1492,6 @@ static void __exit aesni_exit(void) crypto_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers)); crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); - - crypto_fpu_exit(); } late_initcall(aesni_init); diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c deleted file mode 100644 index 406680476c52..000000000000 --- a/arch/x86/crypto/fpu.c +++ /dev/null @@ -1,207 +0,0 @@ -/* - * FPU: Wrapper for blkcipher touching fpu - * - * Copyright (c) Intel Corp. - * Author: Huang Ying <ying.huang@intel.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include <crypto/internal/skcipher.h> -#include <linux/err.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <asm/fpu/api.h> - -struct crypto_fpu_ctx { - struct crypto_skcipher *child; -}; - -static int crypto_fpu_setkey(struct crypto_skcipher *parent, const u8 *key, - unsigned int keylen) -{ - struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(parent); - struct crypto_skcipher *child = ctx->child; - int err; - - crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & - CRYPTO_TFM_REQ_MASK); - err = crypto_skcipher_setkey(child, key, keylen); - crypto_skcipher_set_flags(parent, crypto_skcipher_get_flags(child) & - CRYPTO_TFM_RES_MASK); - return err; -} - -static int crypto_fpu_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm); - struct crypto_skcipher *child = ctx->child; - SKCIPHER_REQUEST_ON_STACK(subreq, child); - int err; - - skcipher_request_set_tfm(subreq, child); - skcipher_request_set_callback(subreq, 0, NULL, NULL); - skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, - req->iv); - - kernel_fpu_begin(); - err = crypto_skcipher_encrypt(subreq); - kernel_fpu_end(); - - skcipher_request_zero(subreq); - return err; -} - -static int crypto_fpu_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm); - struct crypto_skcipher *child = ctx->child; - SKCIPHER_REQUEST_ON_STACK(subreq, child); - int err; - - skcipher_request_set_tfm(subreq, child); - skcipher_request_set_callback(subreq, 0, NULL, NULL); - skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, - req->iv); - - kernel_fpu_begin(); - err = crypto_skcipher_decrypt(subreq); - kernel_fpu_end(); - - skcipher_request_zero(subreq); - return err; -} - -static int crypto_fpu_init_tfm(struct crypto_skcipher *tfm) -{ - struct skcipher_instance *inst = skcipher_alg_instance(tfm); - struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm); - struct crypto_skcipher_spawn *spawn; - struct crypto_skcipher *cipher; - - spawn = skcipher_instance_ctx(inst); - cipher = crypto_spawn_skcipher(spawn); - if (IS_ERR(cipher)) - return PTR_ERR(cipher); - - ctx->child = cipher; - - return 0; -} - -static void crypto_fpu_exit_tfm(struct crypto_skcipher *tfm) -{ - struct crypto_fpu_ctx *ctx = crypto_skcipher_ctx(tfm); - - crypto_free_skcipher(ctx->child); -} - -static void crypto_fpu_free(struct skcipher_instance *inst) -{ - crypto_drop_skcipher(skcipher_instance_ctx(inst)); - kfree(inst); -} - -static int crypto_fpu_create(struct crypto_template *tmpl, struct rtattr **tb) -{ - struct crypto_skcipher_spawn *spawn; - struct skcipher_instance *inst; - struct crypto_attr_type *algt; - struct skcipher_alg *alg; - const char *cipher_name; - int err; - - algt = crypto_get_attr_type(tb); - if (IS_ERR(algt)) - return PTR_ERR(algt); - - if ((algt->type ^ (CRYPTO_ALG_INTERNAL | CRYPTO_ALG_TYPE_SKCIPHER)) & - algt->mask) - return -EINVAL; - - if (!(algt->mask & CRYPTO_ALG_INTERNAL)) - return -EINVAL; - - cipher_name = crypto_attr_alg_name(tb[1]); - if (IS_ERR(cipher_name)) - return PTR_ERR(cipher_name); - - inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); - if (!inst) - return -ENOMEM; - - spawn = skcipher_instance_ctx(inst); - - crypto_set_skcipher_spawn(spawn, skcipher_crypto_instance(inst)); - err = crypto_grab_skcipher(spawn, cipher_name, CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL | CRYPTO_ALG_ASYNC); - if (err) - goto out_free_inst; - - alg = crypto_skcipher_spawn_alg(spawn); - - err = crypto_inst_setname(skcipher_crypto_instance(inst), "fpu", - &alg->base); - if (err) - goto out_drop_skcipher; - - inst->alg.base.cra_flags = CRYPTO_ALG_INTERNAL; - inst->alg.base.cra_priority = alg->base.cra_priority; - inst->alg.base.cra_blocksize = alg->base.cra_blocksize; - inst->alg.base.cra_alignmask = alg->base.cra_alignmask; - - inst->alg.ivsize = crypto_skcipher_alg_ivsize(alg); - inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg); - inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(alg); - - inst->alg.base.cra_ctxsize = sizeof(struct crypto_fpu_ctx); - - inst->alg.init = crypto_fpu_init_tfm; - inst->alg.exit = crypto_fpu_exit_tfm; - - inst->alg.setkey = crypto_fpu_setkey; - inst->alg.encrypt = crypto_fpu_encrypt; - inst->alg.decrypt = crypto_fpu_decrypt; - - inst->free = crypto_fpu_free; - - err = skcipher_register_instance(tmpl, inst); - if (err) - goto out_drop_skcipher; - -out: - return err; - -out_drop_skcipher: - crypto_drop_skcipher(spawn); -out_free_inst: - kfree(inst); - goto out; -} - -static struct crypto_template crypto_fpu_tmpl = { - .name = "fpu", - .create = crypto_fpu_create, - .module = THIS_MODULE, -}; - -int __init crypto_fpu_init(void) -{ - return crypto_register_template(&crypto_fpu_tmpl); -} - -void crypto_fpu_exit(void) -{ - crypto_unregister_template(&crypto_fpu_tmpl); -} - -MODULE_ALIAS_CRYPTO("fpu"); diff --git a/arch/x86/crypto/sha1-mb/Makefile b/arch/x86/crypto/sha1-mb/Makefile deleted file mode 100644 index 815ded3ba90e..000000000000 --- a/arch/x86/crypto/sha1-mb/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Arch-specific CryptoAPI modules. -# - -OBJECT_FILES_NON_STANDARD := y - -avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ - $(comma)4)$(comma)%ymm2,yes,no) -ifeq ($(avx2_supported),yes) - obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb.o - sha1-mb-y := sha1_mb.o sha1_mb_mgr_flush_avx2.o \ - sha1_mb_mgr_init_avx2.o sha1_mb_mgr_submit_avx2.o sha1_x8_avx2.o -endif diff --git a/arch/x86/crypto/sha1-mb/sha1_mb.c b/arch/x86/crypto/sha1-mb/sha1_mb.c deleted file mode 100644 index b93805664c1d..000000000000 --- a/arch/x86/crypto/sha1-mb/sha1_mb.c +++ /dev/null @@ -1,1011 +0,0 @@ -/* - * Multi buffer SHA1 algorithm Glue Code - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Tim Chen <tim.c.chen@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2014 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/cryptohash.h> -#include <linux/types.h> -#include <linux/list.h> -#include <crypto/scatterwalk.h> -#include <crypto/sha.h> -#include <crypto/mcryptd.h> -#include <crypto/crypto_wq.h> -#include <asm/byteorder.h> -#include <linux/hardirq.h> -#include <asm/fpu/api.h> -#include "sha1_mb_ctx.h" - -#define FLUSH_INTERVAL 1000 /* in usec */ - -static struct mcryptd_alg_state sha1_mb_alg_state; - -struct sha1_mb_ctx { - struct mcryptd_ahash *mcryptd_tfm; -}; - -static inline struct mcryptd_hash_request_ctx - *cast_hash_to_mcryptd_ctx(struct sha1_hash_ctx *hash_ctx) -{ - struct ahash_request *areq; - - areq = container_of((void *) hash_ctx, struct ahash_request, __ctx); - return container_of(areq, struct mcryptd_hash_request_ctx, areq); -} - -static inline struct ahash_request - *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) -{ - return container_of((void *) ctx, struct ahash_request, __ctx); -} - -static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, - struct ahash_request *areq) -{ - rctx->flag = HASH_UPDATE; -} - -static asmlinkage void (*sha1_job_mgr_init)(struct sha1_mb_mgr *state); -static asmlinkage struct job_sha1* (*sha1_job_mgr_submit) - (struct sha1_mb_mgr *state, struct job_sha1 *job); -static asmlinkage struct job_sha1* (*sha1_job_mgr_flush) - (struct sha1_mb_mgr *state); -static asmlinkage struct job_sha1* (*sha1_job_mgr_get_comp_job) - (struct sha1_mb_mgr *state); - -static inline uint32_t sha1_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], - uint64_t total_len) -{ - uint32_t i = total_len & (SHA1_BLOCK_SIZE - 1); - - memset(&padblock[i], 0, SHA1_BLOCK_SIZE); - padblock[i] = 0x80; - - i += ((SHA1_BLOCK_SIZE - 1) & - (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1))) - + 1 + SHA1_PADLENGTHFIELD_SIZE; - -#if SHA1_PADLENGTHFIELD_SIZE == 16 - *((uint64_t *) &padblock[i - 16]) = 0; -#endif - - *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3); - - /* Number of extra blocks to hash */ - return i >> SHA1_LOG2_BLOCK_SIZE; -} - -static struct sha1_hash_ctx *sha1_ctx_mgr_resubmit(struct sha1_ctx_mgr *mgr, - struct sha1_hash_ctx *ctx) -{ - while (ctx) { - if (ctx->status & HASH_CTX_STS_COMPLETE) { - /* Clear PROCESSING bit */ - ctx->status = HASH_CTX_STS_COMPLETE; - return ctx; - } - - /* - * If the extra blocks are empty, begin hashing what remains - * in the user's buffer. - */ - if (ctx->partial_block_buffer_length == 0 && - ctx->incoming_buffer_length) { - - const void *buffer = ctx->incoming_buffer; - uint32_t len = ctx->incoming_buffer_length; - uint32_t copy_len; - - /* - * Only entire blocks can be hashed. - * Copy remainder to extra blocks buffer. - */ - copy_len = len & (SHA1_BLOCK_SIZE-1); - - if (copy_len) { - len -= copy_len; - memcpy(ctx->partial_block_buffer, - ((const char *) buffer + len), - copy_len); - ctx->partial_block_buffer_length = copy_len; - } - - ctx->incoming_buffer_length = 0; - - /* len should be a multiple of the block size now */ - assert((len % SHA1_BLOCK_SIZE) == 0); - - /* Set len to the number of blocks to be hashed */ - len >>= SHA1_LOG2_BLOCK_SIZE; - - if (len) { - - ctx->job.buffer = (uint8_t *) buffer; - ctx->job.len = len; - ctx = (struct sha1_hash_ctx *)sha1_job_mgr_submit(&mgr->mgr, - &ctx->job); - continue; - } - } - - /* - * If the extra blocks are not empty, then we are - * either on the last block(s) or we need more - * user input before continuing. - */ - if (ctx->status & HASH_CTX_STS_LAST) { - - uint8_t *buf = ctx->partial_block_buffer; - uint32_t n_extra_blocks = - sha1_pad(buf, ctx->total_length); - - ctx->status = (HASH_CTX_STS_PROCESSING | - HASH_CTX_STS_COMPLETE); - ctx->job.buffer = buf; - ctx->job.len = (uint32_t) n_extra_blocks; - ctx = (struct sha1_hash_ctx *) - sha1_job_mgr_submit(&mgr->mgr, &ctx->job); - continue; - } - - ctx->status = HASH_CTX_STS_IDLE; - return ctx; - } - - return NULL; -} - -static struct sha1_hash_ctx - *sha1_ctx_mgr_get_comp_ctx(struct sha1_ctx_mgr *mgr) -{ - /* - * If get_comp_job returns NULL, there are no jobs complete. - * If get_comp_job returns a job, verify that it is safe to return to - * the user. - * If it is not ready, resubmit the job to finish processing. - * If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned. - * Otherwise, all jobs currently being managed by the hash_ctx_mgr - * still need processing. - */ - struct sha1_hash_ctx *ctx; - - ctx = (struct sha1_hash_ctx *) sha1_job_mgr_get_comp_job(&mgr->mgr); - return sha1_ctx_mgr_resubmit(mgr, ctx); -} - -static void sha1_ctx_mgr_init(struct sha1_ctx_mgr *mgr) -{ - sha1_job_mgr_init(&mgr->mgr); -} - -static struct sha1_hash_ctx *sha1_ctx_mgr_submit(struct sha1_ctx_mgr *mgr, - struct sha1_hash_ctx *ctx, - const void *buffer, - uint32_t len, - int flags) -{ - if (flags & ~(HASH_UPDATE | HASH_LAST)) { - /* User should not pass anything other than UPDATE or LAST */ - ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; - return ctx; - } - - if (ctx->status & HASH_CTX_STS_PROCESSING) { - /* Cannot submit to a currently processing job. */ - ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; - return ctx; - } - - if (ctx->status & HASH_CTX_STS_COMPLETE) { - /* Cannot update a finished job. */ - ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; - return ctx; - } - - /* - * If we made it here, there were no errors during this call to - * submit - */ - ctx->error = HASH_CTX_ERROR_NONE; - - /* Store buffer ptr info from user */ - ctx->incoming_buffer = buffer; - ctx->incoming_buffer_length = len; - - /* - * Store the user's request flags and mark this ctx as currently - * being processed. - */ - ctx->status = (flags & HASH_LAST) ? - (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : - HASH_CTX_STS_PROCESSING; - - /* Advance byte counter */ - ctx->total_length += len; - - /* - * If there is anything currently buffered in the extra blocks, - * append to it until it contains a whole block. - * Or if the user's buffer contains less than a whole block, - * append as much as possible to the extra block. - */ - if (ctx->partial_block_buffer_length || len < SHA1_BLOCK_SIZE) { - /* - * Compute how many bytes to copy from user buffer into - * extra block - */ - uint32_t copy_len = SHA1_BLOCK_SIZE - - ctx->partial_block_buffer_length; - if (len < copy_len) - copy_len = len; - - if (copy_len) { - /* Copy and update relevant pointers and counters */ - memcpy(&ctx->partial_block_buffer[ctx->partial_block_buffer_length], - buffer, copy_len); - - ctx->partial_block_buffer_length += copy_len; - ctx->incoming_buffer = (const void *) - ((const char *)buffer + copy_len); - ctx->incoming_buffer_length = len - copy_len; - } - - /* - * The extra block should never contain more than 1 block - * here - */ - assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE); - - /* - * If the extra block buffer contains exactly 1 block, it can - * be hashed. - */ - if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) { - ctx->partial_block_buffer_length = 0; - - ctx->job.buffer = ctx->partial_block_buffer; - ctx->job.len = 1; - ctx = (struct sha1_hash_ctx *) - sha1_job_mgr_submit(&mgr->mgr, &ctx->job); - } - } - - return sha1_ctx_mgr_resubmit(mgr, ctx); -} - -static struct sha1_hash_ctx *sha1_ctx_mgr_flush(struct sha1_ctx_mgr *mgr) -{ - struct sha1_hash_ctx *ctx; - - while (1) { - ctx = (struct sha1_hash_ctx *) sha1_job_mgr_flush(&mgr->mgr); - - /* If flush returned 0, there are no more jobs in flight. */ - if (!ctx) - return NULL; - - /* - * If flush returned a job, resubmit the job to finish - * processing. - */ - ctx = sha1_ctx_mgr_resubmit(mgr, ctx); - - /* - * If sha1_ctx_mgr_resubmit returned a job, it is ready to be - * returned. Otherwise, all jobs currently being managed by the - * sha1_ctx_mgr still need processing. Loop. - */ - if (ctx) - return ctx; - } -} - -static int sha1_mb_init(struct ahash_request *areq) -{ - struct sha1_hash_ctx *sctx = ahash_request_ctx(areq); - - hash_ctx_init(sctx); - sctx->job.result_digest[0] = SHA1_H0; - sctx->job.result_digest[1] = SHA1_H1; - sctx->job.result_digest[2] = SHA1_H2; - sctx->job.result_digest[3] = SHA1_H3; - sctx->job.result_digest[4] = SHA1_H4; - sctx->total_length = 0; - sctx->partial_block_buffer_length = 0; - sctx->status = HASH_CTX_STS_IDLE; - - return 0; -} - -static int sha1_mb_set_results(struct mcryptd_hash_request_ctx *rctx) -{ - int i; - struct sha1_hash_ctx *sctx = ahash_request_ctx(&rctx->areq); - __be32 *dst = (__be32 *) rctx->out; - - for (i = 0; i < 5; ++i) - dst[i] = cpu_to_be32(sctx->job.result_digest[i]); - - return 0; -} - -static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, - struct mcryptd_alg_cstate *cstate, bool flush) -{ - int flag = HASH_UPDATE; - int nbytes, err = 0; - struct mcryptd_hash_request_ctx *rctx = *ret_rctx; - struct sha1_hash_ctx *sha_ctx; - - /* more work ? */ - while (!(rctx->flag & HASH_DONE)) { - nbytes = crypto_ahash_walk_done(&rctx->walk, 0); - if (nbytes < 0) { - err = nbytes; - goto out; - } - /* check if the walk is done */ - if (crypto_ahash_walk_last(&rctx->walk)) { - rctx->flag |= HASH_DONE; - if (rctx->flag & HASH_FINAL) - flag |= HASH_LAST; - - } - sha_ctx = (struct sha1_hash_ctx *) - ahash_request_ctx(&rctx->areq); - kernel_fpu_begin(); - sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, - rctx->walk.data, nbytes, flag); - if (!sha_ctx) { - if (flush) - sha_ctx = sha1_ctx_mgr_flush(cstate->mgr); - } - kernel_fpu_end(); - if (sha_ctx) - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - else { - rctx = NULL; - goto out; - } - } - - /* copy the results */ - if (rctx->flag & HASH_FINAL) - sha1_mb_set_results(rctx); - -out: - *ret_rctx = rctx; - return err; -} - -static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, - struct mcryptd_alg_cstate *cstate, - int err) -{ - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); - struct sha1_hash_ctx *sha_ctx; - struct mcryptd_hash_request_ctx *req_ctx; - int ret; - - /* remove from work list */ - spin_lock(&cstate->work_lock); - list_del(&rctx->waiter); - spin_unlock(&cstate->work_lock); - - if (irqs_disabled()) - rctx->complete(&req->base, err); - else { - local_bh_disable(); - rctx->complete(&req->base, err); - local_bh_enable(); - } - - /* check to see if there are other jobs that are done */ - sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr); - while (sha_ctx) { - req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); - ret = sha_finish_walk(&req_ctx, cstate, false); - if (req_ctx) { - spin_lock(&cstate->work_lock); - list_del(&req_ctx->waiter); - spin_unlock(&cstate->work_lock); - - req = cast_mcryptd_ctx_to_req(req_ctx); - if (irqs_disabled()) - req_ctx->complete(&req->base, ret); - else { - local_bh_disable(); - req_ctx->complete(&req->base, ret); - local_bh_enable(); - } - } - sha_ctx = sha1_ctx_mgr_get_comp_ctx(cstate->mgr); - } - - return 0; -} - -static void sha1_mb_add_list(struct mcryptd_hash_request_ctx *rctx, - struct mcryptd_alg_cstate *cstate) -{ - unsigned long next_flush; - unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); - - /* initialize tag */ - rctx->tag.arrival = jiffies; /* tag the arrival time */ - rctx->tag.seq_num = cstate->next_seq_num++; - next_flush = rctx->tag.arrival + delay; - rctx->tag.expire = next_flush; - - spin_lock(&cstate->work_lock); - list_add_tail(&rctx->waiter, &cstate->work_list); - spin_unlock(&cstate->work_lock); - - mcryptd_arm_flusher(cstate, delay); -} - -static int sha1_mb_update(struct ahash_request *areq) -{ - struct mcryptd_hash_request_ctx *rctx = - container_of(areq, struct mcryptd_hash_request_ctx, areq); - struct mcryptd_alg_cstate *cstate = - this_cpu_ptr(sha1_mb_alg_state.alg_cstate); - - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); - struct sha1_hash_ctx *sha_ctx; - int ret = 0, nbytes; - - - /* sanity check */ - if (rctx->tag.cpu != smp_processor_id()) { - pr_err("mcryptd error: cpu clash\n"); - goto done; - } - - /* need to init context */ - req_ctx_init(rctx, areq); - - nbytes = crypto_ahash_walk_first(req, &rctx->walk); - - if (nbytes < 0) { - ret = nbytes; - goto done; - } - - if (crypto_ahash_walk_last(&rctx->walk)) - rctx->flag |= HASH_DONE; - - /* submit */ - sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq); - sha1_mb_add_list(rctx, cstate); - kernel_fpu_begin(); - sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, - nbytes, HASH_UPDATE); - kernel_fpu_end(); - - /* check if anything is returned */ - if (!sha_ctx) - return -EINPROGRESS; - - if (sha_ctx->error) { - ret = sha_ctx->error; - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - goto done; - } - - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - ret = sha_finish_walk(&rctx, cstate, false); - - if (!rctx) - return -EINPROGRESS; -done: - sha_complete_job(rctx, cstate, ret); - return ret; -} - -static int sha1_mb_finup(struct ahash_request *areq) -{ - struct mcryptd_hash_request_ctx *rctx = - container_of(areq, struct mcryptd_hash_request_ctx, areq); - struct mcryptd_alg_cstate *cstate = - this_cpu_ptr(sha1_mb_alg_state.alg_cstate); - - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); - struct sha1_hash_ctx *sha_ctx; - int ret = 0, flag = HASH_UPDATE, nbytes; - - /* sanity check */ - if (rctx->tag.cpu != smp_processor_id()) { - pr_err("mcryptd error: cpu clash\n"); - goto done; - } - - /* need to init context */ - req_ctx_init(rctx, areq); - - nbytes = crypto_ahash_walk_first(req, &rctx->walk); - - if (nbytes < 0) { - ret = nbytes; - goto done; - } - - if (crypto_ahash_walk_last(&rctx->walk)) { - rctx->flag |= HASH_DONE; - flag = HASH_LAST; - } - - /* submit */ - rctx->flag |= HASH_FINAL; - sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq); - sha1_mb_add_list(rctx, cstate); - - kernel_fpu_begin(); - sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, - nbytes, flag); - kernel_fpu_end(); - - /* check if anything is returned */ - if (!sha_ctx) - return -EINPROGRESS; - - if (sha_ctx->error) { - ret = sha_ctx->error; - goto done; - } - - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - ret = sha_finish_walk(&rctx, cstate, false); - if (!rctx) - return -EINPROGRESS; -done: - sha_complete_job(rctx, cstate, ret); - return ret; -} - -static int sha1_mb_final(struct ahash_request *areq) -{ - struct mcryptd_hash_request_ctx *rctx = - container_of(areq, struct mcryptd_hash_request_ctx, areq); - struct mcryptd_alg_cstate *cstate = - this_cpu_ptr(sha1_mb_alg_state.alg_cstate); - - struct sha1_hash_ctx *sha_ctx; - int ret = 0; - u8 data; - - /* sanity check */ - if (rctx->tag.cpu != smp_processor_id()) { - pr_err("mcryptd error: cpu clash\n"); - goto done; - } - - /* need to init context */ - req_ctx_init(rctx, areq); - - rctx->flag |= HASH_DONE | HASH_FINAL; - - sha_ctx = (struct sha1_hash_ctx *) ahash_request_ctx(areq); - /* flag HASH_FINAL and 0 data size */ - sha1_mb_add_list(rctx, cstate); - kernel_fpu_begin(); - sha_ctx = sha1_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, - HASH_LAST); - kernel_fpu_end(); - - /* check if anything is returned */ - if (!sha_ctx) - return -EINPROGRESS; - - if (sha_ctx->error) { - ret = sha_ctx->error; - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - goto done; - } - - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - ret = sha_finish_walk(&rctx, cstate, false); - if (!rctx) - return -EINPROGRESS; -done: - sha_complete_job(rctx, cstate, ret); - return ret; -} - -static int sha1_mb_export(struct ahash_request *areq, void *out) -{ - struct sha1_hash_ctx *sctx = ahash_request_ctx(areq); - - memcpy(out, sctx, sizeof(*sctx)); - - return 0; -} - -static int sha1_mb_import(struct ahash_request *areq, const void *in) -{ - struct sha1_hash_ctx *sctx = ahash_request_ctx(areq); - - memcpy(sctx, in, sizeof(*sctx)); - - return 0; -} - -static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm) -{ - struct mcryptd_ahash *mcryptd_tfm; - struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); - struct mcryptd_hash_ctx *mctx; - - mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", - CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(mcryptd_tfm)) - return PTR_ERR(mcryptd_tfm); - mctx = crypto_ahash_ctx(&mcryptd_tfm->base); - mctx->alg_state = &sha1_mb_alg_state; - ctx->mcryptd_tfm = mcryptd_tfm; - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct ahash_request) + - crypto_ahash_reqsize(&mcryptd_tfm->base)); - - return 0; -} - -static void sha1_mb_async_exit_tfm(struct crypto_tfm *tfm) -{ - struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); - - mcryptd_free_ahash(ctx->mcryptd_tfm); -} - -static int sha1_mb_areq_init_tfm(struct crypto_tfm *tfm) -{ - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct ahash_request) + - sizeof(struct sha1_hash_ctx)); - - return 0; -} - -static void sha1_mb_areq_exit_tfm(struct crypto_tfm *tfm) -{ - struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm); - - mcryptd_free_ahash(ctx->mcryptd_tfm); -} - -static struct ahash_alg sha1_mb_areq_alg = { - .init = sha1_mb_init, - .update = sha1_mb_update, - .final = sha1_mb_final, - .finup = sha1_mb_finup, - .export = sha1_mb_export, - .import = sha1_mb_import, - .halg = { - .digestsize = SHA1_DIGEST_SIZE, - .statesize = sizeof(struct sha1_hash_ctx), - .base = { - .cra_name = "__sha1-mb", - .cra_driver_name = "__intel_sha1-mb", - .cra_priority = 100, - /* - * use ASYNC flag as some buffers in multi-buffer - * algo may not have completed before hashing thread - * sleep - */ - .cra_flags = CRYPTO_ALG_ASYNC | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SHA1_BLOCK_SIZE, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT - (sha1_mb_areq_alg.halg.base.cra_list), - .cra_init = sha1_mb_areq_init_tfm, - .cra_exit = sha1_mb_areq_exit_tfm, - .cra_ctxsize = sizeof(struct sha1_hash_ctx), - } - } -}; - -static int sha1_mb_async_init(struct ahash_request *req) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_init(mcryptd_req); -} - -static int sha1_mb_async_update(struct ahash_request *req) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_update(mcryptd_req); -} - -static int sha1_mb_async_finup(struct ahash_request *req) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_finup(mcryptd_req); -} - -static int sha1_mb_async_final(struct ahash_request *req) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_final(mcryptd_req); -} - -static int sha1_mb_async_digest(struct ahash_request *req) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_digest(mcryptd_req); -} - -static int sha1_mb_async_export(struct ahash_request *req, void *out) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_export(mcryptd_req, out); -} - -static int sha1_mb_async_import(struct ahash_request *req, const void *in) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha1_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm); - struct mcryptd_hash_request_ctx *rctx; - struct ahash_request *areq; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - rctx = ahash_request_ctx(mcryptd_req); - areq = &rctx->areq; - - ahash_request_set_tfm(areq, child); - ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP, - rctx->complete, req); - - return crypto_ahash_import(mcryptd_req, in); -} - -static struct ahash_alg sha1_mb_async_alg = { - .init = sha1_mb_async_init, - .update = sha1_mb_async_update, - .final = sha1_mb_async_final, - .finup = sha1_mb_async_finup, - .digest = sha1_mb_async_digest, - .export = sha1_mb_async_export, - .import = sha1_mb_async_import, - .halg = { - .digestsize = SHA1_DIGEST_SIZE, - .statesize = sizeof(struct sha1_hash_ctx), - .base = { - .cra_name = "sha1", - .cra_driver_name = "sha1_mb", - /* - * Low priority, since with few concurrent hash requests - * this is extremely slow due to the flush delay. Users - * whose workloads would benefit from this can request - * it explicitly by driver name, or can increase its - * priority at runtime using NETLINK_CRYPTO. - */ - .cra_priority = 50, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = SHA1_BLOCK_SIZE, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(sha1_mb_async_alg.halg.base.cra_list), - .cra_init = sha1_mb_async_init_tfm, - .cra_exit = sha1_mb_async_exit_tfm, - .cra_ctxsize = sizeof(struct sha1_mb_ctx), - .cra_alignmask = 0, - }, - }, -}; - -static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate) -{ - struct mcryptd_hash_request_ctx *rctx; - unsigned long cur_time; - unsigned long next_flush = 0; - struct sha1_hash_ctx *sha_ctx; - - - cur_time = jiffies; - - while (!list_empty(&cstate->work_list)) { - rctx = list_entry(cstate->work_list.next, - struct mcryptd_hash_request_ctx, waiter); - if (time_before(cur_time, rctx->tag.expire)) - break; - kernel_fpu_begin(); - sha_ctx = (struct sha1_hash_ctx *) - sha1_ctx_mgr_flush(cstate->mgr); - kernel_fpu_end(); - if (!sha_ctx) { - pr_err("sha1_mb error: nothing got flushed for non-empty list\n"); - break; - } - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - sha_finish_walk(&rctx, cstate, true); - sha_complete_job(rctx, cstate, 0); - } - - if (!list_empty(&cstate->work_list)) { - rctx = list_entry(cstate->work_list.next, - struct mcryptd_hash_request_ctx, waiter); - /* get the hash context and then flush time */ - next_flush = rctx->tag.expire; - mcryptd_arm_flusher(cstate, get_delay(next_flush)); - } - return next_flush; -} - -static int __init sha1_mb_mod_init(void) -{ - - int cpu; - int err; - struct mcryptd_alg_cstate *cpu_state; - - /* check for dependent cpu features */ - if (!boot_cpu_has(X86_FEATURE_AVX2) || - !boot_cpu_has(X86_FEATURE_BMI2)) - return -ENODEV; - - /* initialize multibuffer structures */ - sha1_mb_alg_state.alg_cstate = alloc_percpu(struct mcryptd_alg_cstate); - - sha1_job_mgr_init = sha1_mb_mgr_init_avx2; - sha1_job_mgr_submit = sha1_mb_mgr_submit_avx2; - sha1_job_mgr_flush = sha1_mb_mgr_flush_avx2; - sha1_job_mgr_get_comp_job = sha1_mb_mgr_get_comp_job_avx2; - - if (!sha1_mb_alg_state.alg_cstate) - return -ENOMEM; - for_each_possible_cpu(cpu) { - cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); - cpu_state->next_flush = 0; - cpu_state->next_seq_num = 0; - cpu_state->flusher_engaged = false; - INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher); - cpu_state->cpu = cpu; - cpu_state->alg_state = &sha1_mb_alg_state; - cpu_state->mgr = kzalloc(sizeof(struct sha1_ctx_mgr), - GFP_KERNEL); - if (!cpu_state->mgr) - goto err2; - sha1_ctx_mgr_init(cpu_state->mgr); - INIT_LIST_HEAD(&cpu_state->work_list); - spin_lock_init(&cpu_state->work_lock); - } - sha1_mb_alg_state.flusher = &sha1_mb_flusher; - - err = crypto_register_ahash(&sha1_mb_areq_alg); - if (err) - goto err2; - err = crypto_register_ahash(&sha1_mb_async_alg); - if (err) - goto err1; - - - return 0; -err1: - crypto_unregister_ahash(&sha1_mb_areq_alg); -err2: - for_each_possible_cpu(cpu) { - cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); - kfree(cpu_state->mgr); - } - free_percpu(sha1_mb_alg_state.alg_cstate); - return -ENODEV; -} - -static void __exit sha1_mb_mod_fini(void) -{ - int cpu; - struct mcryptd_alg_cstate *cpu_state; - - crypto_unregister_ahash(&sha1_mb_async_alg); - crypto_unregister_ahash(&sha1_mb_areq_alg); - for_each_possible_cpu(cpu) { - cpu_state = per_cpu_ptr(sha1_mb_alg_state.alg_cstate, cpu); - kfree(cpu_state->mgr); - } - free_percpu(sha1_mb_alg_state.alg_cstate); -} - -module_init(sha1_mb_mod_init); -module_exit(sha1_mb_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, multi buffer accelerated"); - -MODULE_ALIAS_CRYPTO("sha1"); diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h b/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h deleted file mode 100644 index 9454bd16f9f8..000000000000 --- a/arch/x86/crypto/sha1-mb/sha1_mb_ctx.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Header file for multi buffer SHA context - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Tim Chen <tim.c.chen@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2014 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _SHA_MB_CTX_INTERNAL_H -#define _SHA_MB_CTX_INTERNAL_H - -#include "sha1_mb_mgr.h" - -#define HASH_UPDATE 0x00 -#define HASH_LAST 0x01 -#define HASH_DONE 0x02 -#define HASH_FINAL 0x04 - -#define HASH_CTX_STS_IDLE 0x00 -#define HASH_CTX_STS_PROCESSING 0x01 -#define HASH_CTX_STS_LAST 0x02 -#define HASH_CTX_STS_COMPLETE 0x04 - -enum hash_ctx_error { - HASH_CTX_ERROR_NONE = 0, - HASH_CTX_ERROR_INVALID_FLAGS = -1, - HASH_CTX_ERROR_ALREADY_PROCESSING = -2, - HASH_CTX_ERROR_ALREADY_COMPLETED = -3, - -#ifdef HASH_CTX_DEBUG - HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4, -#endif -}; - - -#define hash_ctx_user_data(ctx) ((ctx)->user_data) -#define hash_ctx_digest(ctx) ((ctx)->job.result_digest) -#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) -#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) -#define hash_ctx_status(ctx) ((ctx)->status) -#define hash_ctx_error(ctx) ((ctx)->error) -#define hash_ctx_init(ctx) \ - do { \ - (ctx)->error = HASH_CTX_ERROR_NONE; \ - (ctx)->status = HASH_CTX_STS_COMPLETE; \ - } while (0) - - -/* Hash Constants and Typedefs */ -#define SHA1_DIGEST_LENGTH 5 -#define SHA1_LOG2_BLOCK_SIZE 6 - -#define SHA1_PADLENGTHFIELD_SIZE 8 - -#ifdef SHA_MB_DEBUG -#define assert(expr) \ -do { \ - if (unlikely(!(expr))) { \ - printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \ - #expr, __FILE__, __func__, __LINE__); \ - } \ -} while (0) -#else -#define assert(expr) do {} while (0) -#endif - -struct sha1_ctx_mgr { - struct sha1_mb_mgr mgr; -}; - -/* typedef struct sha1_ctx_mgr sha1_ctx_mgr; */ - -struct sha1_hash_ctx { - /* Must be at struct offset 0 */ - struct job_sha1 job; - /* status flag */ - int status; - /* error flag */ - int error; - - uint64_t total_length; - const void *incoming_buffer; - uint32_t incoming_buffer_length; - uint8_t partial_block_buffer[SHA1_BLOCK_SIZE * 2]; - uint32_t partial_block_buffer_length; - void *user_data; -}; - -#endif diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h b/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h deleted file mode 100644 index 08ad1a9acfd7..000000000000 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Header file for multi buffer SHA1 algorithm manager - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * James Guilford <james.guilford@intel.com> - * Tim Chen <tim.c.chen@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2014 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef __SHA_MB_MGR_H -#define __SHA_MB_MGR_H - - -#include <linux/types.h> - -#define NUM_SHA1_DIGEST_WORDS 5 - -enum job_sts { STS_UNKNOWN = 0, - STS_BEING_PROCESSED = 1, - STS_COMPLETED = 2, - STS_INTERNAL_ERROR = 3, - STS_ERROR = 4 -}; - -struct job_sha1 { - u8 *buffer; - u32 len; - u32 result_digest[NUM_SHA1_DIGEST_WORDS] __aligned(32); - enum job_sts status; - void *user_data; -}; - -/* SHA1 out-of-order scheduler */ - -/* typedef uint32_t sha1_digest_array[5][8]; */ - -struct sha1_args_x8 { - uint32_t digest[5][8]; - uint8_t *data_ptr[8]; -}; - -struct sha1_lane_data { - struct job_sha1 *job_in_lane; -}; - -struct sha1_mb_mgr { - struct sha1_args_x8 args; - - uint32_t lens[8]; - - /* each byte is index (0...7) of unused lanes */ - uint64_t unused_lanes; - /* byte 4 is set to FF as a flag */ - struct sha1_lane_data ldata[8]; -}; - - -#define SHA1_MB_MGR_NUM_LANES_AVX2 8 - -void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state); -struct job_sha1 *sha1_mb_mgr_submit_avx2(struct sha1_mb_mgr *state, - struct job_sha1 *job); -struct job_sha1 *sha1_mb_mgr_flush_avx2(struct sha1_mb_mgr *state); -struct job_sha1 *sha1_mb_mgr_get_comp_job_avx2(struct sha1_mb_mgr *state); - -#endif diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S deleted file mode 100644 index 86688c6e7a25..000000000000 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_datastruct.S +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Header file for multi buffer SHA1 algorithm data structure - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * James Guilford <james.guilford@intel.com> - * Tim Chen <tim.c.chen@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2014 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -# Macros for defining data structures - -# Usage example - -#START_FIELDS # JOB_AES -### name size align -#FIELD _plaintext, 8, 8 # pointer to plaintext -#FIELD _ciphertext, 8, 8 # pointer to ciphertext -#FIELD _IV, 16, 8 # IV -#FIELD _keys, 8, 8 # pointer to keys -#FIELD _len, 4, 4 # length in bytes -#FIELD _status, 4, 4 # status enumeration -#FIELD _user_data, 8, 8 # pointer to user data -#UNION _union, size1, align1, \ -# size2, align2, \ -# size3, align3, \ -# ... -#END_FIELDS -#%assign _JOB_AES_size _FIELD_OFFSET -#%assign _JOB_AES_align _STRUCT_ALIGN - -######################################################################### - -# Alternate "struc-like" syntax: -# STRUCT job_aes2 -# RES_Q .plaintext, 1 -# RES_Q .ciphertext, 1 -# RES_DQ .IV, 1 -# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN -# RES_U .union, size1, align1, \ -# size2, align2, \ -# ... -# ENDSTRUCT -# # Following only needed if nesting -# %assign job_aes2_size _FIELD_OFFSET -# %assign job_aes2_align _STRUCT_ALIGN -# -# RES_* macros take a name, a count and an optional alignment. -# The count in in terms of the base size of the macro, and the -# default alignment is the base size. -# The macros are: -# Macro Base size -# RES_B 1 -# RES_W 2 -# RES_D 4 -# RES_Q 8 -# RES_DQ 16 -# RES_Y 32 -# RES_Z 64 -# -# RES_U defines a union. It's arguments are a name and two or more -# pairs of "size, alignment" -# -# The two assigns are only needed if this structure is being nested -# within another. Even if the assigns are not done, one can still use -# STRUCT_NAME_size as the size of the structure. -# -# Note that for nesting, you still need to assign to STRUCT_NAME_size. -# -# The differences between this and using "struc" directly are that each -# type is implicitly aligned to its natural length (although this can be -# over-ridden with an explicit third parameter), and that the structure -# is padded at the end to its overall alignment. -# - -######################################################################### - -#ifndef _SHA1_MB_MGR_DATASTRUCT_ASM_ -#define _SHA1_MB_MGR_DATASTRUCT_ASM_ - -## START_FIELDS -.macro START_FIELDS - _FIELD_OFFSET = 0 - _STRUCT_ALIGN = 0 -.endm - -## FIELD name size align -.macro FIELD name size align - _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) - \name = _FIELD_OFFSET - _FIELD_OFFSET = _FIELD_OFFSET + (\size) -.if (\align > _STRUCT_ALIGN) - _STRUCT_ALIGN = \align -.endif -.endm - -## END_FIELDS -.macro END_FIELDS - _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) -.endm - -######################################################################## - -.macro STRUCT p1 -START_FIELDS -.struc \p1 -.endm - -.macro ENDSTRUCT - tmp = _FIELD_OFFSET - END_FIELDS - tmp = (_FIELD_OFFSET - %%tmp) -.if (tmp > 0) - .lcomm tmp -.endif -.endstruc -.endm - -## RES_int name size align -.macro RES_int p1 p2 p3 - name = \p1 - size = \p2 - align = .\p3 - - _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)) -.align align -.lcomm name size - _FIELD_OFFSET = _FIELD_OFFSET + (size) -.if (align > _STRUCT_ALIGN) - _STRUCT_ALIGN = align -.endif -.endm - - - -# macro RES_B name, size [, align] -.macro RES_B _name, _size, _align=1 -RES_int _name _size _align -.endm - -# macro RES_W name, size [, align] -.macro RES_W _name, _size, _align=2 -RES_int _name 2*(_size) _align -.endm - -# macro RES_D name, size [, align] -.macro RES_D _name, _size, _align=4 -RES_int _name 4*(_size) _align -.endm - -# macro RES_Q name, size [, align] -.macro RES_Q _name, _size, _align=8 -RES_int _name 8*(_size) _align -.endm - -# macro RES_DQ name, size [, align] -.macro RES_DQ _name, _size, _align=16 -RES_int _name 16*(_size) _align -.endm - -# macro RES_Y name, size [, align] -.macro RES_Y _name, _size, _align=32 -RES_int _name 32*(_size) _align -.endm - -# macro RES_Z name, size [, align] -.macro RES_Z _name, _size, _align=64 -RES_int _name 64*(_size) _align -.endm - - -#endif - -######################################################################## -#### Define constants -######################################################################## - -######################################################################## -#### Define SHA1 Out Of Order Data Structures -######################################################################## - -START_FIELDS # LANE_DATA -### name size align -FIELD _job_in_lane, 8, 8 # pointer to job object -END_FIELDS - -_LANE_DATA_size = _FIELD_OFFSET -_LANE_DATA_align = _STRUCT_ALIGN - -######################################################################## - -START_FIELDS # SHA1_ARGS_X8 -### name size align -FIELD _digest, 4*5*8, 16 # transposed digest -FIELD _data_ptr, 8*8, 8 # array of pointers to data -END_FIELDS - -_SHA1_ARGS_X4_size = _FIELD_OFFSET -_SHA1_ARGS_X4_align = _STRUCT_ALIGN -_SHA1_ARGS_X8_size = _FIELD_OFFSET -_SHA1_ARGS_X8_align = _STRUCT_ALIGN - -######################################################################## - -START_FIELDS # MB_MGR -### name size align -FIELD _args, _SHA1_ARGS_X4_size, _SHA1_ARGS_X4_align -FIELD _lens, 4*8, 8 -FIELD _unused_lanes, 8, 8 -FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align -END_FIELDS - -_MB_MGR_size = _FIELD_OFFSET -_MB_MGR_align = _STRUCT_ALIGN - -_args_digest = _args + _digest -_args_data_ptr = _args + _data_ptr - - -######################################################################## -#### Define constants -######################################################################## - -#define STS_UNKNOWN 0 -#define STS_BEING_PROCESSED 1 -#define STS_COMPLETED 2 - -######################################################################## -#### Define JOB_SHA1 structure -######################################################################## - -START_FIELDS # JOB_SHA1 - -### name size align -FIELD _buffer, 8, 8 # pointer to buffer -FIELD _len, 4, 4 # length in bytes -FIELD _result_digest, 5*4, 32 # Digest (output) -FIELD _status, 4, 4 -FIELD _user_data, 8, 8 -END_FIELDS - -_JOB_SHA1_size = _FIELD_OFFSET -_JOB_SHA1_align = _STRUCT_ALIGN diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S deleted file mode 100644 index 7cfba738f104..000000000000 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Flush routine for SHA1 multibuffer - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * James Guilford <james.guilford@intel.com> - * Tim Chen <tim.c.chen@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2014 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include <linux/linkage.h> -#include <asm/frame.h> -#include "sha1_mb_mgr_datastruct.S" - - -.extern sha1_x8_avx2 - -# LINUX register definitions -#define arg1 %rdi -#define arg2 %rsi - -# Common definitions -#define state arg1 -#define job arg2 -#define len2 arg2 - -# idx must be a register not clobbered by sha1_x8_avx2 -#define idx %r8 -#define DWORD_idx %r8d - -#define unused_lanes %rbx -#define lane_data %rbx -#define tmp2 %rbx -#define tmp2_w %ebx - -#define job_rax %rax -#define tmp1 %rax -#define size_offset %rax -#define tmp %rax -#define start_offset %rax - -#define tmp3 %arg1 - -#define extra_blocks %arg2 -#define p %arg2 - -.macro LABEL prefix n -\prefix\n\(): -.endm - -.macro JNE_SKIP i -jne skip_\i -.endm - -.altmacro -.macro SET_OFFSET _offset -offset = \_offset -.endm -.noaltmacro - -# JOB* sha1_mb_mgr_flush_avx2(MB_MGR *state) -# arg 1 : rcx : state -ENTRY(sha1_mb_mgr_flush_avx2) - FRAME_BEGIN - push %rbx - - # If bit (32+3) is set, then all lanes are empty - mov _unused_lanes(state), unused_lanes - bt $32+3, unused_lanes - jc return_null - - # find a lane with a non-null job - xor idx, idx - offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne one(%rip), idx - offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne two(%rip), idx - offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne three(%rip), idx - offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne four(%rip), idx - offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne five(%rip), idx - offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne six(%rip), idx - offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne seven(%rip), idx - - # copy idx to empty lanes -copy_lane_data: - offset = (_args + _data_ptr) - mov offset(state,idx,8), tmp - - I = 0 -.rep 8 - offset = (_ldata + I * _LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) -.altmacro - JNE_SKIP %I - offset = (_args + _data_ptr + 8*I) - mov tmp, offset(state) - offset = (_lens + 4*I) - movl $0xFFFFFFFF, offset(state) -LABEL skip_ %I - I = (I+1) -.noaltmacro -.endr - - # Find min length - vmovdqu _lens+0*16(state), %xmm0 - vmovdqu _lens+1*16(state), %xmm1 - - vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} - vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} - vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword - - vmovd %xmm2, DWORD_idx - mov idx, len2 - and $0xF, idx - shr $4, len2 - jz len_is_0 - - vpand clear_low_nibble(%rip), %xmm2, %xmm2 - vpshufd $0, %xmm2, %xmm2 - - vpsubd %xmm2, %xmm0, %xmm0 - vpsubd %xmm2, %xmm1, %xmm1 - - vmovdqu %xmm0, _lens+0*16(state) - vmovdqu %xmm1, _lens+1*16(state) - - # "state" and "args" are the same address, arg1 - # len is arg2 - call sha1_x8_avx2 - # state and idx are intact - - -len_is_0: - # process completed job "idx" - imul $_LANE_DATA_size, idx, lane_data - lea _ldata(state, lane_data), lane_data - - mov _job_in_lane(lane_data), job_rax - movq $0, _job_in_lane(lane_data) - movl $STS_COMPLETED, _status(job_rax) - mov _unused_lanes(state), unused_lanes - shl $4, unused_lanes - or idx, unused_lanes - mov unused_lanes, _unused_lanes(state) - - movl $0xFFFFFFFF, _lens(state, idx, 4) - - vmovd _args_digest(state , idx, 4) , %xmm0 - vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 - vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 - vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 - movl _args_digest+4*32(state, idx, 4), tmp2_w - - vmovdqu %xmm0, _result_digest(job_rax) - offset = (_result_digest + 1*16) - mov tmp2_w, offset(job_rax) - -return: - pop %rbx - FRAME_END - ret - -return_null: - xor job_rax, job_rax - jmp return -ENDPROC(sha1_mb_mgr_flush_avx2) - - -################################################################# - -.align 16 -ENTRY(sha1_mb_mgr_get_comp_job_avx2) - push %rbx - - ## if bit 32+3 is set, then all lanes are empty - mov _unused_lanes(state), unused_lanes - bt $(32+3), unused_lanes - jc .return_null - - # Find min length - vmovdqu _lens(state), %xmm0 - vmovdqu _lens+1*16(state), %xmm1 - - vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} - vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} - vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword - - vmovd %xmm2, DWORD_idx - test $~0xF, idx - jnz .return_null - - # process completed job "idx" - imul $_LANE_DATA_size, idx, lane_data - lea _ldata(state, lane_data), lane_data - - mov _job_in_lane(lane_data), job_rax - movq $0, _job_in_lane(lane_data) - movl $STS_COMPLETED, _status(job_rax) - mov _unused_lanes(state), unused_lanes - shl $4, unused_lanes - or idx, unused_lanes - mov unused_lanes, _unused_lanes(state) - - movl $0xFFFFFFFF, _lens(state, idx, 4) - - vmovd _args_digest(state, idx, 4), %xmm0 - vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 - vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 - vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 - movl _args_digest+4*32(state, idx, 4), tmp2_w - - vmovdqu %xmm0, _result_digest(job_rax) - movl tmp2_w, _result_digest+1*16(job_rax) - - pop %rbx - - ret - -.return_null: - xor job_rax, job_rax - pop %rbx - ret -ENDPROC(sha1_mb_mgr_get_comp_job_avx2) - -.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 -.align 16 -clear_low_nibble: -.octa 0x000000000000000000000000FFFFFFF0 - -.section .rodata.cst8, "aM", @progbits, 8 -.align 8 -one: -.quad 1 -two: -.quad 2 -three: -.quad 3 -four: -.quad 4 -five: -.quad 5 -six: -.quad 6 -seven: -.quad 7 diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c deleted file mode 100644 index d2add0d35f43..000000000000 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_init_avx2.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Initialization code for multi buffer SHA1 algorithm for AVX2 - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Tim Chen <tim.c.chen@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2014 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "sha1_mb_mgr.h" - -void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state) -{ - unsigned int j; - state->unused_lanes = 0xF76543210ULL; - for (j = 0; j < 8; j++) { - state->lens[j] = 0xFFFFFFFF; - state->ldata[j].job_in_lane = NULL; - } -} diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S deleted file mode 100644 index 7a93b1c0d69a..000000000000 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Buffer submit code for multi buffer SHA1 algorithm - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * James Guilford <james.guilford@intel.com> - * Tim Chen <tim.c.chen@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2014 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/linkage.h> -#include <asm/frame.h> -#include "sha1_mb_mgr_datastruct.S" - - -.extern sha1_x8_avx - -# LINUX register definitions -arg1 = %rdi -arg2 = %rsi -size_offset = %rcx -tmp2 = %rcx -extra_blocks = %rdx - -# Common definitions -#define state arg1 -#define job %rsi -#define len2 arg2 -#define p2 arg2 - -# idx must be a register not clobberred by sha1_x8_avx2 -idx = %r8 -DWORD_idx = %r8d -last_len = %r8 - -p = %r11 -start_offset = %r11 - -unused_lanes = %rbx -BYTE_unused_lanes = %bl - -job_rax = %rax -len = %rax -DWORD_len = %eax - -lane = %r12 -tmp3 = %r12 - -tmp = %r9 -DWORD_tmp = %r9d - -lane_data = %r10 - -# JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job) -# arg 1 : rcx : state -# arg 2 : rdx : job -ENTRY(sha1_mb_mgr_submit_avx2) - FRAME_BEGIN - push %rbx - push %r12 - - mov _unused_lanes(state), unused_lanes - mov unused_lanes, lane - and $0xF, lane - shr $4, unused_lanes - imul $_LANE_DATA_size, lane, lane_data - movl $STS_BEING_PROCESSED, _status(job) - lea _ldata(state, lane_data), lane_data - mov unused_lanes, _unused_lanes(state) - movl _len(job), DWORD_len - - mov job, _job_in_lane(lane_data) - shl $4, len - or lane, len - - movl DWORD_len, _lens(state , lane, 4) - - # Load digest words from result_digest - vmovdqu _result_digest(job), %xmm0 - mov _result_digest+1*16(job), DWORD_tmp - vmovd %xmm0, _args_digest(state, lane, 4) - vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4) - vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4) - vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4) - movl DWORD_tmp, _args_digest+4*32(state , lane, 4) - - mov _buffer(job), p - mov p, _args_data_ptr(state, lane, 8) - - cmp $0xF, unused_lanes - jne return_null - -start_loop: - # Find min length - vmovdqa _lens(state), %xmm0 - vmovdqa _lens+1*16(state), %xmm1 - - vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} - vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} - vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword - - vmovd %xmm2, DWORD_idx - mov idx, len2 - and $0xF, idx - shr $4, len2 - jz len_is_0 - - vpand clear_low_nibble(%rip), %xmm2, %xmm2 - vpshufd $0, %xmm2, %xmm2 - - vpsubd %xmm2, %xmm0, %xmm0 - vpsubd %xmm2, %xmm1, %xmm1 - - vmovdqa %xmm0, _lens + 0*16(state) - vmovdqa %xmm1, _lens + 1*16(state) - - - # "state" and "args" are the same address, arg1 - # len is arg2 - call sha1_x8_avx2 - - # state and idx are intact - -len_is_0: - # process completed job "idx" - imul $_LANE_DATA_size, idx, lane_data - lea _ldata(state, lane_data), lane_data - - mov _job_in_lane(lane_data), job_rax - mov _unused_lanes(state), unused_lanes - movq $0, _job_in_lane(lane_data) - movl $STS_COMPLETED, _status(job_rax) - shl $4, unused_lanes - or idx, unused_lanes - mov unused_lanes, _unused_lanes(state) - - movl $0xFFFFFFFF, _lens(state, idx, 4) - - vmovd _args_digest(state, idx, 4), %xmm0 - vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0 - vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0 - vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0 - movl _args_digest+4*32(state, idx, 4), DWORD_tmp - - vmovdqu %xmm0, _result_digest(job_rax) - movl DWORD_tmp, _result_digest+1*16(job_rax) - -return: - pop %r12 - pop %rbx - FRAME_END - ret - -return_null: - xor job_rax, job_rax - jmp return - -ENDPROC(sha1_mb_mgr_submit_avx2) - -.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 -.align 16 -clear_low_nibble: - .octa 0x000000000000000000000000FFFFFFF0 diff --git a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S deleted file mode 100644 index 20f77aa633de..000000000000 --- a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S +++ /dev/null @@ -1,492 +0,0 @@ -/* - * Multi-buffer SHA1 algorithm hash compute routine - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2014 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * James Guilford <james.guilford@intel.com> - * Tim Chen <tim.c.chen@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2014 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/linkage.h> -#include "sha1_mb_mgr_datastruct.S" - -## code to compute oct SHA1 using SSE-256 -## outer calling routine takes care of save and restore of XMM registers - -## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15# ymm0-15 -## -## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15 -## Linux preserves: rdi rbp r8 -## -## clobbers ymm0-15 - - -# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1 -# "transpose" data in {r0...r7} using temps {t0...t1} -# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7} -# r0 = {a7 a6 a5 a4 a3 a2 a1 a0} -# r1 = {b7 b6 b5 b4 b3 b2 b1 b0} -# r2 = {c7 c6 c5 c4 c3 c2 c1 c0} -# r3 = {d7 d6 d5 d4 d3 d2 d1 d0} -# r4 = {e7 e6 e5 e4 e3 e2 e1 e0} -# r5 = {f7 f6 f5 f4 f3 f2 f1 f0} -# r6 = {g7 g6 g5 g4 g3 g2 g1 g0} -# r7 = {h7 h6 h5 h4 h3 h2 h1 h0} -# -# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7} -# r0 = {h0 g0 f0 e0 d0 c0 b0 a0} -# r1 = {h1 g1 f1 e1 d1 c1 b1 a1} -# r2 = {h2 g2 f2 e2 d2 c2 b2 a2} -# r3 = {h3 g3 f3 e3 d3 c3 b3 a3} -# r4 = {h4 g4 f4 e4 d4 c4 b4 a4} -# r5 = {h5 g5 f5 e5 d5 c5 b5 a5} -# r6 = {h6 g6 f6 e6 d6 c6 b6 a6} -# r7 = {h7 g7 f7 e7 d7 c7 b7 a7} -# - -.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1 - # process top half (r0..r3) {a...d} - vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0} - vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2} - vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0} - vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2} - vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1} - vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2} - vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3} - vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0} - - # use r2 in place of t0 - # process bottom half (r4..r7) {e...h} - vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0} - vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2} - vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0} - vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2} - vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1} - vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2} - vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3} - vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0} - - vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6 - vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2 - vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5 - vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1 - vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7 - vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3 - vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4 - vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0 - -.endm -## -## Magic functions defined in FIPS 180-1 -## -# macro MAGIC_F0 F,B,C,D,T ## F = (D ^ (B & (C ^ D))) -.macro MAGIC_F0 regF regB regC regD regT - vpxor \regD, \regC, \regF - vpand \regB, \regF, \regF - vpxor \regD, \regF, \regF -.endm - -# macro MAGIC_F1 F,B,C,D,T ## F = (B ^ C ^ D) -.macro MAGIC_F1 regF regB regC regD regT - vpxor \regC, \regD, \regF - vpxor \regB, \regF, \regF -.endm - -# macro MAGIC_F2 F,B,C,D,T ## F = ((B & C) | (B & D) | (C & D)) -.macro MAGIC_F2 regF regB regC regD regT - vpor \regC, \regB, \regF - vpand \regC, \regB, \regT - vpand \regD, \regF, \regF - vpor \regT, \regF, \regF -.endm - -# macro MAGIC_F3 F,B,C,D,T ## F = (B ^ C ^ D) -.macro MAGIC_F3 regF regB regC regD regT - MAGIC_F1 \regF,\regB,\regC,\regD,\regT -.endm - -# PROLD reg, imm, tmp -.macro PROLD reg imm tmp - vpsrld $(32-\imm), \reg, \tmp - vpslld $\imm, \reg, \reg - vpor \tmp, \reg, \reg -.endm - -.macro PROLD_nd reg imm tmp src - vpsrld $(32-\imm), \src, \tmp - vpslld $\imm, \src, \reg - vpor \tmp, \reg, \reg -.endm - -.macro SHA1_STEP_00_15 regA regB regC regD regE regT regF memW immCNT MAGIC - vpaddd \immCNT, \regE, \regE - vpaddd \memW*32(%rsp), \regE, \regE - PROLD_nd \regT, 5, \regF, \regA - vpaddd \regT, \regE, \regE - \MAGIC \regF, \regB, \regC, \regD, \regT - PROLD \regB, 30, \regT - vpaddd \regF, \regE, \regE -.endm - -.macro SHA1_STEP_16_79 regA regB regC regD regE regT regF memW immCNT MAGIC - vpaddd \immCNT, \regE, \regE - offset = ((\memW - 14) & 15) * 32 - vmovdqu offset(%rsp), W14 - vpxor W14, W16, W16 - offset = ((\memW - 8) & 15) * 32 - vpxor offset(%rsp), W16, W16 - offset = ((\memW - 3) & 15) * 32 - vpxor offset(%rsp), W16, W16 - vpsrld $(32-1), W16, \regF - vpslld $1, W16, W16 - vpor W16, \regF, \regF - - ROTATE_W - - offset = ((\memW - 0) & 15) * 32 - vmovdqu \regF, offset(%rsp) - vpaddd \regF, \regE, \regE - PROLD_nd \regT, 5, \regF, \regA - vpaddd \regT, \regE, \regE - \MAGIC \regF,\regB,\regC,\regD,\regT ## FUN = MAGIC_Fi(B,C,D) - PROLD \regB,30, \regT - vpaddd \regF, \regE, \regE -.endm - -######################################################################## -######################################################################## -######################################################################## - -## FRAMESZ plus pushes must be an odd multiple of 8 -YMM_SAVE = (15-15)*32 -FRAMESZ = 32*16 + YMM_SAVE -_YMM = FRAMESZ - YMM_SAVE - -#define VMOVPS vmovups - -IDX = %rax -inp0 = %r9 -inp1 = %r10 -inp2 = %r11 -inp3 = %r12 -inp4 = %r13 -inp5 = %r14 -inp6 = %r15 -inp7 = %rcx -arg1 = %rdi -arg2 = %rsi -RSP_SAVE = %rdx - -# ymm0 A -# ymm1 B -# ymm2 C -# ymm3 D -# ymm4 E -# ymm5 F AA -# ymm6 T0 BB -# ymm7 T1 CC -# ymm8 T2 DD -# ymm9 T3 EE -# ymm10 T4 TMP -# ymm11 T5 FUN -# ymm12 T6 K -# ymm13 T7 W14 -# ymm14 T8 W15 -# ymm15 T9 W16 - - -A = %ymm0 -B = %ymm1 -C = %ymm2 -D = %ymm3 -E = %ymm4 -F = %ymm5 -T0 = %ymm6 -T1 = %ymm7 -T2 = %ymm8 -T3 = %ymm9 -T4 = %ymm10 -T5 = %ymm11 -T6 = %ymm12 -T7 = %ymm13 -T8 = %ymm14 -T9 = %ymm15 - -AA = %ymm5 -BB = %ymm6 -CC = %ymm7 -DD = %ymm8 -EE = %ymm9 -TMP = %ymm10 -FUN = %ymm11 -K = %ymm12 -W14 = %ymm13 -W15 = %ymm14 -W16 = %ymm15 - -.macro ROTATE_ARGS - TMP_ = E - E = D - D = C - C = B - B = A - A = TMP_ -.endm - -.macro ROTATE_W -TMP_ = W16 -W16 = W15 -W15 = W14 -W14 = TMP_ -.endm - -# 8 streams x 5 32bit words per digest x 4 bytes per word -#define DIGEST_SIZE (8*5*4) - -.align 32 - -# void sha1_x8_avx2(void **input_data, UINT128 *digest, UINT32 size) -# arg 1 : pointer to array[4] of pointer to input data -# arg 2 : size (in blocks) ;; assumed to be >= 1 -# -ENTRY(sha1_x8_avx2) - - # save callee-saved clobbered registers to comply with C function ABI - push %r12 - push %r13 - push %r14 - push %r15 - - #save rsp - mov %rsp, RSP_SAVE - sub $FRAMESZ, %rsp - - #align rsp to 32 Bytes - and $~0x1F, %rsp - - ## Initialize digests - vmovdqu 0*32(arg1), A - vmovdqu 1*32(arg1), B - vmovdqu 2*32(arg1), C - vmovdqu 3*32(arg1), D - vmovdqu 4*32(arg1), E - - ## transpose input onto stack - mov _data_ptr+0*8(arg1),inp0 - mov _data_ptr+1*8(arg1),inp1 - mov _data_ptr+2*8(arg1),inp2 - mov _data_ptr+3*8(arg1),inp3 - mov _data_ptr+4*8(arg1),inp4 - mov _data_ptr+5*8(arg1),inp5 - mov _data_ptr+6*8(arg1),inp6 - mov _data_ptr+7*8(arg1),inp7 - - xor IDX, IDX -lloop: - vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), F - I=0 -.rep 2 - VMOVPS (inp0, IDX), T0 - VMOVPS (inp1, IDX), T1 - VMOVPS (inp2, IDX), T2 - VMOVPS (inp3, IDX), T3 - VMOVPS (inp4, IDX), T4 - VMOVPS (inp5, IDX), T5 - VMOVPS (inp6, IDX), T6 - VMOVPS (inp7, IDX), T7 - - TRANSPOSE8 T0, T1, T2, T3, T4, T5, T6, T7, T8, T9 - vpshufb F, T0, T0 - vmovdqu T0, (I*8)*32(%rsp) - vpshufb F, T1, T1 - vmovdqu T1, (I*8+1)*32(%rsp) - vpshufb F, T2, T2 - vmovdqu T2, (I*8+2)*32(%rsp) - vpshufb F, T3, T3 - vmovdqu T3, (I*8+3)*32(%rsp) - vpshufb F, T4, T4 - vmovdqu T4, (I*8+4)*32(%rsp) - vpshufb F, T5, T5 - vmovdqu T5, (I*8+5)*32(%rsp) - vpshufb F, T6, T6 - vmovdqu T6, (I*8+6)*32(%rsp) - vpshufb F, T7, T7 - vmovdqu T7, (I*8+7)*32(%rsp) - add $32, IDX - I = (I+1) -.endr - # save old digests - vmovdqu A,AA - vmovdqu B,BB - vmovdqu C,CC - vmovdqu D,DD - vmovdqu E,EE - -## -## perform 0-79 steps -## - vmovdqu K00_19(%rip), K -## do rounds 0...15 - I = 0 -.rep 16 - SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0 - ROTATE_ARGS - I = (I+1) -.endr - -## do rounds 16...19 - vmovdqu ((16 - 16) & 15) * 32 (%rsp), W16 - vmovdqu ((16 - 15) & 15) * 32 (%rsp), W15 -.rep 4 - SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0 - ROTATE_ARGS - I = (I+1) -.endr - -## do rounds 20...39 - vmovdqu K20_39(%rip), K -.rep 20 - SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1 - ROTATE_ARGS - I = (I+1) -.endr - -## do rounds 40...59 - vmovdqu K40_59(%rip), K -.rep 20 - SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2 - ROTATE_ARGS - I = (I+1) -.endr - -## do rounds 60...79 - vmovdqu K60_79(%rip), K -.rep 20 - SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3 - ROTATE_ARGS - I = (I+1) -.endr - - vpaddd AA,A,A - vpaddd BB,B,B - vpaddd CC,C,C - vpaddd DD,D,D - vpaddd EE,E,E - - sub $1, arg2 - jne lloop - - # write out digests - vmovdqu A, 0*32(arg1) - vmovdqu B, 1*32(arg1) - vmovdqu C, 2*32(arg1) - vmovdqu D, 3*32(arg1) - vmovdqu E, 4*32(arg1) - - # update input pointers - add IDX, inp0 - add IDX, inp1 - add IDX, inp2 - add IDX, inp3 - add IDX, inp4 - add IDX, inp5 - add IDX, inp6 - add IDX, inp7 - mov inp0, _data_ptr (arg1) - mov inp1, _data_ptr + 1*8(arg1) - mov inp2, _data_ptr + 2*8(arg1) - mov inp3, _data_ptr + 3*8(arg1) - mov inp4, _data_ptr + 4*8(arg1) - mov inp5, _data_ptr + 5*8(arg1) - mov inp6, _data_ptr + 6*8(arg1) - mov inp7, _data_ptr + 7*8(arg1) - - ################ - ## Postamble - - mov RSP_SAVE, %rsp - - # restore callee-saved clobbered registers - pop %r15 - pop %r14 - pop %r13 - pop %r12 - - ret -ENDPROC(sha1_x8_avx2) - - -.section .rodata.cst32.K00_19, "aM", @progbits, 32 -.align 32 -K00_19: -.octa 0x5A8279995A8279995A8279995A827999 -.octa 0x5A8279995A8279995A8279995A827999 - -.section .rodata.cst32.K20_39, "aM", @progbits, 32 -.align 32 -K20_39: -.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 -.octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 - -.section .rodata.cst32.K40_59, "aM", @progbits, 32 -.align 32 -K40_59: -.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC -.octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC - -.section .rodata.cst32.K60_79, "aM", @progbits, 32 -.align 32 -K60_79: -.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 -.octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 - -.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 -.align 32 -PSHUFFLE_BYTE_FLIP_MASK: -.octa 0x0c0d0e0f08090a0b0405060700010203 -.octa 0x0c0d0e0f08090a0b0405060700010203 diff --git a/arch/x86/crypto/sha256-mb/Makefile b/arch/x86/crypto/sha256-mb/Makefile deleted file mode 100644 index 53ad6e7db747..000000000000 --- a/arch/x86/crypto/sha256-mb/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Arch-specific CryptoAPI modules. -# - -OBJECT_FILES_NON_STANDARD := y - -avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ - $(comma)4)$(comma)%ymm2,yes,no) -ifeq ($(avx2_supported),yes) - obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb.o - sha256-mb-y := sha256_mb.o sha256_mb_mgr_flush_avx2.o \ - sha256_mb_mgr_init_avx2.o sha256_mb_mgr_submit_avx2.o sha256_x8_avx2.o -endif diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c deleted file mode 100644 index 97c5fc43e115..000000000000 --- a/arch/x86/crypto/sha256-mb/sha256_mb.c +++ /dev/null @@ -1,1013 +0,0 @@ -/* - * Multi buffer SHA256 algorithm Glue Code - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/cryptohash.h> -#include <linux/types.h> -#include <linux/list.h> -#include <crypto/scatterwalk.h> -#include <crypto/sha.h> -#include <crypto/mcryptd.h> -#include <crypto/crypto_wq.h> -#include <asm/byteorder.h> -#include <linux/hardirq.h> -#include <asm/fpu/api.h> -#include "sha256_mb_ctx.h" - -#define FLUSH_INTERVAL 1000 /* in usec */ - -static struct mcryptd_alg_state sha256_mb_alg_state; - -struct sha256_mb_ctx { - struct mcryptd_ahash *mcryptd_tfm; -}; - -static inline struct mcryptd_hash_request_ctx - *cast_hash_to_mcryptd_ctx(struct sha256_hash_ctx *hash_ctx) -{ - struct ahash_request *areq; - - areq = container_of((void *) hash_ctx, struct ahash_request, __ctx); - return container_of(areq, struct mcryptd_hash_request_ctx, areq); -} - -static inline struct ahash_request - *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) -{ - return container_of((void *) ctx, struct ahash_request, __ctx); -} - -static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, - struct ahash_request *areq) -{ - rctx->flag = HASH_UPDATE; -} - -static asmlinkage void (*sha256_job_mgr_init)(struct sha256_mb_mgr *state); -static asmlinkage struct job_sha256* (*sha256_job_mgr_submit) - (struct sha256_mb_mgr *state, struct job_sha256 *job); -static asmlinkage struct job_sha256* (*sha256_job_mgr_flush) - (struct sha256_mb_mgr *state); -static asmlinkage struct job_sha256* (*sha256_job_mgr_get_comp_job) - (struct sha256_mb_mgr *state); - -inline uint32_t sha256_pad(uint8_t padblock[SHA256_BLOCK_SIZE * 2], - uint64_t total_len) -{ - uint32_t i = total_len & (SHA256_BLOCK_SIZE - 1); - - memset(&padblock[i], 0, SHA256_BLOCK_SIZE); - padblock[i] = 0x80; - - i += ((SHA256_BLOCK_SIZE - 1) & - (0 - (total_len + SHA256_PADLENGTHFIELD_SIZE + 1))) - + 1 + SHA256_PADLENGTHFIELD_SIZE; - -#if SHA256_PADLENGTHFIELD_SIZE == 16 - *((uint64_t *) &padblock[i - 16]) = 0; -#endif - - *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3); - - /* Number of extra blocks to hash */ - return i >> SHA256_LOG2_BLOCK_SIZE; -} - -static struct sha256_hash_ctx - *sha256_ctx_mgr_resubmit(struct sha256_ctx_mgr *mgr, - struct sha256_hash_ctx *ctx) -{ - while (ctx) { - if (ctx->status & HASH_CTX_STS_COMPLETE) { - /* Clear PROCESSING bit */ - ctx->status = HASH_CTX_STS_COMPLETE; - return ctx; - } - - /* - * If the extra blocks are empty, begin hashing what remains - * in the user's buffer. - */ - if (ctx->partial_block_buffer_length == 0 && - ctx->incoming_buffer_length) { - - const void *buffer = ctx->incoming_buffer; - uint32_t len = ctx->incoming_buffer_length; - uint32_t copy_len; - - /* - * Only entire blocks can be hashed. - * Copy remainder to extra blocks buffer. - */ - copy_len = len & (SHA256_BLOCK_SIZE-1); - - if (copy_len) { - len -= copy_len; - memcpy(ctx->partial_block_buffer, - ((const char *) buffer + len), - copy_len); - ctx->partial_block_buffer_length = copy_len; - } - - ctx->incoming_buffer_length = 0; - - /* len should be a multiple of the block size now */ - assert((len % SHA256_BLOCK_SIZE) == 0); - - /* Set len to the number of blocks to be hashed */ - len >>= SHA256_LOG2_BLOCK_SIZE; - - if (len) { - - ctx->job.buffer = (uint8_t *) buffer; - ctx->job.len = len; - ctx = (struct sha256_hash_ctx *) - sha256_job_mgr_submit(&mgr->mgr, &ctx->job); - continue; - } - } - - /* - * If the extra blocks are not empty, then we are - * either on the last block(s) or we need more - * user input before continuing. - */ - if (ctx->status & HASH_CTX_STS_LAST) { - - uint8_t *buf = ctx->partial_block_buffer; - uint32_t n_extra_blocks = - sha256_pad(buf, ctx->total_length); - - ctx->status = (HASH_CTX_STS_PROCESSING | - HASH_CTX_STS_COMPLETE); - ctx->job.buffer = buf; - ctx->job.len = (uint32_t) n_extra_blocks; - ctx = (struct sha256_hash_ctx *) - sha256_job_mgr_submit(&mgr->mgr, &ctx->job); - continue; - } - - ctx->status = HASH_CTX_STS_IDLE; - return ctx; - } - - return NULL; -} - -static struct sha256_hash_ctx - *sha256_ctx_mgr_get_comp_ctx(struct sha256_ctx_mgr *mgr) -{ - /* - * If get_comp_job returns NULL, there are no jobs complete. - * If get_comp_job returns a job, verify that it is safe to return to - * the user. If it is not ready, resubmit the job to finish processing. - * If sha256_ctx_mgr_resubmit returned a job, it is ready to be - * returned. Otherwise, all jobs currently being managed by the - * hash_ctx_mgr still need processing. - */ - struct sha256_hash_ctx *ctx; - - ctx = (struct sha256_hash_ctx *) sha256_job_mgr_get_comp_job(&mgr->mgr); - return sha256_ctx_mgr_resubmit(mgr, ctx); -} - -static void sha256_ctx_mgr_init(struct sha256_ctx_mgr *mgr) -{ - sha256_job_mgr_init(&mgr->mgr); -} - -static struct sha256_hash_ctx *sha256_ctx_mgr_submit(struct sha256_ctx_mgr *mgr, - struct sha256_hash_ctx *ctx, - const void *buffer, - uint32_t len, - int flags) -{ - if (flags & ~(HASH_UPDATE | HASH_LAST)) { - /* User should not pass anything other than UPDATE or LAST */ - ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; - return ctx; - } - - if (ctx->status & HASH_CTX_STS_PROCESSING) { - /* Cannot submit to a currently processing job. */ - ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; - return ctx; - } - - if (ctx->status & HASH_CTX_STS_COMPLETE) { - /* Cannot update a finished job. */ - ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; - return ctx; - } - - /* If we made it here, there was no error during this call to submit */ - ctx->error = HASH_CTX_ERROR_NONE; - - /* Store buffer ptr info from user */ - ctx->incoming_buffer = buffer; - ctx->incoming_buffer_length = len; - - /* - * Store the user's request flags and mark this ctx as currently - * being processed. - */ - ctx->status = (flags & HASH_LAST) ? - (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : - HASH_CTX_STS_PROCESSING; - - /* Advance byte counter */ - ctx->total_length += len; - - /* - * If there is anything currently buffered in the extra blocks, - * append to it until it contains a whole block. - * Or if the user's buffer contains less than a whole block, - * append as much as possible to the extra block. - */ - if (ctx->partial_block_buffer_length || len < SHA256_BLOCK_SIZE) { - /* - * Compute how many bytes to copy from user buffer into - * extra block - */ - uint32_t copy_len = SHA256_BLOCK_SIZE - - ctx->partial_block_buffer_length; - if (len < copy_len) - copy_len = len; - - if (copy_len) { - /* Copy and update relevant pointers and counters */ - memcpy( - &ctx->partial_block_buffer[ctx->partial_block_buffer_length], - buffer, copy_len); - - ctx->partial_block_buffer_length += copy_len; - ctx->incoming_buffer = (const void *) - ((const char *)buffer + copy_len); - ctx->incoming_buffer_length = len - copy_len; - } - - /* The extra block should never contain more than 1 block */ - assert(ctx->partial_block_buffer_length <= SHA256_BLOCK_SIZE); - - /* - * If the extra block buffer contains exactly 1 block, - * it can be hashed. - */ - if (ctx->partial_block_buffer_length >= SHA256_BLOCK_SIZE) { - ctx->partial_block_buffer_length = 0; - - ctx->job.buffer = ctx->partial_block_buffer; - ctx->job.len = 1; - ctx = (struct sha256_hash_ctx *) - sha256_job_mgr_submit(&mgr->mgr, &ctx->job); - } - } - - return sha256_ctx_mgr_resubmit(mgr, ctx); -} - -static struct sha256_hash_ctx *sha256_ctx_mgr_flush(struct sha256_ctx_mgr *mgr) -{ - struct sha256_hash_ctx *ctx; - - while (1) { - ctx = (struct sha256_hash_ctx *) - sha256_job_mgr_flush(&mgr->mgr); - - /* If flush returned 0, there are no more jobs in flight. */ - if (!ctx) - return NULL; - - /* - * If flush returned a job, resubmit the job to finish - * processing. - */ - ctx = sha256_ctx_mgr_resubmit(mgr, ctx); - - /* - * If sha256_ctx_mgr_resubmit returned a job, it is ready to - * be returned. Otherwise, all jobs currently being managed by - * the sha256_ctx_mgr still need processing. Loop. - */ - if (ctx) - return ctx; - } -} - -static int sha256_mb_init(struct ahash_request *areq) -{ - struct sha256_hash_ctx *sctx = ahash_request_ctx(areq); - - hash_ctx_init(sctx); - sctx->job.result_digest[0] = SHA256_H0; - sctx->job.result_digest[1] = SHA256_H1; - sctx->job.result_digest[2] = SHA256_H2; - sctx->job.result_digest[3] = SHA256_H3; - sctx->job.result_digest[4] = SHA256_H4; - sctx->job.result_digest[5] = SHA256_H5; - sctx->job.result_digest[6] = SHA256_H6; - sctx->job.result_digest[7] = SHA256_H7; - sctx->total_length = 0; - sctx->partial_block_buffer_length = 0; - sctx->status = HASH_CTX_STS_IDLE; - - return 0; -} - -static int sha256_mb_set_results(struct mcryptd_hash_request_ctx *rctx) -{ - int i; - struct sha256_hash_ctx *sctx = ahash_request_ctx(&rctx->areq); - __be32 *dst = (__be32 *) rctx->out; - - for (i = 0; i < 8; ++i) - dst[i] = cpu_to_be32(sctx->job.result_digest[i]); - - return 0; -} - -static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, - struct mcryptd_alg_cstate *cstate, bool flush) -{ - int flag = HASH_UPDATE; - int nbytes, err = 0; - struct mcryptd_hash_request_ctx *rctx = *ret_rctx; - struct sha256_hash_ctx *sha_ctx; - - /* more work ? */ - while (!(rctx->flag & HASH_DONE)) { - nbytes = crypto_ahash_walk_done(&rctx->walk, 0); - if (nbytes < 0) { - err = nbytes; - goto out; - } - /* check if the walk is done */ - if (crypto_ahash_walk_last(&rctx->walk)) { - rctx->flag |= HASH_DONE; - if (rctx->flag & HASH_FINAL) - flag |= HASH_LAST; - - } - sha_ctx = (struct sha256_hash_ctx *) - ahash_request_ctx(&rctx->areq); - kernel_fpu_begin(); - sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, - rctx->walk.data, nbytes, flag); - if (!sha_ctx) { - if (flush) - sha_ctx = sha256_ctx_mgr_flush(cstate->mgr); - } - kernel_fpu_end(); - if (sha_ctx) - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - else { - rctx = NULL; - goto out; - } - } - - /* copy the results */ - if (rctx->flag & HASH_FINAL) - sha256_mb_set_results(rctx); - -out: - *ret_rctx = rctx; - return err; -} - -static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, - struct mcryptd_alg_cstate *cstate, - int err) -{ - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); - struct sha256_hash_ctx *sha_ctx; - struct mcryptd_hash_request_ctx *req_ctx; - int ret; - - /* remove from work list */ - spin_lock(&cstate->work_lock); - list_del(&rctx->waiter); - spin_unlock(&cstate->work_lock); - - if (irqs_disabled()) - rctx->complete(&req->base, err); - else { - local_bh_disable(); - rctx->complete(&req->base, err); - local_bh_enable(); - } - - /* check to see if there are other jobs that are done */ - sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr); - while (sha_ctx) { - req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); - ret = sha_finish_walk(&req_ctx, cstate, false); - if (req_ctx) { - spin_lock(&cstate->work_lock); - list_del(&req_ctx->waiter); - spin_unlock(&cstate->work_lock); - - req = cast_mcryptd_ctx_to_req(req_ctx); - if (irqs_disabled()) - req_ctx->complete(&req->base, ret); - else { - local_bh_disable(); - req_ctx->complete(&req->base, ret); - local_bh_enable(); - } - } - sha_ctx = sha256_ctx_mgr_get_comp_ctx(cstate->mgr); - } - - return 0; -} - -static void sha256_mb_add_list(struct mcryptd_hash_request_ctx *rctx, - struct mcryptd_alg_cstate *cstate) -{ - unsigned long next_flush; - unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); - - /* initialize tag */ - rctx->tag.arrival = jiffies; /* tag the arrival time */ - rctx->tag.seq_num = cstate->next_seq_num++; - next_flush = rctx->tag.arrival + delay; - rctx->tag.expire = next_flush; - - spin_lock(&cstate->work_lock); - list_add_tail(&rctx->waiter, &cstate->work_list); - spin_unlock(&cstate->work_lock); - - mcryptd_arm_flusher(cstate, delay); -} - -static int sha256_mb_update(struct ahash_request *areq) -{ - struct mcryptd_hash_request_ctx *rctx = - container_of(areq, struct mcryptd_hash_request_ctx, areq); - struct mcryptd_alg_cstate *cstate = - this_cpu_ptr(sha256_mb_alg_state.alg_cstate); - - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); - struct sha256_hash_ctx *sha_ctx; - int ret = 0, nbytes; - - /* sanity check */ - if (rctx->tag.cpu != smp_processor_id()) { - pr_err("mcryptd error: cpu clash\n"); - goto done; - } - - /* need to init context */ - req_ctx_init(rctx, areq); - - nbytes = crypto_ahash_walk_first(req, &rctx->walk); - - if (nbytes < 0) { - ret = nbytes; - goto done; - } - - if (crypto_ahash_walk_last(&rctx->walk)) - rctx->flag |= HASH_DONE; - - /* submit */ - sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq); - sha256_mb_add_list(rctx, cstate); - kernel_fpu_begin(); - sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, - nbytes, HASH_UPDATE); - kernel_fpu_end(); - - /* check if anything is returned */ - if (!sha_ctx) - return -EINPROGRESS; - - if (sha_ctx->error) { - ret = sha_ctx->error; - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - goto done; - } - - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - ret = sha_finish_walk(&rctx, cstate, false); - - if (!rctx) - return -EINPROGRESS; -done: - sha_complete_job(rctx, cstate, ret); - return ret; -} - -static int sha256_mb_finup(struct ahash_request *areq) -{ - struct mcryptd_hash_request_ctx *rctx = - container_of(areq, struct mcryptd_hash_request_ctx, areq); - struct mcryptd_alg_cstate *cstate = - this_cpu_ptr(sha256_mb_alg_state.alg_cstate); - - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); - struct sha256_hash_ctx *sha_ctx; - int ret = 0, flag = HASH_UPDATE, nbytes; - - /* sanity check */ - if (rctx->tag.cpu != smp_processor_id()) { - pr_err("mcryptd error: cpu clash\n"); - goto done; - } - - /* need to init context */ - req_ctx_init(rctx, areq); - - nbytes = crypto_ahash_walk_first(req, &rctx->walk); - - if (nbytes < 0) { - ret = nbytes; - goto done; - } - - if (crypto_ahash_walk_last(&rctx->walk)) { - rctx->flag |= HASH_DONE; - flag = HASH_LAST; - } - - /* submit */ - rctx->flag |= HASH_FINAL; - sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq); - sha256_mb_add_list(rctx, cstate); - - kernel_fpu_begin(); - sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, - nbytes, flag); - kernel_fpu_end(); - - /* check if anything is returned */ - if (!sha_ctx) - return -EINPROGRESS; - - if (sha_ctx->error) { - ret = sha_ctx->error; - goto done; - } - - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - ret = sha_finish_walk(&rctx, cstate, false); - if (!rctx) - return -EINPROGRESS; -done: - sha_complete_job(rctx, cstate, ret); - return ret; -} - -static int sha256_mb_final(struct ahash_request *areq) -{ - struct mcryptd_hash_request_ctx *rctx = - container_of(areq, struct mcryptd_hash_request_ctx, - areq); - struct mcryptd_alg_cstate *cstate = - this_cpu_ptr(sha256_mb_alg_state.alg_cstate); - - struct sha256_hash_ctx *sha_ctx; - int ret = 0; - u8 data; - - /* sanity check */ - if (rctx->tag.cpu != smp_processor_id()) { - pr_err("mcryptd error: cpu clash\n"); - goto done; - } - - /* need to init context */ - req_ctx_init(rctx, areq); - - rctx->flag |= HASH_DONE | HASH_FINAL; - - sha_ctx = (struct sha256_hash_ctx *) ahash_request_ctx(areq); - /* flag HASH_FINAL and 0 data size */ - sha256_mb_add_list(rctx, cstate); - kernel_fpu_begin(); - sha_ctx = sha256_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, - HASH_LAST); - kernel_fpu_end(); - - /* check if anything is returned */ - if (!sha_ctx) - return -EINPROGRESS; - - if (sha_ctx->error) { - ret = sha_ctx->error; - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - goto done; - } - - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - ret = sha_finish_walk(&rctx, cstate, false); - if (!rctx) - return -EINPROGRESS; -done: - sha_complete_job(rctx, cstate, ret); - return ret; -} - -static int sha256_mb_export(struct ahash_request *areq, void *out) -{ - struct sha256_hash_ctx *sctx = ahash_request_ctx(areq); - - memcpy(out, sctx, sizeof(*sctx)); - - return 0; -} - -static int sha256_mb_import(struct ahash_request *areq, const void *in) -{ - struct sha256_hash_ctx *sctx = ahash_request_ctx(areq); - - memcpy(sctx, in, sizeof(*sctx)); - - return 0; -} - -static int sha256_mb_async_init_tfm(struct crypto_tfm *tfm) -{ - struct mcryptd_ahash *mcryptd_tfm; - struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm); - struct mcryptd_hash_ctx *mctx; - - mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha256-mb", - CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(mcryptd_tfm)) - return PTR_ERR(mcryptd_tfm); - mctx = crypto_ahash_ctx(&mcryptd_tfm->base); - mctx->alg_state = &sha256_mb_alg_state; - ctx->mcryptd_tfm = mcryptd_tfm; - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct ahash_request) + - crypto_ahash_reqsize(&mcryptd_tfm->base)); - - return 0; -} - -static void sha256_mb_async_exit_tfm(struct crypto_tfm *tfm) -{ - struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm); - - mcryptd_free_ahash(ctx->mcryptd_tfm); -} - -static int sha256_mb_areq_init_tfm(struct crypto_tfm *tfm) -{ - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct ahash_request) + - sizeof(struct sha256_hash_ctx)); - - return 0; -} - -static void sha256_mb_areq_exit_tfm(struct crypto_tfm *tfm) -{ - struct sha256_mb_ctx *ctx = crypto_tfm_ctx(tfm); - - mcryptd_free_ahash(ctx->mcryptd_tfm); -} - -static struct ahash_alg sha256_mb_areq_alg = { - .init = sha256_mb_init, - .update = sha256_mb_update, - .final = sha256_mb_final, - .finup = sha256_mb_finup, - .export = sha256_mb_export, - .import = sha256_mb_import, - .halg = { - .digestsize = SHA256_DIGEST_SIZE, - .statesize = sizeof(struct sha256_hash_ctx), - .base = { - .cra_name = "__sha256-mb", - .cra_driver_name = "__intel_sha256-mb", - .cra_priority = 100, - /* - * use ASYNC flag as some buffers in multi-buffer - * algo may not have completed before hashing thread - * sleep - */ - .cra_flags = CRYPTO_ALG_ASYNC | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT - (sha256_mb_areq_alg.halg.base.cra_list), - .cra_init = sha256_mb_areq_init_tfm, - .cra_exit = sha256_mb_areq_exit_tfm, - .cra_ctxsize = sizeof(struct sha256_hash_ctx), - } - } -}; - -static int sha256_mb_async_init(struct ahash_request *req) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_init(mcryptd_req); -} - -static int sha256_mb_async_update(struct ahash_request *req) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_update(mcryptd_req); -} - -static int sha256_mb_async_finup(struct ahash_request *req) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_finup(mcryptd_req); -} - -static int sha256_mb_async_final(struct ahash_request *req) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_final(mcryptd_req); -} - -static int sha256_mb_async_digest(struct ahash_request *req) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_digest(mcryptd_req); -} - -static int sha256_mb_async_export(struct ahash_request *req, void *out) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_export(mcryptd_req, out); -} - -static int sha256_mb_async_import(struct ahash_request *req, const void *in) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha256_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm); - struct mcryptd_hash_request_ctx *rctx; - struct ahash_request *areq; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - rctx = ahash_request_ctx(mcryptd_req); - areq = &rctx->areq; - - ahash_request_set_tfm(areq, child); - ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP, - rctx->complete, req); - - return crypto_ahash_import(mcryptd_req, in); -} - -static struct ahash_alg sha256_mb_async_alg = { - .init = sha256_mb_async_init, - .update = sha256_mb_async_update, - .final = sha256_mb_async_final, - .finup = sha256_mb_async_finup, - .export = sha256_mb_async_export, - .import = sha256_mb_async_import, - .digest = sha256_mb_async_digest, - .halg = { - .digestsize = SHA256_DIGEST_SIZE, - .statesize = sizeof(struct sha256_hash_ctx), - .base = { - .cra_name = "sha256", - .cra_driver_name = "sha256_mb", - /* - * Low priority, since with few concurrent hash requests - * this is extremely slow due to the flush delay. Users - * whose workloads would benefit from this can request - * it explicitly by driver name, or can increase its - * priority at runtime using NETLINK_CRYPTO. - */ - .cra_priority = 50, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT - (sha256_mb_async_alg.halg.base.cra_list), - .cra_init = sha256_mb_async_init_tfm, - .cra_exit = sha256_mb_async_exit_tfm, - .cra_ctxsize = sizeof(struct sha256_mb_ctx), - .cra_alignmask = 0, - }, - }, -}; - -static unsigned long sha256_mb_flusher(struct mcryptd_alg_cstate *cstate) -{ - struct mcryptd_hash_request_ctx *rctx; - unsigned long cur_time; - unsigned long next_flush = 0; - struct sha256_hash_ctx *sha_ctx; - - - cur_time = jiffies; - - while (!list_empty(&cstate->work_list)) { - rctx = list_entry(cstate->work_list.next, - struct mcryptd_hash_request_ctx, waiter); - if (time_before(cur_time, rctx->tag.expire)) - break; - kernel_fpu_begin(); - sha_ctx = (struct sha256_hash_ctx *) - sha256_ctx_mgr_flush(cstate->mgr); - kernel_fpu_end(); - if (!sha_ctx) { - pr_err("sha256_mb error: nothing got" - " flushed for non-empty list\n"); - break; - } - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - sha_finish_walk(&rctx, cstate, true); - sha_complete_job(rctx, cstate, 0); - } - - if (!list_empty(&cstate->work_list)) { - rctx = list_entry(cstate->work_list.next, - struct mcryptd_hash_request_ctx, waiter); - /* get the hash context and then flush time */ - next_flush = rctx->tag.expire; - mcryptd_arm_flusher(cstate, get_delay(next_flush)); - } - return next_flush; -} - -static int __init sha256_mb_mod_init(void) -{ - - int cpu; - int err; - struct mcryptd_alg_cstate *cpu_state; - - /* check for dependent cpu features */ - if (!boot_cpu_has(X86_FEATURE_AVX2) || - !boot_cpu_has(X86_FEATURE_BMI2)) - return -ENODEV; - - /* initialize multibuffer structures */ - sha256_mb_alg_state.alg_cstate = alloc_percpu - (struct mcryptd_alg_cstate); - - sha256_job_mgr_init = sha256_mb_mgr_init_avx2; - sha256_job_mgr_submit = sha256_mb_mgr_submit_avx2; - sha256_job_mgr_flush = sha256_mb_mgr_flush_avx2; - sha256_job_mgr_get_comp_job = sha256_mb_mgr_get_comp_job_avx2; - - if (!sha256_mb_alg_state.alg_cstate) - return -ENOMEM; - for_each_possible_cpu(cpu) { - cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu); - cpu_state->next_flush = 0; - cpu_state->next_seq_num = 0; - cpu_state->flusher_engaged = false; - INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher); - cpu_state->cpu = cpu; - cpu_state->alg_state = &sha256_mb_alg_state; - cpu_state->mgr = kzalloc(sizeof(struct sha256_ctx_mgr), - GFP_KERNEL); - if (!cpu_state->mgr) - goto err2; - sha256_ctx_mgr_init(cpu_state->mgr); - INIT_LIST_HEAD(&cpu_state->work_list); - spin_lock_init(&cpu_state->work_lock); - } - sha256_mb_alg_state.flusher = &sha256_mb_flusher; - - err = crypto_register_ahash(&sha256_mb_areq_alg); - if (err) - goto err2; - err = crypto_register_ahash(&sha256_mb_async_alg); - if (err) - goto err1; - - - return 0; -err1: - crypto_unregister_ahash(&sha256_mb_areq_alg); -err2: - for_each_possible_cpu(cpu) { - cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu); - kfree(cpu_state->mgr); - } - free_percpu(sha256_mb_alg_state.alg_cstate); - return -ENODEV; -} - -static void __exit sha256_mb_mod_fini(void) -{ - int cpu; - struct mcryptd_alg_cstate *cpu_state; - - crypto_unregister_ahash(&sha256_mb_async_alg); - crypto_unregister_ahash(&sha256_mb_areq_alg); - for_each_possible_cpu(cpu) { - cpu_state = per_cpu_ptr(sha256_mb_alg_state.alg_cstate, cpu); - kfree(cpu_state->mgr); - } - free_percpu(sha256_mb_alg_state.alg_cstate); -} - -module_init(sha256_mb_mod_init); -module_exit(sha256_mb_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, multi buffer accelerated"); - -MODULE_ALIAS_CRYPTO("sha256"); diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h b/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h deleted file mode 100644 index 7c432543dc7f..000000000000 --- a/arch/x86/crypto/sha256-mb/sha256_mb_ctx.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Header file for multi buffer SHA256 context - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _SHA_MB_CTX_INTERNAL_H -#define _SHA_MB_CTX_INTERNAL_H - -#include "sha256_mb_mgr.h" - -#define HASH_UPDATE 0x00 -#define HASH_LAST 0x01 -#define HASH_DONE 0x02 -#define HASH_FINAL 0x04 - -#define HASH_CTX_STS_IDLE 0x00 -#define HASH_CTX_STS_PROCESSING 0x01 -#define HASH_CTX_STS_LAST 0x02 -#define HASH_CTX_STS_COMPLETE 0x04 - -enum hash_ctx_error { - HASH_CTX_ERROR_NONE = 0, - HASH_CTX_ERROR_INVALID_FLAGS = -1, - HASH_CTX_ERROR_ALREADY_PROCESSING = -2, - HASH_CTX_ERROR_ALREADY_COMPLETED = -3, - -#ifdef HASH_CTX_DEBUG - HASH_CTX_ERROR_DEBUG_DIGEST_MISMATCH = -4, -#endif -}; - - -#define hash_ctx_user_data(ctx) ((ctx)->user_data) -#define hash_ctx_digest(ctx) ((ctx)->job.result_digest) -#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) -#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) -#define hash_ctx_status(ctx) ((ctx)->status) -#define hash_ctx_error(ctx) ((ctx)->error) -#define hash_ctx_init(ctx) \ - do { \ - (ctx)->error = HASH_CTX_ERROR_NONE; \ - (ctx)->status = HASH_CTX_STS_COMPLETE; \ - } while (0) - - -/* Hash Constants and Typedefs */ -#define SHA256_DIGEST_LENGTH 8 -#define SHA256_LOG2_BLOCK_SIZE 6 - -#define SHA256_PADLENGTHFIELD_SIZE 8 - -#ifdef SHA_MB_DEBUG -#define assert(expr) \ -do { \ - if (unlikely(!(expr))) { \ - printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \ - #expr, __FILE__, __func__, __LINE__); \ - } \ -} while (0) -#else -#define assert(expr) do {} while (0) -#endif - -struct sha256_ctx_mgr { - struct sha256_mb_mgr mgr; -}; - -/* typedef struct sha256_ctx_mgr sha256_ctx_mgr; */ - -struct sha256_hash_ctx { - /* Must be at struct offset 0 */ - struct job_sha256 job; - /* status flag */ - int status; - /* error flag */ - int error; - - uint64_t total_length; - const void *incoming_buffer; - uint32_t incoming_buffer_length; - uint8_t partial_block_buffer[SHA256_BLOCK_SIZE * 2]; - uint32_t partial_block_buffer_length; - void *user_data; -}; - -#endif diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h b/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h deleted file mode 100644 index b01ae408c56d..000000000000 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Header file for multi buffer SHA256 algorithm manager - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef __SHA_MB_MGR_H -#define __SHA_MB_MGR_H - -#include <linux/types.h> - -#define NUM_SHA256_DIGEST_WORDS 8 - -enum job_sts { STS_UNKNOWN = 0, - STS_BEING_PROCESSED = 1, - STS_COMPLETED = 2, - STS_INTERNAL_ERROR = 3, - STS_ERROR = 4 -}; - -struct job_sha256 { - u8 *buffer; - u32 len; - u32 result_digest[NUM_SHA256_DIGEST_WORDS] __aligned(32); - enum job_sts status; - void *user_data; -}; - -/* SHA256 out-of-order scheduler */ - -/* typedef uint32_t sha8_digest_array[8][8]; */ - -struct sha256_args_x8 { - uint32_t digest[8][8]; - uint8_t *data_ptr[8]; -}; - -struct sha256_lane_data { - struct job_sha256 *job_in_lane; -}; - -struct sha256_mb_mgr { - struct sha256_args_x8 args; - - uint32_t lens[8]; - - /* each byte is index (0...7) of unused lanes */ - uint64_t unused_lanes; - /* byte 4 is set to FF as a flag */ - struct sha256_lane_data ldata[8]; -}; - - -#define SHA256_MB_MGR_NUM_LANES_AVX2 8 - -void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state); -struct job_sha256 *sha256_mb_mgr_submit_avx2(struct sha256_mb_mgr *state, - struct job_sha256 *job); -struct job_sha256 *sha256_mb_mgr_flush_avx2(struct sha256_mb_mgr *state); -struct job_sha256 *sha256_mb_mgr_get_comp_job_avx2(struct sha256_mb_mgr *state); - -#endif diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S deleted file mode 100644 index 5c377bac21d0..000000000000 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_datastruct.S +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Header file for multi buffer SHA256 algorithm data structure - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -# Macros for defining data structures - -# Usage example - -#START_FIELDS # JOB_AES -### name size align -#FIELD _plaintext, 8, 8 # pointer to plaintext -#FIELD _ciphertext, 8, 8 # pointer to ciphertext -#FIELD _IV, 16, 8 # IV -#FIELD _keys, 8, 8 # pointer to keys -#FIELD _len, 4, 4 # length in bytes -#FIELD _status, 4, 4 # status enumeration -#FIELD _user_data, 8, 8 # pointer to user data -#UNION _union, size1, align1, \ -# size2, align2, \ -# size3, align3, \ -# ... -#END_FIELDS -#%assign _JOB_AES_size _FIELD_OFFSET -#%assign _JOB_AES_align _STRUCT_ALIGN - -######################################################################### - -# Alternate "struc-like" syntax: -# STRUCT job_aes2 -# RES_Q .plaintext, 1 -# RES_Q .ciphertext, 1 -# RES_DQ .IV, 1 -# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN -# RES_U .union, size1, align1, \ -# size2, align2, \ -# ... -# ENDSTRUCT -# # Following only needed if nesting -# %assign job_aes2_size _FIELD_OFFSET -# %assign job_aes2_align _STRUCT_ALIGN -# -# RES_* macros take a name, a count and an optional alignment. -# The count in in terms of the base size of the macro, and the -# default alignment is the base size. -# The macros are: -# Macro Base size -# RES_B 1 -# RES_W 2 -# RES_D 4 -# RES_Q 8 -# RES_DQ 16 -# RES_Y 32 -# RES_Z 64 -# -# RES_U defines a union. It's arguments are a name and two or more -# pairs of "size, alignment" -# -# The two assigns are only needed if this structure is being nested -# within another. Even if the assigns are not done, one can still use -# STRUCT_NAME_size as the size of the structure. -# -# Note that for nesting, you still need to assign to STRUCT_NAME_size. -# -# The differences between this and using "struc" directly are that each -# type is implicitly aligned to its natural length (although this can be -# over-ridden with an explicit third parameter), and that the structure -# is padded at the end to its overall alignment. -# - -######################################################################### - -#ifndef _DATASTRUCT_ASM_ -#define _DATASTRUCT_ASM_ - -#define SZ8 8*SHA256_DIGEST_WORD_SIZE -#define ROUNDS 64*SZ8 -#define PTR_SZ 8 -#define SHA256_DIGEST_WORD_SIZE 4 -#define MAX_SHA256_LANES 8 -#define SHA256_DIGEST_WORDS 8 -#define SHA256_DIGEST_ROW_SIZE (MAX_SHA256_LANES * SHA256_DIGEST_WORD_SIZE) -#define SHA256_DIGEST_SIZE (SHA256_DIGEST_ROW_SIZE * SHA256_DIGEST_WORDS) -#define SHA256_BLK_SZ 64 - -# START_FIELDS -.macro START_FIELDS - _FIELD_OFFSET = 0 - _STRUCT_ALIGN = 0 -.endm - -# FIELD name size align -.macro FIELD name size align - _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) - \name = _FIELD_OFFSET - _FIELD_OFFSET = _FIELD_OFFSET + (\size) -.if (\align > _STRUCT_ALIGN) - _STRUCT_ALIGN = \align -.endif -.endm - -# END_FIELDS -.macro END_FIELDS - _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) -.endm - -######################################################################## - -.macro STRUCT p1 -START_FIELDS -.struc \p1 -.endm - -.macro ENDSTRUCT - tmp = _FIELD_OFFSET - END_FIELDS - tmp = (_FIELD_OFFSET - %%tmp) -.if (tmp > 0) - .lcomm tmp -.endif -.endstruc -.endm - -## RES_int name size align -.macro RES_int p1 p2 p3 - name = \p1 - size = \p2 - align = .\p3 - - _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)) -.align align -.lcomm name size - _FIELD_OFFSET = _FIELD_OFFSET + (size) -.if (align > _STRUCT_ALIGN) - _STRUCT_ALIGN = align -.endif -.endm - -# macro RES_B name, size [, align] -.macro RES_B _name, _size, _align=1 -RES_int _name _size _align -.endm - -# macro RES_W name, size [, align] -.macro RES_W _name, _size, _align=2 -RES_int _name 2*(_size) _align -.endm - -# macro RES_D name, size [, align] -.macro RES_D _name, _size, _align=4 -RES_int _name 4*(_size) _align -.endm - -# macro RES_Q name, size [, align] -.macro RES_Q _name, _size, _align=8 -RES_int _name 8*(_size) _align -.endm - -# macro RES_DQ name, size [, align] -.macro RES_DQ _name, _size, _align=16 -RES_int _name 16*(_size) _align -.endm - -# macro RES_Y name, size [, align] -.macro RES_Y _name, _size, _align=32 -RES_int _name 32*(_size) _align -.endm - -# macro RES_Z name, size [, align] -.macro RES_Z _name, _size, _align=64 -RES_int _name 64*(_size) _align -.endm - -#endif - - -######################################################################## -#### Define SHA256 Out Of Order Data Structures -######################################################################## - -START_FIELDS # LANE_DATA -### name size align -FIELD _job_in_lane, 8, 8 # pointer to job object -END_FIELDS - - _LANE_DATA_size = _FIELD_OFFSET - _LANE_DATA_align = _STRUCT_ALIGN - -######################################################################## - -START_FIELDS # SHA256_ARGS_X4 -### name size align -FIELD _digest, 4*8*8, 4 # transposed digest -FIELD _data_ptr, 8*8, 8 # array of pointers to data -END_FIELDS - - _SHA256_ARGS_X4_size = _FIELD_OFFSET - _SHA256_ARGS_X4_align = _STRUCT_ALIGN - _SHA256_ARGS_X8_size = _FIELD_OFFSET - _SHA256_ARGS_X8_align = _STRUCT_ALIGN - -####################################################################### - -START_FIELDS # MB_MGR -### name size align -FIELD _args, _SHA256_ARGS_X4_size, _SHA256_ARGS_X4_align -FIELD _lens, 4*8, 8 -FIELD _unused_lanes, 8, 8 -FIELD _ldata, _LANE_DATA_size*8, _LANE_DATA_align -END_FIELDS - - _MB_MGR_size = _FIELD_OFFSET - _MB_MGR_align = _STRUCT_ALIGN - -_args_digest = _args + _digest -_args_data_ptr = _args + _data_ptr - -####################################################################### - -START_FIELDS #STACK_FRAME -### name size align -FIELD _data, 16*SZ8, 1 # transposed digest -FIELD _digest, 8*SZ8, 1 # array of pointers to data -FIELD _ytmp, 4*SZ8, 1 -FIELD _rsp, 8, 1 -END_FIELDS - - _STACK_FRAME_size = _FIELD_OFFSET - _STACK_FRAME_align = _STRUCT_ALIGN - -####################################################################### - -######################################################################## -#### Define constants -######################################################################## - -#define STS_UNKNOWN 0 -#define STS_BEING_PROCESSED 1 -#define STS_COMPLETED 2 - -######################################################################## -#### Define JOB_SHA256 structure -######################################################################## - -START_FIELDS # JOB_SHA256 - -### name size align -FIELD _buffer, 8, 8 # pointer to buffer -FIELD _len, 8, 8 # length in bytes -FIELD _result_digest, 8*4, 32 # Digest (output) -FIELD _status, 4, 4 -FIELD _user_data, 8, 8 -END_FIELDS - - _JOB_SHA256_size = _FIELD_OFFSET - _JOB_SHA256_align = _STRUCT_ALIGN diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S deleted file mode 100644 index d2364c55bbde..000000000000 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Flush routine for SHA256 multibuffer - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#include <linux/linkage.h> -#include <asm/frame.h> -#include "sha256_mb_mgr_datastruct.S" - -.extern sha256_x8_avx2 - -#LINUX register definitions -#define arg1 %rdi -#define arg2 %rsi - -# Common register definitions -#define state arg1 -#define job arg2 -#define len2 arg2 - -# idx must be a register not clobberred by sha1_mult -#define idx %r8 -#define DWORD_idx %r8d - -#define unused_lanes %rbx -#define lane_data %rbx -#define tmp2 %rbx -#define tmp2_w %ebx - -#define job_rax %rax -#define tmp1 %rax -#define size_offset %rax -#define tmp %rax -#define start_offset %rax - -#define tmp3 %arg1 - -#define extra_blocks %arg2 -#define p %arg2 - -.macro LABEL prefix n -\prefix\n\(): -.endm - -.macro JNE_SKIP i -jne skip_\i -.endm - -.altmacro -.macro SET_OFFSET _offset -offset = \_offset -.endm -.noaltmacro - -# JOB_SHA256* sha256_mb_mgr_flush_avx2(MB_MGR *state) -# arg 1 : rcx : state -ENTRY(sha256_mb_mgr_flush_avx2) - FRAME_BEGIN - push %rbx - - # If bit (32+3) is set, then all lanes are empty - mov _unused_lanes(state), unused_lanes - bt $32+3, unused_lanes - jc return_null - - # find a lane with a non-null job - xor idx, idx - offset = (_ldata + 1 * _LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne one(%rip), idx - offset = (_ldata + 2 * _LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne two(%rip), idx - offset = (_ldata + 3 * _LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne three(%rip), idx - offset = (_ldata + 4 * _LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne four(%rip), idx - offset = (_ldata + 5 * _LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne five(%rip), idx - offset = (_ldata + 6 * _LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne six(%rip), idx - offset = (_ldata + 7 * _LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne seven(%rip), idx - - # copy idx to empty lanes -copy_lane_data: - offset = (_args + _data_ptr) - mov offset(state,idx,8), tmp - - I = 0 -.rep 8 - offset = (_ldata + I * _LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) -.altmacro - JNE_SKIP %I - offset = (_args + _data_ptr + 8*I) - mov tmp, offset(state) - offset = (_lens + 4*I) - movl $0xFFFFFFFF, offset(state) -LABEL skip_ %I - I = (I+1) -.noaltmacro -.endr - - # Find min length - vmovdqu _lens+0*16(state), %xmm0 - vmovdqu _lens+1*16(state), %xmm1 - - vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} - vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} - vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword - - vmovd %xmm2, DWORD_idx - mov idx, len2 - and $0xF, idx - shr $4, len2 - jz len_is_0 - - vpand clear_low_nibble(%rip), %xmm2, %xmm2 - vpshufd $0, %xmm2, %xmm2 - - vpsubd %xmm2, %xmm0, %xmm0 - vpsubd %xmm2, %xmm1, %xmm1 - - vmovdqu %xmm0, _lens+0*16(state) - vmovdqu %xmm1, _lens+1*16(state) - - # "state" and "args" are the same address, arg1 - # len is arg2 - call sha256_x8_avx2 - # state and idx are intact - -len_is_0: - # process completed job "idx" - imul $_LANE_DATA_size, idx, lane_data - lea _ldata(state, lane_data), lane_data - - mov _job_in_lane(lane_data), job_rax - movq $0, _job_in_lane(lane_data) - movl $STS_COMPLETED, _status(job_rax) - mov _unused_lanes(state), unused_lanes - shl $4, unused_lanes - or idx, unused_lanes - - mov unused_lanes, _unused_lanes(state) - movl $0xFFFFFFFF, _lens(state,idx,4) - - vmovd _args_digest(state , idx, 4) , %xmm0 - vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 - vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 - vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 - vmovd _args_digest+4*32(state, idx, 4), %xmm1 - vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1 - vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1 - vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1 - - vmovdqu %xmm0, _result_digest(job_rax) - offset = (_result_digest + 1*16) - vmovdqu %xmm1, offset(job_rax) - -return: - pop %rbx - FRAME_END - ret - -return_null: - xor job_rax, job_rax - jmp return -ENDPROC(sha256_mb_mgr_flush_avx2) - -############################################################################## - -.align 16 -ENTRY(sha256_mb_mgr_get_comp_job_avx2) - push %rbx - - ## if bit 32+3 is set, then all lanes are empty - mov _unused_lanes(state), unused_lanes - bt $(32+3), unused_lanes - jc .return_null - - # Find min length - vmovdqu _lens(state), %xmm0 - vmovdqu _lens+1*16(state), %xmm1 - - vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} - vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} - vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword - - vmovd %xmm2, DWORD_idx - test $~0xF, idx - jnz .return_null - - # process completed job "idx" - imul $_LANE_DATA_size, idx, lane_data - lea _ldata(state, lane_data), lane_data - - mov _job_in_lane(lane_data), job_rax - movq $0, _job_in_lane(lane_data) - movl $STS_COMPLETED, _status(job_rax) - mov _unused_lanes(state), unused_lanes - shl $4, unused_lanes - or idx, unused_lanes - mov unused_lanes, _unused_lanes(state) - - movl $0xFFFFFFFF, _lens(state, idx, 4) - - vmovd _args_digest(state, idx, 4), %xmm0 - vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 - vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 - vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 - vmovd _args_digest+4*32(state, idx, 4), %xmm1 - vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1 - vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1 - vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1 - - vmovdqu %xmm0, _result_digest(job_rax) - offset = (_result_digest + 1*16) - vmovdqu %xmm1, offset(job_rax) - - pop %rbx - - ret - -.return_null: - xor job_rax, job_rax - pop %rbx - ret -ENDPROC(sha256_mb_mgr_get_comp_job_avx2) - -.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 -.align 16 -clear_low_nibble: -.octa 0x000000000000000000000000FFFFFFF0 - -.section .rodata.cst8, "aM", @progbits, 8 -.align 8 -one: -.quad 1 -two: -.quad 2 -three: -.quad 3 -four: -.quad 4 -five: -.quad 5 -six: -.quad 6 -seven: -.quad 7 diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c deleted file mode 100644 index b0c498371e67..000000000000 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_init_avx2.c +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Initialization code for multi buffer SHA256 algorithm for AVX2 - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "sha256_mb_mgr.h" - -void sha256_mb_mgr_init_avx2(struct sha256_mb_mgr *state) -{ - unsigned int j; - - state->unused_lanes = 0xF76543210ULL; - for (j = 0; j < 8; j++) { - state->lens[j] = 0xFFFFFFFF; - state->ldata[j].job_in_lane = NULL; - } -} diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S deleted file mode 100644 index b36ae7454084..000000000000 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Buffer submit code for multi buffer SHA256 algorithm - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/linkage.h> -#include <asm/frame.h> -#include "sha256_mb_mgr_datastruct.S" - -.extern sha256_x8_avx2 - -# LINUX register definitions -arg1 = %rdi -arg2 = %rsi -size_offset = %rcx -tmp2 = %rcx -extra_blocks = %rdx - -# Common definitions -#define state arg1 -#define job %rsi -#define len2 arg2 -#define p2 arg2 - -# idx must be a register not clobberred by sha1_x8_avx2 -idx = %r8 -DWORD_idx = %r8d -last_len = %r8 - -p = %r11 -start_offset = %r11 - -unused_lanes = %rbx -BYTE_unused_lanes = %bl - -job_rax = %rax -len = %rax -DWORD_len = %eax - -lane = %r12 -tmp3 = %r12 - -tmp = %r9 -DWORD_tmp = %r9d - -lane_data = %r10 - -# JOB* sha256_mb_mgr_submit_avx2(MB_MGR *state, JOB_SHA256 *job) -# arg 1 : rcx : state -# arg 2 : rdx : job -ENTRY(sha256_mb_mgr_submit_avx2) - FRAME_BEGIN - push %rbx - push %r12 - - mov _unused_lanes(state), unused_lanes - mov unused_lanes, lane - and $0xF, lane - shr $4, unused_lanes - imul $_LANE_DATA_size, lane, lane_data - movl $STS_BEING_PROCESSED, _status(job) - lea _ldata(state, lane_data), lane_data - mov unused_lanes, _unused_lanes(state) - movl _len(job), DWORD_len - - mov job, _job_in_lane(lane_data) - shl $4, len - or lane, len - - movl DWORD_len, _lens(state , lane, 4) - - # Load digest words from result_digest - vmovdqu _result_digest(job), %xmm0 - vmovdqu _result_digest+1*16(job), %xmm1 - vmovd %xmm0, _args_digest(state, lane, 4) - vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4) - vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4) - vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4) - vmovd %xmm1, _args_digest+4*32(state , lane, 4) - - vpextrd $1, %xmm1, _args_digest+5*32(state , lane, 4) - vpextrd $2, %xmm1, _args_digest+6*32(state , lane, 4) - vpextrd $3, %xmm1, _args_digest+7*32(state , lane, 4) - - mov _buffer(job), p - mov p, _args_data_ptr(state, lane, 8) - - cmp $0xF, unused_lanes - jne return_null - -start_loop: - # Find min length - vmovdqa _lens(state), %xmm0 - vmovdqa _lens+1*16(state), %xmm1 - - vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} - vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} - vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} - vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min val in low dword - - vmovd %xmm2, DWORD_idx - mov idx, len2 - and $0xF, idx - shr $4, len2 - jz len_is_0 - - vpand clear_low_nibble(%rip), %xmm2, %xmm2 - vpshufd $0, %xmm2, %xmm2 - - vpsubd %xmm2, %xmm0, %xmm0 - vpsubd %xmm2, %xmm1, %xmm1 - - vmovdqa %xmm0, _lens + 0*16(state) - vmovdqa %xmm1, _lens + 1*16(state) - - # "state" and "args" are the same address, arg1 - # len is arg2 - call sha256_x8_avx2 - - # state and idx are intact - -len_is_0: - # process completed job "idx" - imul $_LANE_DATA_size, idx, lane_data - lea _ldata(state, lane_data), lane_data - - mov _job_in_lane(lane_data), job_rax - mov _unused_lanes(state), unused_lanes - movq $0, _job_in_lane(lane_data) - movl $STS_COMPLETED, _status(job_rax) - shl $4, unused_lanes - or idx, unused_lanes - mov unused_lanes, _unused_lanes(state) - - movl $0xFFFFFFFF, _lens(state,idx,4) - - vmovd _args_digest(state, idx, 4), %xmm0 - vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0 - vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0 - vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0 - vmovd _args_digest+4*32(state, idx, 4), %xmm1 - - vpinsrd $1, _args_digest+5*32(state , idx, 4), %xmm1, %xmm1 - vpinsrd $2, _args_digest+6*32(state , idx, 4), %xmm1, %xmm1 - vpinsrd $3, _args_digest+7*32(state , idx, 4), %xmm1, %xmm1 - - vmovdqu %xmm0, _result_digest(job_rax) - vmovdqu %xmm1, _result_digest+1*16(job_rax) - -return: - pop %r12 - pop %rbx - FRAME_END - ret - -return_null: - xor job_rax, job_rax - jmp return - -ENDPROC(sha256_mb_mgr_submit_avx2) - -.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 -.align 16 -clear_low_nibble: - .octa 0x000000000000000000000000FFFFFFF0 diff --git a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S deleted file mode 100644 index 1687c80c5995..000000000000 --- a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S +++ /dev/null @@ -1,598 +0,0 @@ -/* - * Multi-buffer SHA256 algorithm hash compute routine - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/linkage.h> -#include "sha256_mb_mgr_datastruct.S" - -## code to compute oct SHA256 using SSE-256 -## outer calling routine takes care of save and restore of XMM registers -## Logic designed/laid out by JDG - -## Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; %ymm0-15 -## Linux clobbers: rax rbx rcx rdx rsi r9 r10 r11 r12 r13 r14 r15 -## Linux preserves: rdi rbp r8 -## -## clobbers %ymm0-15 - -arg1 = %rdi -arg2 = %rsi -reg3 = %rcx -reg4 = %rdx - -# Common definitions -STATE = arg1 -INP_SIZE = arg2 - -IDX = %rax -ROUND = %rbx -TBL = reg3 - -inp0 = %r9 -inp1 = %r10 -inp2 = %r11 -inp3 = %r12 -inp4 = %r13 -inp5 = %r14 -inp6 = %r15 -inp7 = reg4 - -a = %ymm0 -b = %ymm1 -c = %ymm2 -d = %ymm3 -e = %ymm4 -f = %ymm5 -g = %ymm6 -h = %ymm7 - -T1 = %ymm8 - -a0 = %ymm12 -a1 = %ymm13 -a2 = %ymm14 -TMP = %ymm15 -TMP0 = %ymm6 -TMP1 = %ymm7 - -TT0 = %ymm8 -TT1 = %ymm9 -TT2 = %ymm10 -TT3 = %ymm11 -TT4 = %ymm12 -TT5 = %ymm13 -TT6 = %ymm14 -TT7 = %ymm15 - -# Define stack usage - -# Assume stack aligned to 32 bytes before call -# Therefore FRAMESZ mod 32 must be 32-8 = 24 - -#define FRAMESZ 0x388 - -#define VMOVPS vmovups - -# TRANSPOSE8 r0, r1, r2, r3, r4, r5, r6, r7, t0, t1 -# "transpose" data in {r0...r7} using temps {t0...t1} -# Input looks like: {r0 r1 r2 r3 r4 r5 r6 r7} -# r0 = {a7 a6 a5 a4 a3 a2 a1 a0} -# r1 = {b7 b6 b5 b4 b3 b2 b1 b0} -# r2 = {c7 c6 c5 c4 c3 c2 c1 c0} -# r3 = {d7 d6 d5 d4 d3 d2 d1 d0} -# r4 = {e7 e6 e5 e4 e3 e2 e1 e0} -# r5 = {f7 f6 f5 f4 f3 f2 f1 f0} -# r6 = {g7 g6 g5 g4 g3 g2 g1 g0} -# r7 = {h7 h6 h5 h4 h3 h2 h1 h0} -# -# Output looks like: {r0 r1 r2 r3 r4 r5 r6 r7} -# r0 = {h0 g0 f0 e0 d0 c0 b0 a0} -# r1 = {h1 g1 f1 e1 d1 c1 b1 a1} -# r2 = {h2 g2 f2 e2 d2 c2 b2 a2} -# r3 = {h3 g3 f3 e3 d3 c3 b3 a3} -# r4 = {h4 g4 f4 e4 d4 c4 b4 a4} -# r5 = {h5 g5 f5 e5 d5 c5 b5 a5} -# r6 = {h6 g6 f6 e6 d6 c6 b6 a6} -# r7 = {h7 g7 f7 e7 d7 c7 b7 a7} -# - -.macro TRANSPOSE8 r0 r1 r2 r3 r4 r5 r6 r7 t0 t1 - # process top half (r0..r3) {a...d} - vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0} - vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2} - vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0} - vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2} - vshufps $0xDD, \t1, \t0, \r3 # r3 = {d5 c5 b5 a5 d1 c1 b1 a1} - vshufps $0x88, \r2, \r0, \r1 # r1 = {d6 c6 b6 a6 d2 c2 b2 a2} - vshufps $0xDD, \r2, \r0, \r0 # r0 = {d7 c7 b7 a7 d3 c3 b3 a3} - vshufps $0x88, \t1, \t0, \t0 # t0 = {d4 c4 b4 a4 d0 c0 b0 a0} - - # use r2 in place of t0 - # process bottom half (r4..r7) {e...h} - vshufps $0x44, \r5, \r4, \r2 # r2 = {f5 f4 e5 e4 f1 f0 e1 e0} - vshufps $0xEE, \r5, \r4, \r4 # r4 = {f7 f6 e7 e6 f3 f2 e3 e2} - vshufps $0x44, \r7, \r6, \t1 # t1 = {h5 h4 g5 g4 h1 h0 g1 g0} - vshufps $0xEE, \r7, \r6, \r6 # r6 = {h7 h6 g7 g6 h3 h2 g3 g2} - vshufps $0xDD, \t1, \r2, \r7 # r7 = {h5 g5 f5 e5 h1 g1 f1 e1} - vshufps $0x88, \r6, \r4, \r5 # r5 = {h6 g6 f6 e6 h2 g2 f2 e2} - vshufps $0xDD, \r6, \r4, \r4 # r4 = {h7 g7 f7 e7 h3 g3 f3 e3} - vshufps $0x88, \t1, \r2, \t1 # t1 = {h4 g4 f4 e4 h0 g0 f0 e0} - - vperm2f128 $0x13, \r1, \r5, \r6 # h6...a6 - vperm2f128 $0x02, \r1, \r5, \r2 # h2...a2 - vperm2f128 $0x13, \r3, \r7, \r5 # h5...a5 - vperm2f128 $0x02, \r3, \r7, \r1 # h1...a1 - vperm2f128 $0x13, \r0, \r4, \r7 # h7...a7 - vperm2f128 $0x02, \r0, \r4, \r3 # h3...a3 - vperm2f128 $0x13, \t0, \t1, \r4 # h4...a4 - vperm2f128 $0x02, \t0, \t1, \r0 # h0...a0 - -.endm - -.macro ROTATE_ARGS -TMP_ = h -h = g -g = f -f = e -e = d -d = c -c = b -b = a -a = TMP_ -.endm - -.macro _PRORD reg imm tmp - vpslld $(32-\imm),\reg,\tmp - vpsrld $\imm,\reg, \reg - vpor \tmp,\reg, \reg -.endm - -# PRORD_nd reg, imm, tmp, src -.macro _PRORD_nd reg imm tmp src - vpslld $(32-\imm), \src, \tmp - vpsrld $\imm, \src, \reg - vpor \tmp, \reg, \reg -.endm - -# PRORD dst/src, amt -.macro PRORD reg imm - _PRORD \reg,\imm,TMP -.endm - -# PRORD_nd dst, src, amt -.macro PRORD_nd reg tmp imm - _PRORD_nd \reg, \imm, TMP, \tmp -.endm - -# arguments passed implicitly in preprocessor symbols i, a...h -.macro ROUND_00_15 _T1 i - PRORD_nd a0,e,5 # sig1: a0 = (e >> 5) - - vpxor g, f, a2 # ch: a2 = f^g - vpand e,a2, a2 # ch: a2 = (f^g)&e - vpxor g, a2, a2 # a2 = ch - - PRORD_nd a1,e,25 # sig1: a1 = (e >> 25) - - vmovdqu \_T1,(SZ8*(\i & 0xf))(%rsp) - vpaddd (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K - vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5) - PRORD a0, 6 # sig1: a0 = (e >> 6) ^ (e >> 11) - vpaddd a2, h, h # h = h + ch - PRORD_nd a2,a,11 # sig0: a2 = (a >> 11) - vpaddd \_T1,h, h # h = h + ch + W + K - vpxor a1, a0, a0 # a0 = sigma1 - PRORD_nd a1,a,22 # sig0: a1 = (a >> 22) - vpxor c, a, \_T1 # maj: T1 = a^c - add $SZ8, ROUND # ROUND++ - vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b - vpaddd a0, h, h - vpaddd h, d, d - vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11) - PRORD a2,2 # sig0: a2 = (a >> 2) ^ (a >> 13) - vpxor a1, a2, a2 # a2 = sig0 - vpand c, a, a1 # maj: a1 = a&c - vpor \_T1, a1, a1 # a1 = maj - vpaddd a1, h, h # h = h + ch + W + K + maj - vpaddd a2, h, h # h = h + ch + W + K + maj + sigma0 - ROTATE_ARGS -.endm - -# arguments passed implicitly in preprocessor symbols i, a...h -.macro ROUND_16_XX _T1 i - vmovdqu (SZ8*((\i-15)&0xf))(%rsp), \_T1 - vmovdqu (SZ8*((\i-2)&0xf))(%rsp), a1 - vmovdqu \_T1, a0 - PRORD \_T1,11 - vmovdqu a1, a2 - PRORD a1,2 - vpxor a0, \_T1, \_T1 - PRORD \_T1, 7 - vpxor a2, a1, a1 - PRORD a1, 17 - vpsrld $3, a0, a0 - vpxor a0, \_T1, \_T1 - vpsrld $10, a2, a2 - vpxor a2, a1, a1 - vpaddd (SZ8*((\i-16)&0xf))(%rsp), \_T1, \_T1 - vpaddd (SZ8*((\i-7)&0xf))(%rsp), a1, a1 - vpaddd a1, \_T1, \_T1 - - ROUND_00_15 \_T1,\i -.endm - -# SHA256_ARGS: -# UINT128 digest[8]; // transposed digests -# UINT8 *data_ptr[4]; - -# void sha256_x8_avx2(SHA256_ARGS *args, UINT64 bytes); -# arg 1 : STATE : pointer to array of pointers to input data -# arg 2 : INP_SIZE : size of input in blocks - # general registers preserved in outer calling routine - # outer calling routine saves all the XMM registers - # save rsp, allocate 32-byte aligned for local variables -ENTRY(sha256_x8_avx2) - - # save callee-saved clobbered registers to comply with C function ABI - push %r12 - push %r13 - push %r14 - push %r15 - - mov %rsp, IDX - sub $FRAMESZ, %rsp - and $~0x1F, %rsp - mov IDX, _rsp(%rsp) - - # Load the pre-transposed incoming digest. - vmovdqu 0*SHA256_DIGEST_ROW_SIZE(STATE),a - vmovdqu 1*SHA256_DIGEST_ROW_SIZE(STATE),b - vmovdqu 2*SHA256_DIGEST_ROW_SIZE(STATE),c - vmovdqu 3*SHA256_DIGEST_ROW_SIZE(STATE),d - vmovdqu 4*SHA256_DIGEST_ROW_SIZE(STATE),e - vmovdqu 5*SHA256_DIGEST_ROW_SIZE(STATE),f - vmovdqu 6*SHA256_DIGEST_ROW_SIZE(STATE),g - vmovdqu 7*SHA256_DIGEST_ROW_SIZE(STATE),h - - lea K256_8(%rip),TBL - - # load the address of each of the 4 message lanes - # getting ready to transpose input onto stack - mov _args_data_ptr+0*PTR_SZ(STATE),inp0 - mov _args_data_ptr+1*PTR_SZ(STATE),inp1 - mov _args_data_ptr+2*PTR_SZ(STATE),inp2 - mov _args_data_ptr+3*PTR_SZ(STATE),inp3 - mov _args_data_ptr+4*PTR_SZ(STATE),inp4 - mov _args_data_ptr+5*PTR_SZ(STATE),inp5 - mov _args_data_ptr+6*PTR_SZ(STATE),inp6 - mov _args_data_ptr+7*PTR_SZ(STATE),inp7 - - xor IDX, IDX -lloop: - xor ROUND, ROUND - - # save old digest - vmovdqu a, _digest(%rsp) - vmovdqu b, _digest+1*SZ8(%rsp) - vmovdqu c, _digest+2*SZ8(%rsp) - vmovdqu d, _digest+3*SZ8(%rsp) - vmovdqu e, _digest+4*SZ8(%rsp) - vmovdqu f, _digest+5*SZ8(%rsp) - vmovdqu g, _digest+6*SZ8(%rsp) - vmovdqu h, _digest+7*SZ8(%rsp) - i = 0 -.rep 2 - VMOVPS i*32(inp0, IDX), TT0 - VMOVPS i*32(inp1, IDX), TT1 - VMOVPS i*32(inp2, IDX), TT2 - VMOVPS i*32(inp3, IDX), TT3 - VMOVPS i*32(inp4, IDX), TT4 - VMOVPS i*32(inp5, IDX), TT5 - VMOVPS i*32(inp6, IDX), TT6 - VMOVPS i*32(inp7, IDX), TT7 - vmovdqu g, _ytmp(%rsp) - vmovdqu h, _ytmp+1*SZ8(%rsp) - TRANSPOSE8 TT0, TT1, TT2, TT3, TT4, TT5, TT6, TT7, TMP0, TMP1 - vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP1 - vmovdqu _ytmp(%rsp), g - vpshufb TMP1, TT0, TT0 - vpshufb TMP1, TT1, TT1 - vpshufb TMP1, TT2, TT2 - vpshufb TMP1, TT3, TT3 - vpshufb TMP1, TT4, TT4 - vpshufb TMP1, TT5, TT5 - vpshufb TMP1, TT6, TT6 - vpshufb TMP1, TT7, TT7 - vmovdqu _ytmp+1*SZ8(%rsp), h - vmovdqu TT4, _ytmp(%rsp) - vmovdqu TT5, _ytmp+1*SZ8(%rsp) - vmovdqu TT6, _ytmp+2*SZ8(%rsp) - vmovdqu TT7, _ytmp+3*SZ8(%rsp) - ROUND_00_15 TT0,(i*8+0) - vmovdqu _ytmp(%rsp), TT0 - ROUND_00_15 TT1,(i*8+1) - vmovdqu _ytmp+1*SZ8(%rsp), TT1 - ROUND_00_15 TT2,(i*8+2) - vmovdqu _ytmp+2*SZ8(%rsp), TT2 - ROUND_00_15 TT3,(i*8+3) - vmovdqu _ytmp+3*SZ8(%rsp), TT3 - ROUND_00_15 TT0,(i*8+4) - ROUND_00_15 TT1,(i*8+5) - ROUND_00_15 TT2,(i*8+6) - ROUND_00_15 TT3,(i*8+7) - i = (i+1) -.endr - add $64, IDX - i = (i*8) - - jmp Lrounds_16_xx -.align 16 -Lrounds_16_xx: -.rep 16 - ROUND_16_XX T1, i - i = (i+1) -.endr - - cmp $ROUNDS,ROUND - jb Lrounds_16_xx - - # add old digest - vpaddd _digest+0*SZ8(%rsp), a, a - vpaddd _digest+1*SZ8(%rsp), b, b - vpaddd _digest+2*SZ8(%rsp), c, c - vpaddd _digest+3*SZ8(%rsp), d, d - vpaddd _digest+4*SZ8(%rsp), e, e - vpaddd _digest+5*SZ8(%rsp), f, f - vpaddd _digest+6*SZ8(%rsp), g, g - vpaddd _digest+7*SZ8(%rsp), h, h - - sub $1, INP_SIZE # unit is blocks - jne lloop - - # write back to memory (state object) the transposed digest - vmovdqu a, 0*SHA256_DIGEST_ROW_SIZE(STATE) - vmovdqu b, 1*SHA256_DIGEST_ROW_SIZE(STATE) - vmovdqu c, 2*SHA256_DIGEST_ROW_SIZE(STATE) - vmovdqu d, 3*SHA256_DIGEST_ROW_SIZE(STATE) - vmovdqu e, 4*SHA256_DIGEST_ROW_SIZE(STATE) - vmovdqu f, 5*SHA256_DIGEST_ROW_SIZE(STATE) - vmovdqu g, 6*SHA256_DIGEST_ROW_SIZE(STATE) - vmovdqu h, 7*SHA256_DIGEST_ROW_SIZE(STATE) - - # update input pointers - add IDX, inp0 - mov inp0, _args_data_ptr+0*8(STATE) - add IDX, inp1 - mov inp1, _args_data_ptr+1*8(STATE) - add IDX, inp2 - mov inp2, _args_data_ptr+2*8(STATE) - add IDX, inp3 - mov inp3, _args_data_ptr+3*8(STATE) - add IDX, inp4 - mov inp4, _args_data_ptr+4*8(STATE) - add IDX, inp5 - mov inp5, _args_data_ptr+5*8(STATE) - add IDX, inp6 - mov inp6, _args_data_ptr+6*8(STATE) - add IDX, inp7 - mov inp7, _args_data_ptr+7*8(STATE) - - # Postamble - mov _rsp(%rsp), %rsp - - # restore callee-saved clobbered registers - pop %r15 - pop %r14 - pop %r13 - pop %r12 - - ret -ENDPROC(sha256_x8_avx2) - -.section .rodata.K256_8, "a", @progbits -.align 64 -K256_8: - .octa 0x428a2f98428a2f98428a2f98428a2f98 - .octa 0x428a2f98428a2f98428a2f98428a2f98 - .octa 0x71374491713744917137449171374491 - .octa 0x71374491713744917137449171374491 - .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf - .octa 0xb5c0fbcfb5c0fbcfb5c0fbcfb5c0fbcf - .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5 - .octa 0xe9b5dba5e9b5dba5e9b5dba5e9b5dba5 - .octa 0x3956c25b3956c25b3956c25b3956c25b - .octa 0x3956c25b3956c25b3956c25b3956c25b - .octa 0x59f111f159f111f159f111f159f111f1 - .octa 0x59f111f159f111f159f111f159f111f1 - .octa 0x923f82a4923f82a4923f82a4923f82a4 - .octa 0x923f82a4923f82a4923f82a4923f82a4 - .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5 - .octa 0xab1c5ed5ab1c5ed5ab1c5ed5ab1c5ed5 - .octa 0xd807aa98d807aa98d807aa98d807aa98 - .octa 0xd807aa98d807aa98d807aa98d807aa98 - .octa 0x12835b0112835b0112835b0112835b01 - .octa 0x12835b0112835b0112835b0112835b01 - .octa 0x243185be243185be243185be243185be - .octa 0x243185be243185be243185be243185be - .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3 - .octa 0x550c7dc3550c7dc3550c7dc3550c7dc3 - .octa 0x72be5d7472be5d7472be5d7472be5d74 - .octa 0x72be5d7472be5d7472be5d7472be5d74 - .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe - .octa 0x80deb1fe80deb1fe80deb1fe80deb1fe - .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7 - .octa 0x9bdc06a79bdc06a79bdc06a79bdc06a7 - .octa 0xc19bf174c19bf174c19bf174c19bf174 - .octa 0xc19bf174c19bf174c19bf174c19bf174 - .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1 - .octa 0xe49b69c1e49b69c1e49b69c1e49b69c1 - .octa 0xefbe4786efbe4786efbe4786efbe4786 - .octa 0xefbe4786efbe4786efbe4786efbe4786 - .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6 - .octa 0x0fc19dc60fc19dc60fc19dc60fc19dc6 - .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc - .octa 0x240ca1cc240ca1cc240ca1cc240ca1cc - .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f - .octa 0x2de92c6f2de92c6f2de92c6f2de92c6f - .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa - .octa 0x4a7484aa4a7484aa4a7484aa4a7484aa - .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc - .octa 0x5cb0a9dc5cb0a9dc5cb0a9dc5cb0a9dc - .octa 0x76f988da76f988da76f988da76f988da - .octa 0x76f988da76f988da76f988da76f988da - .octa 0x983e5152983e5152983e5152983e5152 - .octa 0x983e5152983e5152983e5152983e5152 - .octa 0xa831c66da831c66da831c66da831c66d - .octa 0xa831c66da831c66da831c66da831c66d - .octa 0xb00327c8b00327c8b00327c8b00327c8 - .octa 0xb00327c8b00327c8b00327c8b00327c8 - .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7 - .octa 0xbf597fc7bf597fc7bf597fc7bf597fc7 - .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3 - .octa 0xc6e00bf3c6e00bf3c6e00bf3c6e00bf3 - .octa 0xd5a79147d5a79147d5a79147d5a79147 - .octa 0xd5a79147d5a79147d5a79147d5a79147 - .octa 0x06ca635106ca635106ca635106ca6351 - .octa 0x06ca635106ca635106ca635106ca6351 - .octa 0x14292967142929671429296714292967 - .octa 0x14292967142929671429296714292967 - .octa 0x27b70a8527b70a8527b70a8527b70a85 - .octa 0x27b70a8527b70a8527b70a8527b70a85 - .octa 0x2e1b21382e1b21382e1b21382e1b2138 - .octa 0x2e1b21382e1b21382e1b21382e1b2138 - .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc - .octa 0x4d2c6dfc4d2c6dfc4d2c6dfc4d2c6dfc - .octa 0x53380d1353380d1353380d1353380d13 - .octa 0x53380d1353380d1353380d1353380d13 - .octa 0x650a7354650a7354650a7354650a7354 - .octa 0x650a7354650a7354650a7354650a7354 - .octa 0x766a0abb766a0abb766a0abb766a0abb - .octa 0x766a0abb766a0abb766a0abb766a0abb - .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e - .octa 0x81c2c92e81c2c92e81c2c92e81c2c92e - .octa 0x92722c8592722c8592722c8592722c85 - .octa 0x92722c8592722c8592722c8592722c85 - .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1 - .octa 0xa2bfe8a1a2bfe8a1a2bfe8a1a2bfe8a1 - .octa 0xa81a664ba81a664ba81a664ba81a664b - .octa 0xa81a664ba81a664ba81a664ba81a664b - .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70 - .octa 0xc24b8b70c24b8b70c24b8b70c24b8b70 - .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3 - .octa 0xc76c51a3c76c51a3c76c51a3c76c51a3 - .octa 0xd192e819d192e819d192e819d192e819 - .octa 0xd192e819d192e819d192e819d192e819 - .octa 0xd6990624d6990624d6990624d6990624 - .octa 0xd6990624d6990624d6990624d6990624 - .octa 0xf40e3585f40e3585f40e3585f40e3585 - .octa 0xf40e3585f40e3585f40e3585f40e3585 - .octa 0x106aa070106aa070106aa070106aa070 - .octa 0x106aa070106aa070106aa070106aa070 - .octa 0x19a4c11619a4c11619a4c11619a4c116 - .octa 0x19a4c11619a4c11619a4c11619a4c116 - .octa 0x1e376c081e376c081e376c081e376c08 - .octa 0x1e376c081e376c081e376c081e376c08 - .octa 0x2748774c2748774c2748774c2748774c - .octa 0x2748774c2748774c2748774c2748774c - .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5 - .octa 0x34b0bcb534b0bcb534b0bcb534b0bcb5 - .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3 - .octa 0x391c0cb3391c0cb3391c0cb3391c0cb3 - .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a - .octa 0x4ed8aa4a4ed8aa4a4ed8aa4a4ed8aa4a - .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f - .octa 0x5b9cca4f5b9cca4f5b9cca4f5b9cca4f - .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3 - .octa 0x682e6ff3682e6ff3682e6ff3682e6ff3 - .octa 0x748f82ee748f82ee748f82ee748f82ee - .octa 0x748f82ee748f82ee748f82ee748f82ee - .octa 0x78a5636f78a5636f78a5636f78a5636f - .octa 0x78a5636f78a5636f78a5636f78a5636f - .octa 0x84c8781484c8781484c8781484c87814 - .octa 0x84c8781484c8781484c8781484c87814 - .octa 0x8cc702088cc702088cc702088cc70208 - .octa 0x8cc702088cc702088cc702088cc70208 - .octa 0x90befffa90befffa90befffa90befffa - .octa 0x90befffa90befffa90befffa90befffa - .octa 0xa4506ceba4506ceba4506ceba4506ceb - .octa 0xa4506ceba4506ceba4506ceba4506ceb - .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7 - .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7 - .octa 0xc67178f2c67178f2c67178f2c67178f2 - .octa 0xc67178f2c67178f2c67178f2c67178f2 - -.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 -.align 32 -PSHUFFLE_BYTE_FLIP_MASK: -.octa 0x0c0d0e0f08090a0b0405060700010203 -.octa 0x0c0d0e0f08090a0b0405060700010203 - -.section .rodata.cst256.K256, "aM", @progbits, 256 -.align 64 -.global K256 -K256: - .int 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 - .int 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 - .int 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 - .int 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 - .int 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc - .int 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da - .int 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 - .int 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 - .int 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 - .int 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 - .int 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 - .int 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 - .int 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 - .int 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 - .int 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 - .int 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 diff --git a/arch/x86/crypto/sha512-mb/Makefile b/arch/x86/crypto/sha512-mb/Makefile deleted file mode 100644 index 90f1ef69152e..000000000000 --- a/arch/x86/crypto/sha512-mb/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Arch-specific CryptoAPI modules. -# - -avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ - $(comma)4)$(comma)%ymm2,yes,no) -ifeq ($(avx2_supported),yes) - obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb.o - sha512-mb-y := sha512_mb.o sha512_mb_mgr_flush_avx2.o \ - sha512_mb_mgr_init_avx2.o sha512_mb_mgr_submit_avx2.o sha512_x4_avx2.o -endif diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c deleted file mode 100644 index 26b85678012d..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb.c +++ /dev/null @@ -1,1047 +0,0 @@ -/* - * Multi buffer SHA512 algorithm Glue Code - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <crypto/internal/hash.h> -#include <linux/init.h> -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/cryptohash.h> -#include <linux/types.h> -#include <linux/list.h> -#include <crypto/scatterwalk.h> -#include <crypto/sha.h> -#include <crypto/mcryptd.h> -#include <crypto/crypto_wq.h> -#include <asm/byteorder.h> -#include <linux/hardirq.h> -#include <asm/fpu/api.h> -#include "sha512_mb_ctx.h" - -#define FLUSH_INTERVAL 1000 /* in usec */ - -static struct mcryptd_alg_state sha512_mb_alg_state; - -struct sha512_mb_ctx { - struct mcryptd_ahash *mcryptd_tfm; -}; - -static inline struct mcryptd_hash_request_ctx - *cast_hash_to_mcryptd_ctx(struct sha512_hash_ctx *hash_ctx) -{ - struct ahash_request *areq; - - areq = container_of((void *) hash_ctx, struct ahash_request, __ctx); - return container_of(areq, struct mcryptd_hash_request_ctx, areq); -} - -static inline struct ahash_request - *cast_mcryptd_ctx_to_req(struct mcryptd_hash_request_ctx *ctx) -{ - return container_of((void *) ctx, struct ahash_request, __ctx); -} - -static void req_ctx_init(struct mcryptd_hash_request_ctx *rctx, - struct ahash_request *areq) -{ - rctx->flag = HASH_UPDATE; -} - -static asmlinkage void (*sha512_job_mgr_init)(struct sha512_mb_mgr *state); -static asmlinkage struct job_sha512* (*sha512_job_mgr_submit) - (struct sha512_mb_mgr *state, - struct job_sha512 *job); -static asmlinkage struct job_sha512* (*sha512_job_mgr_flush) - (struct sha512_mb_mgr *state); -static asmlinkage struct job_sha512* (*sha512_job_mgr_get_comp_job) - (struct sha512_mb_mgr *state); - -inline uint32_t sha512_pad(uint8_t padblock[SHA512_BLOCK_SIZE * 2], - uint64_t total_len) -{ - uint32_t i = total_len & (SHA512_BLOCK_SIZE - 1); - - memset(&padblock[i], 0, SHA512_BLOCK_SIZE); - padblock[i] = 0x80; - - i += ((SHA512_BLOCK_SIZE - 1) & - (0 - (total_len + SHA512_PADLENGTHFIELD_SIZE + 1))) - + 1 + SHA512_PADLENGTHFIELD_SIZE; - -#if SHA512_PADLENGTHFIELD_SIZE == 16 - *((uint64_t *) &padblock[i - 16]) = 0; -#endif - - *((uint64_t *) &padblock[i - 8]) = cpu_to_be64(total_len << 3); - - /* Number of extra blocks to hash */ - return i >> SHA512_LOG2_BLOCK_SIZE; -} - -static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit - (struct sha512_ctx_mgr *mgr, struct sha512_hash_ctx *ctx) -{ - while (ctx) { - if (ctx->status & HASH_CTX_STS_COMPLETE) { - /* Clear PROCESSING bit */ - ctx->status = HASH_CTX_STS_COMPLETE; - return ctx; - } - - /* - * If the extra blocks are empty, begin hashing what remains - * in the user's buffer. - */ - if (ctx->partial_block_buffer_length == 0 && - ctx->incoming_buffer_length) { - - const void *buffer = ctx->incoming_buffer; - uint32_t len = ctx->incoming_buffer_length; - uint32_t copy_len; - - /* - * Only entire blocks can be hashed. - * Copy remainder to extra blocks buffer. - */ - copy_len = len & (SHA512_BLOCK_SIZE-1); - - if (copy_len) { - len -= copy_len; - memcpy(ctx->partial_block_buffer, - ((const char *) buffer + len), - copy_len); - ctx->partial_block_buffer_length = copy_len; - } - - ctx->incoming_buffer_length = 0; - - /* len should be a multiple of the block size now */ - assert((len % SHA512_BLOCK_SIZE) == 0); - - /* Set len to the number of blocks to be hashed */ - len >>= SHA512_LOG2_BLOCK_SIZE; - - if (len) { - - ctx->job.buffer = (uint8_t *) buffer; - ctx->job.len = len; - ctx = (struct sha512_hash_ctx *) - sha512_job_mgr_submit(&mgr->mgr, - &ctx->job); - continue; - } - } - - /* - * If the extra blocks are not empty, then we are - * either on the last block(s) or we need more - * user input before continuing. - */ - if (ctx->status & HASH_CTX_STS_LAST) { - - uint8_t *buf = ctx->partial_block_buffer; - uint32_t n_extra_blocks = - sha512_pad(buf, ctx->total_length); - - ctx->status = (HASH_CTX_STS_PROCESSING | - HASH_CTX_STS_COMPLETE); - ctx->job.buffer = buf; - ctx->job.len = (uint32_t) n_extra_blocks; - ctx = (struct sha512_hash_ctx *) - sha512_job_mgr_submit(&mgr->mgr, &ctx->job); - continue; - } - - if (ctx) - ctx->status = HASH_CTX_STS_IDLE; - return ctx; - } - - return NULL; -} - -static struct sha512_hash_ctx - *sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate) -{ - /* - * If get_comp_job returns NULL, there are no jobs complete. - * If get_comp_job returns a job, verify that it is safe to return to - * the user. - * If it is not ready, resubmit the job to finish processing. - * If sha512_ctx_mgr_resubmit returned a job, it is ready to be - * returned. - * Otherwise, all jobs currently being managed by the hash_ctx_mgr - * still need processing. - */ - struct sha512_ctx_mgr *mgr; - struct sha512_hash_ctx *ctx; - unsigned long flags; - - mgr = cstate->mgr; - spin_lock_irqsave(&cstate->work_lock, flags); - ctx = (struct sha512_hash_ctx *) - sha512_job_mgr_get_comp_job(&mgr->mgr); - ctx = sha512_ctx_mgr_resubmit(mgr, ctx); - spin_unlock_irqrestore(&cstate->work_lock, flags); - return ctx; -} - -static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr) -{ - sha512_job_mgr_init(&mgr->mgr); -} - -static struct sha512_hash_ctx - *sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate, - struct sha512_hash_ctx *ctx, - const void *buffer, - uint32_t len, - int flags) -{ - struct sha512_ctx_mgr *mgr; - unsigned long irqflags; - - mgr = cstate->mgr; - spin_lock_irqsave(&cstate->work_lock, irqflags); - if (flags & ~(HASH_UPDATE | HASH_LAST)) { - /* User should not pass anything other than UPDATE or LAST */ - ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; - goto unlock; - } - - if (ctx->status & HASH_CTX_STS_PROCESSING) { - /* Cannot submit to a currently processing job. */ - ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; - goto unlock; - } - - if (ctx->status & HASH_CTX_STS_COMPLETE) { - /* Cannot update a finished job. */ - ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; - goto unlock; - } - - /* - * If we made it here, there were no errors during this call to - * submit - */ - ctx->error = HASH_CTX_ERROR_NONE; - - /* Store buffer ptr info from user */ - ctx->incoming_buffer = buffer; - ctx->incoming_buffer_length = len; - - /* - * Store the user's request flags and mark this ctx as currently being - * processed. - */ - ctx->status = (flags & HASH_LAST) ? - (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) : - HASH_CTX_STS_PROCESSING; - - /* Advance byte counter */ - ctx->total_length += len; - - /* - * If there is anything currently buffered in the extra blocks, - * append to it until it contains a whole block. - * Or if the user's buffer contains less than a whole block, - * append as much as possible to the extra block. - */ - if (ctx->partial_block_buffer_length || len < SHA512_BLOCK_SIZE) { - /* Compute how many bytes to copy from user buffer into extra - * block - */ - uint32_t copy_len = SHA512_BLOCK_SIZE - - ctx->partial_block_buffer_length; - if (len < copy_len) - copy_len = len; - - if (copy_len) { - /* Copy and update relevant pointers and counters */ - memcpy - (&ctx->partial_block_buffer[ctx->partial_block_buffer_length], - buffer, copy_len); - - ctx->partial_block_buffer_length += copy_len; - ctx->incoming_buffer = (const void *) - ((const char *)buffer + copy_len); - ctx->incoming_buffer_length = len - copy_len; - } - - /* The extra block should never contain more than 1 block - * here - */ - assert(ctx->partial_block_buffer_length <= SHA512_BLOCK_SIZE); - - /* If the extra block buffer contains exactly 1 block, it can - * be hashed. - */ - if (ctx->partial_block_buffer_length >= SHA512_BLOCK_SIZE) { - ctx->partial_block_buffer_length = 0; - - ctx->job.buffer = ctx->partial_block_buffer; - ctx->job.len = 1; - ctx = (struct sha512_hash_ctx *) - sha512_job_mgr_submit(&mgr->mgr, &ctx->job); - } - } - - ctx = sha512_ctx_mgr_resubmit(mgr, ctx); -unlock: - spin_unlock_irqrestore(&cstate->work_lock, irqflags); - return ctx; -} - -static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate) -{ - struct sha512_ctx_mgr *mgr; - struct sha512_hash_ctx *ctx; - unsigned long flags; - - mgr = cstate->mgr; - spin_lock_irqsave(&cstate->work_lock, flags); - while (1) { - ctx = (struct sha512_hash_ctx *) - sha512_job_mgr_flush(&mgr->mgr); - - /* If flush returned 0, there are no more jobs in flight. */ - if (!ctx) - break; - - /* - * If flush returned a job, resubmit the job to finish - * processing. - */ - ctx = sha512_ctx_mgr_resubmit(mgr, ctx); - - /* - * If sha512_ctx_mgr_resubmit returned a job, it is ready to - * be returned. Otherwise, all jobs currently being managed by - * the sha512_ctx_mgr still need processing. Loop. - */ - if (ctx) - break; - } - spin_unlock_irqrestore(&cstate->work_lock, flags); - return ctx; -} - -static int sha512_mb_init(struct ahash_request *areq) -{ - struct sha512_hash_ctx *sctx = ahash_request_ctx(areq); - - hash_ctx_init(sctx); - sctx->job.result_digest[0] = SHA512_H0; - sctx->job.result_digest[1] = SHA512_H1; - sctx->job.result_digest[2] = SHA512_H2; - sctx->job.result_digest[3] = SHA512_H3; - sctx->job.result_digest[4] = SHA512_H4; - sctx->job.result_digest[5] = SHA512_H5; - sctx->job.result_digest[6] = SHA512_H6; - sctx->job.result_digest[7] = SHA512_H7; - sctx->total_length = 0; - sctx->partial_block_buffer_length = 0; - sctx->status = HASH_CTX_STS_IDLE; - - return 0; -} - -static int sha512_mb_set_results(struct mcryptd_hash_request_ctx *rctx) -{ - int i; - struct sha512_hash_ctx *sctx = ahash_request_ctx(&rctx->areq); - __be64 *dst = (__be64 *) rctx->out; - - for (i = 0; i < 8; ++i) - dst[i] = cpu_to_be64(sctx->job.result_digest[i]); - - return 0; -} - -static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, - struct mcryptd_alg_cstate *cstate, bool flush) -{ - int flag = HASH_UPDATE; - int nbytes, err = 0; - struct mcryptd_hash_request_ctx *rctx = *ret_rctx; - struct sha512_hash_ctx *sha_ctx; - - /* more work ? */ - while (!(rctx->flag & HASH_DONE)) { - nbytes = crypto_ahash_walk_done(&rctx->walk, 0); - if (nbytes < 0) { - err = nbytes; - goto out; - } - /* check if the walk is done */ - if (crypto_ahash_walk_last(&rctx->walk)) { - rctx->flag |= HASH_DONE; - if (rctx->flag & HASH_FINAL) - flag |= HASH_LAST; - - } - sha_ctx = (struct sha512_hash_ctx *) - ahash_request_ctx(&rctx->areq); - kernel_fpu_begin(); - sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, - rctx->walk.data, nbytes, flag); - if (!sha_ctx) { - if (flush) - sha_ctx = sha512_ctx_mgr_flush(cstate); - } - kernel_fpu_end(); - if (sha_ctx) - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - else { - rctx = NULL; - goto out; - } - } - - /* copy the results */ - if (rctx->flag & HASH_FINAL) - sha512_mb_set_results(rctx); - -out: - *ret_rctx = rctx; - return err; -} - -static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, - struct mcryptd_alg_cstate *cstate, - int err) -{ - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); - struct sha512_hash_ctx *sha_ctx; - struct mcryptd_hash_request_ctx *req_ctx; - int ret; - unsigned long flags; - - /* remove from work list */ - spin_lock_irqsave(&cstate->work_lock, flags); - list_del(&rctx->waiter); - spin_unlock_irqrestore(&cstate->work_lock, flags); - - if (irqs_disabled()) - rctx->complete(&req->base, err); - else { - local_bh_disable(); - rctx->complete(&req->base, err); - local_bh_enable(); - } - - /* check to see if there are other jobs that are done */ - sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate); - while (sha_ctx) { - req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); - ret = sha_finish_walk(&req_ctx, cstate, false); - if (req_ctx) { - spin_lock_irqsave(&cstate->work_lock, flags); - list_del(&req_ctx->waiter); - spin_unlock_irqrestore(&cstate->work_lock, flags); - - req = cast_mcryptd_ctx_to_req(req_ctx); - if (irqs_disabled()) - req_ctx->complete(&req->base, ret); - else { - local_bh_disable(); - req_ctx->complete(&req->base, ret); - local_bh_enable(); - } - } - sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate); - } - - return 0; -} - -static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx, - struct mcryptd_alg_cstate *cstate) -{ - unsigned long next_flush; - unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); - unsigned long flags; - - /* initialize tag */ - rctx->tag.arrival = jiffies; /* tag the arrival time */ - rctx->tag.seq_num = cstate->next_seq_num++; - next_flush = rctx->tag.arrival + delay; - rctx->tag.expire = next_flush; - - spin_lock_irqsave(&cstate->work_lock, flags); - list_add_tail(&rctx->waiter, &cstate->work_list); - spin_unlock_irqrestore(&cstate->work_lock, flags); - - mcryptd_arm_flusher(cstate, delay); -} - -static int sha512_mb_update(struct ahash_request *areq) -{ - struct mcryptd_hash_request_ctx *rctx = - container_of(areq, struct mcryptd_hash_request_ctx, - areq); - struct mcryptd_alg_cstate *cstate = - this_cpu_ptr(sha512_mb_alg_state.alg_cstate); - - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); - struct sha512_hash_ctx *sha_ctx; - int ret = 0, nbytes; - - - /* sanity check */ - if (rctx->tag.cpu != smp_processor_id()) { - pr_err("mcryptd error: cpu clash\n"); - goto done; - } - - /* need to init context */ - req_ctx_init(rctx, areq); - - nbytes = crypto_ahash_walk_first(req, &rctx->walk); - - if (nbytes < 0) { - ret = nbytes; - goto done; - } - - if (crypto_ahash_walk_last(&rctx->walk)) - rctx->flag |= HASH_DONE; - - /* submit */ - sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); - sha512_mb_add_list(rctx, cstate); - kernel_fpu_begin(); - sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data, - nbytes, HASH_UPDATE); - kernel_fpu_end(); - - /* check if anything is returned */ - if (!sha_ctx) - return -EINPROGRESS; - - if (sha_ctx->error) { - ret = sha_ctx->error; - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - goto done; - } - - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - ret = sha_finish_walk(&rctx, cstate, false); - - if (!rctx) - return -EINPROGRESS; -done: - sha_complete_job(rctx, cstate, ret); - return ret; -} - -static int sha512_mb_finup(struct ahash_request *areq) -{ - struct mcryptd_hash_request_ctx *rctx = - container_of(areq, struct mcryptd_hash_request_ctx, - areq); - struct mcryptd_alg_cstate *cstate = - this_cpu_ptr(sha512_mb_alg_state.alg_cstate); - - struct ahash_request *req = cast_mcryptd_ctx_to_req(rctx); - struct sha512_hash_ctx *sha_ctx; - int ret = 0, flag = HASH_UPDATE, nbytes; - - /* sanity check */ - if (rctx->tag.cpu != smp_processor_id()) { - pr_err("mcryptd error: cpu clash\n"); - goto done; - } - - /* need to init context */ - req_ctx_init(rctx, areq); - - nbytes = crypto_ahash_walk_first(req, &rctx->walk); - - if (nbytes < 0) { - ret = nbytes; - goto done; - } - - if (crypto_ahash_walk_last(&rctx->walk)) { - rctx->flag |= HASH_DONE; - flag = HASH_LAST; - } - - /* submit */ - rctx->flag |= HASH_FINAL; - sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); - sha512_mb_add_list(rctx, cstate); - - kernel_fpu_begin(); - sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data, - nbytes, flag); - kernel_fpu_end(); - - /* check if anything is returned */ - if (!sha_ctx) - return -EINPROGRESS; - - if (sha_ctx->error) { - ret = sha_ctx->error; - goto done; - } - - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - ret = sha_finish_walk(&rctx, cstate, false); - if (!rctx) - return -EINPROGRESS; -done: - sha_complete_job(rctx, cstate, ret); - return ret; -} - -static int sha512_mb_final(struct ahash_request *areq) -{ - struct mcryptd_hash_request_ctx *rctx = - container_of(areq, struct mcryptd_hash_request_ctx, - areq); - struct mcryptd_alg_cstate *cstate = - this_cpu_ptr(sha512_mb_alg_state.alg_cstate); - - struct sha512_hash_ctx *sha_ctx; - int ret = 0; - u8 data; - - /* sanity check */ - if (rctx->tag.cpu != smp_processor_id()) { - pr_err("mcryptd error: cpu clash\n"); - goto done; - } - - /* need to init context */ - req_ctx_init(rctx, areq); - - rctx->flag |= HASH_DONE | HASH_FINAL; - - sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); - /* flag HASH_FINAL and 0 data size */ - sha512_mb_add_list(rctx, cstate); - kernel_fpu_begin(); - sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST); - kernel_fpu_end(); - - /* check if anything is returned */ - if (!sha_ctx) - return -EINPROGRESS; - - if (sha_ctx->error) { - ret = sha_ctx->error; - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - goto done; - } - - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - ret = sha_finish_walk(&rctx, cstate, false); - if (!rctx) - return -EINPROGRESS; -done: - sha_complete_job(rctx, cstate, ret); - return ret; -} - -static int sha512_mb_export(struct ahash_request *areq, void *out) -{ - struct sha512_hash_ctx *sctx = ahash_request_ctx(areq); - - memcpy(out, sctx, sizeof(*sctx)); - - return 0; -} - -static int sha512_mb_import(struct ahash_request *areq, const void *in) -{ - struct sha512_hash_ctx *sctx = ahash_request_ctx(areq); - - memcpy(sctx, in, sizeof(*sctx)); - - return 0; -} - -static int sha512_mb_async_init_tfm(struct crypto_tfm *tfm) -{ - struct mcryptd_ahash *mcryptd_tfm; - struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm); - struct mcryptd_hash_ctx *mctx; - - mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha512-mb", - CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(mcryptd_tfm)) - return PTR_ERR(mcryptd_tfm); - mctx = crypto_ahash_ctx(&mcryptd_tfm->base); - mctx->alg_state = &sha512_mb_alg_state; - ctx->mcryptd_tfm = mcryptd_tfm; - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct ahash_request) + - crypto_ahash_reqsize(&mcryptd_tfm->base)); - - return 0; -} - -static void sha512_mb_async_exit_tfm(struct crypto_tfm *tfm) -{ - struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm); - - mcryptd_free_ahash(ctx->mcryptd_tfm); -} - -static int sha512_mb_areq_init_tfm(struct crypto_tfm *tfm) -{ - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct ahash_request) + - sizeof(struct sha512_hash_ctx)); - - return 0; -} - -static void sha512_mb_areq_exit_tfm(struct crypto_tfm *tfm) -{ - struct sha512_mb_ctx *ctx = crypto_tfm_ctx(tfm); - - mcryptd_free_ahash(ctx->mcryptd_tfm); -} - -static struct ahash_alg sha512_mb_areq_alg = { - .init = sha512_mb_init, - .update = sha512_mb_update, - .final = sha512_mb_final, - .finup = sha512_mb_finup, - .export = sha512_mb_export, - .import = sha512_mb_import, - .halg = { - .digestsize = SHA512_DIGEST_SIZE, - .statesize = sizeof(struct sha512_hash_ctx), - .base = { - .cra_name = "__sha512-mb", - .cra_driver_name = "__intel_sha512-mb", - .cra_priority = 100, - /* - * use ASYNC flag as some buffers in multi-buffer - * algo may not have completed before hashing thread - * sleep - */ - .cra_flags = CRYPTO_ALG_ASYNC | - CRYPTO_ALG_INTERNAL, - .cra_blocksize = SHA512_BLOCK_SIZE, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT - (sha512_mb_areq_alg.halg.base.cra_list), - .cra_init = sha512_mb_areq_init_tfm, - .cra_exit = sha512_mb_areq_exit_tfm, - .cra_ctxsize = sizeof(struct sha512_hash_ctx), - } - } -}; - -static int sha512_mb_async_init(struct ahash_request *req) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_init(mcryptd_req); -} - -static int sha512_mb_async_update(struct ahash_request *req) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_update(mcryptd_req); -} - -static int sha512_mb_async_finup(struct ahash_request *req) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_finup(mcryptd_req); -} - -static int sha512_mb_async_final(struct ahash_request *req) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_final(mcryptd_req); -} - -static int sha512_mb_async_digest(struct ahash_request *req) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_digest(mcryptd_req); -} - -static int sha512_mb_async_export(struct ahash_request *req, void *out) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - return crypto_ahash_export(mcryptd_req, out); -} - -static int sha512_mb_async_import(struct ahash_request *req, const void *in) -{ - struct ahash_request *mcryptd_req = ahash_request_ctx(req); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct sha512_mb_ctx *ctx = crypto_ahash_ctx(tfm); - struct mcryptd_ahash *mcryptd_tfm = ctx->mcryptd_tfm; - struct crypto_ahash *child = mcryptd_ahash_child(mcryptd_tfm); - struct mcryptd_hash_request_ctx *rctx; - struct ahash_request *areq; - - memcpy(mcryptd_req, req, sizeof(*req)); - ahash_request_set_tfm(mcryptd_req, &mcryptd_tfm->base); - rctx = ahash_request_ctx(mcryptd_req); - - areq = &rctx->areq; - - ahash_request_set_tfm(areq, child); - ahash_request_set_callback(areq, CRYPTO_TFM_REQ_MAY_SLEEP, - rctx->complete, req); - - return crypto_ahash_import(mcryptd_req, in); -} - -static struct ahash_alg sha512_mb_async_alg = { - .init = sha512_mb_async_init, - .update = sha512_mb_async_update, - .final = sha512_mb_async_final, - .finup = sha512_mb_async_finup, - .digest = sha512_mb_async_digest, - .export = sha512_mb_async_export, - .import = sha512_mb_async_import, - .halg = { - .digestsize = SHA512_DIGEST_SIZE, - .statesize = sizeof(struct sha512_hash_ctx), - .base = { - .cra_name = "sha512", - .cra_driver_name = "sha512_mb", - /* - * Low priority, since with few concurrent hash requests - * this is extremely slow due to the flush delay. Users - * whose workloads would benefit from this can request - * it explicitly by driver name, or can increase its - * priority at runtime using NETLINK_CRYPTO. - */ - .cra_priority = 50, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = SHA512_BLOCK_SIZE, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT - (sha512_mb_async_alg.halg.base.cra_list), - .cra_init = sha512_mb_async_init_tfm, - .cra_exit = sha512_mb_async_exit_tfm, - .cra_ctxsize = sizeof(struct sha512_mb_ctx), - .cra_alignmask = 0, - }, - }, -}; - -static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate) -{ - struct mcryptd_hash_request_ctx *rctx; - unsigned long cur_time; - unsigned long next_flush = 0; - struct sha512_hash_ctx *sha_ctx; - - - cur_time = jiffies; - - while (!list_empty(&cstate->work_list)) { - rctx = list_entry(cstate->work_list.next, - struct mcryptd_hash_request_ctx, waiter); - if time_before(cur_time, rctx->tag.expire) - break; - kernel_fpu_begin(); - sha_ctx = (struct sha512_hash_ctx *) - sha512_ctx_mgr_flush(cstate); - kernel_fpu_end(); - if (!sha_ctx) { - pr_err("sha512_mb error: nothing got flushed for" - " non-empty list\n"); - break; - } - rctx = cast_hash_to_mcryptd_ctx(sha_ctx); - sha_finish_walk(&rctx, cstate, true); - sha_complete_job(rctx, cstate, 0); - } - - if (!list_empty(&cstate->work_list)) { - rctx = list_entry(cstate->work_list.next, - struct mcryptd_hash_request_ctx, waiter); - /* get the hash context and then flush time */ - next_flush = rctx->tag.expire; - mcryptd_arm_flusher(cstate, get_delay(next_flush)); - } - return next_flush; -} - -static int __init sha512_mb_mod_init(void) -{ - - int cpu; - int err; - struct mcryptd_alg_cstate *cpu_state; - - /* check for dependent cpu features */ - if (!boot_cpu_has(X86_FEATURE_AVX2) || - !boot_cpu_has(X86_FEATURE_BMI2)) - return -ENODEV; - - /* initialize multibuffer structures */ - sha512_mb_alg_state.alg_cstate = - alloc_percpu(struct mcryptd_alg_cstate); - - sha512_job_mgr_init = sha512_mb_mgr_init_avx2; - sha512_job_mgr_submit = sha512_mb_mgr_submit_avx2; - sha512_job_mgr_flush = sha512_mb_mgr_flush_avx2; - sha512_job_mgr_get_comp_job = sha512_mb_mgr_get_comp_job_avx2; - - if (!sha512_mb_alg_state.alg_cstate) - return -ENOMEM; - for_each_possible_cpu(cpu) { - cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu); - cpu_state->next_flush = 0; - cpu_state->next_seq_num = 0; - cpu_state->flusher_engaged = false; - INIT_DELAYED_WORK(&cpu_state->flush, mcryptd_flusher); - cpu_state->cpu = cpu; - cpu_state->alg_state = &sha512_mb_alg_state; - cpu_state->mgr = kzalloc(sizeof(struct sha512_ctx_mgr), - GFP_KERNEL); - if (!cpu_state->mgr) - goto err2; - sha512_ctx_mgr_init(cpu_state->mgr); - INIT_LIST_HEAD(&cpu_state->work_list); - spin_lock_init(&cpu_state->work_lock); - } - sha512_mb_alg_state.flusher = &sha512_mb_flusher; - - err = crypto_register_ahash(&sha512_mb_areq_alg); - if (err) - goto err2; - err = crypto_register_ahash(&sha512_mb_async_alg); - if (err) - goto err1; - - - return 0; -err1: - crypto_unregister_ahash(&sha512_mb_areq_alg); -err2: - for_each_possible_cpu(cpu) { - cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu); - kfree(cpu_state->mgr); - } - free_percpu(sha512_mb_alg_state.alg_cstate); - return -ENODEV; -} - -static void __exit sha512_mb_mod_fini(void) -{ - int cpu; - struct mcryptd_alg_cstate *cpu_state; - - crypto_unregister_ahash(&sha512_mb_async_alg); - crypto_unregister_ahash(&sha512_mb_areq_alg); - for_each_possible_cpu(cpu) { - cpu_state = per_cpu_ptr(sha512_mb_alg_state.alg_cstate, cpu); - kfree(cpu_state->mgr); - } - free_percpu(sha512_mb_alg_state.alg_cstate); -} - -module_init(sha512_mb_mod_init); -module_exit(sha512_mb_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, multi buffer accelerated"); - -MODULE_ALIAS("sha512"); diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h b/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h deleted file mode 100644 index e5c465bd821e..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb_ctx.h +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Header file for multi buffer SHA512 context - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _SHA_MB_CTX_INTERNAL_H -#define _SHA_MB_CTX_INTERNAL_H - -#include "sha512_mb_mgr.h" - -#define HASH_UPDATE 0x00 -#define HASH_LAST 0x01 -#define HASH_DONE 0x02 -#define HASH_FINAL 0x04 - -#define HASH_CTX_STS_IDLE 0x00 -#define HASH_CTX_STS_PROCESSING 0x01 -#define HASH_CTX_STS_LAST 0x02 -#define HASH_CTX_STS_COMPLETE 0x04 - -enum hash_ctx_error { - HASH_CTX_ERROR_NONE = 0, - HASH_CTX_ERROR_INVALID_FLAGS = -1, - HASH_CTX_ERROR_ALREADY_PROCESSING = -2, - HASH_CTX_ERROR_ALREADY_COMPLETED = -3, -}; - -#define hash_ctx_user_data(ctx) ((ctx)->user_data) -#define hash_ctx_digest(ctx) ((ctx)->job.result_digest) -#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) -#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) -#define hash_ctx_status(ctx) ((ctx)->status) -#define hash_ctx_error(ctx) ((ctx)->error) -#define hash_ctx_init(ctx) \ - do { \ - (ctx)->error = HASH_CTX_ERROR_NONE; \ - (ctx)->status = HASH_CTX_STS_COMPLETE; \ - } while (0) - -/* Hash Constants and Typedefs */ -#define SHA512_DIGEST_LENGTH 8 -#define SHA512_LOG2_BLOCK_SIZE 7 - -#define SHA512_PADLENGTHFIELD_SIZE 16 - -#ifdef SHA_MB_DEBUG -#define assert(expr) \ -do { \ - if (unlikely(!(expr))) { \ - printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \ - #expr, __FILE__, __func__, __LINE__); \ - } \ -} while (0) -#else -#define assert(expr) do {} while (0) -#endif - -struct sha512_ctx_mgr { - struct sha512_mb_mgr mgr; -}; - -/* typedef struct sha512_ctx_mgr sha512_ctx_mgr; */ - -struct sha512_hash_ctx { - /* Must be at struct offset 0 */ - struct job_sha512 job; - /* status flag */ - int status; - /* error flag */ - int error; - - uint64_t total_length; - const void *incoming_buffer; - uint32_t incoming_buffer_length; - uint8_t partial_block_buffer[SHA512_BLOCK_SIZE * 2]; - uint32_t partial_block_buffer_length; - void *user_data; -}; - -#endif diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h b/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h deleted file mode 100644 index 178f17eef382..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Header file for multi buffer SHA512 algorithm manager - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __SHA_MB_MGR_H -#define __SHA_MB_MGR_H - -#include <linux/types.h> - -#define NUM_SHA512_DIGEST_WORDS 8 - -enum job_sts {STS_UNKNOWN = 0, - STS_BEING_PROCESSED = 1, - STS_COMPLETED = 2, - STS_INTERNAL_ERROR = 3, - STS_ERROR = 4 -}; - -struct job_sha512 { - u8 *buffer; - u64 len; - u64 result_digest[NUM_SHA512_DIGEST_WORDS] __aligned(32); - enum job_sts status; - void *user_data; -}; - -struct sha512_args_x4 { - uint64_t digest[8][4]; - uint8_t *data_ptr[4]; -}; - -struct sha512_lane_data { - struct job_sha512 *job_in_lane; -}; - -struct sha512_mb_mgr { - struct sha512_args_x4 args; - - uint64_t lens[4]; - - /* each byte is index (0...7) of unused lanes */ - uint64_t unused_lanes; - /* byte 4 is set to FF as a flag */ - struct sha512_lane_data ldata[4]; -}; - -#define SHA512_MB_MGR_NUM_LANES_AVX2 4 - -void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state); -struct job_sha512 *sha512_mb_mgr_submit_avx2(struct sha512_mb_mgr *state, - struct job_sha512 *job); -struct job_sha512 *sha512_mb_mgr_flush_avx2(struct sha512_mb_mgr *state); -struct job_sha512 *sha512_mb_mgr_get_comp_job_avx2(struct sha512_mb_mgr *state); - -#endif diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S deleted file mode 100644 index cf2636d4c9ba..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_datastruct.S +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Header file for multi buffer SHA256 algorithm data structure - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -# Macros for defining data structures - -# Usage example - -#START_FIELDS # JOB_AES -### name size align -#FIELD _plaintext, 8, 8 # pointer to plaintext -#FIELD _ciphertext, 8, 8 # pointer to ciphertext -#FIELD _IV, 16, 8 # IV -#FIELD _keys, 8, 8 # pointer to keys -#FIELD _len, 4, 4 # length in bytes -#FIELD _status, 4, 4 # status enumeration -#FIELD _user_data, 8, 8 # pointer to user data -#UNION _union, size1, align1, \ -# size2, align2, \ -# size3, align3, \ -# ... -#END_FIELDS -#%assign _JOB_AES_size _FIELD_OFFSET -#%assign _JOB_AES_align _STRUCT_ALIGN - -######################################################################### - -# Alternate "struc-like" syntax: -# STRUCT job_aes2 -# RES_Q .plaintext, 1 -# RES_Q .ciphertext, 1 -# RES_DQ .IV, 1 -# RES_B .nested, _JOB_AES_SIZE, _JOB_AES_ALIGN -# RES_U .union, size1, align1, \ -# size2, align2, \ -# ... -# ENDSTRUCT -# # Following only needed if nesting -# %assign job_aes2_size _FIELD_OFFSET -# %assign job_aes2_align _STRUCT_ALIGN -# -# RES_* macros take a name, a count and an optional alignment. -# The count in in terms of the base size of the macro, and the -# default alignment is the base size. -# The macros are: -# Macro Base size -# RES_B 1 -# RES_W 2 -# RES_D 4 -# RES_Q 8 -# RES_DQ 16 -# RES_Y 32 -# RES_Z 64 -# -# RES_U defines a union. It's arguments are a name and two or more -# pairs of "size, alignment" -# -# The two assigns are only needed if this structure is being nested -# within another. Even if the assigns are not done, one can still use -# STRUCT_NAME_size as the size of the structure. -# -# Note that for nesting, you still need to assign to STRUCT_NAME_size. -# -# The differences between this and using "struc" directly are that each -# type is implicitly aligned to its natural length (although this can be -# over-ridden with an explicit third parameter), and that the structure -# is padded at the end to its overall alignment. -# - -######################################################################### - -#ifndef _DATASTRUCT_ASM_ -#define _DATASTRUCT_ASM_ - -#define PTR_SZ 8 -#define SHA512_DIGEST_WORD_SIZE 8 -#define SHA512_MB_MGR_NUM_LANES_AVX2 4 -#define NUM_SHA512_DIGEST_WORDS 8 -#define SZ4 4*SHA512_DIGEST_WORD_SIZE -#define ROUNDS 80*SZ4 -#define SHA512_DIGEST_ROW_SIZE (SHA512_MB_MGR_NUM_LANES_AVX2 * 8) - -# START_FIELDS -.macro START_FIELDS - _FIELD_OFFSET = 0 - _STRUCT_ALIGN = 0 -.endm - -# FIELD name size align -.macro FIELD name size align - _FIELD_OFFSET = (_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) - \name = _FIELD_OFFSET - _FIELD_OFFSET = _FIELD_OFFSET + (\size) -.if (\align > _STRUCT_ALIGN) - _STRUCT_ALIGN = \align -.endif -.endm - -# END_FIELDS -.macro END_FIELDS - _FIELD_OFFSET = (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) -.endm - -.macro STRUCT p1 -START_FIELDS -.struc \p1 -.endm - -.macro ENDSTRUCT - tmp = _FIELD_OFFSET - END_FIELDS - tmp = (_FIELD_OFFSET - ##tmp) -.if (tmp > 0) - .lcomm tmp -.endm - -## RES_int name size align -.macro RES_int p1 p2 p3 - name = \p1 - size = \p2 - align = .\p3 - - _FIELD_OFFSET = (_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)) -.align align -.lcomm name size - _FIELD_OFFSET = _FIELD_OFFSET + (size) -.if (align > _STRUCT_ALIGN) - _STRUCT_ALIGN = align -.endif -.endm - -# macro RES_B name, size [, align] -.macro RES_B _name, _size, _align=1 -RES_int _name _size _align -.endm - -# macro RES_W name, size [, align] -.macro RES_W _name, _size, _align=2 -RES_int _name 2*(_size) _align -.endm - -# macro RES_D name, size [, align] -.macro RES_D _name, _size, _align=4 -RES_int _name 4*(_size) _align -.endm - -# macro RES_Q name, size [, align] -.macro RES_Q _name, _size, _align=8 -RES_int _name 8*(_size) _align -.endm - -# macro RES_DQ name, size [, align] -.macro RES_DQ _name, _size, _align=16 -RES_int _name 16*(_size) _align -.endm - -# macro RES_Y name, size [, align] -.macro RES_Y _name, _size, _align=32 -RES_int _name 32*(_size) _align -.endm - -# macro RES_Z name, size [, align] -.macro RES_Z _name, _size, _align=64 -RES_int _name 64*(_size) _align -.endm - -#endif - -################################################################### -### Define SHA512 Out Of Order Data Structures -################################################################### - -START_FIELDS # LANE_DATA -### name size align -FIELD _job_in_lane, 8, 8 # pointer to job object -END_FIELDS - - _LANE_DATA_size = _FIELD_OFFSET - _LANE_DATA_align = _STRUCT_ALIGN - -#################################################################### - -START_FIELDS # SHA512_ARGS_X4 -### name size align -FIELD _digest, 8*8*4, 4 # transposed digest -FIELD _data_ptr, 8*4, 8 # array of pointers to data -END_FIELDS - - _SHA512_ARGS_X4_size = _FIELD_OFFSET - _SHA512_ARGS_X4_align = _STRUCT_ALIGN - -##################################################################### - -START_FIELDS # MB_MGR -### name size align -FIELD _args, _SHA512_ARGS_X4_size, _SHA512_ARGS_X4_align -FIELD _lens, 8*4, 8 -FIELD _unused_lanes, 8, 8 -FIELD _ldata, _LANE_DATA_size*4, _LANE_DATA_align -END_FIELDS - - _MB_MGR_size = _FIELD_OFFSET - _MB_MGR_align = _STRUCT_ALIGN - -_args_digest = _args + _digest -_args_data_ptr = _args + _data_ptr - -####################################################################### - -####################################################################### -#### Define constants -####################################################################### - -#define STS_UNKNOWN 0 -#define STS_BEING_PROCESSED 1 -#define STS_COMPLETED 2 - -####################################################################### -#### Define JOB_SHA512 structure -####################################################################### - -START_FIELDS # JOB_SHA512 -### name size align -FIELD _buffer, 8, 8 # pointer to buffer -FIELD _len, 8, 8 # length in bytes -FIELD _result_digest, 8*8, 32 # Digest (output) -FIELD _status, 4, 4 -FIELD _user_data, 8, 8 -END_FIELDS - - _JOB_SHA512_size = _FIELD_OFFSET - _JOB_SHA512_align = _STRUCT_ALIGN diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S deleted file mode 100644 index 7c629caebc05..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S +++ /dev/null @@ -1,297 +0,0 @@ -/* - * Flush routine for SHA512 multibuffer - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/linkage.h> -#include <asm/frame.h> -#include "sha512_mb_mgr_datastruct.S" - -.extern sha512_x4_avx2 - -# LINUX register definitions -#define arg1 %rdi -#define arg2 %rsi - -# idx needs to be other than arg1, arg2, rbx, r12 -#define idx %rdx - -# Common definitions -#define state arg1 -#define job arg2 -#define len2 arg2 - -#define unused_lanes %rbx -#define lane_data %rbx -#define tmp2 %rbx - -#define job_rax %rax -#define tmp1 %rax -#define size_offset %rax -#define tmp %rax -#define start_offset %rax - -#define tmp3 arg1 - -#define extra_blocks arg2 -#define p arg2 - -#define tmp4 %r8 -#define lens0 %r8 - -#define lens1 %r9 -#define lens2 %r10 -#define lens3 %r11 - -.macro LABEL prefix n -\prefix\n\(): -.endm - -.macro JNE_SKIP i -jne skip_\i -.endm - -.altmacro -.macro SET_OFFSET _offset -offset = \_offset -.endm -.noaltmacro - -# JOB* sha512_mb_mgr_flush_avx2(MB_MGR *state) -# arg 1 : rcx : state -ENTRY(sha512_mb_mgr_flush_avx2) - FRAME_BEGIN - push %rbx - - # If bit (32+3) is set, then all lanes are empty - mov _unused_lanes(state), unused_lanes - bt $32+7, unused_lanes - jc return_null - - # find a lane with a non-null job - xor idx, idx - offset = (_ldata + 1*_LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne one(%rip), idx - offset = (_ldata + 2*_LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne two(%rip), idx - offset = (_ldata + 3*_LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) - cmovne three(%rip), idx - - # copy idx to empty lanes -copy_lane_data: - offset = (_args + _data_ptr) - mov offset(state,idx,8), tmp - - I = 0 -.rep 4 - offset = (_ldata + I * _LANE_DATA_size + _job_in_lane) - cmpq $0, offset(state) -.altmacro - JNE_SKIP %I - offset = (_args + _data_ptr + 8*I) - mov tmp, offset(state) - offset = (_lens + 8*I +4) - movl $0xFFFFFFFF, offset(state) -LABEL skip_ %I - I = (I+1) -.noaltmacro -.endr - - # Find min length - mov _lens + 0*8(state),lens0 - mov lens0,idx - mov _lens + 1*8(state),lens1 - cmp idx,lens1 - cmovb lens1,idx - mov _lens + 2*8(state),lens2 - cmp idx,lens2 - cmovb lens2,idx - mov _lens + 3*8(state),lens3 - cmp idx,lens3 - cmovb lens3,idx - mov idx,len2 - and $0xF,idx - and $~0xFF,len2 - jz len_is_0 - - sub len2, lens0 - sub len2, lens1 - sub len2, lens2 - sub len2, lens3 - shr $32,len2 - mov lens0, _lens + 0*8(state) - mov lens1, _lens + 1*8(state) - mov lens2, _lens + 2*8(state) - mov lens3, _lens + 3*8(state) - - # "state" and "args" are the same address, arg1 - # len is arg2 - call sha512_x4_avx2 - # state and idx are intact - -len_is_0: - # process completed job "idx" - imul $_LANE_DATA_size, idx, lane_data - lea _ldata(state, lane_data), lane_data - - mov _job_in_lane(lane_data), job_rax - movq $0, _job_in_lane(lane_data) - movl $STS_COMPLETED, _status(job_rax) - mov _unused_lanes(state), unused_lanes - shl $8, unused_lanes - or idx, unused_lanes - mov unused_lanes, _unused_lanes(state) - - movl $0xFFFFFFFF, _lens+4(state, idx, 8) - - vmovq _args_digest+0*32(state, idx, 8), %xmm0 - vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0 - vmovq _args_digest+2*32(state, idx, 8), %xmm1 - vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1 - vmovq _args_digest+4*32(state, idx, 8), %xmm2 - vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2 - vmovq _args_digest+6*32(state, idx, 8), %xmm3 - vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3 - - vmovdqu %xmm0, _result_digest(job_rax) - vmovdqu %xmm1, _result_digest+1*16(job_rax) - vmovdqu %xmm2, _result_digest+2*16(job_rax) - vmovdqu %xmm3, _result_digest+3*16(job_rax) - -return: - pop %rbx - FRAME_END - ret - -return_null: - xor job_rax, job_rax - jmp return -ENDPROC(sha512_mb_mgr_flush_avx2) -.align 16 - -ENTRY(sha512_mb_mgr_get_comp_job_avx2) - push %rbx - - mov _unused_lanes(state), unused_lanes - bt $(32+7), unused_lanes - jc .return_null - - # Find min length - mov _lens(state),lens0 - mov lens0,idx - mov _lens+1*8(state),lens1 - cmp idx,lens1 - cmovb lens1,idx - mov _lens+2*8(state),lens2 - cmp idx,lens2 - cmovb lens2,idx - mov _lens+3*8(state),lens3 - cmp idx,lens3 - cmovb lens3,idx - test $~0xF,idx - jnz .return_null - and $0xF,idx - - #process completed job "idx" - imul $_LANE_DATA_size, idx, lane_data - lea _ldata(state, lane_data), lane_data - - mov _job_in_lane(lane_data), job_rax - movq $0, _job_in_lane(lane_data) - movl $STS_COMPLETED, _status(job_rax) - mov _unused_lanes(state), unused_lanes - shl $8, unused_lanes - or idx, unused_lanes - mov unused_lanes, _unused_lanes(state) - - movl $0xFFFFFFFF, _lens+4(state, idx, 8) - - vmovq _args_digest(state, idx, 8), %xmm0 - vpinsrq $1, _args_digest+1*32(state, idx, 8), %xmm0, %xmm0 - vmovq _args_digest+2*32(state, idx, 8), %xmm1 - vpinsrq $1, _args_digest+3*32(state, idx, 8), %xmm1, %xmm1 - vmovq _args_digest+4*32(state, idx, 8), %xmm2 - vpinsrq $1, _args_digest+5*32(state, idx, 8), %xmm2, %xmm2 - vmovq _args_digest+6*32(state, idx, 8), %xmm3 - vpinsrq $1, _args_digest+7*32(state, idx, 8), %xmm3, %xmm3 - - vmovdqu %xmm0, _result_digest+0*16(job_rax) - vmovdqu %xmm1, _result_digest+1*16(job_rax) - vmovdqu %xmm2, _result_digest+2*16(job_rax) - vmovdqu %xmm3, _result_digest+3*16(job_rax) - - pop %rbx - - ret - -.return_null: - xor job_rax, job_rax - pop %rbx - ret -ENDPROC(sha512_mb_mgr_get_comp_job_avx2) - -.section .rodata.cst8.one, "aM", @progbits, 8 -.align 8 -one: -.quad 1 - -.section .rodata.cst8.two, "aM", @progbits, 8 -.align 8 -two: -.quad 2 - -.section .rodata.cst8.three, "aM", @progbits, 8 -.align 8 -three: -.quad 3 diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c deleted file mode 100644 index d08805032f01..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Initialization code for multi buffer SHA256 algorithm for AVX2 - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "sha512_mb_mgr.h" - -void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state) -{ - unsigned int j; - - /* initially all lanes are unused */ - state->lens[0] = 0xFFFFFFFF00000000; - state->lens[1] = 0xFFFFFFFF00000001; - state->lens[2] = 0xFFFFFFFF00000002; - state->lens[3] = 0xFFFFFFFF00000003; - - state->unused_lanes = 0xFF03020100; - for (j = 0; j < 4; j++) - state->ldata[j].job_in_lane = NULL; -} diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S deleted file mode 100644 index 4ba709ba78e5..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Buffer submit code for multi buffer SHA512 algorithm - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include <linux/linkage.h> -#include <asm/frame.h> -#include "sha512_mb_mgr_datastruct.S" - -.extern sha512_x4_avx2 - -#define arg1 %rdi -#define arg2 %rsi - -#define idx %rdx -#define last_len %rdx - -#define size_offset %rcx -#define tmp2 %rcx - -# Common definitions -#define state arg1 -#define job arg2 -#define len2 arg2 -#define p2 arg2 - -#define p %r11 -#define start_offset %r11 - -#define unused_lanes %rbx - -#define job_rax %rax -#define len %rax - -#define lane %r12 -#define tmp3 %r12 -#define lens3 %r12 - -#define extra_blocks %r8 -#define lens0 %r8 - -#define tmp %r9 -#define lens1 %r9 - -#define lane_data %r10 -#define lens2 %r10 - -#define DWORD_len %eax - -# JOB* sha512_mb_mgr_submit_avx2(MB_MGR *state, JOB *job) -# arg 1 : rcx : state -# arg 2 : rdx : job -ENTRY(sha512_mb_mgr_submit_avx2) - FRAME_BEGIN - push %rbx - push %r12 - - mov _unused_lanes(state), unused_lanes - movzb %bl,lane - shr $8, unused_lanes - imul $_LANE_DATA_size, lane,lane_data - movl $STS_BEING_PROCESSED, _status(job) - lea _ldata(state, lane_data), lane_data - mov unused_lanes, _unused_lanes(state) - movl _len(job), DWORD_len - - mov job, _job_in_lane(lane_data) - movl DWORD_len,_lens+4(state , lane, 8) - - # Load digest words from result_digest - vmovdqu _result_digest+0*16(job), %xmm0 - vmovdqu _result_digest+1*16(job), %xmm1 - vmovdqu _result_digest+2*16(job), %xmm2 - vmovdqu _result_digest+3*16(job), %xmm3 - - vmovq %xmm0, _args_digest(state, lane, 8) - vpextrq $1, %xmm0, _args_digest+1*32(state , lane, 8) - vmovq %xmm1, _args_digest+2*32(state , lane, 8) - vpextrq $1, %xmm1, _args_digest+3*32(state , lane, 8) - vmovq %xmm2, _args_digest+4*32(state , lane, 8) - vpextrq $1, %xmm2, _args_digest+5*32(state , lane, 8) - vmovq %xmm3, _args_digest+6*32(state , lane, 8) - vpextrq $1, %xmm3, _args_digest+7*32(state , lane, 8) - - mov _buffer(job), p - mov p, _args_data_ptr(state, lane, 8) - - cmp $0xFF, unused_lanes - jne return_null - -start_loop: - - # Find min length - mov _lens+0*8(state),lens0 - mov lens0,idx - mov _lens+1*8(state),lens1 - cmp idx,lens1 - cmovb lens1, idx - mov _lens+2*8(state),lens2 - cmp idx,lens2 - cmovb lens2,idx - mov _lens+3*8(state),lens3 - cmp idx,lens3 - cmovb lens3,idx - mov idx,len2 - and $0xF,idx - and $~0xFF,len2 - jz len_is_0 - - sub len2,lens0 - sub len2,lens1 - sub len2,lens2 - sub len2,lens3 - shr $32,len2 - mov lens0, _lens + 0*8(state) - mov lens1, _lens + 1*8(state) - mov lens2, _lens + 2*8(state) - mov lens3, _lens + 3*8(state) - - # "state" and "args" are the same address, arg1 - # len is arg2 - call sha512_x4_avx2 - # state and idx are intact - -len_is_0: - - # process completed job "idx" - imul $_LANE_DATA_size, idx, lane_data - lea _ldata(state, lane_data), lane_data - - mov _job_in_lane(lane_data), job_rax - mov _unused_lanes(state), unused_lanes - movq $0, _job_in_lane(lane_data) - movl $STS_COMPLETED, _status(job_rax) - shl $8, unused_lanes - or idx, unused_lanes - mov unused_lanes, _unused_lanes(state) - - movl $0xFFFFFFFF,_lens+4(state,idx,8) - vmovq _args_digest+0*32(state , idx, 8), %xmm0 - vpinsrq $1, _args_digest+1*32(state , idx, 8), %xmm0, %xmm0 - vmovq _args_digest+2*32(state , idx, 8), %xmm1 - vpinsrq $1, _args_digest+3*32(state , idx, 8), %xmm1, %xmm1 - vmovq _args_digest+4*32(state , idx, 8), %xmm2 - vpinsrq $1, _args_digest+5*32(state , idx, 8), %xmm2, %xmm2 - vmovq _args_digest+6*32(state , idx, 8), %xmm3 - vpinsrq $1, _args_digest+7*32(state , idx, 8), %xmm3, %xmm3 - - vmovdqu %xmm0, _result_digest + 0*16(job_rax) - vmovdqu %xmm1, _result_digest + 1*16(job_rax) - vmovdqu %xmm2, _result_digest + 2*16(job_rax) - vmovdqu %xmm3, _result_digest + 3*16(job_rax) - -return: - pop %r12 - pop %rbx - FRAME_END - ret - -return_null: - xor job_rax, job_rax - jmp return -ENDPROC(sha512_mb_mgr_submit_avx2) - -/* UNUSED? -.section .rodata.cst16, "aM", @progbits, 16 -.align 16 -H0: .int 0x6a09e667 -H1: .int 0xbb67ae85 -H2: .int 0x3c6ef372 -H3: .int 0xa54ff53a -H4: .int 0x510e527f -H5: .int 0x9b05688c -H6: .int 0x1f83d9ab -H7: .int 0x5be0cd19 -*/ diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S deleted file mode 100644 index e22e907643a6..000000000000 --- a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S +++ /dev/null @@ -1,531 +0,0 @@ -/* - * Multi-buffer SHA512 algorithm hash compute routine - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY - * - * Copyright(c) 2016 Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * Contact Information: - * Megha Dey <megha.dey@linux.intel.com> - * - * BSD LICENSE - * - * Copyright(c) 2016 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -# code to compute quad SHA512 using AVX2 -# use YMMs to tackle the larger digest size -# outer calling routine takes care of save and restore of XMM registers -# Logic designed/laid out by JDG - -# Function clobbers: rax, rcx, rdx, rbx, rsi, rdi, r9-r15; ymm0-15 -# Stack must be aligned to 32 bytes before call -# Linux clobbers: rax rbx rcx rsi r8 r9 r10 r11 r12 -# Linux preserves: rcx rdx rdi rbp r13 r14 r15 -# clobbers ymm0-15 - -#include <linux/linkage.h> -#include "sha512_mb_mgr_datastruct.S" - -arg1 = %rdi -arg2 = %rsi - -# Common definitions -STATE = arg1 -INP_SIZE = arg2 - -IDX = %rax -ROUND = %rbx -TBL = %r8 - -inp0 = %r9 -inp1 = %r10 -inp2 = %r11 -inp3 = %r12 - -a = %ymm0 -b = %ymm1 -c = %ymm2 -d = %ymm3 -e = %ymm4 -f = %ymm5 -g = %ymm6 -h = %ymm7 - -a0 = %ymm8 -a1 = %ymm9 -a2 = %ymm10 - -TT0 = %ymm14 -TT1 = %ymm13 -TT2 = %ymm12 -TT3 = %ymm11 -TT4 = %ymm10 -TT5 = %ymm9 - -T1 = %ymm14 -TMP = %ymm15 - -# Define stack usage -STACK_SPACE1 = SZ4*16 + NUM_SHA512_DIGEST_WORDS*SZ4 + 24 - -#define VMOVPD vmovupd -_digest = SZ4*16 - -# transpose r0, r1, r2, r3, t0, t1 -# "transpose" data in {r0..r3} using temps {t0..t3} -# Input looks like: {r0 r1 r2 r3} -# r0 = {a7 a6 a5 a4 a3 a2 a1 a0} -# r1 = {b7 b6 b5 b4 b3 b2 b1 b0} -# r2 = {c7 c6 c5 c4 c3 c2 c1 c0} -# r3 = {d7 d6 d5 d4 d3 d2 d1 d0} -# -# output looks like: {t0 r1 r0 r3} -# t0 = {d1 d0 c1 c0 b1 b0 a1 a0} -# r1 = {d3 d2 c3 c2 b3 b2 a3 a2} -# r0 = {d5 d4 c5 c4 b5 b4 a5 a4} -# r3 = {d7 d6 c7 c6 b7 b6 a7 a6} - -.macro TRANSPOSE r0 r1 r2 r3 t0 t1 - vshufps $0x44, \r1, \r0, \t0 # t0 = {b5 b4 a5 a4 b1 b0 a1 a0} - vshufps $0xEE, \r1, \r0, \r0 # r0 = {b7 b6 a7 a6 b3 b2 a3 a2} - vshufps $0x44, \r3, \r2, \t1 # t1 = {d5 d4 c5 c4 d1 d0 c1 c0} - vshufps $0xEE, \r3, \r2, \r2 # r2 = {d7 d6 c7 c6 d3 d2 c3 c2} - - vperm2f128 $0x20, \r2, \r0, \r1 # h6...a6 - vperm2f128 $0x31, \r2, \r0, \r3 # h2...a2 - vperm2f128 $0x31, \t1, \t0, \r0 # h5...a5 - vperm2f128 $0x20, \t1, \t0, \t0 # h1...a1 -.endm - -.macro ROTATE_ARGS -TMP_ = h -h = g -g = f -f = e -e = d -d = c -c = b -b = a -a = TMP_ -.endm - -# PRORQ reg, imm, tmp -# packed-rotate-right-double -# does a rotate by doing two shifts and an or -.macro _PRORQ reg imm tmp - vpsllq $(64-\imm),\reg,\tmp - vpsrlq $\imm,\reg, \reg - vpor \tmp,\reg, \reg -.endm - -# non-destructive -# PRORQ_nd reg, imm, tmp, src -.macro _PRORQ_nd reg imm tmp src - vpsllq $(64-\imm), \src, \tmp - vpsrlq $\imm, \src, \reg - vpor \tmp, \reg, \reg -.endm - -# PRORQ dst/src, amt -.macro PRORQ reg imm - _PRORQ \reg, \imm, TMP -.endm - -# PRORQ_nd dst, src, amt -.macro PRORQ_nd reg tmp imm - _PRORQ_nd \reg, \imm, TMP, \tmp -.endm - -#; arguments passed implicitly in preprocessor symbols i, a...h -.macro ROUND_00_15 _T1 i - PRORQ_nd a0, e, (18-14) # sig1: a0 = (e >> 4) - - vpxor g, f, a2 # ch: a2 = f^g - vpand e,a2, a2 # ch: a2 = (f^g)&e - vpxor g, a2, a2 # a2 = ch - - PRORQ_nd a1,e,41 # sig1: a1 = (e >> 25) - - offset = SZ4*(\i & 0xf) - vmovdqu \_T1,offset(%rsp) - vpaddq (TBL,ROUND,1), \_T1, \_T1 # T1 = W + K - vpxor e,a0, a0 # sig1: a0 = e ^ (e >> 5) - PRORQ a0, 14 # sig1: a0 = (e >> 6) ^ (e >> 11) - vpaddq a2, h, h # h = h + ch - PRORQ_nd a2,a,6 # sig0: a2 = (a >> 11) - vpaddq \_T1,h, h # h = h + ch + W + K - vpxor a1, a0, a0 # a0 = sigma1 - vmovdqu a,\_T1 - PRORQ_nd a1,a,39 # sig0: a1 = (a >> 22) - vpxor c, \_T1, \_T1 # maj: T1 = a^c - add $SZ4, ROUND # ROUND++ - vpand b, \_T1, \_T1 # maj: T1 = (a^c)&b - vpaddq a0, h, h - vpaddq h, d, d - vpxor a, a2, a2 # sig0: a2 = a ^ (a >> 11) - PRORQ a2,28 # sig0: a2 = (a >> 2) ^ (a >> 13) - vpxor a1, a2, a2 # a2 = sig0 - vpand c, a, a1 # maj: a1 = a&c - vpor \_T1, a1, a1 # a1 = maj - vpaddq a1, h, h # h = h + ch + W + K + maj - vpaddq a2, h, h # h = h + ch + W + K + maj + sigma0 - ROTATE_ARGS -.endm - - -#; arguments passed implicitly in preprocessor symbols i, a...h -.macro ROUND_16_XX _T1 i - vmovdqu SZ4*((\i-15)&0xf)(%rsp), \_T1 - vmovdqu SZ4*((\i-2)&0xf)(%rsp), a1 - vmovdqu \_T1, a0 - PRORQ \_T1,7 - vmovdqu a1, a2 - PRORQ a1,42 - vpxor a0, \_T1, \_T1 - PRORQ \_T1, 1 - vpxor a2, a1, a1 - PRORQ a1, 19 - vpsrlq $7, a0, a0 - vpxor a0, \_T1, \_T1 - vpsrlq $6, a2, a2 - vpxor a2, a1, a1 - vpaddq SZ4*((\i-16)&0xf)(%rsp), \_T1, \_T1 - vpaddq SZ4*((\i-7)&0xf)(%rsp), a1, a1 - vpaddq a1, \_T1, \_T1 - - ROUND_00_15 \_T1,\i -.endm - - -# void sha512_x4_avx2(void *STATE, const int INP_SIZE) -# arg 1 : STATE : pointer to input data -# arg 2 : INP_SIZE : size of data in blocks (assumed >= 1) -ENTRY(sha512_x4_avx2) - # general registers preserved in outer calling routine - # outer calling routine saves all the XMM registers - # save callee-saved clobbered registers to comply with C function ABI - push %r12 - push %r13 - push %r14 - push %r15 - - sub $STACK_SPACE1, %rsp - - # Load the pre-transposed incoming digest. - vmovdqu 0*SHA512_DIGEST_ROW_SIZE(STATE),a - vmovdqu 1*SHA512_DIGEST_ROW_SIZE(STATE),b - vmovdqu 2*SHA512_DIGEST_ROW_SIZE(STATE),c - vmovdqu 3*SHA512_DIGEST_ROW_SIZE(STATE),d - vmovdqu 4*SHA512_DIGEST_ROW_SIZE(STATE),e - vmovdqu 5*SHA512_DIGEST_ROW_SIZE(STATE),f - vmovdqu 6*SHA512_DIGEST_ROW_SIZE(STATE),g - vmovdqu 7*SHA512_DIGEST_ROW_SIZE(STATE),h - - lea K512_4(%rip),TBL - - # load the address of each of the 4 message lanes - # getting ready to transpose input onto stack - mov _data_ptr+0*PTR_SZ(STATE),inp0 - mov _data_ptr+1*PTR_SZ(STATE),inp1 - mov _data_ptr+2*PTR_SZ(STATE),inp2 - mov _data_ptr+3*PTR_SZ(STATE),inp3 - - xor IDX, IDX -lloop: - xor ROUND, ROUND - - # save old digest - vmovdqu a, _digest(%rsp) - vmovdqu b, _digest+1*SZ4(%rsp) - vmovdqu c, _digest+2*SZ4(%rsp) - vmovdqu d, _digest+3*SZ4(%rsp) - vmovdqu e, _digest+4*SZ4(%rsp) - vmovdqu f, _digest+5*SZ4(%rsp) - vmovdqu g, _digest+6*SZ4(%rsp) - vmovdqu h, _digest+7*SZ4(%rsp) - i = 0 -.rep 4 - vmovdqu PSHUFFLE_BYTE_FLIP_MASK(%rip), TMP - VMOVPD i*32(inp0, IDX), TT2 - VMOVPD i*32(inp1, IDX), TT1 - VMOVPD i*32(inp2, IDX), TT4 - VMOVPD i*32(inp3, IDX), TT3 - TRANSPOSE TT2, TT1, TT4, TT3, TT0, TT5 - vpshufb TMP, TT0, TT0 - vpshufb TMP, TT1, TT1 - vpshufb TMP, TT2, TT2 - vpshufb TMP, TT3, TT3 - ROUND_00_15 TT0,(i*4+0) - ROUND_00_15 TT1,(i*4+1) - ROUND_00_15 TT2,(i*4+2) - ROUND_00_15 TT3,(i*4+3) - i = (i+1) -.endr - add $128, IDX - - i = (i*4) - - jmp Lrounds_16_xx -.align 16 -Lrounds_16_xx: -.rep 16 - ROUND_16_XX T1, i - i = (i+1) -.endr - cmp $0xa00,ROUND - jb Lrounds_16_xx - - # add old digest - vpaddq _digest(%rsp), a, a - vpaddq _digest+1*SZ4(%rsp), b, b - vpaddq _digest+2*SZ4(%rsp), c, c - vpaddq _digest+3*SZ4(%rsp), d, d - vpaddq _digest+4*SZ4(%rsp), e, e - vpaddq _digest+5*SZ4(%rsp), f, f - vpaddq _digest+6*SZ4(%rsp), g, g - vpaddq _digest+7*SZ4(%rsp), h, h - - sub $1, INP_SIZE # unit is blocks - jne lloop - - # write back to memory (state object) the transposed digest - vmovdqu a, 0*SHA512_DIGEST_ROW_SIZE(STATE) - vmovdqu b, 1*SHA512_DIGEST_ROW_SIZE(STATE) - vmovdqu c, 2*SHA512_DIGEST_ROW_SIZE(STATE) - vmovdqu d, 3*SHA512_DIGEST_ROW_SIZE(STATE) - vmovdqu e, 4*SHA512_DIGEST_ROW_SIZE(STATE) - vmovdqu f, 5*SHA512_DIGEST_ROW_SIZE(STATE) - vmovdqu g, 6*SHA512_DIGEST_ROW_SIZE(STATE) - vmovdqu h, 7*SHA512_DIGEST_ROW_SIZE(STATE) - - # update input data pointers - add IDX, inp0 - mov inp0, _data_ptr+0*PTR_SZ(STATE) - add IDX, inp1 - mov inp1, _data_ptr+1*PTR_SZ(STATE) - add IDX, inp2 - mov inp2, _data_ptr+2*PTR_SZ(STATE) - add IDX, inp3 - mov inp3, _data_ptr+3*PTR_SZ(STATE) - - #;;;;;;;;;;;;;;; - #; Postamble - add $STACK_SPACE1, %rsp - # restore callee-saved clobbered registers - - pop %r15 - pop %r14 - pop %r13 - pop %r12 - - # outer calling routine restores XMM and other GP registers - ret -ENDPROC(sha512_x4_avx2) - -.section .rodata.K512_4, "a", @progbits -.align 64 -K512_4: - .octa 0x428a2f98d728ae22428a2f98d728ae22,\ - 0x428a2f98d728ae22428a2f98d728ae22 - .octa 0x7137449123ef65cd7137449123ef65cd,\ - 0x7137449123ef65cd7137449123ef65cd - .octa 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f,\ - 0xb5c0fbcfec4d3b2fb5c0fbcfec4d3b2f - .octa 0xe9b5dba58189dbbce9b5dba58189dbbc,\ - 0xe9b5dba58189dbbce9b5dba58189dbbc - .octa 0x3956c25bf348b5383956c25bf348b538,\ - 0x3956c25bf348b5383956c25bf348b538 - .octa 0x59f111f1b605d01959f111f1b605d019,\ - 0x59f111f1b605d01959f111f1b605d019 - .octa 0x923f82a4af194f9b923f82a4af194f9b,\ - 0x923f82a4af194f9b923f82a4af194f9b - .octa 0xab1c5ed5da6d8118ab1c5ed5da6d8118,\ - 0xab1c5ed5da6d8118ab1c5ed5da6d8118 - .octa 0xd807aa98a3030242d807aa98a3030242,\ - 0xd807aa98a3030242d807aa98a3030242 - .octa 0x12835b0145706fbe12835b0145706fbe,\ - 0x12835b0145706fbe12835b0145706fbe - .octa 0x243185be4ee4b28c243185be4ee4b28c,\ - 0x243185be4ee4b28c243185be4ee4b28c - .octa 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2,\ - 0x550c7dc3d5ffb4e2550c7dc3d5ffb4e2 - .octa 0x72be5d74f27b896f72be5d74f27b896f,\ - 0x72be5d74f27b896f72be5d74f27b896f - .octa 0x80deb1fe3b1696b180deb1fe3b1696b1,\ - 0x80deb1fe3b1696b180deb1fe3b1696b1 - .octa 0x9bdc06a725c712359bdc06a725c71235,\ - 0x9bdc06a725c712359bdc06a725c71235 - .octa 0xc19bf174cf692694c19bf174cf692694,\ - 0xc19bf174cf692694c19bf174cf692694 - .octa 0xe49b69c19ef14ad2e49b69c19ef14ad2,\ - 0xe49b69c19ef14ad2e49b69c19ef14ad2 - .octa 0xefbe4786384f25e3efbe4786384f25e3,\ - 0xefbe4786384f25e3efbe4786384f25e3 - .octa 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5,\ - 0x0fc19dc68b8cd5b50fc19dc68b8cd5b5 - .octa 0x240ca1cc77ac9c65240ca1cc77ac9c65,\ - 0x240ca1cc77ac9c65240ca1cc77ac9c65 - .octa 0x2de92c6f592b02752de92c6f592b0275,\ - 0x2de92c6f592b02752de92c6f592b0275 - .octa 0x4a7484aa6ea6e4834a7484aa6ea6e483,\ - 0x4a7484aa6ea6e4834a7484aa6ea6e483 - .octa 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4,\ - 0x5cb0a9dcbd41fbd45cb0a9dcbd41fbd4 - .octa 0x76f988da831153b576f988da831153b5,\ - 0x76f988da831153b576f988da831153b5 - .octa 0x983e5152ee66dfab983e5152ee66dfab,\ - 0x983e5152ee66dfab983e5152ee66dfab - .octa 0xa831c66d2db43210a831c66d2db43210,\ - 0xa831c66d2db43210a831c66d2db43210 - .octa 0xb00327c898fb213fb00327c898fb213f,\ - 0xb00327c898fb213fb00327c898fb213f - .octa 0xbf597fc7beef0ee4bf597fc7beef0ee4,\ - 0xbf597fc7beef0ee4bf597fc7beef0ee4 - .octa 0xc6e00bf33da88fc2c6e00bf33da88fc2,\ - 0xc6e00bf33da88fc2c6e00bf33da88fc2 - .octa 0xd5a79147930aa725d5a79147930aa725,\ - 0xd5a79147930aa725d5a79147930aa725 - .octa 0x06ca6351e003826f06ca6351e003826f,\ - 0x06ca6351e003826f06ca6351e003826f - .octa 0x142929670a0e6e70142929670a0e6e70,\ - 0x142929670a0e6e70142929670a0e6e70 - .octa 0x27b70a8546d22ffc27b70a8546d22ffc,\ - 0x27b70a8546d22ffc27b70a8546d22ffc - .octa 0x2e1b21385c26c9262e1b21385c26c926,\ - 0x2e1b21385c26c9262e1b21385c26c926 - .octa 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed,\ - 0x4d2c6dfc5ac42aed4d2c6dfc5ac42aed - .octa 0x53380d139d95b3df53380d139d95b3df,\ - 0x53380d139d95b3df53380d139d95b3df - .octa 0x650a73548baf63de650a73548baf63de,\ - 0x650a73548baf63de650a73548baf63de - .octa 0x766a0abb3c77b2a8766a0abb3c77b2a8,\ - 0x766a0abb3c77b2a8766a0abb3c77b2a8 - .octa 0x81c2c92e47edaee681c2c92e47edaee6,\ - 0x81c2c92e47edaee681c2c92e47edaee6 - .octa 0x92722c851482353b92722c851482353b,\ - 0x92722c851482353b92722c851482353b - .octa 0xa2bfe8a14cf10364a2bfe8a14cf10364,\ - 0xa2bfe8a14cf10364a2bfe8a14cf10364 - .octa 0xa81a664bbc423001a81a664bbc423001,\ - 0xa81a664bbc423001a81a664bbc423001 - .octa 0xc24b8b70d0f89791c24b8b70d0f89791,\ - 0xc24b8b70d0f89791c24b8b70d0f89791 - .octa 0xc76c51a30654be30c76c51a30654be30,\ - 0xc76c51a30654be30c76c51a30654be30 - .octa 0xd192e819d6ef5218d192e819d6ef5218,\ - 0xd192e819d6ef5218d192e819d6ef5218 - .octa 0xd69906245565a910d69906245565a910,\ - 0xd69906245565a910d69906245565a910 - .octa 0xf40e35855771202af40e35855771202a,\ - 0xf40e35855771202af40e35855771202a - .octa 0x106aa07032bbd1b8106aa07032bbd1b8,\ - 0x106aa07032bbd1b8106aa07032bbd1b8 - .octa 0x19a4c116b8d2d0c819a4c116b8d2d0c8,\ - 0x19a4c116b8d2d0c819a4c116b8d2d0c8 - .octa 0x1e376c085141ab531e376c085141ab53,\ - 0x1e376c085141ab531e376c085141ab53 - .octa 0x2748774cdf8eeb992748774cdf8eeb99,\ - 0x2748774cdf8eeb992748774cdf8eeb99 - .octa 0x34b0bcb5e19b48a834b0bcb5e19b48a8,\ - 0x34b0bcb5e19b48a834b0bcb5e19b48a8 - .octa 0x391c0cb3c5c95a63391c0cb3c5c95a63,\ - 0x391c0cb3c5c95a63391c0cb3c5c95a63 - .octa 0x4ed8aa4ae3418acb4ed8aa4ae3418acb,\ - 0x4ed8aa4ae3418acb4ed8aa4ae3418acb - .octa 0x5b9cca4f7763e3735b9cca4f7763e373,\ - 0x5b9cca4f7763e3735b9cca4f7763e373 - .octa 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3,\ - 0x682e6ff3d6b2b8a3682e6ff3d6b2b8a3 - .octa 0x748f82ee5defb2fc748f82ee5defb2fc,\ - 0x748f82ee5defb2fc748f82ee5defb2fc - .octa 0x78a5636f43172f6078a5636f43172f60,\ - 0x78a5636f43172f6078a5636f43172f60 - .octa 0x84c87814a1f0ab7284c87814a1f0ab72,\ - 0x84c87814a1f0ab7284c87814a1f0ab72 - .octa 0x8cc702081a6439ec8cc702081a6439ec,\ - 0x8cc702081a6439ec8cc702081a6439ec - .octa 0x90befffa23631e2890befffa23631e28,\ - 0x90befffa23631e2890befffa23631e28 - .octa 0xa4506cebde82bde9a4506cebde82bde9,\ - 0xa4506cebde82bde9a4506cebde82bde9 - .octa 0xbef9a3f7b2c67915bef9a3f7b2c67915,\ - 0xbef9a3f7b2c67915bef9a3f7b2c67915 - .octa 0xc67178f2e372532bc67178f2e372532b,\ - 0xc67178f2e372532bc67178f2e372532b - .octa 0xca273eceea26619cca273eceea26619c,\ - 0xca273eceea26619cca273eceea26619c - .octa 0xd186b8c721c0c207d186b8c721c0c207,\ - 0xd186b8c721c0c207d186b8c721c0c207 - .octa 0xeada7dd6cde0eb1eeada7dd6cde0eb1e,\ - 0xeada7dd6cde0eb1eeada7dd6cde0eb1e - .octa 0xf57d4f7fee6ed178f57d4f7fee6ed178,\ - 0xf57d4f7fee6ed178f57d4f7fee6ed178 - .octa 0x06f067aa72176fba06f067aa72176fba,\ - 0x06f067aa72176fba06f067aa72176fba - .octa 0x0a637dc5a2c898a60a637dc5a2c898a6,\ - 0x0a637dc5a2c898a60a637dc5a2c898a6 - .octa 0x113f9804bef90dae113f9804bef90dae,\ - 0x113f9804bef90dae113f9804bef90dae - .octa 0x1b710b35131c471b1b710b35131c471b,\ - 0x1b710b35131c471b1b710b35131c471b - .octa 0x28db77f523047d8428db77f523047d84,\ - 0x28db77f523047d8428db77f523047d84 - .octa 0x32caab7b40c7249332caab7b40c72493,\ - 0x32caab7b40c7249332caab7b40c72493 - .octa 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc,\ - 0x3c9ebe0a15c9bebc3c9ebe0a15c9bebc - .octa 0x431d67c49c100d4c431d67c49c100d4c,\ - 0x431d67c49c100d4c431d67c49c100d4c - .octa 0x4cc5d4becb3e42b64cc5d4becb3e42b6,\ - 0x4cc5d4becb3e42b64cc5d4becb3e42b6 - .octa 0x597f299cfc657e2a597f299cfc657e2a,\ - 0x597f299cfc657e2a597f299cfc657e2a - .octa 0x5fcb6fab3ad6faec5fcb6fab3ad6faec,\ - 0x5fcb6fab3ad6faec5fcb6fab3ad6faec - .octa 0x6c44198c4a4758176c44198c4a475817,\ - 0x6c44198c4a4758176c44198c4a475817 - -.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 -.align 32 -PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607 - .octa 0x18191a1b1c1d1e1f1011121314151617 diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 3f9d43f26f63..7eb878561910 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -39,7 +39,7 @@ void __init init_vdso_image(const struct vdso_image *image) struct linux_binprm; -static int vdso_fault(const struct vm_special_mapping *sm, +static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { const struct vdso_image *image = vma->vm_mm->context.vdso_image; @@ -84,12 +84,11 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } -static int vvar_fault(const struct vm_special_mapping *sm, +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { const struct vdso_image *image = vma->vm_mm->context.vdso_image; long sym_offset; - int ret = -EFAULT; if (!image) return VM_FAULT_SIGBUS; @@ -108,29 +107,24 @@ static int vvar_fault(const struct vm_special_mapping *sm, return VM_FAULT_SIGBUS; if (sym_offset == image->sym_vvar_page) { - ret = vm_insert_pfn(vma, vmf->address, - __pa_symbol(&__vvar_page) >> PAGE_SHIFT); + return vmf_insert_pfn(vma, vmf->address, + __pa_symbol(&__vvar_page) >> PAGE_SHIFT); } else if (sym_offset == image->sym_pvclock_page) { struct pvclock_vsyscall_time_info *pvti = pvclock_get_pvti_cpu0_va(); if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) { - ret = vm_insert_pfn_prot( - vma, - vmf->address, - __pa(pvti) >> PAGE_SHIFT, - pgprot_decrypted(vma->vm_page_prot)); + return vmf_insert_pfn_prot(vma, vmf->address, + __pa(pvti) >> PAGE_SHIFT, + pgprot_decrypted(vma->vm_page_prot)); } } else if (sym_offset == image->sym_hvclock_page) { struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page(); if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK)) - ret = vm_insert_pfn(vma, vmf->address, - vmalloc_to_pfn(tsc_pg)); + return vmf_insert_pfn(vma, vmf->address, + vmalloc_to_pfn(tsc_pg)); } - if (ret == 0 || ret == -EBUSY) - return VM_FAULT_NOPAGE; - return VM_FAULT_SIGBUS; } diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 82ed001e8909..85fd85d52ffd 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -100,20 +100,13 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) */ if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) { - siginfo_t info; struct thread_struct *thread = ¤t->thread; thread->error_code = 6; /* user fault, no page, write */ thread->cr2 = ptr; thread->trap_nr = X86_TRAP_PF; - clear_siginfo(&info); - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = (void __user *)ptr; - - force_sig_info(SIGSEGV, &info, current); + force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)ptr, current); return false; } else { return true; diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index fb97cf7c4137..fab4df16a3c4 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -12,38 +12,23 @@ #include <asm/user32.h> #include <asm/unistd.h> +#include <asm-generic/compat.h> + #define COMPAT_USER_HZ 100 #define COMPAT_UTS_MACHINE "i686\0\0" -typedef u32 compat_size_t; -typedef s32 compat_ssize_t; -typedef s32 compat_clock_t; -typedef s32 compat_pid_t; typedef u16 __compat_uid_t; typedef u16 __compat_gid_t; typedef u32 __compat_uid32_t; typedef u32 __compat_gid32_t; typedef u16 compat_mode_t; -typedef u32 compat_ino_t; typedef u16 compat_dev_t; -typedef s32 compat_off_t; -typedef s64 compat_loff_t; typedef u16 compat_nlink_t; typedef u16 compat_ipc_pid_t; -typedef s32 compat_daddr_t; typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; -typedef s32 compat_timer_t; -typedef s32 compat_key_t; - -typedef s32 compat_int_t; -typedef s32 compat_long_t; typedef s64 __attribute__((aligned(4))) compat_s64; -typedef u32 compat_uint_t; -typedef u32 compat_ulong_t; -typedef u32 compat_u32; typedef u64 __attribute__((aligned(4))) compat_u64; -typedef u32 compat_uptr_t; struct compat_stat { compat_dev_t st_dev; @@ -240,6 +225,6 @@ static inline bool in_compat_syscall(void) struct compat_siginfo; int __copy_siginfo_to_user32(struct compat_siginfo __user *to, - const siginfo_t *from, bool x32_ABI); + const kernel_siginfo_t *from, bool x32_ABI); #endif /* _ASM_X86_COMPAT_H */ diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index 5ed826da5e07..7469d321f072 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -13,75 +13,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, return 0; } -/* - * If the arch doesn't supply something else, assume that hugepage - * size aligned regions are ok without further preparation. - */ -static inline int prepare_hugepage_range(struct file *file, - unsigned long addr, unsigned long len) -{ - struct hstate *h = hstate_file(file); - if (len & ~huge_page_mask(h)) - return -EINVAL; - if (addr & ~huge_page_mask(h)) - return -EINVAL; - return 0; -} - -static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, - unsigned long addr, unsigned long end, - unsigned long floor, - unsigned long ceiling) -{ - free_pgd_range(tlb, addr, end, floor, ceiling); -} - -static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) -{ - set_pte_at(mm, addr, ptep, pte); -} - -static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - return ptep_get_and_clear(mm, addr, ptep); -} - -static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) -{ - ptep_clear_flush(vma, addr, ptep); -} - -static inline int huge_pte_none(pte_t pte) -{ - return pte_none(pte); -} - -static inline pte_t huge_pte_wrprotect(pte_t pte) -{ - return pte_wrprotect(pte); -} - -static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - ptep_set_wrprotect(mm, addr, ptep); -} - -static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep, - pte_t pte, int dirty) -{ - return ptep_set_access_flags(vma, addr, ptep, pte, dirty); -} - -static inline pte_t huge_ptep_get(pte_t *ptep) -{ - return *ptep; -} - static inline void arch_clear_hugepage_flags(struct page *page) { } diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 5f26962eff42..67ed72f31cc2 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -45,6 +45,8 @@ struct vcpu_data { #ifdef CONFIG_IRQ_REMAP +extern raw_spinlock_t irq_2_ir_lock; + extern bool irq_remapping_cap(enum irq_remap_cap cap); extern void set_irq_remapping_broken(void); extern int irq_remapping_prepare(void); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 09b2e3e2cf1b..55e51ff7e421 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -102,7 +102,15 @@ #define UNMAPPED_GVA (~(gpa_t)0) /* KVM Hugepage definitions for x86 */ -#define KVM_NR_PAGE_SIZES 3 +enum { + PT_PAGE_TABLE_LEVEL = 1, + PT_DIRECTORY_LEVEL = 2, + PT_PDPE_LEVEL = 3, + /* set max level to the biggest one */ + PT_MAX_HUGEPAGE_LEVEL = PT_PDPE_LEVEL, +}; +#define KVM_NR_PAGE_SIZES (PT_MAX_HUGEPAGE_LEVEL - \ + PT_PAGE_TABLE_LEVEL + 1) #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9) #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) @@ -177,6 +185,7 @@ enum { #define DR6_BD (1 << 13) #define DR6_BS (1 << 14) +#define DR6_BT (1 << 15) #define DR6_RTM (1 << 16) #define DR6_FIXED_1 0xfffe0ff0 #define DR6_INIT 0xffff0ff0 @@ -247,7 +256,7 @@ struct kvm_mmu_memory_cache { * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp. */ union kvm_mmu_page_role { - unsigned word; + u32 word; struct { unsigned level:4; unsigned cr4_pae:1; @@ -273,6 +282,34 @@ union kvm_mmu_page_role { }; }; +union kvm_mmu_extended_role { +/* + * This structure complements kvm_mmu_page_role caching everything needed for + * MMU configuration. If nothing in both these structures changed, MMU + * re-configuration can be skipped. @valid bit is set on first usage so we don't + * treat all-zero structure as valid data. + */ + u32 word; + struct { + unsigned int valid:1; + unsigned int execonly:1; + unsigned int cr0_pg:1; + unsigned int cr4_pse:1; + unsigned int cr4_pke:1; + unsigned int cr4_smap:1; + unsigned int cr4_smep:1; + unsigned int cr4_la57:1; + }; +}; + +union kvm_mmu_role { + u64 as_u64; + struct { + union kvm_mmu_page_role base; + union kvm_mmu_extended_role ext; + }; +}; + struct kvm_rmap_head { unsigned long val; }; @@ -280,18 +317,18 @@ struct kvm_rmap_head { struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; + bool unsync; /* * The following two entries are used to key the shadow page in the * hash table. */ - gfn_t gfn; union kvm_mmu_page_role role; + gfn_t gfn; u64 *spt; /* hold the gfn of each spte inside spt */ gfn_t *gfns; - bool unsync; int root_count; /* Currently serving as active root */ unsigned int unsync_children; struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ @@ -360,7 +397,7 @@ struct kvm_mmu { void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *pte); hpa_t root_hpa; - union kvm_mmu_page_role base_role; + union kvm_mmu_role mmu_role; u8 root_level; u8 shadow_root_level; u8 ept_ad; @@ -490,7 +527,7 @@ struct kvm_vcpu_hv { struct kvm_hyperv_exit exit; struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT]; DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); - cpumask_t tlb_lush; + cpumask_t tlb_flush; }; struct kvm_vcpu_arch { @@ -534,7 +571,13 @@ struct kvm_vcpu_arch { * the paging mode of the l1 guest. This context is always used to * handle faults. */ - struct kvm_mmu mmu; + struct kvm_mmu *mmu; + + /* Non-nested MMU for L1 */ + struct kvm_mmu root_mmu; + + /* L1 MMU when running nested */ + struct kvm_mmu guest_mmu; /* * Paging state of an L2 guest (used for nested npt) @@ -585,6 +628,8 @@ struct kvm_vcpu_arch { bool has_error_code; u8 nr; u32 error_code; + unsigned long payload; + bool has_payload; u8 nested_apf; } exception; @@ -781,6 +826,9 @@ struct kvm_hv { u64 hv_reenlightenment_control; u64 hv_tsc_emulation_control; u64 hv_tsc_emulation_status; + + /* How many vCPUs have VP index != vCPU index */ + atomic_t num_mismatched_vp_indexes; }; enum kvm_irqchip_mode { @@ -871,6 +919,7 @@ struct kvm_arch { bool x2apic_broadcast_quirk_disabled; bool guest_can_read_msr_platform_info; + bool exception_payload_enabled; }; struct kvm_vm_stat { @@ -1133,6 +1182,9 @@ struct kvm_x86_ops { int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*get_msr_feature)(struct kvm_msr_entry *entry); + + int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu, + uint16_t *vmcs_version); }; struct kvm_arch_async_pf { @@ -1170,7 +1222,6 @@ void kvm_mmu_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); -void kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_init_vm(struct kvm *kvm); void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, @@ -1324,7 +1375,8 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); -void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free); +void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + ulong roots_to_free); gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h index 61eb4b63c5ec..d0b1434fb0b6 100644 --- a/arch/x86/include/asm/mpx.h +++ b/arch/x86/include/asm/mpx.h @@ -57,8 +57,14 @@ #define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) #define MPX_BNDSTA_ERROR_CODE 0x3 +struct mpx_fault_info { + void __user *addr; + void __user *lower; + void __user *upper; +}; + #ifdef CONFIG_X86_INTEL_MPX -siginfo_t *mpx_generate_siginfo(struct pt_regs *regs); +int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs); int mpx_handle_bd_fault(void); static inline int kernel_managing_mpx_tables(struct mm_struct *mm) { @@ -78,9 +84,9 @@ void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len, unsigned long flags); #else -static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) +static inline int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs) { - return NULL; + return -EINVAL; } static inline int mpx_handle_bd_fault(void) { diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index f236bcd5485d..143c99499531 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -293,7 +293,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, #define arch_has_block_step() (boot_cpu_data.x86 >= 6) #endif -#define ARCH_HAS_USER_SINGLE_STEP_INFO +#define ARCH_HAS_USER_SINGLE_STEP_REPORT /* * When hitting ptrace_stop(), we cannot return using SYSRET because diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h index 7bd92db09e8d..54133017267c 100644 --- a/arch/x86/include/asm/trace/mpx.h +++ b/arch/x86/include/asm/trace/mpx.h @@ -11,12 +11,12 @@ TRACE_EVENT(mpx_bounds_register_exception, - TP_PROTO(void *addr_referenced, + TP_PROTO(void __user *addr_referenced, const struct mpx_bndreg *bndreg), TP_ARGS(addr_referenced, bndreg), TP_STRUCT__entry( - __field(void *, addr_referenced) + __field(void __user *, addr_referenced) __field(u64, lower_bound) __field(u64, upper_bound) ), diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 51c4eee00732..dc4ed8bc2382 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -24,6 +24,7 @@ # include <asm/unistd_64.h> # include <asm/unistd_64_x32.h> # define __ARCH_WANT_COMPAT_SYS_TIME +# define __ARCH_WANT_SYS_UTIME32 # define __ARCH_WANT_COMPAT_SYS_PREADV64 # define __ARCH_WANT_COMPAT_SYS_PWRITEV64 # define __ARCH_WANT_COMPAT_SYS_PREADV64V2 @@ -31,13 +32,13 @@ # endif +# define __ARCH_WANT_NEW_STAT # define __ARCH_WANT_OLD_READDIR # define __ARCH_WANT_OLD_STAT # define __ARCH_WANT_SYS_ALARM # define __ARCH_WANT_SYS_FADVISE64 # define __ARCH_WANT_SYS_GETHOSTNAME # define __ARCH_WANT_SYS_GETPGRP -# define __ARCH_WANT_SYS_LLSEEK # define __ARCH_WANT_SYS_NICE # define __ARCH_WANT_SYS_OLDUMOUNT # define __ARCH_WANT_SYS_OLD_GETRLIMIT diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index e05e0d309244..1fc7a0d1e877 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -40,7 +40,7 @@ static inline int cpu_has_vmx(void) */ static inline void cpu_vmxoff(void) { - asm volatile (ASM_VMX_VMXOFF : : : "cc"); + asm volatile ("vmxoff"); cr4_clear_bits(X86_CR4_VMXE); } diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 9527ba5d62da..ade0f153947d 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -503,19 +503,6 @@ enum vmcs_field { #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul - -#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" -#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" -#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" -#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" -#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" -#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" -#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" -#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" -#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" -#define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" -#define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" - struct vmx_msr_entry { u32 index; u32 reserved; diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index fd23d5778ea1..dabfcf7c3941 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -288,6 +288,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 #define KVM_VCPUEVENT_VALID_SMM 0x00000008 +#define KVM_VCPUEVENT_VALID_PAYLOAD 0x00000010 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -299,7 +300,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 has_error_code; - __u8 pad; + __u8 pending; __u32 error_code; } exception; struct { @@ -322,7 +323,9 @@ struct kvm_vcpu_events { __u8 smm_inside_nmi; __u8 latched_init; } smi; - __u32 reserved[9]; + __u8 reserved[27]; + __u8 exception_has_payload; + __u64 exception_payload; }; /* for KVM_GET/SET_DEBUGREGS */ @@ -381,6 +384,7 @@ struct kvm_sync_regs { #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 +#define KVM_STATE_NESTED_EVMCS 0x00000004 #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 diff --git a/arch/x86/include/uapi/asm/siginfo.h b/arch/x86/include/uapi/asm/siginfo.h index b3d157957177..6642d8be40c4 100644 --- a/arch/x86/include/uapi/asm/siginfo.h +++ b/arch/x86/include/uapi/asm/siginfo.h @@ -7,8 +7,6 @@ typedef long long __kernel_si_clock_t __attribute__((aligned(4))); # define __ARCH_SI_CLOCK_T __kernel_si_clock_t # define __ARCH_SI_ATTRIBUTES __attribute__((aligned(8))) -# else /* x86-64 */ -# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) # endif #endif diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index f39f3a06c26f..7299dcbf8e85 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -140,7 +140,7 @@ static void __init dtb_cpu_setup(void) int ret; version = GET_APIC_VERSION(apic_read(APIC_LVR)); - for_each_node_by_type(dn, "cpu") { + for_each_of_cpu_node(dn) { ret = of_property_read_u32(dn, "reg", &apic_id); if (ret < 0) { pr_warn("%pOF: missing local APIC ID\n", dn); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index c88c23c658c1..d1f25c831447 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1248,7 +1248,6 @@ void __init e820__memblock_setup(void) { int i; u64 end; - u64 addr = 0; /* * The bootstrap memblock region count maximum is 128 entries @@ -1265,21 +1264,13 @@ void __init e820__memblock_setup(void) struct e820_entry *entry = &e820_table->entries[i]; end = entry->addr + entry->size; - if (addr < entry->addr) - memblock_reserve(addr, entry->addr - addr); - addr = end; if (end != (resource_size_t)end) continue; - /* - * all !E820_TYPE_RAM ranges (including gap ranges) are put - * into memblock.reserved to make sure that struct pages in - * such regions are not left uninitialized after bootup. - */ if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN) - memblock_reserve(entry->addr, entry->size); - else - memblock_add(entry->addr, entry->size); + continue; + + memblock_add(entry->addr, entry->size); } /* Throw away partial pages: */ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index d8f49c7384a3..ffae9b9740fd 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1359,33 +1359,18 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) #endif } -static void fill_sigtrap_info(struct task_struct *tsk, - struct pt_regs *regs, - int error_code, int si_code, - struct siginfo *info) +void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, + int error_code, int si_code) { tsk->thread.trap_nr = X86_TRAP_DB; tsk->thread.error_code = error_code; - info->si_signo = SIGTRAP; - info->si_code = si_code; - info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL; -} - -void user_single_step_siginfo(struct task_struct *tsk, - struct pt_regs *regs, - struct siginfo *info) -{ - fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info); + /* Send us the fake SIGTRAP */ + force_sig_fault(SIGTRAP, si_code, + user_mode(regs) ? (void __user *)regs->ip : NULL, tsk); } -void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, - int error_code, int si_code) +void user_single_step_report(struct pt_regs *regs) { - struct siginfo info; - - clear_siginfo(&info); - fill_sigtrap_info(tsk, regs, error_code, si_code, &info); - /* Send us the fake SIGTRAP */ - force_sig_info(SIGTRAP, &info, tsk); + send_sigtrap(current, regs, 0, TRAP_BRKPT); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5bd0a997d81e..8f6dcd88202e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -189,7 +189,7 @@ int fixup_bug(struct pt_regs *regs, int trapnr) } static nokprobe_inline int -do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, +do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str, struct pt_regs *regs, long error_code) { if (v8086_mode(regs)) { @@ -202,10 +202,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, error_code, trapnr)) return 0; } - return -1; - } - - if (!user_mode(regs)) { + } else if (!user_mode(regs)) { if (fixup_exception(regs, trapnr, error_code, 0)) return 0; @@ -214,49 +211,6 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, die(str, regs, error_code); } - return -1; -} - -static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr, - siginfo_t *info) -{ - unsigned long siaddr; - int sicode; - - switch (trapnr) { - default: - return SEND_SIG_PRIV; - - case X86_TRAP_DE: - sicode = FPE_INTDIV; - siaddr = uprobe_get_trap_addr(regs); - break; - case X86_TRAP_UD: - sicode = ILL_ILLOPN; - siaddr = uprobe_get_trap_addr(regs); - break; - case X86_TRAP_AC: - sicode = BUS_ADRALN; - siaddr = 0; - break; - } - - info->si_signo = signr; - info->si_errno = 0; - info->si_code = sicode; - info->si_addr = (void __user *)siaddr; - return info; -} - -static void -do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, - long error_code, siginfo_t *info) -{ - struct task_struct *tsk = current; - - - if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code)) - return; /* * We want error_code and trap_nr set for userspace faults and * kernelspace faults which result in die(), but not @@ -269,24 +223,45 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, tsk->thread.error_code = error_code; tsk->thread.trap_nr = trapnr; + return -1; +} + +static void show_signal(struct task_struct *tsk, int signr, + const char *type, const char *desc, + struct pt_regs *regs, long error_code) +{ if (show_unhandled_signals && unhandled_signal(tsk, signr) && printk_ratelimit()) { - pr_info("%s[%d] trap %s ip:%lx sp:%lx error:%lx", - tsk->comm, tsk->pid, str, + pr_info("%s[%d] %s%s ip:%lx sp:%lx error:%lx", + tsk->comm, task_pid_nr(tsk), type, desc, regs->ip, regs->sp, error_code); print_vma_addr(KERN_CONT " in ", regs->ip); pr_cont("\n"); } +} + +static void +do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, + long error_code, int sicode, void __user *addr) +{ + struct task_struct *tsk = current; + + + if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code)) + return; + + show_signal(tsk, signr, "trap ", str, regs, error_code); - force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk); + if (!sicode) + force_sig(signr, tsk); + else + force_sig_fault(signr, sicode, addr, tsk); } NOKPROBE_SYMBOL(do_trap); static void do_error_trap(struct pt_regs *regs, long error_code, char *str, - unsigned long trapnr, int signr) + unsigned long trapnr, int signr, int sicode, void __user *addr) { - siginfo_t info; - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); /* @@ -299,26 +274,26 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { cond_local_irq_enable(regs); - clear_siginfo(&info); - do_trap(trapnr, signr, str, regs, error_code, - fill_trap_info(regs, signr, trapnr, &info)); + do_trap(trapnr, signr, str, regs, error_code, sicode, addr); } } -#define DO_ERROR(trapnr, signr, str, name) \ -dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ -{ \ - do_error_trap(regs, error_code, str, trapnr, signr); \ +#define IP ((void __user *)uprobe_get_trap_addr(regs)) +#define DO_ERROR(trapnr, signr, sicode, addr, str, name) \ +dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ +{ \ + do_error_trap(regs, error_code, str, trapnr, signr, sicode, addr); \ } -DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error) -DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) -DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) -DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) -DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) -DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) -DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) -DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) +DO_ERROR(X86_TRAP_DE, SIGFPE, FPE_INTDIV, IP, "divide error", divide_error) +DO_ERROR(X86_TRAP_OF, SIGSEGV, 0, NULL, "overflow", overflow) +DO_ERROR(X86_TRAP_UD, SIGILL, ILL_ILLOPN, IP, "invalid opcode", invalid_op) +DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, 0, NULL, "coprocessor segment overrun", coprocessor_segment_overrun) +DO_ERROR(X86_TRAP_TS, SIGSEGV, 0, NULL, "invalid TSS", invalid_TSS) +DO_ERROR(X86_TRAP_NP, SIGBUS, 0, NULL, "segment not present", segment_not_present) +DO_ERROR(X86_TRAP_SS, SIGBUS, 0, NULL, "stack segment", stack_segment) +DO_ERROR(X86_TRAP_AC, SIGBUS, BUS_ADRALN, NULL, "alignment check", alignment_check) +#undef IP #ifdef CONFIG_VMAP_STACK __visible void __noreturn handle_stack_overflow(const char *message, @@ -459,7 +434,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) { const struct mpx_bndcsr *bndcsr; - siginfo_t *info; RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); if (notify_die(DIE_TRAP, "bounds", regs, error_code, @@ -497,8 +471,11 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) goto exit_trap; break; /* Success, it was handled */ case 1: /* Bound violation. */ - info = mpx_generate_siginfo(regs); - if (IS_ERR(info)) { + { + struct task_struct *tsk = current; + struct mpx_fault_info mpx; + + if (mpx_fault_info(&mpx, regs)) { /* * We failed to decode the MPX instruction. Act as if * the exception was not caused by MPX. @@ -507,14 +484,20 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) } /* * Success, we decoded the instruction and retrieved - * an 'info' containing the address being accessed + * an 'mpx' containing the address being accessed * which caused the exception. This information * allows and application to possibly handle the * #BR exception itself. */ - do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, info); - kfree(info); + if (!do_trap_no_signal(tsk, X86_TRAP_BR, "bounds", regs, + error_code)) + break; + + show_signal(tsk, SIGSEGV, "trap ", "bounds", regs, error_code); + + force_sig_bnderr(mpx.addr, mpx.lower, mpx.upper); break; + } case 0: /* No exception caused by Intel MPX operations. */ goto exit_trap; default: @@ -531,12 +514,13 @@ exit_trap: * up here if the kernel has MPX turned off at compile * time.. */ - do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL); + do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL); } dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) { + const char *desc = "general protection fault"; struct task_struct *tsk; RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); @@ -570,25 +554,18 @@ do_general_protection(struct pt_regs *regs, long error_code) kprobe_fault_handler(regs, X86_TRAP_GP)) return; - if (notify_die(DIE_GPF, "general protection fault", regs, error_code, + if (notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) - die("general protection fault", regs, error_code); + die(desc, regs, error_code); return; } tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; - if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && - printk_ratelimit()) { - pr_info("%s[%d] general protection ip:%lx sp:%lx error:%lx", - tsk->comm, task_pid_nr(tsk), - regs->ip, regs->sp, error_code); - print_vma_addr(KERN_CONT " in ", regs->ip); - pr_cont("\n"); - } + show_signal(tsk, SIGSEGV, "", desc, regs, error_code); - force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); + force_sig(SIGSEGV, tsk); } NOKPROBE_SYMBOL(do_general_protection); @@ -631,7 +608,7 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) goto exit; cond_local_irq_enable(regs); - do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); + do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, 0, NULL); cond_local_irq_disable(regs); exit: @@ -845,7 +822,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) { struct task_struct *task = current; struct fpu *fpu = &task->thread.fpu; - siginfo_t info; + int si_code; char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" : "simd exception"; @@ -871,18 +848,14 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) task->thread.trap_nr = trapnr; task->thread.error_code = error_code; - clear_siginfo(&info); - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_addr = (void __user *)uprobe_get_trap_addr(regs); - - info.si_code = fpu__exception_code(fpu, trapnr); + si_code = fpu__exception_code(fpu, trapnr); /* Retry when we get spurious exceptions: */ - if (!info.si_code) + if (!si_code) return; - force_sig_info(SIGFPE, &info, task); + force_sig_fault(SIGFPE, si_code, + (void __user *)uprobe_get_trap_addr(regs), task); } dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) @@ -942,20 +915,13 @@ NOKPROBE_SYMBOL(do_device_not_available); #ifdef CONFIG_X86_32 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) { - siginfo_t info; - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); local_irq_enable(); - clear_siginfo(&info); - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_BADSTK; - info.si_addr = NULL; if (notify_die(DIE_TRAP, "iret exception", regs, error_code, X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) { do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code, - &info); + ILL_BADSTK, (void __user *)NULL); } } #endif diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c index ff20b35e98dd..f8f3cfda01ae 100644 --- a/arch/x86/kernel/umip.c +++ b/arch/x86/kernel/umip.c @@ -271,19 +271,13 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst, */ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs) { - siginfo_t info; struct task_struct *tsk = current; tsk->thread.cr2 = (unsigned long)addr; tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE; tsk->thread.trap_nr = X86_TRAP_PF; - clear_siginfo(&info); - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = addr; - force_sig_info(SIGSEGV, &info, tsk); + force_sig_fault(SIGSEGV, SEGV_MAPERR, addr, tsk); if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV))) return; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index deb576b23b7c..843feb94a950 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -1086,7 +1086,7 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs pr_err("return address clobbered: pid=%d, %%sp=%#lx, %%ip=%#lx\n", current->pid, regs->sp, regs->ip); - force_sig_info(SIGSEGV, SEND_SIG_FORCED, current); + force_sig(SIGSEGV, current); } return -1; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 01d209ab5481..4e80080f277a 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -36,6 +36,8 @@ #include "trace.h" +#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64) + static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint) { return atomic64_read(&synic->sint[sint]); @@ -132,8 +134,10 @@ static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx) struct kvm_vcpu *vcpu = NULL; int i; - if (vpidx < KVM_MAX_VCPUS) - vcpu = kvm_get_vcpu(kvm, vpidx); + if (vpidx >= KVM_MAX_VCPUS) + return NULL; + + vcpu = kvm_get_vcpu(kvm, vpidx); if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx) return vcpu; kvm_for_each_vcpu(i, vcpu, kvm) @@ -689,6 +693,24 @@ void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu) stimer_cleanup(&hv_vcpu->stimer[i]); } +bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) + return false; + return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED; +} +EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled); + +bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu, + struct hv_vp_assist_page *assist_page) +{ + if (!kvm_hv_assist_page_enabled(vcpu)) + return false; + return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, + assist_page, sizeof(*assist_page)); +} +EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page); + static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer) { struct hv_message *msg = &stimer->msg; @@ -1040,21 +1062,41 @@ static u64 current_task_runtime_100ns(void) static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) { - struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; + struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; switch (msr) { - case HV_X64_MSR_VP_INDEX: - if (!host) + case HV_X64_MSR_VP_INDEX: { + struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + int vcpu_idx = kvm_vcpu_get_idx(vcpu); + u32 new_vp_index = (u32)data; + + if (!host || new_vp_index >= KVM_MAX_VCPUS) return 1; - hv->vp_index = (u32)data; + + if (new_vp_index == hv_vcpu->vp_index) + return 0; + + /* + * The VP index is initialized to vcpu_index by + * kvm_hv_vcpu_postcreate so they initially match. Now the + * VP index is changing, adjust num_mismatched_vp_indexes if + * it now matches or no longer matches vcpu_idx. + */ + if (hv_vcpu->vp_index == vcpu_idx) + atomic_inc(&hv->num_mismatched_vp_indexes); + else if (new_vp_index == vcpu_idx) + atomic_dec(&hv->num_mismatched_vp_indexes); + + hv_vcpu->vp_index = new_vp_index; break; + } case HV_X64_MSR_VP_ASSIST_PAGE: { u64 gfn; unsigned long addr; if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) { - hv->hv_vapic = data; - if (kvm_lapic_enable_pv_eoi(vcpu, 0)) + hv_vcpu->hv_vapic = data; + if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0)) return 1; break; } @@ -1062,12 +1104,19 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); if (kvm_is_error_hva(addr)) return 1; - if (__clear_user((void __user *)addr, PAGE_SIZE)) + + /* + * Clear apic_assist portion of f(struct hv_vp_assist_page + * only, there can be valuable data in the rest which needs + * to be preserved e.g. on migration. + */ + if (__clear_user((void __user *)addr, sizeof(u32))) return 1; - hv->hv_vapic = data; + hv_vcpu->hv_vapic = data; kvm_vcpu_mark_page_dirty(vcpu, gfn); if (kvm_lapic_enable_pv_eoi(vcpu, - gfn_to_gpa(gfn) | KVM_MSR_ENABLED)) + gfn_to_gpa(gfn) | KVM_MSR_ENABLED, + sizeof(struct hv_vp_assist_page))) return 1; break; } @@ -1080,7 +1129,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) case HV_X64_MSR_VP_RUNTIME: if (!host) return 1; - hv->runtime_offset = data - current_task_runtime_100ns(); + hv_vcpu->runtime_offset = data - current_task_runtime_100ns(); break; case HV_X64_MSR_SCONTROL: case HV_X64_MSR_SVERSION: @@ -1172,11 +1221,11 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) { u64 data = 0; - struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; + struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; switch (msr) { case HV_X64_MSR_VP_INDEX: - data = hv->vp_index; + data = hv_vcpu->vp_index; break; case HV_X64_MSR_EOI: return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); @@ -1185,10 +1234,10 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, case HV_X64_MSR_TPR: return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); case HV_X64_MSR_VP_ASSIST_PAGE: - data = hv->hv_vapic; + data = hv_vcpu->hv_vapic; break; case HV_X64_MSR_VP_RUNTIME: - data = current_task_runtime_100ns() + hv->runtime_offset; + data = current_task_runtime_100ns() + hv_vcpu->runtime_offset; break; case HV_X64_MSR_SCONTROL: case HV_X64_MSR_SVERSION: @@ -1255,32 +1304,47 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) return kvm_hv_get_msr(vcpu, msr, pdata, host); } -static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no) +static __always_inline unsigned long *sparse_set_to_vcpu_mask( + struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask, + u64 *vp_bitmap, unsigned long *vcpu_bitmap) { - int i = 0, j; + struct kvm_hv *hv = &kvm->arch.hyperv; + struct kvm_vcpu *vcpu; + int i, bank, sbank = 0; - if (!(valid_bank_mask & BIT_ULL(bank_no))) - return -1; + memset(vp_bitmap, 0, + KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap)); + for_each_set_bit(bank, (unsigned long *)&valid_bank_mask, + KVM_HV_MAX_SPARSE_VCPU_SET_BITS) + vp_bitmap[bank] = sparse_banks[sbank++]; - for (j = 0; j < bank_no; j++) - if (valid_bank_mask & BIT_ULL(j)) - i++; + if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) { + /* for all vcpus vp_index == vcpu_idx */ + return (unsigned long *)vp_bitmap; + } - return i; + bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS); + kvm_for_each_vcpu(i, vcpu, kvm) { + if (test_bit(vcpu_to_hv_vcpu(vcpu)->vp_index, + (unsigned long *)vp_bitmap)) + __set_bit(i, vcpu_bitmap); + } + return vcpu_bitmap; } static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, u16 rep_cnt, bool ex) { struct kvm *kvm = current_vcpu->kvm; - struct kvm_vcpu_hv *hv_current = ¤t_vcpu->arch.hyperv; + struct kvm_vcpu_hv *hv_vcpu = ¤t_vcpu->arch.hyperv; struct hv_tlb_flush_ex flush_ex; struct hv_tlb_flush flush; - struct kvm_vcpu *vcpu; - unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0}; - unsigned long valid_bank_mask = 0; + u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; + DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); + unsigned long *vcpu_mask; + u64 valid_bank_mask; u64 sparse_banks[64]; - int sparse_banks_len, i; + int sparse_banks_len; bool all_cpus; if (!ex) { @@ -1290,6 +1354,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, trace_kvm_hv_flush_tlb(flush.processor_mask, flush.address_space, flush.flags); + valid_bank_mask = BIT_ULL(0); sparse_banks[0] = flush.processor_mask; all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS; } else { @@ -1306,7 +1371,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, all_cpus = flush_ex.hv_vp_set.format != HV_GENERIC_SET_SPARSE_4K; - sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * + sparse_banks_len = + bitmap_weight((unsigned long *)&valid_bank_mask, 64) * sizeof(sparse_banks[0]); if (!sparse_banks_len && !all_cpus) @@ -1321,48 +1387,19 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, return HV_STATUS_INVALID_HYPERCALL_INPUT; } - cpumask_clear(&hv_current->tlb_lush); - - kvm_for_each_vcpu(i, vcpu, kvm) { - struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; - int bank = hv->vp_index / 64, sbank = 0; - - if (!all_cpus) { - /* Banks >64 can't be represented */ - if (bank >= 64) - continue; - - /* Non-ex hypercalls can only address first 64 vCPUs */ - if (!ex && bank) - continue; - - if (ex) { - /* - * Check is the bank of this vCPU is in sparse - * set and get the sparse bank number. - */ - sbank = get_sparse_bank_no(valid_bank_mask, - bank); - - if (sbank < 0) - continue; - } - - if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64))) - continue; - } + cpumask_clear(&hv_vcpu->tlb_flush); - /* - * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we - * can't analyze it here, flush TLB regardless of the specified - * address space. - */ - __set_bit(i, vcpu_bitmap); - } + vcpu_mask = all_cpus ? NULL : + sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, + vp_bitmap, vcpu_bitmap); + /* + * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't + * analyze it here, flush TLB regardless of the specified address space. + */ kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP, - vcpu_bitmap, &hv_current->tlb_lush); + vcpu_mask, &hv_vcpu->tlb_flush); ret_success: /* We always do full TLB flush, set rep_done = rep_cnt. */ @@ -1370,6 +1407,99 @@ ret_success: ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET); } +static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector, + unsigned long *vcpu_bitmap) +{ + struct kvm_lapic_irq irq = { + .delivery_mode = APIC_DM_FIXED, + .vector = vector + }; + struct kvm_vcpu *vcpu; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu_bitmap && !test_bit(i, vcpu_bitmap)) + continue; + + /* We fail only when APIC is disabled */ + kvm_apic_set_irq(vcpu, &irq, NULL); + } +} + +static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa, + bool ex, bool fast) +{ + struct kvm *kvm = current_vcpu->kvm; + struct hv_send_ipi_ex send_ipi_ex; + struct hv_send_ipi send_ipi; + u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; + DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); + unsigned long *vcpu_mask; + unsigned long valid_bank_mask; + u64 sparse_banks[64]; + int sparse_banks_len; + u32 vector; + bool all_cpus; + + if (!ex) { + if (!fast) { + if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi, + sizeof(send_ipi)))) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + sparse_banks[0] = send_ipi.cpu_mask; + vector = send_ipi.vector; + } else { + /* 'reserved' part of hv_send_ipi should be 0 */ + if (unlikely(ingpa >> 32 != 0)) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + sparse_banks[0] = outgpa; + vector = (u32)ingpa; + } + all_cpus = false; + valid_bank_mask = BIT_ULL(0); + + trace_kvm_hv_send_ipi(vector, sparse_banks[0]); + } else { + if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex, + sizeof(send_ipi_ex)))) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + + trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector, + send_ipi_ex.vp_set.format, + send_ipi_ex.vp_set.valid_bank_mask); + + vector = send_ipi_ex.vector; + valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask; + sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * + sizeof(sparse_banks[0]); + + all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; + + if (!sparse_banks_len) + goto ret_success; + + if (!all_cpus && + kvm_read_guest(kvm, + ingpa + offsetof(struct hv_send_ipi_ex, + vp_set.bank_contents), + sparse_banks, + sparse_banks_len)) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } + + if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + + vcpu_mask = all_cpus ? NULL : + sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, + vp_bitmap, vcpu_bitmap); + + kvm_send_ipi_to_many(kvm, vector, vcpu_mask); + +ret_success: + return HV_STATUS_SUCCESS; +} + bool kvm_hv_hypercall_enabled(struct kvm *kvm) { return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE; @@ -1539,6 +1669,20 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) } ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); break; + case HVCALL_SEND_IPI: + if (unlikely(rep)) { + ret = HV_STATUS_INVALID_HYPERCALL_INPUT; + break; + } + ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, false, fast); + break; + case HVCALL_SEND_IPI_EX: + if (unlikely(fast || rep)) { + ret = HV_STATUS_INVALID_HYPERCALL_INPUT; + break; + } + ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, true, false); + break; default: ret = HV_STATUS_INVALID_HYPERCALL_CODE; break; diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index d6aa969e20f1..0e66c12ed2c3 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -62,6 +62,10 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu); void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu); +bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu); +bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu, + struct hv_vp_assist_page *assist_page); + static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu, int timer_index) { diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index fbb0e6df121b..3cd227ff807f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -70,6 +70,11 @@ #define APIC_BROADCAST 0xFF #define X2APIC_BROADCAST 0xFFFFFFFFul +static bool lapic_timer_advance_adjust_done = false; +#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100 +/* step-by-step approximation to mitigate fluctuation */ +#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8 + static inline int apic_test_vector(int vec, void *bitmap) { return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); @@ -955,14 +960,14 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, map = rcu_dereference(kvm->arch.apic_map); ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap); - if (ret) + if (ret) { + *r = 0; for_each_set_bit(i, &bitmap, 16) { if (!dst[i]) continue; - if (*r < 0) - *r = 0; *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map); } + } rcu_read_unlock(); return ret; @@ -1472,7 +1477,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) void wait_lapic_expire(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - u64 guest_tsc, tsc_deadline; + u64 guest_tsc, tsc_deadline, ns; if (!lapic_in_kernel(vcpu)) return; @@ -1492,6 +1497,24 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) if (guest_tsc < tsc_deadline) __delay(min(tsc_deadline - guest_tsc, nsec_to_cycles(vcpu, lapic_timer_advance_ns))); + + if (!lapic_timer_advance_adjust_done) { + /* too early */ + if (guest_tsc < tsc_deadline) { + ns = (tsc_deadline - guest_tsc) * 1000000ULL; + do_div(ns, vcpu->arch.virtual_tsc_khz); + lapic_timer_advance_ns -= min((unsigned int)ns, + lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); + } else { + /* too late */ + ns = (guest_tsc - tsc_deadline) * 1000000ULL; + do_div(ns, vcpu->arch.virtual_tsc_khz); + lapic_timer_advance_ns += min((unsigned int)ns, + lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP); + } + if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE) + lapic_timer_advance_adjust_done = true; + } } static void start_sw_tscdeadline(struct kvm_lapic *apic) @@ -2621,17 +2644,25 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) return 0; } -int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) +int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len) { u64 addr = data & ~KVM_MSR_ENABLED; + struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data; + unsigned long new_len; + if (!IS_ALIGNED(addr, 4)) return 1; vcpu->arch.pv_eoi.msr_val = data; if (!pv_eoi_enabled(vcpu)) return 0; - return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, - addr, sizeof(u8)); + + if (addr == ghc->gpa && len <= ghc->len) + new_len = ghc->len; + else + new_len = len; + + return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len); } void kvm_apic_accept_events(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index ed0ed39abd36..ff6ef9c3d760 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -120,7 +120,7 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; } -int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data); +int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len); void kvm_lapic_init(void); void kvm_lapic_exit(void); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 51b953ad9d4e..cf5f572f2305 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -932,7 +932,7 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, while (cache->nobjs < ARRAY_SIZE(cache->objects)) { obj = kmem_cache_zalloc(base_cache, GFP_KERNEL); if (!obj) - return -ENOMEM; + return cache->nobjs >= min ? 0 : -ENOMEM; cache->objects[cache->nobjs++] = obj; } return 0; @@ -960,7 +960,7 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, while (cache->nobjs < ARRAY_SIZE(cache->objects)) { page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT); if (!page) - return -ENOMEM; + return cache->nobjs >= min ? 0 : -ENOMEM; cache->objects[cache->nobjs++] = page; } return 0; @@ -1265,24 +1265,24 @@ pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head, mmu_free_pte_list_desc(desc); } -static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head) +static void __pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head) { struct pte_list_desc *desc; struct pte_list_desc *prev_desc; int i; if (!rmap_head->val) { - printk(KERN_ERR "pte_list_remove: %p 0->BUG\n", spte); + pr_err("%s: %p 0->BUG\n", __func__, spte); BUG(); } else if (!(rmap_head->val & 1)) { - rmap_printk("pte_list_remove: %p 1->0\n", spte); + rmap_printk("%s: %p 1->0\n", __func__, spte); if ((u64 *)rmap_head->val != spte) { - printk(KERN_ERR "pte_list_remove: %p 1->BUG\n", spte); + pr_err("%s: %p 1->BUG\n", __func__, spte); BUG(); } rmap_head->val = 0; } else { - rmap_printk("pte_list_remove: %p many->many\n", spte); + rmap_printk("%s: %p many->many\n", __func__, spte); desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); prev_desc = NULL; while (desc) { @@ -1296,11 +1296,17 @@ static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head) prev_desc = desc; desc = desc->more; } - pr_err("pte_list_remove: %p many->many\n", spte); + pr_err("%s: %p many->many\n", __func__, spte); BUG(); } } +static void pte_list_remove(struct kvm_rmap_head *rmap_head, u64 *sptep) +{ + mmu_spte_clear_track_bits(sptep); + __pte_list_remove(sptep, rmap_head); +} + static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level, struct kvm_memory_slot *slot) { @@ -1349,7 +1355,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) sp = page_header(__pa(spte)); gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); rmap_head = gfn_to_rmap(kvm, gfn, sp); - pte_list_remove(spte, rmap_head); + __pte_list_remove(spte, rmap_head); } /* @@ -1685,7 +1691,7 @@ static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head) while ((sptep = rmap_get_first(rmap_head, &iter))) { rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep); - drop_spte(kvm, sptep); + pte_list_remove(rmap_head, sptep); flush = true; } @@ -1721,7 +1727,7 @@ restart: need_flush = 1; if (pte_write(*ptep)) { - drop_spte(kvm, sptep); + pte_list_remove(rmap_head, sptep); goto restart; } else { new_spte = *sptep & ~PT64_BASE_ADDR_MASK; @@ -1988,7 +1994,7 @@ static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu, static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, u64 *parent_pte) { - pte_list_remove(parent_pte, &sp->parent_ptes); + __pte_list_remove(parent_pte, &sp->parent_ptes); } static void drop_parent_pte(struct kvm_mmu_page *sp, @@ -2181,7 +2187,7 @@ static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, struct list_head *invalid_list) { if (sp->role.cr4_pae != !!is_pae(vcpu) - || vcpu->arch.mmu.sync_page(vcpu, sp) == 0) { + || vcpu->arch.mmu->sync_page(vcpu, sp) == 0) { kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); return false; } @@ -2375,14 +2381,14 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, int collisions = 0; LIST_HEAD(invalid_list); - role = vcpu->arch.mmu.base_role; + role = vcpu->arch.mmu->mmu_role.base; role.level = level; role.direct = direct; if (role.direct) role.cr4_pae = 0; role.access = access; - if (!vcpu->arch.mmu.direct_map - && vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { + if (!vcpu->arch.mmu->direct_map + && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) { quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; role.quadrant = quadrant; @@ -2457,11 +2463,11 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato { iterator->addr = addr; iterator->shadow_addr = root; - iterator->level = vcpu->arch.mmu.shadow_root_level; + iterator->level = vcpu->arch.mmu->shadow_root_level; if (iterator->level == PT64_ROOT_4LEVEL && - vcpu->arch.mmu.root_level < PT64_ROOT_4LEVEL && - !vcpu->arch.mmu.direct_map) + vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL && + !vcpu->arch.mmu->direct_map) --iterator->level; if (iterator->level == PT32E_ROOT_LEVEL) { @@ -2469,10 +2475,10 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato * prev_root is currently only used for 64-bit hosts. So only * the active root_hpa is valid here. */ - BUG_ON(root != vcpu->arch.mmu.root_hpa); + BUG_ON(root != vcpu->arch.mmu->root_hpa); iterator->shadow_addr - = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; + = vcpu->arch.mmu->pae_root[(addr >> 30) & 3]; iterator->shadow_addr &= PT64_BASE_ADDR_MASK; --iterator->level; if (!iterator->shadow_addr) @@ -2483,7 +2489,7 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, struct kvm_vcpu *vcpu, u64 addr) { - shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu.root_hpa, + shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu->root_hpa, addr); } @@ -3095,7 +3101,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable, int emulate = 0; gfn_t pseudo_gfn; - if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) return 0; for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { @@ -3125,16 +3131,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable, static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk) { - siginfo_t info; - - clear_siginfo(&info); - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_MCEERR_AR; - info.si_addr = (void __user *)address; - info.si_addr_lsb = PAGE_SHIFT; - - send_sig_info(SIGBUS, &info, tsk); + send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, PAGE_SHIFT, tsk); } static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) @@ -3310,7 +3307,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, u64 spte = 0ull; uint retry_count = 0; - if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) return false; if (!page_fault_can_be_fast(error_code)) @@ -3480,11 +3477,11 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, } /* roots_to_free must be some combination of the KVM_MMU_ROOT_* flags */ -void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free) +void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + ulong roots_to_free) { int i; LIST_HEAD(invalid_list); - struct kvm_mmu *mmu = &vcpu->arch.mmu; bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT; BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG); @@ -3544,20 +3541,20 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) struct kvm_mmu_page *sp; unsigned i; - if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL) { + if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) { spin_lock(&vcpu->kvm->mmu_lock); if(make_mmu_pages_available(vcpu) < 0) { spin_unlock(&vcpu->kvm->mmu_lock); return -ENOSPC; } sp = kvm_mmu_get_page(vcpu, 0, 0, - vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL); + vcpu->arch.mmu->shadow_root_level, 1, ACC_ALL); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); - vcpu->arch.mmu.root_hpa = __pa(sp->spt); - } else if (vcpu->arch.mmu.shadow_root_level == PT32E_ROOT_LEVEL) { + vcpu->arch.mmu->root_hpa = __pa(sp->spt); + } else if (vcpu->arch.mmu->shadow_root_level == PT32E_ROOT_LEVEL) { for (i = 0; i < 4; ++i) { - hpa_t root = vcpu->arch.mmu.pae_root[i]; + hpa_t root = vcpu->arch.mmu->pae_root[i]; MMU_WARN_ON(VALID_PAGE(root)); spin_lock(&vcpu->kvm->mmu_lock); @@ -3570,9 +3567,9 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) root = __pa(sp->spt); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); - vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; + vcpu->arch.mmu->pae_root[i] = root | PT_PRESENT_MASK; } - vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); + vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root); } else BUG(); @@ -3586,7 +3583,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) gfn_t root_gfn; int i; - root_gfn = vcpu->arch.mmu.get_cr3(vcpu) >> PAGE_SHIFT; + root_gfn = vcpu->arch.mmu->get_cr3(vcpu) >> PAGE_SHIFT; if (mmu_check_root(vcpu, root_gfn)) return 1; @@ -3595,8 +3592,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) * Do we shadow a long mode page table? If so we need to * write-protect the guests page table root. */ - if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) { - hpa_t root = vcpu->arch.mmu.root_hpa; + if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) { + hpa_t root = vcpu->arch.mmu->root_hpa; MMU_WARN_ON(VALID_PAGE(root)); @@ -3606,11 +3603,11 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) return -ENOSPC; } sp = kvm_mmu_get_page(vcpu, root_gfn, 0, - vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL); + vcpu->arch.mmu->shadow_root_level, 0, ACC_ALL); root = __pa(sp->spt); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); - vcpu->arch.mmu.root_hpa = root; + vcpu->arch.mmu->root_hpa = root; return 0; } @@ -3620,17 +3617,17 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) * the shadow page table may be a PAE or a long mode page table. */ pm_mask = PT_PRESENT_MASK; - if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) + if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL) pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; for (i = 0; i < 4; ++i) { - hpa_t root = vcpu->arch.mmu.pae_root[i]; + hpa_t root = vcpu->arch.mmu->pae_root[i]; MMU_WARN_ON(VALID_PAGE(root)); - if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) { - pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i); + if (vcpu->arch.mmu->root_level == PT32E_ROOT_LEVEL) { + pdptr = vcpu->arch.mmu->get_pdptr(vcpu, i); if (!(pdptr & PT_PRESENT_MASK)) { - vcpu->arch.mmu.pae_root[i] = 0; + vcpu->arch.mmu->pae_root[i] = 0; continue; } root_gfn = pdptr >> PAGE_SHIFT; @@ -3648,16 +3645,16 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); - vcpu->arch.mmu.pae_root[i] = root | pm_mask; + vcpu->arch.mmu->pae_root[i] = root | pm_mask; } - vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); + vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root); /* * If we shadow a 32 bit page table with a long mode page * table we enter this path. */ - if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) { - if (vcpu->arch.mmu.lm_root == NULL) { + if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL) { + if (vcpu->arch.mmu->lm_root == NULL) { /* * The additional page necessary for this is only * allocated on demand. @@ -3669,12 +3666,12 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) if (lm_root == NULL) return 1; - lm_root[0] = __pa(vcpu->arch.mmu.pae_root) | pm_mask; + lm_root[0] = __pa(vcpu->arch.mmu->pae_root) | pm_mask; - vcpu->arch.mmu.lm_root = lm_root; + vcpu->arch.mmu->lm_root = lm_root; } - vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.lm_root); + vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root); } return 0; @@ -3682,7 +3679,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) static int mmu_alloc_roots(struct kvm_vcpu *vcpu) { - if (vcpu->arch.mmu.direct_map) + if (vcpu->arch.mmu->direct_map) return mmu_alloc_direct_roots(vcpu); else return mmu_alloc_shadow_roots(vcpu); @@ -3693,17 +3690,16 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) int i; struct kvm_mmu_page *sp; - if (vcpu->arch.mmu.direct_map) + if (vcpu->arch.mmu->direct_map) return; - if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) return; vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); - if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) { - hpa_t root = vcpu->arch.mmu.root_hpa; - + if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) { + hpa_t root = vcpu->arch.mmu->root_hpa; sp = page_header(root); /* @@ -3734,7 +3730,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); for (i = 0; i < 4; ++i) { - hpa_t root = vcpu->arch.mmu.pae_root[i]; + hpa_t root = vcpu->arch.mmu->pae_root[i]; if (root && VALID_PAGE(root)) { root &= PT64_BASE_ADDR_MASK; @@ -3808,7 +3804,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) int root, leaf; bool reserved = false; - if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) goto exit; walk_shadow_page_lockless_begin(vcpu); @@ -3825,7 +3821,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) if (!is_shadow_present_pte(spte)) break; - reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte, + reserved |= is_shadow_zero_bits_set(vcpu->arch.mmu, spte, iterator.level); } @@ -3904,7 +3900,7 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) struct kvm_shadow_walk_iterator iterator; u64 spte; - if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) return; walk_shadow_page_lockless_begin(vcpu); @@ -3931,7 +3927,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, if (r) return r; - MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); + MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)); return nonpaging_map(vcpu, gva & PAGE_MASK, @@ -3944,8 +3940,8 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id; arch.gfn = gfn; - arch.direct_map = vcpu->arch.mmu.direct_map; - arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu); + arch.direct_map = vcpu->arch.mmu->direct_map; + arch.cr3 = vcpu->arch.mmu->get_cr3(vcpu); return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch); } @@ -4051,7 +4047,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, int write = error_code & PFERR_WRITE_MASK; bool map_writable; - MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); + MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)); if (page_fault_handle_page_track(vcpu, error_code, gfn)) return RET_PF_EMULATE; @@ -4127,7 +4123,7 @@ static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3, { uint i; struct kvm_mmu_root_info root; - struct kvm_mmu *mmu = &vcpu->arch.mmu; + struct kvm_mmu *mmu = vcpu->arch.mmu; root.cr3 = mmu->get_cr3(vcpu); root.hpa = mmu->root_hpa; @@ -4150,7 +4146,7 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, union kvm_mmu_page_role new_role, bool skip_tlb_flush) { - struct kvm_mmu *mmu = &vcpu->arch.mmu; + struct kvm_mmu *mmu = vcpu->arch.mmu; /* * For now, limit the fast switch to 64-bit hosts+VMs in order to avoid @@ -4201,7 +4197,8 @@ static void __kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush) { if (!fast_cr3_switch(vcpu, new_cr3, new_role, skip_tlb_flush)) - kvm_mmu_free_roots(vcpu, KVM_MMU_ROOT_CURRENT); + kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, + KVM_MMU_ROOT_CURRENT); } void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush) @@ -4219,7 +4216,7 @@ static unsigned long get_cr3(struct kvm_vcpu *vcpu) static void inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) { - vcpu->arch.mmu.inject_page_fault(vcpu, fault); + vcpu->arch.mmu->inject_page_fault(vcpu, fault); } static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, @@ -4423,7 +4420,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { - bool uses_nx = context->nx || context->base_role.smep_andnot_wp; + bool uses_nx = context->nx || + context->mmu_role.base.smep_andnot_wp; struct rsvd_bits_validate *shadow_zero_check; int i; @@ -4562,7 +4560,7 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, * SMAP:kernel-mode data accesses from user-mode * mappings should fault. A fault is considered * as a SMAP violation if all of the following - * conditions are ture: + * conditions are true: * - X86_CR4_SMAP is set in CR4 * - A user page is accessed * - The access is not a fetch @@ -4723,27 +4721,65 @@ static void paging32E_init_context(struct kvm_vcpu *vcpu, paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL); } -static union kvm_mmu_page_role -kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu) +static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu) +{ + union kvm_mmu_extended_role ext = {0}; + + ext.cr0_pg = !!is_paging(vcpu); + ext.cr4_smep = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); + ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); + ext.cr4_pse = !!is_pse(vcpu); + ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE); + ext.cr4_la57 = !!kvm_read_cr4_bits(vcpu, X86_CR4_LA57); + + ext.valid = 1; + + return ext; +} + +static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu, + bool base_only) { - union kvm_mmu_page_role role = {0}; + union kvm_mmu_role role = {0}; - role.guest_mode = is_guest_mode(vcpu); - role.smm = is_smm(vcpu); - role.ad_disabled = (shadow_accessed_mask == 0); - role.level = kvm_x86_ops->get_tdp_level(vcpu); - role.direct = true; - role.access = ACC_ALL; + role.base.access = ACC_ALL; + role.base.nxe = !!is_nx(vcpu); + role.base.cr4_pae = !!is_pae(vcpu); + role.base.cr0_wp = is_write_protection(vcpu); + role.base.smm = is_smm(vcpu); + role.base.guest_mode = is_guest_mode(vcpu); + + if (base_only) + return role; + + role.ext = kvm_calc_mmu_role_ext(vcpu); + + return role; +} + +static union kvm_mmu_role +kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only) +{ + union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only); + + role.base.ad_disabled = (shadow_accessed_mask == 0); + role.base.level = kvm_x86_ops->get_tdp_level(vcpu); + role.base.direct = true; return role; } static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) { - struct kvm_mmu *context = &vcpu->arch.mmu; + struct kvm_mmu *context = vcpu->arch.mmu; + union kvm_mmu_role new_role = + kvm_calc_tdp_mmu_root_page_role(vcpu, false); + + new_role.base.word &= mmu_base_role_mask.word; + if (new_role.as_u64 == context->mmu_role.as_u64) + return; - context->base_role.word = mmu_base_role_mask.word & - kvm_calc_tdp_mmu_root_page_role(vcpu).word; + context->mmu_role.as_u64 = new_role.as_u64; context->page_fault = tdp_page_fault; context->sync_page = nonpaging_sync_page; context->invlpg = nonpaging_invlpg; @@ -4783,36 +4819,36 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) reset_tdp_shadow_zero_bits_mask(vcpu, context); } -static union kvm_mmu_page_role -kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu) -{ - union kvm_mmu_page_role role = {0}; - bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); - bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); - - role.nxe = is_nx(vcpu); - role.cr4_pae = !!is_pae(vcpu); - role.cr0_wp = is_write_protection(vcpu); - role.smep_andnot_wp = smep && !is_write_protection(vcpu); - role.smap_andnot_wp = smap && !is_write_protection(vcpu); - role.guest_mode = is_guest_mode(vcpu); - role.smm = is_smm(vcpu); - role.direct = !is_paging(vcpu); - role.access = ACC_ALL; +static union kvm_mmu_role +kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only) +{ + union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only); + + role.base.smep_andnot_wp = role.ext.cr4_smep && + !is_write_protection(vcpu); + role.base.smap_andnot_wp = role.ext.cr4_smap && + !is_write_protection(vcpu); + role.base.direct = !is_paging(vcpu); if (!is_long_mode(vcpu)) - role.level = PT32E_ROOT_LEVEL; + role.base.level = PT32E_ROOT_LEVEL; else if (is_la57_mode(vcpu)) - role.level = PT64_ROOT_5LEVEL; + role.base.level = PT64_ROOT_5LEVEL; else - role.level = PT64_ROOT_4LEVEL; + role.base.level = PT64_ROOT_4LEVEL; return role; } void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) { - struct kvm_mmu *context = &vcpu->arch.mmu; + struct kvm_mmu *context = vcpu->arch.mmu; + union kvm_mmu_role new_role = + kvm_calc_shadow_mmu_root_page_role(vcpu, false); + + new_role.base.word &= mmu_base_role_mask.word; + if (new_role.as_u64 == context->mmu_role.as_u64) + return; if (!is_paging(vcpu)) nonpaging_init_context(vcpu, context); @@ -4823,22 +4859,28 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) else paging32_init_context(vcpu, context); - context->base_role.word = mmu_base_role_mask.word & - kvm_calc_shadow_mmu_root_page_role(vcpu).word; + context->mmu_role.as_u64 = new_role.as_u64; reset_shadow_zero_bits_mask(vcpu, context); } EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); -static union kvm_mmu_page_role -kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty) +static union kvm_mmu_role +kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty, + bool execonly) { - union kvm_mmu_page_role role = vcpu->arch.mmu.base_role; + union kvm_mmu_role role; + + /* Base role is inherited from root_mmu */ + role.base.word = vcpu->arch.root_mmu.mmu_role.base.word; + role.ext = kvm_calc_mmu_role_ext(vcpu); - role.level = PT64_ROOT_4LEVEL; - role.direct = false; - role.ad_disabled = !accessed_dirty; - role.guest_mode = true; - role.access = ACC_ALL; + role.base.level = PT64_ROOT_4LEVEL; + role.base.direct = false; + role.base.ad_disabled = !accessed_dirty; + role.base.guest_mode = true; + role.base.access = ACC_ALL; + + role.ext.execonly = execonly; return role; } @@ -4846,11 +4888,17 @@ kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty) void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, bool accessed_dirty, gpa_t new_eptp) { - struct kvm_mmu *context = &vcpu->arch.mmu; - union kvm_mmu_page_role root_page_role = - kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty); + struct kvm_mmu *context = vcpu->arch.mmu; + union kvm_mmu_role new_role = + kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty, + execonly); + + __kvm_mmu_new_cr3(vcpu, new_eptp, new_role.base, false); + + new_role.base.word &= mmu_base_role_mask.word; + if (new_role.as_u64 == context->mmu_role.as_u64) + return; - __kvm_mmu_new_cr3(vcpu, new_eptp, root_page_role, false); context->shadow_root_level = PT64_ROOT_4LEVEL; context->nx = true; @@ -4862,7 +4910,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, context->update_pte = ept_update_pte; context->root_level = PT64_ROOT_4LEVEL; context->direct_map = false; - context->base_role.word = root_page_role.word & mmu_base_role_mask.word; + context->mmu_role.as_u64 = new_role.as_u64; + update_permission_bitmask(vcpu, context, true); update_pkru_bitmask(vcpu, context, true); update_last_nonleaf_level(vcpu, context); @@ -4873,7 +4922,7 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu); static void init_kvm_softmmu(struct kvm_vcpu *vcpu) { - struct kvm_mmu *context = &vcpu->arch.mmu; + struct kvm_mmu *context = vcpu->arch.mmu; kvm_init_shadow_mmu(vcpu); context->set_cr3 = kvm_x86_ops->set_cr3; @@ -4884,14 +4933,20 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu) static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) { + union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false); struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; + new_role.base.word &= mmu_base_role_mask.word; + if (new_role.as_u64 == g_context->mmu_role.as_u64) + return; + + g_context->mmu_role.as_u64 = new_role.as_u64; g_context->get_cr3 = get_cr3; g_context->get_pdptr = kvm_pdptr_read; g_context->inject_page_fault = kvm_inject_page_fault; /* - * Note that arch.mmu.gva_to_gpa translates l2_gpa to l1_gpa using + * Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using * L1's nested page tables (e.g. EPT12). The nested translation * of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using * L2's page tables as the first level of translation and L1's @@ -4930,10 +4985,10 @@ void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots) if (reset_roots) { uint i; - vcpu->arch.mmu.root_hpa = INVALID_PAGE; + vcpu->arch.mmu->root_hpa = INVALID_PAGE; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) - vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; + vcpu->arch.mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; } if (mmu_is_nested(vcpu)) @@ -4948,10 +5003,14 @@ EXPORT_SYMBOL_GPL(kvm_init_mmu); static union kvm_mmu_page_role kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu) { + union kvm_mmu_role role; + if (tdp_enabled) - return kvm_calc_tdp_mmu_root_page_role(vcpu); + role = kvm_calc_tdp_mmu_root_page_role(vcpu, true); else - return kvm_calc_shadow_mmu_root_page_role(vcpu); + role = kvm_calc_shadow_mmu_root_page_role(vcpu, true); + + return role.base; } void kvm_mmu_reset_context(struct kvm_vcpu *vcpu) @@ -4981,8 +5040,10 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load); void kvm_mmu_unload(struct kvm_vcpu *vcpu) { - kvm_mmu_free_roots(vcpu, KVM_MMU_ROOTS_ALL); - WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa)); + kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL); + WARN_ON(VALID_PAGE(vcpu->arch.root_mmu.root_hpa)); + kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); + WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root_hpa)); } EXPORT_SYMBOL_GPL(kvm_mmu_unload); @@ -4996,7 +5057,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, } ++vcpu->kvm->stat.mmu_pte_updated; - vcpu->arch.mmu.update_pte(vcpu, sp, spte, new); + vcpu->arch.mmu->update_pte(vcpu, sp, spte, new); } static bool need_remote_flush(u64 old, u64 new) @@ -5173,10 +5234,12 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, local_flush = true; while (npte--) { + u32 base_role = vcpu->arch.mmu->mmu_role.base.word; + entry = *spte; mmu_page_zap_pte(vcpu->kvm, sp, spte); if (gentry && - !((sp->role.word ^ vcpu->arch.mmu.base_role.word) + !((sp->role.word ^ base_role) & mmu_base_role_mask.word) && rmap_can_add(vcpu)) mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); if (need_remote_flush(entry, *spte)) @@ -5194,7 +5257,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) gpa_t gpa; int r; - if (vcpu->arch.mmu.direct_map) + if (vcpu->arch.mmu->direct_map) return 0; gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL); @@ -5230,10 +5293,10 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, { int r, emulation_type = 0; enum emulation_result er; - bool direct = vcpu->arch.mmu.direct_map; + bool direct = vcpu->arch.mmu->direct_map; /* With shadow page tables, fault_address contains a GVA or nGPA. */ - if (vcpu->arch.mmu.direct_map) { + if (vcpu->arch.mmu->direct_map) { vcpu->arch.gpa_available = true; vcpu->arch.gpa_val = cr2; } @@ -5246,8 +5309,9 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, } if (r == RET_PF_INVALID) { - r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code), - false); + r = vcpu->arch.mmu->page_fault(vcpu, cr2, + lower_32_bits(error_code), + false); WARN_ON(r == RET_PF_INVALID); } @@ -5263,7 +5327,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code, * paging in both guests. If true, we simply unprotect the page * and resume the guest. */ - if (vcpu->arch.mmu.direct_map && + if (vcpu->arch.mmu->direct_map && (error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) { kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2)); return 1; @@ -5311,7 +5375,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { - struct kvm_mmu *mmu = &vcpu->arch.mmu; + struct kvm_mmu *mmu = vcpu->arch.mmu; int i; /* INVLPG on a * non-canonical address is a NOP according to the SDM. */ @@ -5342,7 +5406,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) { - struct kvm_mmu *mmu = &vcpu->arch.mmu; + struct kvm_mmu *mmu = vcpu->arch.mmu; bool tlb_flush = false; uint i; @@ -5386,8 +5450,8 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp); static void free_mmu_pages(struct kvm_vcpu *vcpu) { - free_page((unsigned long)vcpu->arch.mmu.pae_root); - free_page((unsigned long)vcpu->arch.mmu.lm_root); + free_page((unsigned long)vcpu->arch.mmu->pae_root); + free_page((unsigned long)vcpu->arch.mmu->lm_root); } static int alloc_mmu_pages(struct kvm_vcpu *vcpu) @@ -5407,9 +5471,9 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu) if (!page) return -ENOMEM; - vcpu->arch.mmu.pae_root = page_address(page); + vcpu->arch.mmu->pae_root = page_address(page); for (i = 0; i < 4; ++i) - vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; + vcpu->arch.mmu->pae_root[i] = INVALID_PAGE; return 0; } @@ -5418,27 +5482,21 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) { uint i; - vcpu->arch.walk_mmu = &vcpu->arch.mmu; - vcpu->arch.mmu.root_hpa = INVALID_PAGE; - vcpu->arch.mmu.translate_gpa = translate_gpa; - vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa; + vcpu->arch.mmu = &vcpu->arch.root_mmu; + vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; + vcpu->arch.root_mmu.root_hpa = INVALID_PAGE; + vcpu->arch.root_mmu.translate_gpa = translate_gpa; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) - vcpu->arch.mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; - - return alloc_mmu_pages(vcpu); -} + vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; -void kvm_mmu_setup(struct kvm_vcpu *vcpu) -{ - MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa)); + vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE; + vcpu->arch.guest_mmu.translate_gpa = translate_gpa; + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) + vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; - /* - * kvm_mmu_setup() is called only on vCPU initialization. - * Therefore, no need to reset mmu roots as they are not yet - * initialized. - */ - kvm_init_mmu(vcpu, false); + vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa; + return alloc_mmu_pages(vcpu); } static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm, @@ -5621,7 +5679,7 @@ restart: if (sp->role.direct && !kvm_is_reserved_pfn(pfn) && PageTransCompoundMap(pfn_to_page(pfn))) { - drop_spte(kvm, sptep); + pte_list_remove(rmap_head, sptep); need_tlb_flush = 1; goto restart; } @@ -5878,6 +5936,16 @@ int kvm_mmu_module_init(void) { int ret = -ENOMEM; + /* + * MMU roles use union aliasing which is, generally speaking, an + * undefined behavior. However, we supposedly know how compilers behave + * and the current status quo is unlikely to change. Guardians below are + * supposed to let us know if the assumption becomes false. + */ + BUILD_BUG_ON(sizeof(union kvm_mmu_page_role) != sizeof(u32)); + BUILD_BUG_ON(sizeof(union kvm_mmu_extended_role) != sizeof(u32)); + BUILD_BUG_ON(sizeof(union kvm_mmu_role) != sizeof(u64)); + kvm_mmu_reset_all_pte_masks(); pte_list_desc_cache = kmem_cache_create("pte_list_desc", @@ -5907,7 +5975,7 @@ out: } /* - * Caculate mmu pages needed for kvm. + * Calculate mmu pages needed for kvm. */ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) { diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 1fab69c0b2f3..c7b333147c4a 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -43,11 +43,6 @@ #define PT32_ROOT_LEVEL 2 #define PT32E_ROOT_LEVEL 3 -#define PT_PDPE_LEVEL 3 -#define PT_DIRECTORY_LEVEL 2 -#define PT_PAGE_TABLE_LEVEL 1 -#define PT_MAX_HUGEPAGE_LEVEL (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES - 1) - static inline u64 rsvd_bits(int s, int e) { if (e < s) @@ -80,7 +75,7 @@ static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) { - if (likely(vcpu->arch.mmu.root_hpa != INVALID_PAGE)) + if (likely(vcpu->arch.mmu->root_hpa != INVALID_PAGE)) return 0; return kvm_mmu_load(vcpu); @@ -102,9 +97,9 @@ static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu) static inline void kvm_mmu_load_cr3(struct kvm_vcpu *vcpu) { - if (VALID_PAGE(vcpu->arch.mmu.root_hpa)) - vcpu->arch.mmu.set_cr3(vcpu, vcpu->arch.mmu.root_hpa | - kvm_get_active_pcid(vcpu)); + if (VALID_PAGE(vcpu->arch.mmu->root_hpa)) + vcpu->arch.mmu->set_cr3(vcpu, vcpu->arch.mmu->root_hpa | + kvm_get_active_pcid(vcpu)); } /* diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 1272861e77b9..abac7e208853 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -59,19 +59,19 @@ static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn) int i; struct kvm_mmu_page *sp; - if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) return; - if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) { - hpa_t root = vcpu->arch.mmu.root_hpa; + if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) { + hpa_t root = vcpu->arch.mmu->root_hpa; sp = page_header(root); - __mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu.root_level); + __mmu_spte_walk(vcpu, sp, fn, vcpu->arch.mmu->root_level); return; } for (i = 0; i < 4; ++i) { - hpa_t root = vcpu->arch.mmu.pae_root[i]; + hpa_t root = vcpu->arch.mmu->pae_root[i]; if (root && VALID_PAGE(root)) { root &= PT64_BASE_ADDR_MASK; @@ -122,7 +122,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) hpa = pfn << PAGE_SHIFT; if ((*sptep & PT64_BASE_ADDR_MASK) != hpa) audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx " - "ent %llxn", vcpu->arch.mmu.root_level, pfn, + "ent %llxn", vcpu->arch.mmu->root_level, pfn, hpa, *sptep); } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 14ffd973df54..7cf2185b7eb5 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -158,14 +158,15 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, u64 gpte) { - if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) + if (is_rsvd_bits_set(vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) goto no_present; if (!FNAME(is_present_gpte)(gpte)) goto no_present; /* if accessed bit is not supported prefetch non accessed gpte */ - if (PT_HAVE_ACCESSED_DIRTY(&vcpu->arch.mmu) && !(gpte & PT_GUEST_ACCESSED_MASK)) + if (PT_HAVE_ACCESSED_DIRTY(vcpu->arch.mmu) && + !(gpte & PT_GUEST_ACCESSED_MASK)) goto no_present; return false; @@ -480,7 +481,7 @@ error: static int FNAME(walk_addr)(struct guest_walker *walker, struct kvm_vcpu *vcpu, gva_t addr, u32 access) { - return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.mmu, addr, + return FNAME(walk_addr_generic)(walker, vcpu, vcpu->arch.mmu, addr, access); } @@ -509,7 +510,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, gfn = gpte_to_gfn(gpte); pte_access = sp->role.access & FNAME(gpte_access)(gpte); - FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte); + FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte); pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, no_dirty_log && (pte_access & ACC_WRITE_MASK)); if (is_error_pfn(pfn)) @@ -604,7 +605,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, direct_access = gw->pte_access; - top_level = vcpu->arch.mmu.root_level; + top_level = vcpu->arch.mmu->root_level; if (top_level == PT32E_ROOT_LEVEL) top_level = PT32_ROOT_LEVEL; /* @@ -616,7 +617,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, if (FNAME(gpte_changed)(vcpu, gw, top_level)) goto out_gpte_changed; - if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) goto out_gpte_changed; for (shadow_walk_init(&it, vcpu, addr); @@ -1004,7 +1005,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) gfn = gpte_to_gfn(gpte); pte_access = sp->role.access; pte_access &= FNAME(gpte_access)(gpte); - FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte); + FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte); if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access, &nr_present)) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 61ccfb13899e..0e21ccc46792 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -809,6 +809,8 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu) nested_svm_check_exception(svm, nr, has_error_code, error_code)) return; + kvm_deliver_exception_payload(&svm->vcpu); + if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) { unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); @@ -2922,18 +2924,18 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) { WARN_ON(mmu_is_nested(vcpu)); kvm_init_shadow_mmu(vcpu); - vcpu->arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; - vcpu->arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; - vcpu->arch.mmu.get_pdptr = nested_svm_get_tdp_pdptr; - vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; - vcpu->arch.mmu.shadow_root_level = get_npt_level(vcpu); - reset_shadow_zero_bits_mask(vcpu, &vcpu->arch.mmu); + vcpu->arch.mmu->set_cr3 = nested_svm_set_tdp_cr3; + vcpu->arch.mmu->get_cr3 = nested_svm_get_tdp_cr3; + vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr; + vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit; + vcpu->arch.mmu->shadow_root_level = get_npt_level(vcpu); + reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu); vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; } static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) { - vcpu->arch.walk_mmu = &vcpu->arch.mmu; + vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; } static int nested_svm_check_permissions(struct vcpu_svm *svm) @@ -2969,16 +2971,13 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, svm->vmcb->control.exit_info_1 = error_code; /* - * FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception. - * The fix is to add the ancillary datum (CR2 or DR6) to structs - * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be - * written only when inject_pending_event runs (DR6 would written here - * too). This should be conditional on a new capability---if the - * capability is disabled, kvm_multiple_exception would write the - * ancillary information to CR2 or DR6, for backwards ABI-compatibility. + * EXITINFO2 is undefined for all exception intercepts other + * than #PF. */ if (svm->vcpu.arch.exception.nested_apf) svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; + else if (svm->vcpu.arch.exception.has_payload) + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload; else svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; @@ -5642,26 +5641,24 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r13, %c[r13](%[svm]) \n\t" "mov %%r14, %c[r14](%[svm]) \n\t" "mov %%r15, %c[r15](%[svm]) \n\t" -#endif /* * Clear host registers marked as clobbered to prevent * speculative use. */ - "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t" - "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t" - "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t" - "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t" - "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t" -#ifdef CONFIG_X86_64 - "xor %%r8, %%r8 \n\t" - "xor %%r9, %%r9 \n\t" - "xor %%r10, %%r10 \n\t" - "xor %%r11, %%r11 \n\t" - "xor %%r12, %%r12 \n\t" - "xor %%r13, %%r13 \n\t" - "xor %%r14, %%r14 \n\t" - "xor %%r15, %%r15 \n\t" + "xor %%r8d, %%r8d \n\t" + "xor %%r9d, %%r9d \n\t" + "xor %%r10d, %%r10d \n\t" + "xor %%r11d, %%r11d \n\t" + "xor %%r12d, %%r12d \n\t" + "xor %%r13d, %%r13d \n\t" + "xor %%r14d, %%r14d \n\t" + "xor %%r15d, %%r15d \n\t" #endif + "xor %%ebx, %%ebx \n\t" + "xor %%ecx, %%ecx \n\t" + "xor %%edx, %%edx \n\t" + "xor %%esi, %%esi \n\t" + "xor %%edi, %%edi \n\t" "pop %%" _ASM_BP : : [svm]"a"(svm), @@ -7040,6 +7037,13 @@ failed: return ret; } +static int nested_enable_evmcs(struct kvm_vcpu *vcpu, + uint16_t *vmcs_version) +{ + /* Intel-only feature */ + return -ENODEV; +} + static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -7169,6 +7173,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .mem_enc_op = svm_mem_enc_op, .mem_enc_reg_region = svm_register_enc_region, .mem_enc_unreg_region = svm_unregister_enc_region, + + .nested_enable_evmcs = nested_enable_evmcs, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 0f997683404f..0659465a745c 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1418,6 +1418,48 @@ TRACE_EVENT(kvm_hv_flush_tlb_ex, __entry->valid_bank_mask, __entry->format, __entry->address_space, __entry->flags) ); + +/* + * Tracepoints for kvm_hv_send_ipi. + */ +TRACE_EVENT(kvm_hv_send_ipi, + TP_PROTO(u32 vector, u64 processor_mask), + TP_ARGS(vector, processor_mask), + + TP_STRUCT__entry( + __field(u32, vector) + __field(u64, processor_mask) + ), + + TP_fast_assign( + __entry->vector = vector; + __entry->processor_mask = processor_mask; + ), + + TP_printk("vector %x processor_mask 0x%llx", + __entry->vector, __entry->processor_mask) +); + +TRACE_EVENT(kvm_hv_send_ipi_ex, + TP_PROTO(u32 vector, u64 format, u64 valid_bank_mask), + TP_ARGS(vector, format, valid_bank_mask), + + TP_STRUCT__entry( + __field(u32, vector) + __field(u64, format) + __field(u64, valid_bank_mask) + ), + + TP_fast_assign( + __entry->vector = vector; + __entry->format = format; + __entry->valid_bank_mask = valid_bank_mask; + ), + + TP_printk("vector %x format %llx valid_bank_mask 0x%llx", + __entry->vector, __entry->format, + __entry->valid_bank_mask) +); #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e665aa7167cf..4555077d69ce 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -20,6 +20,7 @@ #include "mmu.h" #include "cpuid.h" #include "lapic.h" +#include "hyperv.h" #include <linux/kvm_host.h> #include <linux/module.h> @@ -61,7 +62,7 @@ #define __ex(x) __kvm_handle_fault_on_reboot(x) #define __ex_clear(x, reg) \ - ____kvm_handle_fault_on_reboot(x, "xor " reg " , " reg) + ____kvm_handle_fault_on_reboot(x, "xor " reg ", " reg) MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -107,9 +108,12 @@ module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); * VMX and be a hypervisor for its own guests. If nested=0, guests may not * use VMX instructions. */ -static bool __read_mostly nested = 0; +static bool __read_mostly nested = 1; module_param(nested, bool, S_IRUGO); +static bool __read_mostly nested_early_check = 0; +module_param(nested_early_check, bool, S_IRUGO); + static u64 __read_mostly host_xss; static bool __read_mostly enable_pml = 1; @@ -131,7 +135,7 @@ static bool __read_mostly enable_preemption_timer = 1; module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); #endif -#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) +#define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD) #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE #define KVM_VM_CR0_ALWAYS_ON \ (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | \ @@ -187,6 +191,7 @@ static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX; module_param(ple_window_max, uint, 0444); extern const ulong vmx_return; +extern const ulong vmx_early_consistency_check_return; static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond); @@ -827,14 +832,28 @@ struct nested_vmx { */ struct vmcs12 *cached_shadow_vmcs12; /* - * Indicates if the shadow vmcs must be updated with the - * data hold by vmcs12 + * Indicates if the shadow vmcs or enlightened vmcs must be updated + * with the data held by struct vmcs12. */ - bool sync_shadow_vmcs; + bool need_vmcs12_sync; bool dirty_vmcs12; + /* + * vmcs02 has been initialized, i.e. state that is constant for + * vmcs02 has been written to the backing VMCS. Initialization + * is delayed until L1 actually attempts to run a nested VM. + */ + bool vmcs02_initialized; + bool change_vmcs01_virtual_apic_mode; + /* + * Enlightened VMCS has been enabled. It does not mean that L1 has to + * use it. However, VMX features available to L1 will be limited based + * on what the enlightened VMCS supports. + */ + bool enlightened_vmcs_enabled; + /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; @@ -870,6 +889,10 @@ struct nested_vmx { /* in guest mode on SMM entry? */ bool guest_mode; } smm; + + gpa_t hv_evmcs_vmptr; + struct page *hv_evmcs_page; + struct hv_enlightened_vmcs *hv_evmcs; }; #define POSTED_INTR_ON 0 @@ -1381,6 +1404,49 @@ DEFINE_STATIC_KEY_FALSE(enable_evmcs); #define KVM_EVMCS_VERSION 1 +/* + * Enlightened VMCSv1 doesn't support these: + * + * POSTED_INTR_NV = 0x00000002, + * GUEST_INTR_STATUS = 0x00000810, + * APIC_ACCESS_ADDR = 0x00002014, + * POSTED_INTR_DESC_ADDR = 0x00002016, + * EOI_EXIT_BITMAP0 = 0x0000201c, + * EOI_EXIT_BITMAP1 = 0x0000201e, + * EOI_EXIT_BITMAP2 = 0x00002020, + * EOI_EXIT_BITMAP3 = 0x00002022, + * GUEST_PML_INDEX = 0x00000812, + * PML_ADDRESS = 0x0000200e, + * VM_FUNCTION_CONTROL = 0x00002018, + * EPTP_LIST_ADDRESS = 0x00002024, + * VMREAD_BITMAP = 0x00002026, + * VMWRITE_BITMAP = 0x00002028, + * + * TSC_MULTIPLIER = 0x00002032, + * PLE_GAP = 0x00004020, + * PLE_WINDOW = 0x00004022, + * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, + * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, + * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, + * + * Currently unsupported in KVM: + * GUEST_IA32_RTIT_CTL = 0x00002814, + */ +#define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \ + PIN_BASED_VMX_PREEMPTION_TIMER) +#define EVMCS1_UNSUPPORTED_2NDEXEC \ + (SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \ + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | \ + SECONDARY_EXEC_APIC_REGISTER_VIRT | \ + SECONDARY_EXEC_ENABLE_PML | \ + SECONDARY_EXEC_ENABLE_VMFUNC | \ + SECONDARY_EXEC_SHADOW_VMCS | \ + SECONDARY_EXEC_TSC_SCALING | \ + SECONDARY_EXEC_PAUSE_LOOP_EXITING) +#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) +#define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) +#define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING) + #if IS_ENABLED(CONFIG_HYPERV) static bool __read_mostly enlightened_vmcs = true; module_param(enlightened_vmcs, bool, 0444); @@ -1473,69 +1539,12 @@ static void evmcs_load(u64 phys_addr) static void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) { - /* - * Enlightened VMCSv1 doesn't support these: - * - * POSTED_INTR_NV = 0x00000002, - * GUEST_INTR_STATUS = 0x00000810, - * APIC_ACCESS_ADDR = 0x00002014, - * POSTED_INTR_DESC_ADDR = 0x00002016, - * EOI_EXIT_BITMAP0 = 0x0000201c, - * EOI_EXIT_BITMAP1 = 0x0000201e, - * EOI_EXIT_BITMAP2 = 0x00002020, - * EOI_EXIT_BITMAP3 = 0x00002022, - */ - vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; - vmcs_conf->cpu_based_2nd_exec_ctrl &= - ~SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; - vmcs_conf->cpu_based_2nd_exec_ctrl &= - ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - vmcs_conf->cpu_based_2nd_exec_ctrl &= - ~SECONDARY_EXEC_APIC_REGISTER_VIRT; - - /* - * GUEST_PML_INDEX = 0x00000812, - * PML_ADDRESS = 0x0000200e, - */ - vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_PML; - - /* VM_FUNCTION_CONTROL = 0x00002018, */ - vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_ENABLE_VMFUNC; - - /* - * EPTP_LIST_ADDRESS = 0x00002024, - * VMREAD_BITMAP = 0x00002026, - * VMWRITE_BITMAP = 0x00002028, - */ - vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_SHADOW_VMCS; - - /* - * TSC_MULTIPLIER = 0x00002032, - */ - vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_TSC_SCALING; + vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL; + vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC; - /* - * PLE_GAP = 0x00004020, - * PLE_WINDOW = 0x00004022, - */ - vmcs_conf->cpu_based_2nd_exec_ctrl &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; - - /* - * VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, - */ - vmcs_conf->pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; - - /* - * GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, - * HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, - */ - vmcs_conf->vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; - vmcs_conf->vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; + vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; + vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; - /* - * Currently unsupported in KVM: - * GUEST_IA32_RTIT_CTL = 0x00002814, - */ } /* check_ept_pointer() should be under protection of ept_pointer_lock. */ @@ -1560,26 +1569,27 @@ static void check_ept_pointer_match(struct kvm *kvm) static int vmx_hv_remote_flush_tlb(struct kvm *kvm) { - int ret; + struct kvm_vcpu *vcpu; + int ret = -ENOTSUPP, i; spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK) check_ept_pointer_match(kvm); - if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) { - ret = -ENOTSUPP; - goto out; - } - /* * FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE hypercall needs the address of the * base of EPT PML4 table, strip off EPT configuration information. */ - ret = hyperv_flush_guest_mapping( - to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer & PAGE_MASK); + if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) { + kvm_for_each_vcpu(i, vcpu, kvm) + ret |= hyperv_flush_guest_mapping( + to_vmx(kvm_get_vcpu(kvm, i))->ept_pointer & PAGE_MASK); + } else { + ret = hyperv_flush_guest_mapping( + to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer & PAGE_MASK); + } -out: spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); return ret; } @@ -1595,6 +1605,35 @@ static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {} static inline void evmcs_touch_msr_bitmap(void) {} #endif /* IS_ENABLED(CONFIG_HYPERV) */ +static int nested_enable_evmcs(struct kvm_vcpu *vcpu, + uint16_t *vmcs_version) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + /* We don't support disabling the feature for simplicity. */ + if (vmx->nested.enlightened_vmcs_enabled) + return 0; + + vmx->nested.enlightened_vmcs_enabled = true; + + /* + * vmcs_version represents the range of supported Enlightened VMCS + * versions: lower 8 bits is the minimal version, higher 8 bits is the + * maximum supported version. KVM supports versions from 1 to + * KVM_EVMCS_VERSION. + */ + if (vmcs_version) + *vmcs_version = (KVM_EVMCS_VERSION << 8) | 1; + + vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL; + vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; + vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; + vmx->nested.msrs.secondary_ctls_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC; + vmx->nested.msrs.vmfunc_controls &= ~EVMCS1_UNSUPPORTED_VMFUNC; + + return 0; +} + static inline bool is_exception_n(u32 intr_info, u8 vector) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | @@ -1617,11 +1656,6 @@ static inline bool is_page_fault(u32 intr_info) return is_exception_n(intr_info, PF_VECTOR); } -static inline bool is_no_device(u32 intr_info) -{ - return is_exception_n(intr_info, NM_VECTOR); -} - static inline bool is_invalid_opcode(u32 intr_info) { return is_exception_n(intr_info, UD_VECTOR); @@ -1632,12 +1666,6 @@ static inline bool is_gp_fault(u32 intr_info) return is_exception_n(intr_info, GP_VECTOR); } -static inline bool is_external_interrupt(u32 intr_info) -{ - return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) - == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); -} - static inline bool is_machine_check(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | @@ -2063,9 +2091,6 @@ static inline bool is_nmi(u32 intr_info) static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, u32 exit_intr_info, unsigned long exit_qualification); -static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12, - u32 reason, unsigned long qualification); static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) { @@ -2077,7 +2102,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) return -1; } -static inline void __invvpid(int ext, u16 vpid, gva_t gva) +static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva) { struct { u64 vpid : 16; @@ -2086,22 +2111,20 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva) } operand = { vpid, 0, gva }; bool error; - asm volatile (__ex(ASM_VMX_INVVPID) CC_SET(na) - : CC_OUT(na) (error) : "a"(&operand), "c"(ext) - : "memory"); + asm volatile (__ex("invvpid %2, %1") CC_SET(na) + : CC_OUT(na) (error) : "r"(ext), "m"(operand)); BUG_ON(error); } -static inline void __invept(int ext, u64 eptp, gpa_t gpa) +static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa) { struct { u64 eptp, gpa; } operand = {eptp, gpa}; bool error; - asm volatile (__ex(ASM_VMX_INVEPT) CC_SET(na) - : CC_OUT(na) (error) : "a" (&operand), "c" (ext) - : "memory"); + asm volatile (__ex("invept %2, %1") CC_SET(na) + : CC_OUT(na) (error) : "r"(ext), "m"(operand)); BUG_ON(error); } @@ -2120,9 +2143,8 @@ static void vmcs_clear(struct vmcs *vmcs) u64 phys_addr = __pa(vmcs); bool error; - asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) CC_SET(na) - : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr) - : "memory"); + asm volatile (__ex("vmclear %1") CC_SET(na) + : CC_OUT(na) (error) : "m"(phys_addr)); if (unlikely(error)) printk(KERN_ERR "kvm: vmclear fail: %p/%llx\n", vmcs, phys_addr); @@ -2145,9 +2167,8 @@ static void vmcs_load(struct vmcs *vmcs) if (static_branch_unlikely(&enable_evmcs)) return evmcs_load(phys_addr); - asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) CC_SET(na) - : CC_OUT(na) (error) : "a"(&phys_addr), "m"(phys_addr) - : "memory"); + asm volatile (__ex("vmptrld %1") CC_SET(na) + : CC_OUT(na) (error) : "m"(phys_addr)); if (unlikely(error)) printk(KERN_ERR "kvm: vmptrld %p/%llx failed\n", vmcs, phys_addr); @@ -2323,8 +2344,8 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field) { unsigned long value; - asm volatile (__ex_clear(ASM_VMX_VMREAD_RDX_RAX, "%0") - : "=a"(value) : "d"(field) : "cc"); + asm volatile (__ex_clear("vmread %1, %0", "%k0") + : "=r"(value) : "r"(field)); return value; } @@ -2375,8 +2396,8 @@ static __always_inline void __vmcs_writel(unsigned long field, unsigned long val { bool error; - asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) CC_SET(na) - : CC_OUT(na) (error) : "a"(value), "d"(field)); + asm volatile (__ex("vmwrite %2, %1") CC_SET(na) + : CC_OUT(na) (error) : "r"(field), "rm"(value)); if (unlikely(error)) vmwrite_error(field, value); } @@ -2707,7 +2728,8 @@ static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, u64 guest_val, u64 host_val) { vmcs_write64(guest_val_vmcs, guest_val); - vmcs_write64(host_val_vmcs, host_val); + if (host_val_vmcs != HOST_IA32_EFER) + vmcs_write64(host_val_vmcs, host_val); vm_entry_controls_setbit(vmx, entry); vm_exit_controls_setbit(vmx, exit); } @@ -2805,8 +2827,6 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) ignore_bits &= ~(u64)EFER_SCE; #endif - clear_atomic_switch_msr(vmx, MSR_EFER); - /* * On EPT, we can't emulate NX, so we must switch EFER atomically. * On CPUs that support "load IA32_EFER", always switch EFER @@ -2819,8 +2839,12 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) if (guest_efer != host_efer) add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer, false); + else + clear_atomic_switch_msr(vmx, MSR_EFER); return false; } else { + clear_atomic_switch_msr(vmx, MSR_EFER); + guest_efer &= ~ignore_bits; guest_efer |= host_efer & ignore_bits; @@ -3272,34 +3296,30 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); unsigned int nr = vcpu->arch.exception.nr; + bool has_payload = vcpu->arch.exception.has_payload; + unsigned long payload = vcpu->arch.exception.payload; if (nr == PF_VECTOR) { if (vcpu->arch.exception.nested_apf) { *exit_qual = vcpu->arch.apf.nested_apf_token; return 1; } - /* - * FIXME: we must not write CR2 when L1 intercepts an L2 #PF exception. - * The fix is to add the ancillary datum (CR2 or DR6) to structs - * kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 - * can be written only when inject_pending_event runs. This should be - * conditional on a new capability---if the capability is disabled, - * kvm_multiple_exception would write the ancillary information to - * CR2 or DR6, for backwards ABI-compatibility. - */ if (nested_vmx_is_page_fault_vmexit(vmcs12, vcpu->arch.exception.error_code)) { - *exit_qual = vcpu->arch.cr2; - return 1; - } - } else { - if (vmcs12->exception_bitmap & (1u << nr)) { - if (nr == DB_VECTOR) - *exit_qual = vcpu->arch.dr6; - else - *exit_qual = 0; + *exit_qual = has_payload ? payload : vcpu->arch.cr2; return 1; } + } else if (vmcs12->exception_bitmap & (1u << nr)) { + if (nr == DB_VECTOR) { + if (!has_payload) { + payload = vcpu->arch.dr6; + payload &= ~(DR6_FIXED_1 | DR6_BT); + payload ^= DR6_RTM; + } + *exit_qual = payload; + } else + *exit_qual = 0; + return 1; } return 0; @@ -3326,6 +3346,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) u32 error_code = vcpu->arch.exception.error_code; u32 intr_info = nr | INTR_INFO_VALID_MASK; + kvm_deliver_exception_payload(vcpu); + if (has_error_code) { vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); intr_info |= INTR_INFO_DELIVER_CODE_MASK; @@ -4397,9 +4419,7 @@ static void kvm_cpu_vmxon(u64 addr) cr4_set_bits(X86_CR4_VMXE); intel_pt_handle_vmx(1); - asm volatile (ASM_VMX_VMXON_RAX - : : "a"(&addr), "m"(addr) - : "memory", "cc"); + asm volatile ("vmxon %0" : : "m"(addr)); } static int hardware_enable(void) @@ -4468,7 +4488,7 @@ static void vmclear_local_loaded_vmcss(void) */ static void kvm_cpu_vmxoff(void) { - asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); + asm volatile (__ex("vmxoff")); intel_pt_handle_vmx(0); cr4_clear_bits(X86_CR4_VMXE); @@ -5112,9 +5132,10 @@ static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid, bool invalidate_gpa) { if (enable_ept && (invalidate_gpa || !enable_vpid)) { - if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) return; - ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa)); + ept_sync_context(construct_eptp(vcpu, + vcpu->arch.mmu->root_hpa)); } else { vpid_sync_context(vpid); } @@ -5264,7 +5285,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long hw_cr0; - hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK); + hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF); if (enable_unrestricted_guest) hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; else { @@ -6339,6 +6360,9 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx) rdmsr(MSR_IA32_CR_PAT, low32, high32); vmcs_write64(HOST_IA32_PAT, low32 | ((u64) high32 << 32)); } + + if (cpu_has_load_ia32_efer) + vmcs_write64(HOST_IA32_EFER, host_efer); } static void set_cr4_guest_host_mask(struct vcpu_vmx *vmx) @@ -6666,7 +6690,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP); if (enable_pml) { - ASSERT(vmx->pml_pg); vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); } @@ -8067,35 +8090,39 @@ static int handle_monitor(struct kvm_vcpu *vcpu) /* * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), - * set the success or error code of an emulated VMX instruction, as specified - * by Vol 2B, VMX Instruction Reference, "Conventions". + * set the success or error code of an emulated VMX instruction (as specified + * by Vol 2B, VMX Instruction Reference, "Conventions"), and skip the emulated + * instruction. */ -static void nested_vmx_succeed(struct kvm_vcpu *vcpu) +static int nested_vmx_succeed(struct kvm_vcpu *vcpu) { vmx_set_rflags(vcpu, vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF)); + return kvm_skip_emulated_instruction(vcpu); } -static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu) +static int nested_vmx_failInvalid(struct kvm_vcpu *vcpu) { vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF)) | X86_EFLAGS_CF); + return kvm_skip_emulated_instruction(vcpu); } -static void nested_vmx_failValid(struct kvm_vcpu *vcpu, - u32 vm_instruction_error) +static int nested_vmx_failValid(struct kvm_vcpu *vcpu, + u32 vm_instruction_error) { - if (to_vmx(vcpu)->nested.current_vmptr == -1ull) { - /* - * failValid writes the error number to the current VMCS, which - * can't be done there isn't a current VMCS. - */ - nested_vmx_failInvalid(vcpu); - return; - } + struct vcpu_vmx *vmx = to_vmx(vcpu); + + /* + * failValid writes the error number to the current VMCS, which + * can't be done if there isn't a current VMCS. + */ + if (vmx->nested.current_vmptr == -1ull && !vmx->nested.hv_evmcs) + return nested_vmx_failInvalid(vcpu); + vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_SF | X86_EFLAGS_OF)) @@ -8105,6 +8132,7 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu, * We don't need to force a shadow sync because * VM_INSTRUCTION_ERROR is not shadowed */ + return kvm_skip_emulated_instruction(vcpu); } static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator) @@ -8292,6 +8320,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) vmx->nested.vpid02 = allocate_vpid(); + vmx->nested.vmcs02_initialized = false; vmx->nested.vmxon = true; return 0; @@ -8345,10 +8374,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } - if (vmx->nested.vmxon) { - nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); - return kvm_skip_emulated_instruction(vcpu); - } + if (vmx->nested.vmxon) + return nested_vmx_failValid(vcpu, + VMXERR_VMXON_IN_VMX_ROOT_OPERATION); if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES) != VMXON_NEEDED_FEATURES) { @@ -8367,21 +8395,17 @@ static int handle_vmon(struct kvm_vcpu *vcpu) * Note - IA32_VMX_BASIC[48] will never be 1 for the nested case; * which replaces physical address width with 32 */ - if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) { - nested_vmx_failInvalid(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } + if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) + return nested_vmx_failInvalid(vcpu); page = kvm_vcpu_gpa_to_page(vcpu, vmptr); - if (is_error_page(page)) { - nested_vmx_failInvalid(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } + if (is_error_page(page)) + return nested_vmx_failInvalid(vcpu); + if (*(u32 *)kmap(page) != VMCS12_REVISION) { kunmap(page); kvm_release_page_clean(page); - nested_vmx_failInvalid(vcpu); - return kvm_skip_emulated_instruction(vcpu); + return nested_vmx_failInvalid(vcpu); } kunmap(page); kvm_release_page_clean(page); @@ -8391,8 +8415,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) if (ret) return ret; - nested_vmx_succeed(vcpu); - return kvm_skip_emulated_instruction(vcpu); + return nested_vmx_succeed(vcpu); } /* @@ -8423,8 +8446,24 @@ static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) vmcs_write64(VMCS_LINK_POINTER, -1ull); } -static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) +static inline void nested_release_evmcs(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!vmx->nested.hv_evmcs) + return; + + kunmap(vmx->nested.hv_evmcs_page); + kvm_release_page_dirty(vmx->nested.hv_evmcs_page); + vmx->nested.hv_evmcs_vmptr = -1ull; + vmx->nested.hv_evmcs_page = NULL; + vmx->nested.hv_evmcs = NULL; +} + +static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu) { + struct vcpu_vmx *vmx = to_vmx(vcpu); + if (vmx->nested.current_vmptr == -1ull) return; @@ -8432,16 +8471,18 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) /* copy to memory all shadowed fields in case they were modified */ copy_shadow_to_vmcs12(vmx); - vmx->nested.sync_shadow_vmcs = false; + vmx->nested.need_vmcs12_sync = false; vmx_disable_shadow_vmcs(vmx); } vmx->nested.posted_intr_nv = -1; /* Flush VMCS12 to guest memory */ - kvm_vcpu_write_guest_page(&vmx->vcpu, + kvm_vcpu_write_guest_page(vcpu, vmx->nested.current_vmptr >> PAGE_SHIFT, vmx->nested.cached_vmcs12, 0, VMCS12_SIZE); + kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); + vmx->nested.current_vmptr = -1ull; } @@ -8449,8 +8490,10 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) * Free whatever needs to be freed from vmx->nested when L1 goes down, or * just stops using VMX. */ -static void free_nested(struct vcpu_vmx *vmx) +static void free_nested(struct kvm_vcpu *vcpu) { + struct vcpu_vmx *vmx = to_vmx(vcpu); + if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) return; @@ -8483,6 +8526,10 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.pi_desc = NULL; } + kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL); + + nested_release_evmcs(vcpu); + free_loaded_vmcs(&vmx->nested.vmcs02); } @@ -8491,9 +8538,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu) { if (!nested_vmx_check_permission(vcpu)) return 1; - free_nested(to_vmx(vcpu)); - nested_vmx_succeed(vcpu); - return kvm_skip_emulated_instruction(vcpu); + free_nested(vcpu); + return nested_vmx_succeed(vcpu); } /* Emulate the VMCLEAR instruction */ @@ -8509,25 +8555,28 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (nested_vmx_get_vmptr(vcpu, &vmptr)) return 1; - if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) { - nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS); - return kvm_skip_emulated_instruction(vcpu); - } + if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) + return nested_vmx_failValid(vcpu, + VMXERR_VMCLEAR_INVALID_ADDRESS); - if (vmptr == vmx->nested.vmxon_ptr) { - nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_VMXON_POINTER); - return kvm_skip_emulated_instruction(vcpu); - } + if (vmptr == vmx->nested.vmxon_ptr) + return nested_vmx_failValid(vcpu, + VMXERR_VMCLEAR_VMXON_POINTER); - if (vmptr == vmx->nested.current_vmptr) - nested_release_vmcs12(vmx); + if (vmx->nested.hv_evmcs_page) { + if (vmptr == vmx->nested.hv_evmcs_vmptr) + nested_release_evmcs(vcpu); + } else { + if (vmptr == vmx->nested.current_vmptr) + nested_release_vmcs12(vcpu); - kvm_vcpu_write_guest(vcpu, - vmptr + offsetof(struct vmcs12, launch_state), - &zero, sizeof(zero)); + kvm_vcpu_write_guest(vcpu, + vmptr + offsetof(struct vmcs12, + launch_state), + &zero, sizeof(zero)); + } - nested_vmx_succeed(vcpu); - return kvm_skip_emulated_instruction(vcpu); + return nested_vmx_succeed(vcpu); } static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch); @@ -8610,6 +8659,395 @@ static inline int vmcs12_write_any(struct vmcs12 *vmcs12, } +static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx) +{ + struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12; + struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs; + + vmcs12->hdr.revision_id = evmcs->revision_id; + + /* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */ + vmcs12->tpr_threshold = evmcs->tpr_threshold; + vmcs12->guest_rip = evmcs->guest_rip; + + if (unlikely(!(evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) { + vmcs12->guest_rsp = evmcs->guest_rsp; + vmcs12->guest_rflags = evmcs->guest_rflags; + vmcs12->guest_interruptibility_info = + evmcs->guest_interruptibility_info; + } + + if (unlikely(!(evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) { + vmcs12->cpu_based_vm_exec_control = + evmcs->cpu_based_vm_exec_control; + } + + if (unlikely(!(evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) { + vmcs12->exception_bitmap = evmcs->exception_bitmap; + } + + if (unlikely(!(evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) { + vmcs12->vm_entry_controls = evmcs->vm_entry_controls; + } + + if (unlikely(!(evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) { + vmcs12->vm_entry_intr_info_field = + evmcs->vm_entry_intr_info_field; + vmcs12->vm_entry_exception_error_code = + evmcs->vm_entry_exception_error_code; + vmcs12->vm_entry_instruction_len = + evmcs->vm_entry_instruction_len; + } + + if (unlikely(!(evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) { + vmcs12->host_ia32_pat = evmcs->host_ia32_pat; + vmcs12->host_ia32_efer = evmcs->host_ia32_efer; + vmcs12->host_cr0 = evmcs->host_cr0; + vmcs12->host_cr3 = evmcs->host_cr3; + vmcs12->host_cr4 = evmcs->host_cr4; + vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp; + vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip; + vmcs12->host_rip = evmcs->host_rip; + vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs; + vmcs12->host_es_selector = evmcs->host_es_selector; + vmcs12->host_cs_selector = evmcs->host_cs_selector; + vmcs12->host_ss_selector = evmcs->host_ss_selector; + vmcs12->host_ds_selector = evmcs->host_ds_selector; + vmcs12->host_fs_selector = evmcs->host_fs_selector; + vmcs12->host_gs_selector = evmcs->host_gs_selector; + vmcs12->host_tr_selector = evmcs->host_tr_selector; + } + + if (unlikely(!(evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) { + vmcs12->pin_based_vm_exec_control = + evmcs->pin_based_vm_exec_control; + vmcs12->vm_exit_controls = evmcs->vm_exit_controls; + vmcs12->secondary_vm_exec_control = + evmcs->secondary_vm_exec_control; + } + + if (unlikely(!(evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) { + vmcs12->io_bitmap_a = evmcs->io_bitmap_a; + vmcs12->io_bitmap_b = evmcs->io_bitmap_b; + } + + if (unlikely(!(evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) { + vmcs12->msr_bitmap = evmcs->msr_bitmap; + } + + if (unlikely(!(evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) { + vmcs12->guest_es_base = evmcs->guest_es_base; + vmcs12->guest_cs_base = evmcs->guest_cs_base; + vmcs12->guest_ss_base = evmcs->guest_ss_base; + vmcs12->guest_ds_base = evmcs->guest_ds_base; + vmcs12->guest_fs_base = evmcs->guest_fs_base; + vmcs12->guest_gs_base = evmcs->guest_gs_base; + vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base; + vmcs12->guest_tr_base = evmcs->guest_tr_base; + vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base; + vmcs12->guest_idtr_base = evmcs->guest_idtr_base; + vmcs12->guest_es_limit = evmcs->guest_es_limit; + vmcs12->guest_cs_limit = evmcs->guest_cs_limit; + vmcs12->guest_ss_limit = evmcs->guest_ss_limit; + vmcs12->guest_ds_limit = evmcs->guest_ds_limit; + vmcs12->guest_fs_limit = evmcs->guest_fs_limit; + vmcs12->guest_gs_limit = evmcs->guest_gs_limit; + vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit; + vmcs12->guest_tr_limit = evmcs->guest_tr_limit; + vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit; + vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit; + vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes; + vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes; + vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes; + vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes; + vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes; + vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes; + vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes; + vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes; + vmcs12->guest_es_selector = evmcs->guest_es_selector; + vmcs12->guest_cs_selector = evmcs->guest_cs_selector; + vmcs12->guest_ss_selector = evmcs->guest_ss_selector; + vmcs12->guest_ds_selector = evmcs->guest_ds_selector; + vmcs12->guest_fs_selector = evmcs->guest_fs_selector; + vmcs12->guest_gs_selector = evmcs->guest_gs_selector; + vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector; + vmcs12->guest_tr_selector = evmcs->guest_tr_selector; + } + + if (unlikely(!(evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) { + vmcs12->tsc_offset = evmcs->tsc_offset; + vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr; + vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap; + } + + if (unlikely(!(evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) { + vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask; + vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask; + vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow; + vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow; + vmcs12->guest_cr0 = evmcs->guest_cr0; + vmcs12->guest_cr3 = evmcs->guest_cr3; + vmcs12->guest_cr4 = evmcs->guest_cr4; + vmcs12->guest_dr7 = evmcs->guest_dr7; + } + + if (unlikely(!(evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) { + vmcs12->host_fs_base = evmcs->host_fs_base; + vmcs12->host_gs_base = evmcs->host_gs_base; + vmcs12->host_tr_base = evmcs->host_tr_base; + vmcs12->host_gdtr_base = evmcs->host_gdtr_base; + vmcs12->host_idtr_base = evmcs->host_idtr_base; + vmcs12->host_rsp = evmcs->host_rsp; + } + + if (unlikely(!(evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) { + vmcs12->ept_pointer = evmcs->ept_pointer; + vmcs12->virtual_processor_id = evmcs->virtual_processor_id; + } + + if (unlikely(!(evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) { + vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer; + vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl; + vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat; + vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer; + vmcs12->guest_pdptr0 = evmcs->guest_pdptr0; + vmcs12->guest_pdptr1 = evmcs->guest_pdptr1; + vmcs12->guest_pdptr2 = evmcs->guest_pdptr2; + vmcs12->guest_pdptr3 = evmcs->guest_pdptr3; + vmcs12->guest_pending_dbg_exceptions = + evmcs->guest_pending_dbg_exceptions; + vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp; + vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip; + vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs; + vmcs12->guest_activity_state = evmcs->guest_activity_state; + vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs; + } + + /* + * Not used? + * vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr; + * vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr; + * vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr; + * vmcs12->cr3_target_value0 = evmcs->cr3_target_value0; + * vmcs12->cr3_target_value1 = evmcs->cr3_target_value1; + * vmcs12->cr3_target_value2 = evmcs->cr3_target_value2; + * vmcs12->cr3_target_value3 = evmcs->cr3_target_value3; + * vmcs12->page_fault_error_code_mask = + * evmcs->page_fault_error_code_mask; + * vmcs12->page_fault_error_code_match = + * evmcs->page_fault_error_code_match; + * vmcs12->cr3_target_count = evmcs->cr3_target_count; + * vmcs12->vm_exit_msr_store_count = evmcs->vm_exit_msr_store_count; + * vmcs12->vm_exit_msr_load_count = evmcs->vm_exit_msr_load_count; + * vmcs12->vm_entry_msr_load_count = evmcs->vm_entry_msr_load_count; + */ + + /* + * Read only fields: + * vmcs12->guest_physical_address = evmcs->guest_physical_address; + * vmcs12->vm_instruction_error = evmcs->vm_instruction_error; + * vmcs12->vm_exit_reason = evmcs->vm_exit_reason; + * vmcs12->vm_exit_intr_info = evmcs->vm_exit_intr_info; + * vmcs12->vm_exit_intr_error_code = evmcs->vm_exit_intr_error_code; + * vmcs12->idt_vectoring_info_field = evmcs->idt_vectoring_info_field; + * vmcs12->idt_vectoring_error_code = evmcs->idt_vectoring_error_code; + * vmcs12->vm_exit_instruction_len = evmcs->vm_exit_instruction_len; + * vmcs12->vmx_instruction_info = evmcs->vmx_instruction_info; + * vmcs12->exit_qualification = evmcs->exit_qualification; + * vmcs12->guest_linear_address = evmcs->guest_linear_address; + * + * Not present in struct vmcs12: + * vmcs12->exit_io_instruction_ecx = evmcs->exit_io_instruction_ecx; + * vmcs12->exit_io_instruction_esi = evmcs->exit_io_instruction_esi; + * vmcs12->exit_io_instruction_edi = evmcs->exit_io_instruction_edi; + * vmcs12->exit_io_instruction_eip = evmcs->exit_io_instruction_eip; + */ + + return 0; +} + +static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx) +{ + struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12; + struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs; + + /* + * Should not be changed by KVM: + * + * evmcs->host_es_selector = vmcs12->host_es_selector; + * evmcs->host_cs_selector = vmcs12->host_cs_selector; + * evmcs->host_ss_selector = vmcs12->host_ss_selector; + * evmcs->host_ds_selector = vmcs12->host_ds_selector; + * evmcs->host_fs_selector = vmcs12->host_fs_selector; + * evmcs->host_gs_selector = vmcs12->host_gs_selector; + * evmcs->host_tr_selector = vmcs12->host_tr_selector; + * evmcs->host_ia32_pat = vmcs12->host_ia32_pat; + * evmcs->host_ia32_efer = vmcs12->host_ia32_efer; + * evmcs->host_cr0 = vmcs12->host_cr0; + * evmcs->host_cr3 = vmcs12->host_cr3; + * evmcs->host_cr4 = vmcs12->host_cr4; + * evmcs->host_ia32_sysenter_esp = vmcs12->host_ia32_sysenter_esp; + * evmcs->host_ia32_sysenter_eip = vmcs12->host_ia32_sysenter_eip; + * evmcs->host_rip = vmcs12->host_rip; + * evmcs->host_ia32_sysenter_cs = vmcs12->host_ia32_sysenter_cs; + * evmcs->host_fs_base = vmcs12->host_fs_base; + * evmcs->host_gs_base = vmcs12->host_gs_base; + * evmcs->host_tr_base = vmcs12->host_tr_base; + * evmcs->host_gdtr_base = vmcs12->host_gdtr_base; + * evmcs->host_idtr_base = vmcs12->host_idtr_base; + * evmcs->host_rsp = vmcs12->host_rsp; + * sync_vmcs12() doesn't read these: + * evmcs->io_bitmap_a = vmcs12->io_bitmap_a; + * evmcs->io_bitmap_b = vmcs12->io_bitmap_b; + * evmcs->msr_bitmap = vmcs12->msr_bitmap; + * evmcs->ept_pointer = vmcs12->ept_pointer; + * evmcs->xss_exit_bitmap = vmcs12->xss_exit_bitmap; + * evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr; + * evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr; + * evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr; + * evmcs->cr3_target_value0 = vmcs12->cr3_target_value0; + * evmcs->cr3_target_value1 = vmcs12->cr3_target_value1; + * evmcs->cr3_target_value2 = vmcs12->cr3_target_value2; + * evmcs->cr3_target_value3 = vmcs12->cr3_target_value3; + * evmcs->tpr_threshold = vmcs12->tpr_threshold; + * evmcs->virtual_processor_id = vmcs12->virtual_processor_id; + * evmcs->exception_bitmap = vmcs12->exception_bitmap; + * evmcs->vmcs_link_pointer = vmcs12->vmcs_link_pointer; + * evmcs->pin_based_vm_exec_control = vmcs12->pin_based_vm_exec_control; + * evmcs->vm_exit_controls = vmcs12->vm_exit_controls; + * evmcs->secondary_vm_exec_control = vmcs12->secondary_vm_exec_control; + * evmcs->page_fault_error_code_mask = + * vmcs12->page_fault_error_code_mask; + * evmcs->page_fault_error_code_match = + * vmcs12->page_fault_error_code_match; + * evmcs->cr3_target_count = vmcs12->cr3_target_count; + * evmcs->virtual_apic_page_addr = vmcs12->virtual_apic_page_addr; + * evmcs->tsc_offset = vmcs12->tsc_offset; + * evmcs->guest_ia32_debugctl = vmcs12->guest_ia32_debugctl; + * evmcs->cr0_guest_host_mask = vmcs12->cr0_guest_host_mask; + * evmcs->cr4_guest_host_mask = vmcs12->cr4_guest_host_mask; + * evmcs->cr0_read_shadow = vmcs12->cr0_read_shadow; + * evmcs->cr4_read_shadow = vmcs12->cr4_read_shadow; + * evmcs->vm_exit_msr_store_count = vmcs12->vm_exit_msr_store_count; + * evmcs->vm_exit_msr_load_count = vmcs12->vm_exit_msr_load_count; + * evmcs->vm_entry_msr_load_count = vmcs12->vm_entry_msr_load_count; + * + * Not present in struct vmcs12: + * evmcs->exit_io_instruction_ecx = vmcs12->exit_io_instruction_ecx; + * evmcs->exit_io_instruction_esi = vmcs12->exit_io_instruction_esi; + * evmcs->exit_io_instruction_edi = vmcs12->exit_io_instruction_edi; + * evmcs->exit_io_instruction_eip = vmcs12->exit_io_instruction_eip; + */ + + evmcs->guest_es_selector = vmcs12->guest_es_selector; + evmcs->guest_cs_selector = vmcs12->guest_cs_selector; + evmcs->guest_ss_selector = vmcs12->guest_ss_selector; + evmcs->guest_ds_selector = vmcs12->guest_ds_selector; + evmcs->guest_fs_selector = vmcs12->guest_fs_selector; + evmcs->guest_gs_selector = vmcs12->guest_gs_selector; + evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector; + evmcs->guest_tr_selector = vmcs12->guest_tr_selector; + + evmcs->guest_es_limit = vmcs12->guest_es_limit; + evmcs->guest_cs_limit = vmcs12->guest_cs_limit; + evmcs->guest_ss_limit = vmcs12->guest_ss_limit; + evmcs->guest_ds_limit = vmcs12->guest_ds_limit; + evmcs->guest_fs_limit = vmcs12->guest_fs_limit; + evmcs->guest_gs_limit = vmcs12->guest_gs_limit; + evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit; + evmcs->guest_tr_limit = vmcs12->guest_tr_limit; + evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit; + evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit; + + evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes; + evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes; + evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes; + evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes; + evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes; + evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes; + evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes; + evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes; + + evmcs->guest_es_base = vmcs12->guest_es_base; + evmcs->guest_cs_base = vmcs12->guest_cs_base; + evmcs->guest_ss_base = vmcs12->guest_ss_base; + evmcs->guest_ds_base = vmcs12->guest_ds_base; + evmcs->guest_fs_base = vmcs12->guest_fs_base; + evmcs->guest_gs_base = vmcs12->guest_gs_base; + evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base; + evmcs->guest_tr_base = vmcs12->guest_tr_base; + evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base; + evmcs->guest_idtr_base = vmcs12->guest_idtr_base; + + evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat; + evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer; + + evmcs->guest_pdptr0 = vmcs12->guest_pdptr0; + evmcs->guest_pdptr1 = vmcs12->guest_pdptr1; + evmcs->guest_pdptr2 = vmcs12->guest_pdptr2; + evmcs->guest_pdptr3 = vmcs12->guest_pdptr3; + + evmcs->guest_pending_dbg_exceptions = + vmcs12->guest_pending_dbg_exceptions; + evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp; + evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip; + + evmcs->guest_activity_state = vmcs12->guest_activity_state; + evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs; + + evmcs->guest_cr0 = vmcs12->guest_cr0; + evmcs->guest_cr3 = vmcs12->guest_cr3; + evmcs->guest_cr4 = vmcs12->guest_cr4; + evmcs->guest_dr7 = vmcs12->guest_dr7; + + evmcs->guest_physical_address = vmcs12->guest_physical_address; + + evmcs->vm_instruction_error = vmcs12->vm_instruction_error; + evmcs->vm_exit_reason = vmcs12->vm_exit_reason; + evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info; + evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code; + evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field; + evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code; + evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len; + evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info; + + evmcs->exit_qualification = vmcs12->exit_qualification; + + evmcs->guest_linear_address = vmcs12->guest_linear_address; + evmcs->guest_rsp = vmcs12->guest_rsp; + evmcs->guest_rflags = vmcs12->guest_rflags; + + evmcs->guest_interruptibility_info = + vmcs12->guest_interruptibility_info; + evmcs->cpu_based_vm_exec_control = vmcs12->cpu_based_vm_exec_control; + evmcs->vm_entry_controls = vmcs12->vm_entry_controls; + evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field; + evmcs->vm_entry_exception_error_code = + vmcs12->vm_entry_exception_error_code; + evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len; + + evmcs->guest_rip = vmcs12->guest_rip; + + evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs; + + return 0; +} + /* * Copy the writable VMCS shadow fields back to the VMCS12, in case * they have been modified by the L1 guest. Note that the "read-only" @@ -8683,20 +9121,6 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) vmcs_load(vmx->loaded_vmcs->vmcs); } -/* - * VMX instructions which assume a current vmcs12 (i.e., that VMPTRLD was - * used before) all generate the same failure when it is missing. - */ -static int nested_vmx_check_vmcs12(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - if (vmx->nested.current_vmptr == -1ull) { - nested_vmx_failInvalid(vcpu); - return 0; - } - return 1; -} - static int handle_vmread(struct kvm_vcpu *vcpu) { unsigned long field; @@ -8709,8 +9133,8 @@ static int handle_vmread(struct kvm_vcpu *vcpu) if (!nested_vmx_check_permission(vcpu)) return 1; - if (!nested_vmx_check_vmcs12(vcpu)) - return kvm_skip_emulated_instruction(vcpu); + if (to_vmx(vcpu)->nested.current_vmptr == -1ull) + return nested_vmx_failInvalid(vcpu); if (!is_guest_mode(vcpu)) vmcs12 = get_vmcs12(vcpu); @@ -8719,20 +9143,18 @@ static int handle_vmread(struct kvm_vcpu *vcpu) * When vmcs->vmcs_link_pointer is -1ull, any VMREAD * to shadowed-field sets the ALU flags for VMfailInvalid. */ - if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) { - nested_vmx_failInvalid(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } + if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) + return nested_vmx_failInvalid(vcpu); vmcs12 = get_shadow_vmcs12(vcpu); } /* Decode instruction info and find the field to read */ field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); /* Read the field, zero-extended to a u64 field_value */ - if (vmcs12_read_any(vmcs12, field, &field_value) < 0) { - nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); - return kvm_skip_emulated_instruction(vcpu); - } + if (vmcs12_read_any(vmcs12, field, &field_value) < 0) + return nested_vmx_failValid(vcpu, + VMXERR_UNSUPPORTED_VMCS_COMPONENT); + /* * Now copy part of this value to register or memory, as requested. * Note that the number of bits actually copied is 32 or 64 depending @@ -8750,8 +9172,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) (is_long_mode(vcpu) ? 8 : 4), NULL); } - nested_vmx_succeed(vcpu); - return kvm_skip_emulated_instruction(vcpu); + return nested_vmx_succeed(vcpu); } @@ -8776,8 +9197,8 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) if (!nested_vmx_check_permission(vcpu)) return 1; - if (!nested_vmx_check_vmcs12(vcpu)) - return kvm_skip_emulated_instruction(vcpu); + if (vmx->nested.current_vmptr == -1ull) + return nested_vmx_failInvalid(vcpu); if (vmx_instruction_info & (1u << 10)) field_value = kvm_register_readl(vcpu, @@ -8800,11 +9221,9 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) * VMCS," then the "read-only" fields are actually read/write. */ if (vmcs_field_readonly(field) && - !nested_cpu_has_vmwrite_any_field(vcpu)) { - nested_vmx_failValid(vcpu, + !nested_cpu_has_vmwrite_any_field(vcpu)) + return nested_vmx_failValid(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); - return kvm_skip_emulated_instruction(vcpu); - } if (!is_guest_mode(vcpu)) vmcs12 = get_vmcs12(vcpu); @@ -8813,18 +9232,14 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) * When vmcs->vmcs_link_pointer is -1ull, any VMWRITE * to shadowed-field sets the ALU flags for VMfailInvalid. */ - if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) { - nested_vmx_failInvalid(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } + if (get_vmcs12(vcpu)->vmcs_link_pointer == -1ull) + return nested_vmx_failInvalid(vcpu); vmcs12 = get_shadow_vmcs12(vcpu); - } - if (vmcs12_write_any(vmcs12, field, field_value) < 0) { - nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); - return kvm_skip_emulated_instruction(vcpu); - } + if (vmcs12_write_any(vmcs12, field, field_value) < 0) + return nested_vmx_failValid(vcpu, + VMXERR_UNSUPPORTED_VMCS_COMPONENT); /* * Do not track vmcs12 dirty-state if in guest-mode @@ -8846,8 +9261,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) } } - nested_vmx_succeed(vcpu); - return kvm_skip_emulated_instruction(vcpu); + return nested_vmx_succeed(vcpu); } static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr) @@ -8858,7 +9272,7 @@ static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr) SECONDARY_EXEC_SHADOW_VMCS); vmcs_write64(VMCS_LINK_POINTER, __pa(vmx->vmcs01.shadow_vmcs)); - vmx->nested.sync_shadow_vmcs = true; + vmx->nested.need_vmcs12_sync = true; } vmx->nested.dirty_vmcs12 = true; } @@ -8875,36 +9289,37 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) if (nested_vmx_get_vmptr(vcpu, &vmptr)) return 1; - if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) { - nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS); - return kvm_skip_emulated_instruction(vcpu); - } + if (!PAGE_ALIGNED(vmptr) || (vmptr >> cpuid_maxphyaddr(vcpu))) + return nested_vmx_failValid(vcpu, + VMXERR_VMPTRLD_INVALID_ADDRESS); - if (vmptr == vmx->nested.vmxon_ptr) { - nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_VMXON_POINTER); - return kvm_skip_emulated_instruction(vcpu); - } + if (vmptr == vmx->nested.vmxon_ptr) + return nested_vmx_failValid(vcpu, + VMXERR_VMPTRLD_VMXON_POINTER); + + /* Forbid normal VMPTRLD if Enlightened version was used */ + if (vmx->nested.hv_evmcs) + return 1; if (vmx->nested.current_vmptr != vmptr) { struct vmcs12 *new_vmcs12; struct page *page; page = kvm_vcpu_gpa_to_page(vcpu, vmptr); - if (is_error_page(page)) { - nested_vmx_failInvalid(vcpu); - return kvm_skip_emulated_instruction(vcpu); - } + if (is_error_page(page)) + return nested_vmx_failInvalid(vcpu); + new_vmcs12 = kmap(page); if (new_vmcs12->hdr.revision_id != VMCS12_REVISION || (new_vmcs12->hdr.shadow_vmcs && !nested_cpu_has_vmx_shadow_vmcs(vcpu))) { kunmap(page); kvm_release_page_clean(page); - nested_vmx_failValid(vcpu, + return nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID); - return kvm_skip_emulated_instruction(vcpu); } - nested_release_vmcs12(vmx); + nested_release_vmcs12(vcpu); + /* * Load VMCS12 from guest memory since it is not already * cached. @@ -8916,8 +9331,71 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) set_current_vmptr(vmx, vmptr); } - nested_vmx_succeed(vcpu); - return kvm_skip_emulated_instruction(vcpu); + return nested_vmx_succeed(vcpu); +} + +/* + * This is an equivalent of the nested hypervisor executing the vmptrld + * instruction. + */ +static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, + bool from_launch) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct hv_vp_assist_page assist_page; + + if (likely(!vmx->nested.enlightened_vmcs_enabled)) + return 1; + + if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page))) + return 1; + + if (unlikely(!assist_page.enlighten_vmentry)) + return 1; + + if (unlikely(assist_page.current_nested_vmcs != + vmx->nested.hv_evmcs_vmptr)) { + + if (!vmx->nested.hv_evmcs) + vmx->nested.current_vmptr = -1ull; + + nested_release_evmcs(vcpu); + + vmx->nested.hv_evmcs_page = kvm_vcpu_gpa_to_page( + vcpu, assist_page.current_nested_vmcs); + + if (unlikely(is_error_page(vmx->nested.hv_evmcs_page))) + return 0; + + vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page); + + if (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION) { + nested_release_evmcs(vcpu); + return 0; + } + + vmx->nested.dirty_vmcs12 = true; + /* + * As we keep L2 state for one guest only 'hv_clean_fields' mask + * can't be used when we switch between them. Reset it here for + * simplicity. + */ + vmx->nested.hv_evmcs->hv_clean_fields &= + ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + vmx->nested.hv_evmcs_vmptr = assist_page.current_nested_vmcs; + + /* + * Unlike normal vmcs12, enlightened vmcs12 is not fully + * reloaded from guest's memory (read only fields, fields not + * present in struct hv_enlightened_vmcs, ...). Make sure there + * are no leftovers. + */ + if (from_launch) + memset(vmx->nested.cached_vmcs12, 0, + sizeof(*vmx->nested.cached_vmcs12)); + + } + return 1; } /* Emulate the VMPTRST instruction */ @@ -8932,6 +9410,9 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) if (!nested_vmx_check_permission(vcpu)) return 1; + if (unlikely(to_vmx(vcpu)->nested.hv_evmcs)) + return 1; + if (get_vmx_mem_address(vcpu, exit_qual, instr_info, true, &gva)) return 1; /* *_system ok, nested_vmx_check_permission has verified cpl=0 */ @@ -8940,8 +9421,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) kvm_inject_page_fault(vcpu, &e); return 1; } - nested_vmx_succeed(vcpu); - return kvm_skip_emulated_instruction(vcpu); + return nested_vmx_succeed(vcpu); } /* Emulate the INVEPT instruction */ @@ -8971,11 +9451,9 @@ static int handle_invept(struct kvm_vcpu *vcpu) types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; - if (type >= 32 || !(types & (1 << type))) { - nested_vmx_failValid(vcpu, + if (type >= 32 || !(types & (1 << type))) + return nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); - return kvm_skip_emulated_instruction(vcpu); - } /* According to the Intel VMX instruction reference, the memory * operand is read even if it isn't needed (e.g., for type==global) @@ -8997,14 +9475,20 @@ static int handle_invept(struct kvm_vcpu *vcpu) case VMX_EPT_EXTENT_CONTEXT: kvm_mmu_sync_roots(vcpu); kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); - nested_vmx_succeed(vcpu); break; default: BUG_ON(1); break; } - return kvm_skip_emulated_instruction(vcpu); + return nested_vmx_succeed(vcpu); +} + +static u16 nested_get_vpid02(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid; } static int handle_invvpid(struct kvm_vcpu *vcpu) @@ -9018,6 +9502,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) u64 vpid; u64 gla; } operand; + u16 vpid02; if (!(vmx->nested.msrs.secondary_ctls_high & SECONDARY_EXEC_ENABLE_VPID) || @@ -9035,11 +9520,9 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) types = (vmx->nested.msrs.vpid_caps & VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8; - if (type >= 32 || !(types & (1 << type))) { - nested_vmx_failValid(vcpu, + if (type >= 32 || !(types & (1 << type))) + return nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); - return kvm_skip_emulated_instruction(vcpu); - } /* according to the intel vmx instruction reference, the memory * operand is read even if it isn't needed (e.g., for type==global) @@ -9051,47 +9534,39 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) kvm_inject_page_fault(vcpu, &e); return 1; } - if (operand.vpid >> 16) { - nested_vmx_failValid(vcpu, + if (operand.vpid >> 16) + return nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); - return kvm_skip_emulated_instruction(vcpu); - } + vpid02 = nested_get_vpid02(vcpu); switch (type) { case VMX_VPID_EXTENT_INDIVIDUAL_ADDR: if (!operand.vpid || - is_noncanonical_address(operand.gla, vcpu)) { - nested_vmx_failValid(vcpu, + is_noncanonical_address(operand.gla, vcpu)) + return nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); - return kvm_skip_emulated_instruction(vcpu); - } - if (cpu_has_vmx_invvpid_individual_addr() && - vmx->nested.vpid02) { + if (cpu_has_vmx_invvpid_individual_addr()) { __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, - vmx->nested.vpid02, operand.gla); + vpid02, operand.gla); } else - __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true); + __vmx_flush_tlb(vcpu, vpid02, false); break; case VMX_VPID_EXTENT_SINGLE_CONTEXT: case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL: - if (!operand.vpid) { - nested_vmx_failValid(vcpu, + if (!operand.vpid) + return nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); - return kvm_skip_emulated_instruction(vcpu); - } - __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true); + __vmx_flush_tlb(vcpu, vpid02, false); break; case VMX_VPID_EXTENT_ALL_CONTEXT: - __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true); + __vmx_flush_tlb(vcpu, vpid02, false); break; default: WARN_ON_ONCE(1); return kvm_skip_emulated_instruction(vcpu); } - nested_vmx_succeed(vcpu); - - return kvm_skip_emulated_instruction(vcpu); + return nested_vmx_succeed(vcpu); } static int handle_invpcid(struct kvm_vcpu *vcpu) @@ -9162,11 +9637,11 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) } for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) - if (kvm_get_pcid(vcpu, vcpu->arch.mmu.prev_roots[i].cr3) + if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].cr3) == operand.pcid) roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); - kvm_mmu_free_roots(vcpu, roots_to_free); + kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free); /* * If neither the current cr3 nor any of the prev_roots use the * given PCID, then nothing needs to be done here because a @@ -9293,7 +9768,7 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu, kvm_mmu_unload(vcpu); mmu->ept_ad = accessed_dirty; - mmu->base_role.ad_disabled = !accessed_dirty; + mmu->mmu_role.base.ad_disabled = !accessed_dirty; vmcs12->ept_pointer = address; /* * TODO: Check what's the correct approach in case @@ -9652,9 +10127,6 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) return false; else if (is_page_fault(intr_info)) return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept; - else if (is_no_device(intr_info) && - !(vmcs12->guest_cr0 & X86_CR0_TS)) - return false; else if (is_debug(intr_info) && vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) @@ -10676,9 +11148,25 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmcs_write32(PLE_WINDOW, vmx->ple_window); } - if (vmx->nested.sync_shadow_vmcs) { - copy_vmcs12_to_shadow(vmx); - vmx->nested.sync_shadow_vmcs = false; + if (vmx->nested.need_vmcs12_sync) { + /* + * hv_evmcs may end up being not mapped after migration (when + * L2 was running), map it here to make sure vmcs12 changes are + * properly reflected. + */ + if (vmx->nested.enlightened_vmcs_enabled && + !vmx->nested.hv_evmcs) + nested_vmx_handle_enlightened_vmptrld(vcpu, false); + + if (vmx->nested.hv_evmcs) { + copy_vmcs12_to_enlightened(vmx); + /* All fields are clean */ + vmx->nested.hv_evmcs->hv_clean_fields |= + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL; + } else { + copy_vmcs12_to_shadow(vmx); + } + vmx->nested.need_vmcs12_sync = false; } if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) @@ -10745,7 +11233,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "mov %%" _ASM_SP ", (%%" _ASM_SI ") \n\t" "jmp 1f \n\t" "2: \n\t" - __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" + __ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t" "1: \n\t" /* Reload cr2 if changed */ "mov %c[cr2](%0), %%" _ASM_AX " \n\t" @@ -10777,9 +11265,9 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) /* Enter guest mode */ "jne 1f \n\t" - __ex(ASM_VMX_VMLAUNCH) "\n\t" + __ex("vmlaunch") "\n\t" "jmp 2f \n\t" - "1: " __ex(ASM_VMX_VMRESUME) "\n\t" + "1: " __ex("vmresume") "\n\t" "2: " /* Save guest registers, load host registers, keep flags */ "mov %0, %c[wordsize](%%" _ASM_SP ") \n\t" @@ -10801,6 +11289,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) "mov %%r13, %c[r13](%0) \n\t" "mov %%r14, %c[r14](%0) \n\t" "mov %%r15, %c[r15](%0) \n\t" + /* + * Clear host registers marked as clobbered to prevent + * speculative use. + */ "xor %%r8d, %%r8d \n\t" "xor %%r9d, %%r9d \n\t" "xor %%r10d, %%r10d \n\t" @@ -10958,6 +11450,10 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) vmx->loaded_vmcs = vmcs; vmx_vcpu_load(vcpu, cpu); put_cpu(); + + vm_entry_controls_reset_shadow(vmx); + vm_exit_controls_reset_shadow(vmx); + vmx_segment_cache_clear(vmx); } /* @@ -10966,12 +11462,10 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) */ static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - - vcpu_load(vcpu); - vmx_switch_vmcs(vcpu, &vmx->vmcs01); - free_nested(vmx); - vcpu_put(vcpu); + vcpu_load(vcpu); + vmx_switch_vmcs(vcpu, &to_vmx(vcpu)->vmcs01); + free_nested(vcpu); + vcpu_put(vcpu); } static void vmx_free_vcpu(struct kvm_vcpu *vcpu) @@ -11334,28 +11828,28 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) return get_vmcs12(vcpu)->ept_pointer; } -static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) +static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) { WARN_ON(mmu_is_nested(vcpu)); - if (!valid_ept_address(vcpu, nested_ept_get_cr3(vcpu))) - return 1; + vcpu->arch.mmu = &vcpu->arch.guest_mmu; kvm_init_shadow_ept_mmu(vcpu, to_vmx(vcpu)->nested.msrs.ept_caps & VMX_EPT_EXECUTE_ONLY_BIT, nested_ept_ad_enabled(vcpu), nested_ept_get_cr3(vcpu)); - vcpu->arch.mmu.set_cr3 = vmx_set_cr3; - vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; - vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; + vcpu->arch.mmu->set_cr3 = vmx_set_cr3; + vcpu->arch.mmu->get_cr3 = nested_ept_get_cr3; + vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault; + vcpu->arch.mmu->get_pdptr = kvm_pdptr_read; vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; - return 0; } static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) { - vcpu->arch.walk_mmu = &vcpu->arch.mmu; + vcpu->arch.mmu = &vcpu->arch.root_mmu; + vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; } static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, @@ -11716,7 +12210,7 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu, !nested_exit_intr_ack_set(vcpu) || (vmcs12->posted_intr_nv & 0xff00) || (vmcs12->posted_intr_desc_addr & 0x3f) || - (!page_address_valid(vcpu, vmcs12->posted_intr_desc_addr)))) + (vmcs12->posted_intr_desc_addr >> cpuid_maxphyaddr(vcpu)))) return -EINVAL; /* tpr shadow is needed by all apicv features. */ @@ -11772,15 +12266,12 @@ static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu, static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - u64 address = vmcs12->pml_address; - int maxphyaddr = cpuid_maxphyaddr(vcpu); + if (!nested_cpu_has_pml(vmcs12)) + return 0; - if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML)) { - if (!nested_cpu_has_ept(vmcs12) || - !IS_ALIGNED(address, 4096) || - address >> maxphyaddr) - return -EINVAL; - } + if (!nested_cpu_has_ept(vmcs12) || + !page_address_valid(vcpu, vmcs12->pml_address)) + return -EINVAL; return 0; } @@ -11960,112 +12451,87 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne return 0; } -static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) +/* + * Returns if KVM is able to config CPU to tag TLB entries + * populated by L2 differently than TLB entries populated + * by L1. + * + * If L1 uses EPT, then TLB entries are tagged with different EPTP. + * + * If L1 uses VPID and we allocated a vpid02, TLB entries are tagged + * with different VPID (L1 entries are tagged with vmx->vpid + * while L2 entries are tagged with vmx->nested.vpid02). + */ +static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); - vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector); - vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector); - vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector); - vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector); - vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector); - vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector); - vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit); - vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit); - vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit); - vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit); - vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit); - vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit); - vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit); - vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit); - vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit); - vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes); - vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes); - vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes); - vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes); - vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes); - vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes); - vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes); - vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base); - vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base); - vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base); - vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base); - vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base); - vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base); - vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); - vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); - - vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); - vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, - vmcs12->guest_pending_dbg_exceptions); - vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); - vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip); + return nested_cpu_has_ept(vmcs12) || + (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02); +} - if (nested_cpu_has_xsaves(vmcs12)) - vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap); - vmcs_write64(VMCS_LINK_POINTER, -1ull); +static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) +{ + if (vmx->nested.nested_run_pending && + (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) + return vmcs12->guest_ia32_efer; + else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) + return vmx->vcpu.arch.efer | (EFER_LMA | EFER_LME); + else + return vmx->vcpu.arch.efer & ~(EFER_LMA | EFER_LME); +} - if (cpu_has_vmx_posted_intr()) - vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR); +static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx) +{ + /* + * If vmcs02 hasn't been initialized, set the constant vmcs02 state + * according to L0's settings (vmcs12 is irrelevant here). Host + * fields that come from L0 and are not constant, e.g. HOST_CR3, + * will be set as needed prior to VMLAUNCH/VMRESUME. + */ + if (vmx->nested.vmcs02_initialized) + return; + vmx->nested.vmcs02_initialized = true; /* - * Whether page-faults are trapped is determined by a combination of - * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. - * If enable_ept, L0 doesn't care about page faults and we should - * set all of these to L1's desires. However, if !enable_ept, L0 does - * care about (at least some) page faults, and because it is not easy - * (if at all possible?) to merge L0 and L1's desires, we simply ask - * to exit on each and every L2 page fault. This is done by setting - * MASK=MATCH=0 and (see below) EB.PF=1. - * Note that below we don't need special code to set EB.PF beyond the - * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept, - * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when - * !enable_ept, EB.PF is 1, so the "or" will always be 1. + * We don't care what the EPTP value is we just need to guarantee + * it's valid so we don't get a false positive when doing early + * consistency checks. */ - vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, - enable_ept ? vmcs12->page_fault_error_code_mask : 0); - vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, - enable_ept ? vmcs12->page_fault_error_code_match : 0); + if (enable_ept && nested_early_check) + vmcs_write64(EPT_POINTER, construct_eptp(&vmx->vcpu, 0)); /* All VMFUNCs are currently emulated through L0 vmexits. */ if (cpu_has_vmx_vmfunc()) vmcs_write64(VM_FUNCTION_CONTROL, 0); - if (cpu_has_vmx_apicv()) { - vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0); - vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1); - vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2); - vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3); - } + if (cpu_has_vmx_posted_intr()) + vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR); - /* - * Set host-state according to L0's settings (vmcs12 is irrelevant here) - * Some constant fields are set here by vmx_set_constant_host_state(). - * Other fields are different per CPU, and will be set later when - * vmx_vcpu_load() is called, and when vmx_prepare_switch_to_guest() - * is called. - */ - vmx_set_constant_host_state(vmx); + if (cpu_has_vmx_msr_bitmap()) + vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); + + if (enable_pml) + vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); /* - * Set the MSR load/store lists to match L0's settings. + * Set the MSR load/store lists to match L0's settings. Only the + * addresses are constant (for vmcs02), the counts can change based + * on L2's behavior, e.g. switching to/from long mode. */ vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0); - vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val)); - vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val)); - set_cr4_guest_host_mask(vmx); + vmx_set_constant_host_state(vmx); +} - if (kvm_mpx_supported()) { - if (vmx->nested.nested_run_pending && - (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) - vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); - else - vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs); - } +static void prepare_vmcs02_early_full(struct vcpu_vmx *vmx, + struct vmcs12 *vmcs12) +{ + prepare_vmcs02_constant_state(vmx); + + vmcs_write64(VMCS_LINK_POINTER, -1ull); if (enable_vpid) { if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) @@ -12073,78 +12539,30 @@ static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) else vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); } - - /* - * L1 may access the L2's PDPTR, so save them to construct vmcs12 - */ - if (enable_ept) { - vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0); - vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); - vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); - vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); - } - - if (cpu_has_vmx_msr_bitmap()) - vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap)); } -/* - * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested - * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it - * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2 - * guest in a way that will both be appropriate to L1's requests, and our - * needs. In addition to modifying the active vmcs (which is vmcs02), this - * function also has additional necessary side-effects, like setting various - * vcpu->arch fields. - * Returns 0 on success, 1 on failure. Invalid state exit qualification code - * is assigned to entry_failure_code on failure. - */ -static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, - u32 *entry_failure_code) +static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) { - struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exec_control, vmcs12_exec_ctrl; + u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12); - if (vmx->nested.dirty_vmcs12) { - prepare_vmcs02_full(vcpu, vmcs12); - vmx->nested.dirty_vmcs12 = false; - } + if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) + prepare_vmcs02_early_full(vmx, vmcs12); /* - * First, the fields that are shadowed. This must be kept in sync - * with vmx_shadow_fields.h. + * HOST_RSP is normally set correctly in vmx_vcpu_run() just before + * entry, but only if the current (host) sp changed from the value + * we wrote last (vmx->host_rsp). This cache is no longer relevant + * if we switch vmcs, and rather than hold a separate cache per vmcs, + * here we just force the write to happen on entry. host_rsp will + * also be written unconditionally by nested_vmx_check_vmentry_hw() + * if we are doing early consistency checks via hardware. */ + vmx->host_rsp = 0; - vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); - vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit); - vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes); - vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base); - vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base); - - if (vmx->nested.nested_run_pending && - (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { - kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); - vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); - } else { - kvm_set_dr(vcpu, 7, vcpu->arch.dr7); - vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl); - } - if (vmx->nested.nested_run_pending) { - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - vmcs12->vm_entry_intr_info_field); - vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, - vmcs12->vm_entry_exception_error_code); - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, - vmcs12->vm_entry_instruction_len); - vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, - vmcs12->guest_interruptibility_info); - vmx->loaded_vmcs->nmi_known_unmasked = - !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI); - } else { - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); - } - vmx_set_rflags(vcpu, vmcs12->guest_rflags); - + /* + * PIN CONTROLS + */ exec_control = vmcs12->pin_based_vm_exec_control; /* Preemption timer setting is computed directly in vmx_vcpu_run. */ @@ -12159,13 +12577,43 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, } else { exec_control &= ~PIN_BASED_POSTED_INTR; } - vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control); - vmx->nested.preemption_timer_expired = false; - if (nested_cpu_has_preemption_timer(vmcs12)) - vmx_start_preemption_timer(vcpu); + /* + * EXEC CONTROLS + */ + exec_control = vmx_exec_control(vmx); /* L0's desires */ + exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; + exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; + exec_control &= ~CPU_BASED_TPR_SHADOW; + exec_control |= vmcs12->cpu_based_vm_exec_control; + /* + * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if + * nested_get_vmcs12_pages can't fix it up, the illegal value + * will result in a VM entry failure. + */ + if (exec_control & CPU_BASED_TPR_SHADOW) { + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull); + vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); + } else { +#ifdef CONFIG_X86_64 + exec_control |= CPU_BASED_CR8_LOAD_EXITING | + CPU_BASED_CR8_STORE_EXITING; +#endif + } + + /* + * A vmexit (to either L1 hypervisor or L0 userspace) is always needed + * for I/O port accesses. + */ + exec_control &= ~CPU_BASED_USE_IO_BITMAPS; + exec_control |= CPU_BASED_UNCOND_IO_EXITING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); + + /* + * SECONDARY EXEC CONTROLS + */ if (cpu_has_secondary_exec_ctrls()) { exec_control = vmx->secondary_exec_control; @@ -12206,43 +12654,214 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, } /* - * HOST_RSP is normally set correctly in vmx_vcpu_run() just before - * entry, but only if the current (host) sp changed from the value - * we wrote last (vmx->host_rsp). This cache is no longer relevant - * if we switch vmcs, and rather than hold a separate cache per vmcs, - * here we just force the write to happen on entry. + * ENTRY CONTROLS + * + * vmcs12's VM_{ENTRY,EXIT}_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE + * are emulated by vmx_set_efer() in prepare_vmcs02(), but speculate + * on the related bits (if supported by the CPU) in the hope that + * we can avoid VMWrites during vmx_set_efer(). + */ + exec_control = (vmcs12->vm_entry_controls | vmcs_config.vmentry_ctrl) & + ~VM_ENTRY_IA32E_MODE & ~VM_ENTRY_LOAD_IA32_EFER; + if (cpu_has_load_ia32_efer) { + if (guest_efer & EFER_LMA) + exec_control |= VM_ENTRY_IA32E_MODE; + if (guest_efer != host_efer) + exec_control |= VM_ENTRY_LOAD_IA32_EFER; + } + vm_entry_controls_init(vmx, exec_control); + + /* + * EXIT CONTROLS + * + * L2->L1 exit controls are emulated - the hardware exit is to L0 so + * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER + * bits may be modified by vmx_set_efer() in prepare_vmcs02(). */ - vmx->host_rsp = 0; + exec_control = vmcs_config.vmexit_ctrl; + if (cpu_has_load_ia32_efer && guest_efer != host_efer) + exec_control |= VM_EXIT_LOAD_IA32_EFER; + vm_exit_controls_init(vmx, exec_control); - exec_control = vmx_exec_control(vmx); /* L0's desires */ - exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; - exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; - exec_control &= ~CPU_BASED_TPR_SHADOW; - exec_control |= vmcs12->cpu_based_vm_exec_control; + /* + * Conceptually we want to copy the PML address and index from + * vmcs01 here, and then back to vmcs01 on nested vmexit. But, + * since we always flush the log on each vmexit and never change + * the PML address (once set), this happens to be equivalent to + * simply resetting the index in vmcs02. + */ + if (enable_pml) + vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); /* - * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if - * nested_get_vmcs12_pages can't fix it up, the illegal value - * will result in a VM entry failure. + * Interrupt/Exception Fields */ - if (exec_control & CPU_BASED_TPR_SHADOW) { - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull); - vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); + if (vmx->nested.nested_run_pending) { + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, + vmcs12->vm_entry_intr_info_field); + vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, + vmcs12->vm_entry_exception_error_code); + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, + vmcs12->vm_entry_instruction_len); + vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, + vmcs12->guest_interruptibility_info); + vmx->loaded_vmcs->nmi_known_unmasked = + !(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI); } else { -#ifdef CONFIG_X86_64 - exec_control |= CPU_BASED_CR8_LOAD_EXITING | - CPU_BASED_CR8_STORE_EXITING; -#endif + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); } +} + +static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) +{ + struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs; + + if (!hv_evmcs || !(hv_evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) { + vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); + vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); + vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector); + vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector); + vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector); + vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector); + vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector); + vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector); + vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit); + vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit); + vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit); + vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit); + vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit); + vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit); + vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit); + vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit); + vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit); + vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit); + vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes); + vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes); + vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes); + vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes); + vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes); + vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes); + vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base); + vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base); + vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base); + vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base); + vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base); + vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base); + vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base); + vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base); + vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); + vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); + } + + if (!hv_evmcs || !(hv_evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) { + vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, + vmcs12->guest_pending_dbg_exceptions); + vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp); + vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip); + + /* + * L1 may access the L2's PDPTR, so save them to construct + * vmcs12 + */ + if (enable_ept) { + vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0); + vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1); + vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2); + vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3); + } + } + + if (nested_cpu_has_xsaves(vmcs12)) + vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap); /* - * A vmexit (to either L1 hypervisor or L0 userspace) is always needed - * for I/O port accesses. + * Whether page-faults are trapped is determined by a combination of + * 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. + * If enable_ept, L0 doesn't care about page faults and we should + * set all of these to L1's desires. However, if !enable_ept, L0 does + * care about (at least some) page faults, and because it is not easy + * (if at all possible?) to merge L0 and L1's desires, we simply ask + * to exit on each and every L2 page fault. This is done by setting + * MASK=MATCH=0 and (see below) EB.PF=1. + * Note that below we don't need special code to set EB.PF beyond the + * "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept, + * vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when + * !enable_ept, EB.PF is 1, so the "or" will always be 1. */ - exec_control &= ~CPU_BASED_USE_IO_BITMAPS; - exec_control |= CPU_BASED_UNCOND_IO_EXITING; + vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, + enable_ept ? vmcs12->page_fault_error_code_mask : 0); + vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, + enable_ept ? vmcs12->page_fault_error_code_match : 0); - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control); + if (cpu_has_vmx_apicv()) { + vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0); + vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1); + vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2); + vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3); + } + + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); + + set_cr4_guest_host_mask(vmx); + + if (kvm_mpx_supported()) { + if (vmx->nested.nested_run_pending && + (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) + vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); + else + vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs); + } +} + +/* + * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested + * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it + * with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2 + * guest in a way that will both be appropriate to L1's requests, and our + * needs. In addition to modifying the active vmcs (which is vmcs02), this + * function also has additional necessary side-effects, like setting various + * vcpu->arch fields. + * Returns 0 on success, 1 on failure. Invalid state exit qualification code + * is assigned to entry_failure_code on failure. + */ +static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, + u32 *entry_failure_code) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs; + + if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) { + prepare_vmcs02_full(vmx, vmcs12); + vmx->nested.dirty_vmcs12 = false; + } + + /* + * First, the fields that are shadowed. This must be kept in sync + * with vmx_shadow_fields.h. + */ + if (!hv_evmcs || !(hv_evmcs->hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) { + vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes); + vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes); + } + + if (vmx->nested.nested_run_pending && + (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { + kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); + vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); + } else { + kvm_set_dr(vcpu, 7, vcpu->arch.dr7); + vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl); + } + vmx_set_rflags(vcpu, vmcs12->guest_rflags); + + vmx->nested.preemption_timer_expired = false; + if (nested_cpu_has_preemption_timer(vmcs12)) + vmx_start_preemption_timer(vcpu); /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the * bitwise-or of what L1 wants to trap for L2, and what we want to @@ -12252,20 +12871,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask; vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); - /* L2->L1 exit controls are emulated - the hardware exit is to L0 so - * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER - * bits are further modified by vmx_set_efer() below. - */ - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); - - /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are - * emulated by vmx_set_efer(), below. - */ - vm_entry_controls_init(vmx, - (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & - ~VM_ENTRY_IA32E_MODE) | - (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); - if (vmx->nested.nested_run_pending && (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) { vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); @@ -12288,37 +12893,29 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, * influence global bitmap(for vpid01 and vpid02 allocation) * even if spawn a lot of nested vCPUs. */ - if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) { + if (nested_cpu_has_vpid(vmcs12) && nested_has_guest_tlb_tag(vcpu)) { if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) { vmx->nested.last_vpid = vmcs12->virtual_processor_id; - __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true); + __vmx_flush_tlb(vcpu, nested_get_vpid02(vcpu), false); } } else { - vmx_flush_tlb(vcpu, true); + /* + * If L1 use EPT, then L0 needs to execute INVEPT on + * EPTP02 instead of EPTP01. Therefore, delay TLB + * flush until vmcs02->eptp is fully updated by + * KVM_REQ_LOAD_CR3. Note that this assumes + * KVM_REQ_TLB_FLUSH is evaluated after + * KVM_REQ_LOAD_CR3 in vcpu_enter_guest(). + */ + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } } - if (enable_pml) { - /* - * Conceptually we want to copy the PML address and index from - * vmcs01 here, and then back to vmcs01 on nested vmexit. But, - * since we always flush the log on each vmexit, this happens - * to be equivalent to simply resetting the fields in vmcs02. - */ - ASSERT(vmx->pml_pg); - vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg)); - vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1); - } - - if (nested_cpu_has_ept(vmcs12)) { - if (nested_ept_init_mmu_context(vcpu)) { - *entry_failure_code = ENTRY_FAIL_DEFAULT; - return 1; - } - } else if (nested_cpu_has2(vmcs12, - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { + if (nested_cpu_has_ept(vmcs12)) + nested_ept_init_mmu_context(vcpu); + else if (nested_cpu_has2(vmcs12, + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) vmx_flush_tlb(vcpu, true); - } /* * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those @@ -12334,14 +12931,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmx_set_cr4(vcpu, vmcs12->guest_cr4); vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12)); - if (vmx->nested.nested_run_pending && - (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) - vcpu->arch.efer = vmcs12->guest_ia32_efer; - else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) - vcpu->arch.efer |= (EFER_LMA | EFER_LME); - else - vcpu->arch.efer &= ~(EFER_LMA | EFER_LME); - /* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */ + vcpu->arch.efer = nested_vmx_calc_efer(vmx, vmcs12); + /* Note: may modify VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */ vmx_set_efer(vcpu, vcpu->arch.efer); /* @@ -12383,6 +12974,7 @@ static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12) static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct vcpu_vmx *vmx = to_vmx(vcpu); + bool ia32e; if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) @@ -12457,6 +13049,21 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; /* + * If the load IA32_EFER VM-exit control is 1, bits reserved in the + * IA32_EFER MSR must be 0 in the field for that register. In addition, + * the values of the LMA and LME bits in the field must each be that of + * the host address-space size VM-exit control. + */ + if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) { + ia32e = (vmcs12->vm_exit_controls & + VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; + if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || + ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || + ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) + return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; + } + + /* * From the Intel SDM, volume 3: * Fields relevant to VM-entry event injection must be set properly. * These fields are the VM-entry interruption-information field, the @@ -12512,6 +13119,10 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) } } + if (nested_cpu_has_ept(vmcs12) && + !valid_ept_address(vcpu, vmcs12->ept_pointer)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + return 0; } @@ -12577,21 +13188,6 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, return 1; } - /* - * If the load IA32_EFER VM-exit control is 1, bits reserved in the - * IA32_EFER MSR must be 0 in the field for that register. In addition, - * the values of the LMA and LME bits in the field must each be that of - * the host address-space size VM-exit control. - */ - if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) { - ia32e = (vmcs12->vm_exit_controls & - VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; - if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || - ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || - ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) - return 1; - } - if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) && (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) || (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))) @@ -12600,26 +13196,139 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, return 0; } +static int __noclone nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + unsigned long cr3, cr4; + + if (!nested_early_check) + return 0; + + if (vmx->msr_autoload.host.nr) + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0); + if (vmx->msr_autoload.guest.nr) + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0); + + preempt_disable(); + + vmx_prepare_switch_to_guest(vcpu); + + /* + * Induce a consistency check VMExit by clearing bit 1 in GUEST_RFLAGS, + * which is reserved to '1' by hardware. GUEST_RFLAGS is guaranteed to + * be written (by preparve_vmcs02()) before the "real" VMEnter, i.e. + * there is no need to preserve other bits or save/restore the field. + */ + vmcs_writel(GUEST_RFLAGS, 0); + + vmcs_writel(HOST_RIP, vmx_early_consistency_check_return); + + cr3 = __get_current_cr3_fast(); + if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) { + vmcs_writel(HOST_CR3, cr3); + vmx->loaded_vmcs->host_state.cr3 = cr3; + } + + cr4 = cr4_read_shadow(); + if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) { + vmcs_writel(HOST_CR4, cr4); + vmx->loaded_vmcs->host_state.cr4 = cr4; + } + + vmx->__launched = vmx->loaded_vmcs->launched; + + asm( + /* Set HOST_RSP */ + __ex("vmwrite %%" _ASM_SP ", %%" _ASM_DX) "\n\t" + "mov %%" _ASM_SP ", %c[host_rsp](%0)\n\t" + + /* Check if vmlaunch of vmresume is needed */ + "cmpl $0, %c[launched](%0)\n\t" + "je 1f\n\t" + __ex("vmresume") "\n\t" + "jmp 2f\n\t" + "1: " __ex("vmlaunch") "\n\t" + "jmp 2f\n\t" + "2: " + + /* Set vmx->fail accordingly */ + "setbe %c[fail](%0)\n\t" + + ".pushsection .rodata\n\t" + ".global vmx_early_consistency_check_return\n\t" + "vmx_early_consistency_check_return: " _ASM_PTR " 2b\n\t" + ".popsection" + : + : "c"(vmx), "d"((unsigned long)HOST_RSP), + [launched]"i"(offsetof(struct vcpu_vmx, __launched)), + [fail]"i"(offsetof(struct vcpu_vmx, fail)), + [host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)) + : "rax", "cc", "memory" + ); + + vmcs_writel(HOST_RIP, vmx_return); + + preempt_enable(); + + if (vmx->msr_autoload.host.nr) + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); + if (vmx->msr_autoload.guest.nr) + vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); + + if (vmx->fail) { + WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) != + VMXERR_ENTRY_INVALID_CONTROL_FIELD); + vmx->fail = 0; + return 1; + } + + /* + * VMExit clears RFLAGS.IF and DR7, even on a consistency check. + */ + local_irq_enable(); + if (hw_breakpoint_active()) + set_debugreg(__this_cpu_read(cpu_dr7), 7); + + /* + * A non-failing VMEntry means we somehow entered guest mode with + * an illegal RIP, and that's just the tip of the iceberg. There + * is no telling what memory has been modified or what state has + * been exposed to unknown code. Hitting this all but guarantees + * a (very critical) hardware issue. + */ + WARN_ON(!(vmcs_read32(VM_EXIT_REASON) & + VMX_EXIT_REASONS_FAILED_VMENTRY)); + + return 0; +} +STACK_FRAME_NON_STANDARD(nested_vmx_check_vmentry_hw); + +static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12); + /* - * If exit_qual is NULL, this is being called from state restore (either RSM + * If from_vmentry is false, this is being called from state restore (either RSM * or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume. ++ * ++ * Returns: ++ * 0 - success, i.e. proceed with actual VMEnter ++ * 1 - consistency check VMExit ++ * -1 - consistency check VMFail */ -static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual) +static int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, + bool from_vmentry) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - bool from_vmentry = !!exit_qual; - u32 dummy_exit_qual; bool evaluate_pending_interrupts; - int r = 0; + u32 exit_reason = EXIT_REASON_INVALID_STATE; + u32 exit_qual; evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) & (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING); if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); - enter_guest_mode(vcpu); - if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); if (kvm_mpx_supported() && @@ -12627,24 +13336,35 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual) vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); - vmx_segment_cache_clear(vmx); + prepare_vmcs02_early(vmx, vmcs12); + + if (from_vmentry) { + nested_get_vmcs12_pages(vcpu); + + if (nested_vmx_check_vmentry_hw(vcpu)) { + vmx_switch_vmcs(vcpu, &vmx->vmcs01); + return -1; + } + + if (check_vmentry_postreqs(vcpu, vmcs12, &exit_qual)) + goto vmentry_fail_vmexit; + } + + enter_guest_mode(vcpu); if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) vcpu->arch.tsc_offset += vmcs12->tsc_offset; - r = EXIT_REASON_INVALID_STATE; - if (prepare_vmcs02(vcpu, vmcs12, from_vmentry ? exit_qual : &dummy_exit_qual)) - goto fail; + if (prepare_vmcs02(vcpu, vmcs12, &exit_qual)) + goto vmentry_fail_vmexit_guest_mode; if (from_vmentry) { - nested_get_vmcs12_pages(vcpu); - - r = EXIT_REASON_MSR_LOAD_FAIL; - *exit_qual = nested_vmx_load_msr(vcpu, - vmcs12->vm_entry_msr_load_addr, - vmcs12->vm_entry_msr_load_count); - if (*exit_qual) - goto fail; + exit_reason = EXIT_REASON_MSR_LOAD_FAIL; + exit_qual = nested_vmx_load_msr(vcpu, + vmcs12->vm_entry_msr_load_addr, + vmcs12->vm_entry_msr_load_count); + if (exit_qual) + goto vmentry_fail_vmexit_guest_mode; } else { /* * The MMU is not initialized to point at the right entities yet and @@ -12681,12 +13401,28 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual) */ return 0; -fail: + /* + * A failed consistency check that leads to a VMExit during L1's + * VMEnter to L2 is a variation of a normal VMexit, as explained in + * 26.7 "VM-entry failures during or after loading guest state". + */ +vmentry_fail_vmexit_guest_mode: if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) vcpu->arch.tsc_offset -= vmcs12->tsc_offset; leave_guest_mode(vcpu); + +vmentry_fail_vmexit: vmx_switch_vmcs(vcpu, &vmx->vmcs01); - return r; + + if (!from_vmentry) + return 1; + + load_vmcs12_host_state(vcpu, vmcs12); + vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY; + vmcs12->exit_qualification = exit_qual; + if (enable_shadow_vmcs || vmx->nested.hv_evmcs) + vmx->nested.need_vmcs12_sync = true; + return 1; } /* @@ -12698,14 +13434,16 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) struct vmcs12 *vmcs12; struct vcpu_vmx *vmx = to_vmx(vcpu); u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu); - u32 exit_qual; int ret; if (!nested_vmx_check_permission(vcpu)) return 1; - if (!nested_vmx_check_vmcs12(vcpu)) - goto out; + if (!nested_vmx_handle_enlightened_vmptrld(vcpu, true)) + return 1; + + if (!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull) + return nested_vmx_failInvalid(vcpu); vmcs12 = get_vmcs12(vcpu); @@ -12715,13 +13453,16 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) * rather than RFLAGS.ZF, and no error number is stored to the * VM-instruction error field. */ - if (vmcs12->hdr.shadow_vmcs) { - nested_vmx_failInvalid(vcpu); - goto out; - } + if (vmcs12->hdr.shadow_vmcs) + return nested_vmx_failInvalid(vcpu); - if (enable_shadow_vmcs) + if (vmx->nested.hv_evmcs) { + copy_enlightened_to_vmcs12(vmx); + /* Enlightened VMCS doesn't have launch state */ + vmcs12->launch_state = !launch; + } else if (enable_shadow_vmcs) { copy_shadow_to_vmcs12(vmx); + } /* * The nested entry process starts with enforcing various prerequisites @@ -12733,59 +13474,37 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) * for misconfigurations which will anyway be caught by the processor * when using the merged vmcs02. */ - if (interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS) { - nested_vmx_failValid(vcpu, - VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS); - goto out; - } + if (interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS) + return nested_vmx_failValid(vcpu, + VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS); - if (vmcs12->launch_state == launch) { - nested_vmx_failValid(vcpu, + if (vmcs12->launch_state == launch) + return nested_vmx_failValid(vcpu, launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS : VMXERR_VMRESUME_NONLAUNCHED_VMCS); - goto out; - } ret = check_vmentry_prereqs(vcpu, vmcs12); - if (ret) { - nested_vmx_failValid(vcpu, ret); - goto out; - } - - /* - * After this point, the trap flag no longer triggers a singlestep trap - * on the vm entry instructions; don't call kvm_skip_emulated_instruction. - * This is not 100% correct; for performance reasons, we delegate most - * of the checks on host state to the processor. If those fail, - * the singlestep trap is missed. - */ - skip_emulated_instruction(vcpu); - - ret = check_vmentry_postreqs(vcpu, vmcs12, &exit_qual); - if (ret) { - nested_vmx_entry_failure(vcpu, vmcs12, - EXIT_REASON_INVALID_STATE, exit_qual); - return 1; - } + if (ret) + return nested_vmx_failValid(vcpu, ret); /* * We're finally done with prerequisite checking, and can start with * the nested entry. */ - vmx->nested.nested_run_pending = 1; - ret = enter_vmx_non_root_mode(vcpu, &exit_qual); - if (ret) { - nested_vmx_entry_failure(vcpu, vmcs12, ret, exit_qual); - vmx->nested.nested_run_pending = 0; + ret = nested_vmx_enter_non_root_mode(vcpu, true); + vmx->nested.nested_run_pending = !ret; + if (ret > 0) return 1; - } + else if (ret) + return nested_vmx_failValid(vcpu, + VMXERR_ENTRY_INVALID_CONTROL_FIELD); /* Hide L1D cache contents from the nested guest. */ vmx->vcpu.arch.l1tf_flush_l1d = true; /* - * Must happen outside of enter_vmx_non_root_mode() as it will + * Must happen outside of nested_vmx_enter_non_root_mode() as it will * also be used as part of restoring nVMX state for * snapshot restore (migration). * @@ -12806,9 +13525,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) return kvm_vcpu_halt(vcpu); } return 1; - -out: - return kvm_skip_emulated_instruction(vcpu); } /* @@ -13122,24 +13838,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, kvm_clear_interrupt_queue(vcpu); } -static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12) -{ - u32 entry_failure_code; - - nested_ept_uninit_mmu_context(vcpu); - - /* - * Only PDPTE load can fail as the value of cr3 was checked on entry and - * couldn't have changed. - */ - if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) - nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); - - if (!enable_ept) - vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; -} - /* * A part of what we need to when the nested L2 guest exits and we want to * run its L1 parent, is to reset L1's guest state to the host state specified @@ -13153,6 +13851,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct kvm_segment seg; + u32 entry_failure_code; if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) vcpu->arch.efer = vmcs12->host_ia32_efer; @@ -13165,6 +13864,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->host_rsp); kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->host_rip); vmx_set_rflags(vcpu, X86_EFLAGS_FIXED); + vmx_set_interrupt_shadow(vcpu, 0); + /* * Note that calling vmx_set_cr0 is important, even if cr0 hasn't * actually changed, because vmx_set_cr0 refers to efer set above. @@ -13179,23 +13880,35 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); vmx_set_cr4(vcpu, vmcs12->host_cr4); - load_vmcs12_mmu_host_state(vcpu, vmcs12); + nested_ept_uninit_mmu_context(vcpu); /* - * If vmcs01 don't use VPID, CPU flushes TLB on every + * Only PDPTE load can fail as the value of cr3 was checked on entry and + * couldn't have changed. + */ + if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) + nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); + + if (!enable_ept) + vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; + + /* + * If vmcs01 doesn't use VPID, CPU flushes TLB on every * VMEntry/VMExit. Thus, no need to flush TLB. * - * If vmcs12 uses VPID, TLB entries populated by L2 are - * tagged with vmx->nested.vpid02 while L1 entries are tagged - * with vmx->vpid. Thus, no need to flush TLB. + * If vmcs12 doesn't use VPID, L1 expects TLB to be + * flushed on every VMEntry/VMExit. + * + * Otherwise, we can preserve TLB entries as long as we are + * able to tag L1 TLB entries differently than L2 TLB entries. * - * Therefore, flush TLB only in case vmcs01 uses VPID and - * vmcs12 don't use VPID as in this case L1 & L2 TLB entries - * are both tagged with vmx->vpid. + * If vmcs12 uses EPT, we need to execute this flush on EPTP01 + * and therefore we request the TLB flush to happen only after VMCS EPTP + * has been set by KVM_REQ_LOAD_CR3. */ if (enable_vpid && - !(nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02)) { - vmx_flush_tlb(vcpu, true); + (!nested_cpu_has_vpid(vmcs12) || !nested_has_guest_tlb_tag(vcpu))) { + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs); @@ -13275,6 +13988,140 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL); } +static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx) +{ + struct shared_msr_entry *efer_msr; + unsigned int i; + + if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER) + return vmcs_read64(GUEST_IA32_EFER); + + if (cpu_has_load_ia32_efer) + return host_efer; + + for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) { + if (vmx->msr_autoload.guest.val[i].index == MSR_EFER) + return vmx->msr_autoload.guest.val[i].value; + } + + efer_msr = find_msr_entry(vmx, MSR_EFER); + if (efer_msr) + return efer_msr->data; + + return host_efer; +} + +static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmx_msr_entry g, h; + struct msr_data msr; + gpa_t gpa; + u32 i, j; + + vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT); + + if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) { + /* + * L1's host DR7 is lost if KVM_GUESTDBG_USE_HW_BP is set + * as vmcs01.GUEST_DR7 contains a userspace defined value + * and vcpu->arch.dr7 is not squirreled away before the + * nested VMENTER (not worth adding a variable in nested_vmx). + */ + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) + kvm_set_dr(vcpu, 7, DR7_FIXED_1); + else + WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7))); + } + + /* + * Note that calling vmx_set_{efer,cr0,cr4} is important as they + * handle a variety of side effects to KVM's software model. + */ + vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx)); + + vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; + vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW)); + + vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); + vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW)); + + nested_ept_uninit_mmu_context(vcpu); + vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); + __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); + + /* + * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs + * from vmcs01 (if necessary). The PDPTRs are not loaded on + * VMFail, like everything else we just need to ensure our + * software model is up-to-date. + */ + ept_save_pdptrs(vcpu); + + kvm_mmu_reset_context(vcpu); + + if (cpu_has_vmx_msr_bitmap()) + vmx_update_msr_bitmap(vcpu); + + /* + * This nasty bit of open coding is a compromise between blindly + * loading L1's MSRs using the exit load lists (incorrect emulation + * of VMFail), leaving the nested VM's MSRs in the software model + * (incorrect behavior) and snapshotting the modified MSRs (too + * expensive since the lists are unbound by hardware). For each + * MSR that was (prematurely) loaded from the nested VMEntry load + * list, reload it from the exit load list if it exists and differs + * from the guest value. The intent is to stuff host state as + * silently as possible, not to fully process the exit load list. + */ + msr.host_initiated = false; + for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) { + gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g)); + if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) { + pr_debug_ratelimited( + "%s read MSR index failed (%u, 0x%08llx)\n", + __func__, i, gpa); + goto vmabort; + } + + for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) { + gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h)); + if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) { + pr_debug_ratelimited( + "%s read MSR failed (%u, 0x%08llx)\n", + __func__, j, gpa); + goto vmabort; + } + if (h.index != g.index) + continue; + if (h.value == g.value) + break; + + if (nested_vmx_load_msr_check(vcpu, &h)) { + pr_debug_ratelimited( + "%s check failed (%u, 0x%x, 0x%x)\n", + __func__, j, h.index, h.reserved); + goto vmabort; + } + + msr.index = h.index; + msr.data = h.value; + if (kvm_set_msr(vcpu, &msr)) { + pr_debug_ratelimited( + "%s WRMSR failed (%u, 0x%x, 0x%llx)\n", + __func__, j, h.index, h.value); + goto vmabort; + } + } + } + + return; + +vmabort: + nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL); +} + /* * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1 * and modify vmcs12 to make it see what it would expect to see there if @@ -13290,14 +14137,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, /* trying to cancel vmlaunch/vmresume is a bug */ WARN_ON_ONCE(vmx->nested.nested_run_pending); - /* - * The only expected VM-instruction error is "VM entry with - * invalid control field(s)." Anything else indicates a - * problem with L0. - */ - WARN_ON_ONCE(vmx->fail && (vmcs_read32(VM_INSTRUCTION_ERROR) != - VMXERR_ENTRY_INVALID_CONTROL_FIELD)); - leave_guest_mode(vcpu); if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) @@ -13324,12 +14163,19 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr, vmcs12->vm_exit_msr_store_count)) nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL); + } else { + /* + * The only expected VM-instruction error is "VM entry with + * invalid control field(s)." Anything else indicates a + * problem with L0. And we should never get here with a + * VMFail of any type if early consistency checks are enabled. + */ + WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) != + VMXERR_ENTRY_INVALID_CONTROL_FIELD); + WARN_ON_ONCE(nested_early_check); } vmx_switch_vmcs(vcpu, &vmx->vmcs01); - vm_entry_controls_reset_shadow(vmx); - vm_exit_controls_reset_shadow(vmx); - vmx_segment_cache_clear(vmx); /* Update any VMCS fields that might have changed while L2 ran */ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); @@ -13373,8 +14219,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, */ kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); - if (enable_shadow_vmcs && exit_reason != -1) - vmx->nested.sync_shadow_vmcs = true; + if ((exit_reason != -1) && (enable_shadow_vmcs || vmx->nested.hv_evmcs)) + vmx->nested.need_vmcs12_sync = true; /* in case we halted in L2 */ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -13409,24 +14255,24 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, return; } - + /* * After an early L2 VM-entry failure, we're now back * in L1 which thinks it just finished a VMLAUNCH or * VMRESUME instruction, so we need to set the failure * flag and the VM-instruction error field of the VMCS - * accordingly. + * accordingly, and skip the emulated instruction. */ - nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); - - load_vmcs12_mmu_host_state(vcpu, vmcs12); + (void)nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); /* - * The emulated instruction was already skipped in - * nested_vmx_run, but the updated RIP was never - * written back to the vmcs01. + * Restore L1's host state to KVM's software model. We're here + * because a consistency check was caught by hardware, which + * means some amount of guest state has been propagated to KVM's + * model and needs to be unwound to the host's state. */ - skip_emulated_instruction(vcpu); + nested_vmx_restore_host_state(vcpu); + vmx->fail = 0; } @@ -13439,26 +14285,7 @@ static void vmx_leave_nested(struct kvm_vcpu *vcpu) to_vmx(vcpu)->nested.nested_run_pending = 0; nested_vmx_vmexit(vcpu, -1, 0, 0); } - free_nested(to_vmx(vcpu)); -} - -/* - * L1's failure to enter L2 is a subset of a normal exit, as explained in - * 23.7 "VM-entry failures during or after loading guest state" (this also - * lists the acceptable exit-reason and exit-qualification parameters). - * It should only be called before L2 actually succeeded to run, and when - * vmcs01 is current (it doesn't leave_guest_mode() or switch vmcss). - */ -static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12, - u32 reason, unsigned long qualification) -{ - load_vmcs12_host_state(vcpu, vmcs12); - vmcs12->vm_exit_reason = reason | VMX_EXIT_REASONS_FAILED_VMENTRY; - vmcs12->exit_qualification = qualification; - nested_vmx_succeed(vcpu); - if (enable_shadow_vmcs) - to_vmx(vcpu)->nested.sync_shadow_vmcs = true; + free_nested(vcpu); } static int vmx_check_intercept(struct kvm_vcpu *vcpu, @@ -13884,7 +14711,7 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, u64 smbase) if (vmx->nested.smm.guest_mode) { vcpu->arch.hflags &= ~HF_SMM_MASK; - ret = enter_vmx_non_root_mode(vcpu, NULL); + ret = nested_vmx_enter_non_root_mode(vcpu, false); vcpu->arch.hflags |= HF_SMM_MASK; if (ret) return ret; @@ -13899,6 +14726,20 @@ static int enable_smi_window(struct kvm_vcpu *vcpu) return 0; } +static inline int vmx_has_valid_vmcs12(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + /* + * In case we do two consecutive get/set_nested_state()s while L2 was + * running hv_evmcs may end up not being mapped (we map it from + * nested_vmx_run()/vmx_vcpu_run()). Check is_guest_mode() as we always + * have vmcs12 if it is true. + */ + return is_guest_mode(vcpu) || vmx->nested.current_vmptr != -1ull || + vmx->nested.hv_evmcs; +} + static int vmx_get_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state __user *user_kvm_nested_state, u32 user_data_size) @@ -13918,12 +14759,16 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, vmx = to_vmx(vcpu); vmcs12 = get_vmcs12(vcpu); + + if (nested_vmx_allowed(vcpu) && vmx->nested.enlightened_vmcs_enabled) + kvm_state.flags |= KVM_STATE_NESTED_EVMCS; + if (nested_vmx_allowed(vcpu) && (vmx->nested.vmxon || vmx->nested.smm.vmxon)) { kvm_state.vmx.vmxon_pa = vmx->nested.vmxon_ptr; kvm_state.vmx.vmcs_pa = vmx->nested.current_vmptr; - if (vmx->nested.current_vmptr != -1ull) { + if (vmx_has_valid_vmcs12(vcpu)) { kvm_state.size += VMCS12_SIZE; if (is_guest_mode(vcpu) && @@ -13952,20 +14797,24 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state))) return -EFAULT; - if (vmx->nested.current_vmptr == -1ull) + if (!vmx_has_valid_vmcs12(vcpu)) goto out; /* * When running L2, the authoritative vmcs12 state is in the * vmcs02. When running L1, the authoritative vmcs12 state is - * in the shadow vmcs linked to vmcs01, unless - * sync_shadow_vmcs is set, in which case, the authoritative + * in the shadow or enlightened vmcs linked to vmcs01, unless + * need_vmcs12_sync is set, in which case, the authoritative * vmcs12 state is in the vmcs12 already. */ - if (is_guest_mode(vcpu)) + if (is_guest_mode(vcpu)) { sync_vmcs12(vcpu, vmcs12); - else if (enable_shadow_vmcs && !vmx->nested.sync_shadow_vmcs) - copy_shadow_to_vmcs12(vmx); + } else if (!vmx->nested.need_vmcs12_sync) { + if (vmx->nested.hv_evmcs) + copy_enlightened_to_vmcs12(vmx); + else if (enable_shadow_vmcs) + copy_shadow_to_vmcs12(vmx); + } if (copy_to_user(user_kvm_nested_state->data, vmcs12, sizeof(*vmcs12))) return -EFAULT; @@ -13993,6 +14842,9 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (kvm_state->format != 0) return -EINVAL; + if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) + nested_enable_evmcs(vcpu, NULL); + if (!nested_vmx_allowed(vcpu)) return kvm_state->vmx.vmxon_pa == -1ull ? 0 : -EINVAL; @@ -14010,13 +14862,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (!page_address_valid(vcpu, kvm_state->vmx.vmxon_pa)) return -EINVAL; - if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12)) - return -EINVAL; - - if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa || - !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa)) - return -EINVAL; - if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) return -EINVAL; @@ -14046,7 +14891,25 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, if (ret) return ret; - set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa); + /* Empty 'VMXON' state is permitted */ + if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12)) + return 0; + + if (kvm_state->vmx.vmcs_pa != -1ull) { + if (kvm_state->vmx.vmcs_pa == kvm_state->vmx.vmxon_pa || + !page_address_valid(vcpu, kvm_state->vmx.vmcs_pa)) + return -EINVAL; + + set_current_vmptr(vmx, kvm_state->vmx.vmcs_pa); + } else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) { + /* + * Sync eVMCS upon entry as we may not have + * HV_X64_MSR_VP_ASSIST_PAGE set up yet. + */ + vmx->nested.need_vmcs12_sync = true; + } else { + return -EINVAL; + } if (kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) { vmx->nested.smm.vmxon = true; @@ -14090,7 +14953,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, return -EINVAL; vmx->nested.dirty_vmcs12 = true; - ret = enter_vmx_non_root_mode(vcpu, NULL); + ret = nested_vmx_enter_non_root_mode(vcpu, false); if (ret) return -EINVAL; @@ -14242,6 +15105,8 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .pre_enter_smm = vmx_pre_enter_smm, .pre_leave_smm = vmx_pre_leave_smm, .enable_smi_window = enable_smi_window, + + .nested_enable_evmcs = nested_enable_evmcs, }; static void vmx_cleanup_l1d_flush(void) diff --git a/arch/x86/kvm/vmx_shadow_fields.h b/arch/x86/kvm/vmx_shadow_fields.h index cd0c75f6d037..132432f375c2 100644 --- a/arch/x86/kvm/vmx_shadow_fields.h +++ b/arch/x86/kvm/vmx_shadow_fields.h @@ -28,7 +28,6 @@ */ /* 16-bits */ -SHADOW_FIELD_RW(GUEST_CS_SELECTOR) SHADOW_FIELD_RW(GUEST_INTR_STATUS) SHADOW_FIELD_RW(GUEST_PML_INDEX) SHADOW_FIELD_RW(HOST_FS_SELECTOR) @@ -47,8 +46,8 @@ SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE) SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD) SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN) SHADOW_FIELD_RW(TPR_THRESHOLD) -SHADOW_FIELD_RW(GUEST_CS_LIMIT) SHADOW_FIELD_RW(GUEST_CS_AR_BYTES) +SHADOW_FIELD_RW(GUEST_SS_AR_BYTES) SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO) SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE) @@ -61,8 +60,6 @@ SHADOW_FIELD_RW(GUEST_CR0) SHADOW_FIELD_RW(GUEST_CR3) SHADOW_FIELD_RW(GUEST_CR4) SHADOW_FIELD_RW(GUEST_RFLAGS) -SHADOW_FIELD_RW(GUEST_CS_BASE) -SHADOW_FIELD_RW(GUEST_ES_BASE) SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK) SHADOW_FIELD_RW(CR0_READ_SHADOW) SHADOW_FIELD_RW(CR4_READ_SHADOW) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ca717737347e..66d66d77caee 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -136,7 +136,7 @@ static u32 __read_mostly tsc_tolerance_ppm = 250; module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR); /* lapic timer advance (tscdeadline mode only) in nanoseconds */ -unsigned int __read_mostly lapic_timer_advance_ns = 0; +unsigned int __read_mostly lapic_timer_advance_ns = 1000; module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR); EXPORT_SYMBOL_GPL(lapic_timer_advance_ns); @@ -400,9 +400,51 @@ static int exception_type(int vector) return EXCPT_FAULT; } +void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu) +{ + unsigned nr = vcpu->arch.exception.nr; + bool has_payload = vcpu->arch.exception.has_payload; + unsigned long payload = vcpu->arch.exception.payload; + + if (!has_payload) + return; + + switch (nr) { + case DB_VECTOR: + /* + * "Certain debug exceptions may clear bit 0-3. The + * remaining contents of the DR6 register are never + * cleared by the processor". + */ + vcpu->arch.dr6 &= ~DR_TRAP_BITS; + /* + * DR6.RTM is set by all #DB exceptions that don't clear it. + */ + vcpu->arch.dr6 |= DR6_RTM; + vcpu->arch.dr6 |= payload; + /* + * Bit 16 should be set in the payload whenever the #DB + * exception should clear DR6.RTM. This makes the payload + * compatible with the pending debug exceptions under VMX. + * Though not currently documented in the SDM, this also + * makes the payload compatible with the exit qualification + * for #DB exceptions under VMX. + */ + vcpu->arch.dr6 ^= payload & DR6_RTM; + break; + case PF_VECTOR: + vcpu->arch.cr2 = payload; + break; + } + + vcpu->arch.exception.has_payload = false; + vcpu->arch.exception.payload = 0; +} +EXPORT_SYMBOL_GPL(kvm_deliver_exception_payload); + static void kvm_multiple_exception(struct kvm_vcpu *vcpu, unsigned nr, bool has_error, u32 error_code, - bool reinject) + bool has_payload, unsigned long payload, bool reinject) { u32 prev_nr; int class1, class2; @@ -424,6 +466,14 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, */ WARN_ON_ONCE(vcpu->arch.exception.pending); vcpu->arch.exception.injected = true; + if (WARN_ON_ONCE(has_payload)) { + /* + * A reinjected event has already + * delivered its payload. + */ + has_payload = false; + payload = 0; + } } else { vcpu->arch.exception.pending = true; vcpu->arch.exception.injected = false; @@ -431,6 +481,22 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, vcpu->arch.exception.has_error_code = has_error; vcpu->arch.exception.nr = nr; vcpu->arch.exception.error_code = error_code; + vcpu->arch.exception.has_payload = has_payload; + vcpu->arch.exception.payload = payload; + /* + * In guest mode, payload delivery should be deferred, + * so that the L1 hypervisor can intercept #PF before + * CR2 is modified (or intercept #DB before DR6 is + * modified under nVMX). However, for ABI + * compatibility with KVM_GET_VCPU_EVENTS and + * KVM_SET_VCPU_EVENTS, we can't delay payload + * delivery unless userspace has enabled this + * functionality via the per-VM capability, + * KVM_CAP_EXCEPTION_PAYLOAD. + */ + if (!vcpu->kvm->arch.exception_payload_enabled || + !is_guest_mode(vcpu)) + kvm_deliver_exception_payload(vcpu); return; } @@ -455,6 +521,8 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, vcpu->arch.exception.has_error_code = true; vcpu->arch.exception.nr = DF_VECTOR; vcpu->arch.exception.error_code = 0; + vcpu->arch.exception.has_payload = false; + vcpu->arch.exception.payload = 0; } else /* replace previous exception with a new one in a hope that instruction re-execution will regenerate lost @@ -464,16 +532,29 @@ static void kvm_multiple_exception(struct kvm_vcpu *vcpu, void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr) { - kvm_multiple_exception(vcpu, nr, false, 0, false); + kvm_multiple_exception(vcpu, nr, false, 0, false, 0, false); } EXPORT_SYMBOL_GPL(kvm_queue_exception); void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr) { - kvm_multiple_exception(vcpu, nr, false, 0, true); + kvm_multiple_exception(vcpu, nr, false, 0, false, 0, true); } EXPORT_SYMBOL_GPL(kvm_requeue_exception); +static void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, + unsigned long payload) +{ + kvm_multiple_exception(vcpu, nr, false, 0, true, payload, false); +} + +static void kvm_queue_exception_e_p(struct kvm_vcpu *vcpu, unsigned nr, + u32 error_code, unsigned long payload) +{ + kvm_multiple_exception(vcpu, nr, true, error_code, + true, payload, false); +} + int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err) { if (err) @@ -490,11 +571,13 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) ++vcpu->stat.pf_guest; vcpu->arch.exception.nested_apf = is_guest_mode(vcpu) && fault->async_page_fault; - if (vcpu->arch.exception.nested_apf) + if (vcpu->arch.exception.nested_apf) { vcpu->arch.apf.nested_apf_token = fault->address; - else - vcpu->arch.cr2 = fault->address; - kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code); + kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code); + } else { + kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code, + fault->address); + } } EXPORT_SYMBOL_GPL(kvm_inject_page_fault); @@ -503,7 +586,7 @@ static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fau if (mmu_is_nested(vcpu) && !fault->nested_page_fault) vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault); else - vcpu->arch.mmu.inject_page_fault(vcpu, fault); + vcpu->arch.mmu->inject_page_fault(vcpu, fault); return fault->nested_page_fault; } @@ -517,13 +600,13 @@ EXPORT_SYMBOL_GPL(kvm_inject_nmi); void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) { - kvm_multiple_exception(vcpu, nr, true, error_code, false); + kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, false); } EXPORT_SYMBOL_GPL(kvm_queue_exception_e); void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) { - kvm_multiple_exception(vcpu, nr, true, error_code, true); + kvm_multiple_exception(vcpu, nr, true, error_code, false, 0, true); } EXPORT_SYMBOL_GPL(kvm_requeue_exception_e); @@ -602,7 +685,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3) for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { if ((pdpte[i] & PT_PRESENT_MASK) && (pdpte[i] & - vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) { + vcpu->arch.mmu->guest_rsvd_check.rsvd_bits_mask[0][2])) { ret = 0; goto out; } @@ -2477,7 +2560,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_KVM_PV_EOI_EN: - if (kvm_lapic_enable_pv_eoi(vcpu, data)) + if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8))) return 1; break; @@ -2912,6 +2995,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_HYPERV_VP_INDEX: case KVM_CAP_HYPERV_EVENTFD: case KVM_CAP_HYPERV_TLBFLUSH: + case KVM_CAP_HYPERV_SEND_IPI: + case KVM_CAP_HYPERV_ENLIGHTENED_VMCS: case KVM_CAP_PCI_SEGMENT: case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: @@ -2930,6 +3015,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IMMEDIATE_EXIT: case KVM_CAP_GET_MSR_FEATURES: case KVM_CAP_MSR_PLATFORM_INFO: + case KVM_CAP_EXCEPTION_PAYLOAD: r = 1; break; case KVM_CAP_SYNC_REGS: @@ -3362,19 +3448,33 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { process_nmi(vcpu); + /* - * FIXME: pass injected and pending separately. This is only - * needed for nested virtualization, whose state cannot be - * migrated yet. For now we can combine them. + * The API doesn't provide the instruction length for software + * exceptions, so don't report them. As long as the guest RIP + * isn't advanced, we should expect to encounter the exception + * again. */ - events->exception.injected = - (vcpu->arch.exception.pending || - vcpu->arch.exception.injected) && - !kvm_exception_is_soft(vcpu->arch.exception.nr); + if (kvm_exception_is_soft(vcpu->arch.exception.nr)) { + events->exception.injected = 0; + events->exception.pending = 0; + } else { + events->exception.injected = vcpu->arch.exception.injected; + events->exception.pending = vcpu->arch.exception.pending; + /* + * For ABI compatibility, deliberately conflate + * pending and injected exceptions when + * KVM_CAP_EXCEPTION_PAYLOAD isn't enabled. + */ + if (!vcpu->kvm->arch.exception_payload_enabled) + events->exception.injected |= + vcpu->arch.exception.pending; + } events->exception.nr = vcpu->arch.exception.nr; events->exception.has_error_code = vcpu->arch.exception.has_error_code; - events->exception.pad = 0; events->exception.error_code = vcpu->arch.exception.error_code; + events->exception_has_payload = vcpu->arch.exception.has_payload; + events->exception_payload = vcpu->arch.exception.payload; events->interrupt.injected = vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft; @@ -3398,6 +3498,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SHADOW | KVM_VCPUEVENT_VALID_SMM); + if (vcpu->kvm->arch.exception_payload_enabled) + events->flags |= KVM_VCPUEVENT_VALID_PAYLOAD; + memset(&events->reserved, 0, sizeof(events->reserved)); } @@ -3409,12 +3512,24 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR | KVM_VCPUEVENT_VALID_SHADOW - | KVM_VCPUEVENT_VALID_SMM)) + | KVM_VCPUEVENT_VALID_SMM + | KVM_VCPUEVENT_VALID_PAYLOAD)) return -EINVAL; - if (events->exception.injected && - (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR || - is_guest_mode(vcpu))) + if (events->flags & KVM_VCPUEVENT_VALID_PAYLOAD) { + if (!vcpu->kvm->arch.exception_payload_enabled) + return -EINVAL; + if (events->exception.pending) + events->exception.injected = 0; + else + events->exception_has_payload = 0; + } else { + events->exception.pending = 0; + events->exception_has_payload = 0; + } + + if ((events->exception.injected || events->exception.pending) && + (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR)) return -EINVAL; /* INITs are latched while in SMM */ @@ -3424,11 +3539,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, return -EINVAL; process_nmi(vcpu); - vcpu->arch.exception.injected = false; - vcpu->arch.exception.pending = events->exception.injected; + vcpu->arch.exception.injected = events->exception.injected; + vcpu->arch.exception.pending = events->exception.pending; vcpu->arch.exception.nr = events->exception.nr; vcpu->arch.exception.has_error_code = events->exception.has_error_code; vcpu->arch.exception.error_code = events->exception.error_code; + vcpu->arch.exception.has_payload = events->exception_has_payload; + vcpu->arch.exception.payload = events->exception_payload; vcpu->arch.interrupt.injected = events->interrupt.injected; vcpu->arch.interrupt.nr = events->interrupt.nr; @@ -3694,6 +3811,10 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, struct kvm_enable_cap *cap) { + int r; + uint16_t vmcs_version; + void __user *user_ptr; + if (cap->flags) return -EINVAL; @@ -3706,6 +3827,16 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return -EINVAL; return kvm_hv_activate_synic(vcpu, cap->cap == KVM_CAP_HYPERV_SYNIC2); + case KVM_CAP_HYPERV_ENLIGHTENED_VMCS: + r = kvm_x86_ops->nested_enable_evmcs(vcpu, &vmcs_version); + if (!r) { + user_ptr = (void __user *)(uintptr_t)cap->args[0]; + if (copy_to_user(user_ptr, &vmcs_version, + sizeof(vmcs_version))) + r = -EFAULT; + } + return r; + default: return -EINVAL; } @@ -4047,11 +4178,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; if (kvm_state.flags & - ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE)) + ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE + | KVM_STATE_NESTED_EVMCS)) break; /* nested_run_pending implies guest_mode. */ - if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING) + if ((kvm_state.flags & KVM_STATE_NESTED_RUN_PENDING) + && !(kvm_state.flags & KVM_STATE_NESTED_GUEST_MODE)) break; r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state); @@ -4363,6 +4496,10 @@ split_irqchip_unlock: kvm->arch.guest_can_read_msr_platform_info = cap->args[0]; r = 0; break; + case KVM_CAP_EXCEPTION_PAYLOAD: + kvm->arch.exception_payload_enabled = cap->args[0]; + r = 0; + break; default: r = -EINVAL; break; @@ -4803,7 +4940,7 @@ gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, /* NPT walks are always user-walks */ access |= PFERR_USER_MASK; - t_gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gpa, access, exception); + t_gpa = vcpu->arch.mmu->gva_to_gpa(vcpu, gpa, access, exception); return t_gpa; } @@ -5889,7 +6026,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, if (WARN_ON_ONCE(is_guest_mode(vcpu))) return false; - if (!vcpu->arch.mmu.direct_map) { + if (!vcpu->arch.mmu->direct_map) { /* * Write permission should be allowed since only * write access need to be emulated. @@ -5922,7 +6059,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, kvm_release_pfn_clean(pfn); /* The instructions are well-emulated on direct mmu. */ - if (vcpu->arch.mmu.direct_map) { + if (vcpu->arch.mmu->direct_map) { unsigned int indirect_shadow_pages; spin_lock(&vcpu->kvm->mmu_lock); @@ -5989,7 +6126,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, vcpu->arch.last_retry_eip = ctxt->eip; vcpu->arch.last_retry_addr = cr2; - if (!vcpu->arch.mmu.direct_map) + if (!vcpu->arch.mmu->direct_map) gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); @@ -6049,14 +6186,7 @@ static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r) kvm_run->exit_reason = KVM_EXIT_DEBUG; *r = EMULATE_USER_EXIT; } else { - /* - * "Certain debug exceptions may clear bit 0-3. The - * remaining contents of the DR6 register are never - * cleared by the processor". - */ - vcpu->arch.dr6 &= ~15; - vcpu->arch.dr6 |= DR6_BS | DR6_RTM; - kvm_queue_exception(vcpu, DB_VECTOR); + kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS); } } @@ -6995,10 +7125,22 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | X86_EFLAGS_RF); - if (vcpu->arch.exception.nr == DB_VECTOR && - (vcpu->arch.dr7 & DR7_GD)) { - vcpu->arch.dr7 &= ~DR7_GD; - kvm_update_dr7(vcpu); + if (vcpu->arch.exception.nr == DB_VECTOR) { + /* + * This code assumes that nSVM doesn't use + * check_nested_events(). If it does, the + * DR6/DR7 changes should happen before L1 + * gets a #VMEXIT for an intercepted #DB in + * L2. (Under VMX, on the other hand, the + * DR6/DR7 changes should not happen in the + * event of a VM-exit to L1 for an intercepted + * #DB in L2.) + */ + kvm_deliver_exception_payload(vcpu); + if (vcpu->arch.dr7 & DR7_GD) { + vcpu->arch.dr7 &= ~DR7_GD; + kvm_update_dr7(vcpu); + } } kvm_x86_ops->queue_exception(vcpu); @@ -8478,7 +8620,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) kvm_vcpu_mtrr_init(vcpu); vcpu_load(vcpu); kvm_vcpu_reset(vcpu, false); - kvm_mmu_setup(vcpu); + kvm_init_mmu(vcpu, false); vcpu_put(vcpu); return 0; } @@ -9327,7 +9469,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) { int r; - if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) || + if ((vcpu->arch.mmu->direct_map != work->arch.direct_map) || work->wakeup_all) return; @@ -9335,11 +9477,11 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) if (unlikely(r)) return; - if (!vcpu->arch.mmu.direct_map && - work->arch.cr3 != vcpu->arch.mmu.get_cr3(vcpu)) + if (!vcpu->arch.mmu->direct_map && + work->arch.cr3 != vcpu->arch.mmu->get_cr3(vcpu)) return; - vcpu->arch.mmu.page_fault(vcpu, work->gva, 0, true); + vcpu->arch.mmu->page_fault(vcpu, work->gva, 0, true); } static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) @@ -9463,6 +9605,8 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, vcpu->arch.exception.nr = 0; vcpu->arch.exception.has_error_code = false; vcpu->arch.exception.error_code = 0; + vcpu->arch.exception.has_payload = false; + vcpu->arch.exception.payload = 0; } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { fault.vector = PF_VECTOR; fault.error_code_valid = true; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 67b9568613f3..224cd0a47568 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -266,6 +266,8 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, int handle_ud(struct kvm_vcpu *vcpu); +void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu); + void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu); u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 2b1519bc5381..b24eb4eb9984 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -157,79 +157,6 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) return prefetch; } -/* - * A protection key fault means that the PKRU value did not allow - * access to some PTE. Userspace can figure out what PKRU was - * from the XSAVE state, and this function fills out a field in - * siginfo so userspace can discover which protection key was set - * on the PTE. - * - * If we get here, we know that the hardware signaled a X86_PF_PK - * fault and that there was a VMA once we got in the fault - * handler. It does *not* guarantee that the VMA we find here - * was the one that we faulted on. - * - * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4); - * 2. T1 : set PKRU to deny access to pkey=4, touches page - * 3. T1 : faults... - * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5); - * 5. T1 : enters fault handler, takes mmap_sem, etc... - * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really - * faulted on a pte with its pkey=4. - */ -static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info, - u32 *pkey) -{ - /* This is effectively an #ifdef */ - if (!boot_cpu_has(X86_FEATURE_OSPKE)) - return; - - /* Fault not from Protection Keys: nothing to do */ - if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV)) - return; - /* - * force_sig_info_fault() is called from a number of - * contexts, some of which have a VMA and some of which - * do not. The X86_PF_PK handing happens after we have a - * valid VMA, so we should never reach this without a - * valid VMA. - */ - if (!pkey) { - WARN_ONCE(1, "PKU fault with no VMA passed in"); - info->si_pkey = 0; - return; - } - /* - * si_pkey should be thought of as a strong hint, but not - * absolutely guranteed to be 100% accurate because of - * the race explained above. - */ - info->si_pkey = *pkey; -} - -static void -force_sig_info_fault(int si_signo, int si_code, unsigned long address, - struct task_struct *tsk, u32 *pkey, int fault) -{ - unsigned lsb = 0; - siginfo_t info; - - clear_siginfo(&info); - info.si_signo = si_signo; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = (void __user *)address; - if (fault & VM_FAULT_HWPOISON_LARGE) - lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); - if (fault & VM_FAULT_HWPOISON) - lsb = PAGE_SHIFT; - info.si_addr_lsb = lsb; - - fill_sig_info_pkey(si_signo, si_code, &info, pkey); - - force_sig_info(si_signo, &info, tsk); -} - DEFINE_SPINLOCK(pgd_lock); LIST_HEAD(pgd_list); @@ -734,8 +661,8 @@ no_context(struct pt_regs *regs, unsigned long error_code, tsk->thread.cr2 = address; /* XXX: hwpoison faults will set the wrong code. */ - force_sig_info_fault(signal, si_code, address, - tsk, NULL, 0); + force_sig_fault(signal, si_code, (void __user *)address, + tsk); } /* @@ -862,7 +789,7 @@ static bool is_vsyscall_vaddr(unsigned long vaddr) static void __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, - unsigned long address, u32 *pkey, int si_code) + unsigned long address, u32 pkey, int si_code) { struct task_struct *tsk = current; @@ -898,7 +825,10 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_PF; - force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0); + if (si_code == SEGV_PKUERR) + force_sig_pkuerr((void __user *)address, pkey); + + force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk); return; } @@ -911,35 +841,29 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, static noinline void bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, - unsigned long address, u32 *pkey) + unsigned long address) { - __bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR); + __bad_area_nosemaphore(regs, error_code, address, 0, SEGV_MAPERR); } static void __bad_area(struct pt_regs *regs, unsigned long error_code, - unsigned long address, struct vm_area_struct *vma, int si_code) + unsigned long address, u32 pkey, int si_code) { struct mm_struct *mm = current->mm; - u32 pkey; - - if (vma) - pkey = vma_pkey(vma); - /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ up_read(&mm->mmap_sem); - __bad_area_nosemaphore(regs, error_code, address, - (vma) ? &pkey : NULL, si_code); + __bad_area_nosemaphore(regs, error_code, address, pkey, si_code); } static noinline void bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address) { - __bad_area(regs, error_code, address, NULL, SEGV_MAPERR); + __bad_area(regs, error_code, address, 0, SEGV_MAPERR); } static inline bool bad_area_access_from_pkeys(unsigned long error_code, @@ -968,18 +892,40 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code, * But, doing it this way allows compiler optimizations * if pkeys are compiled out. */ - if (bad_area_access_from_pkeys(error_code, vma)) - __bad_area(regs, error_code, address, vma, SEGV_PKUERR); - else - __bad_area(regs, error_code, address, vma, SEGV_ACCERR); + if (bad_area_access_from_pkeys(error_code, vma)) { + /* + * A protection key fault means that the PKRU value did not allow + * access to some PTE. Userspace can figure out what PKRU was + * from the XSAVE state. This function captures the pkey from + * the vma and passes it to userspace so userspace can discover + * which protection key was set on the PTE. + * + * If we get here, we know that the hardware signaled a X86_PF_PK + * fault and that there was a VMA once we got in the fault + * handler. It does *not* guarantee that the VMA we find here + * was the one that we faulted on. + * + * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4); + * 2. T1 : set PKRU to deny access to pkey=4, touches page + * 3. T1 : faults... + * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5); + * 5. T1 : enters fault handler, takes mmap_sem, etc... + * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really + * faulted on a pte with its pkey=4. + */ + u32 pkey = vma_pkey(vma); + + __bad_area(regs, error_code, address, pkey, SEGV_PKUERR); + } else { + __bad_area(regs, error_code, address, 0, SEGV_ACCERR); + } } static void do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, - u32 *pkey, unsigned int fault) + unsigned int fault) { struct task_struct *tsk = current; - int code = BUS_ADRERR; /* Kernel mode? Handle exceptions or die: */ if (!(error_code & X86_PF_USER)) { @@ -997,18 +943,25 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, #ifdef CONFIG_MEMORY_FAILURE if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { - printk(KERN_ERR + unsigned lsb = 0; + + pr_err( "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", tsk->comm, tsk->pid, address); - code = BUS_MCEERR_AR; + if (fault & VM_FAULT_HWPOISON_LARGE) + lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); + if (fault & VM_FAULT_HWPOISON) + lsb = PAGE_SHIFT; + force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, tsk); + return; } #endif - force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk); } static noinline void mm_fault_error(struct pt_regs *regs, unsigned long error_code, - unsigned long address, u32 *pkey, vm_fault_t fault) + unsigned long address, vm_fault_t fault) { if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) { no_context(regs, error_code, address, 0, 0); @@ -1032,9 +985,9 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, } else { if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| VM_FAULT_HWPOISON_LARGE)) - do_sigbus(regs, error_code, address, pkey, fault); + do_sigbus(regs, error_code, address, fault); else if (fault & VM_FAULT_SIGSEGV) - bad_area_nosemaphore(regs, error_code, address, pkey); + bad_area_nosemaphore(regs, error_code, address); else BUG(); } @@ -1267,7 +1220,7 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code, * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock: */ - bad_area_nosemaphore(regs, hw_error_code, address, NULL); + bad_area_nosemaphore(regs, hw_error_code, address); } NOKPROBE_SYMBOL(do_kern_addr_fault); @@ -1283,7 +1236,6 @@ void do_user_addr_fault(struct pt_regs *regs, struct mm_struct *mm; vm_fault_t fault, major = 0; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; - u32 pkey; tsk = current; mm = tsk->mm; @@ -1304,7 +1256,7 @@ void do_user_addr_fault(struct pt_regs *regs, * pages in the user address space. */ if (unlikely(smap_violation(hw_error_code, regs))) { - bad_area_nosemaphore(regs, hw_error_code, address, NULL); + bad_area_nosemaphore(regs, hw_error_code, address); return; } @@ -1313,7 +1265,7 @@ void do_user_addr_fault(struct pt_regs *regs, * in a region with pagefaults disabled then we must not take the fault */ if (unlikely(faulthandler_disabled() || !mm)) { - bad_area_nosemaphore(regs, hw_error_code, address, NULL); + bad_area_nosemaphore(regs, hw_error_code, address); return; } @@ -1403,7 +1355,7 @@ void do_user_addr_fault(struct pt_regs *regs, * Fault from code in kernel from * which we do not expect faults. */ - bad_area_nosemaphore(regs, sw_error_code, address, NULL); + bad_area_nosemaphore(regs, sw_error_code, address); return; } retry: @@ -1467,10 +1419,7 @@ good_area: * (potentially after handling any pending signal during the return to * userland). The return to userland is identified whenever * FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags. - * Thus we have to be careful about not touching vma after handling the - * fault, so we read the pkey beforehand. */ - pkey = vma_pkey(vma); fault = handle_mm_fault(vma, address, flags); major |= fault & VM_FAULT_MAJOR; @@ -1499,7 +1448,7 @@ good_area: up_read(&mm->mmap_sem); if (unlikely(fault & VM_FAULT_ERROR)) { - mm_fault_error(regs, sw_error_code, address, &pkey, fault); + mm_fault_error(regs, sw_error_code, address, fault); return; } diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index e500949bae24..2385538e8065 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -118,14 +118,11 @@ bad_opcode: * anything it wants in to the instructions. We can not * trust anything about it. They might not be valid * instructions or might encode invalid registers, etc... - * - * The caller is expected to kfree() the returned siginfo_t. */ -siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) +int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs) { const struct mpx_bndreg_state *bndregs; const struct mpx_bndreg *bndreg; - siginfo_t *info = NULL; struct insn insn; uint8_t bndregno; int err; @@ -153,11 +150,6 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) /* now go select the individual register in the set of 4 */ bndreg = &bndregs->bndreg[bndregno]; - info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) { - err = -ENOMEM; - goto err_out; - } /* * The registers are always 64-bit, but the upper 32 * bits are ignored in 32-bit mode. Also, note that the @@ -168,27 +160,23 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) * complains when casting from integers to different-size * pointers. */ - info->si_lower = (void __user *)(unsigned long)bndreg->lower_bound; - info->si_upper = (void __user *)(unsigned long)~bndreg->upper_bound; - info->si_addr_lsb = 0; - info->si_signo = SIGSEGV; - info->si_errno = 0; - info->si_code = SEGV_BNDERR; - info->si_addr = insn_get_addr_ref(&insn, regs); + info->lower = (void __user *)(unsigned long)bndreg->lower_bound; + info->upper = (void __user *)(unsigned long)~bndreg->upper_bound; + info->addr = insn_get_addr_ref(&insn, regs); + /* * We were not able to extract an address from the instruction, * probably because there was something invalid in it. */ - if (info->si_addr == (void __user *)-1) { + if (info->addr == (void __user *)-1) { err = -EINVAL; goto err_out; } - trace_mpx_bounds_register_exception(info->si_addr, bndreg); - return info; + trace_mpx_bounds_register_exception(info->addr, bndreg); + return 0; err_out: /* info might be NULL, but kfree() handles that */ - kfree(info); - return ERR_PTR(err); + return err; } static __user void *mpx_get_bounds_dir(void) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 5559dcaddd5e..948656069cdd 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -356,7 +356,7 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) } else { struct pci_root_info *info; - info = kzalloc_node(sizeof(*info), GFP_KERNEL, node); + info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) dev_err(&root->device->dev, "pci_bus %04x:%02x: ignored (out of memory)\n", diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 13f4485ca388..30a5111ae5fd 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -629,17 +629,11 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8c10, quirk_apple_mbp_poweroff); static void quirk_no_aersid(struct pci_dev *pdev) { /* VMD Domain */ - if (is_vmd(pdev->bus)) + if (is_vmd(pdev->bus) && pci_is_root_bus(pdev->bus)) pdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_AERSID; } -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2030, quirk_no_aersid); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2031, quirk_no_aersid); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2032, quirk_no_aersid); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334a, quirk_no_aersid); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334b, quirk_no_aersid); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334c, quirk_no_aersid); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x334d, quirk_no_aersid); +DECLARE_PCI_FIXUP_CLASS_EARLY(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, + PCI_CLASS_BRIDGE_PCI, 8, quirk_no_aersid); #ifdef CONFIG_PHYS_ADDR_T_64BIT |