summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/kvm/arm.c4
-rw-r--r--arch/mips/kvm/mips.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c6
-rw-r--r--arch/powerpc/kvm/powerpc.c4
-rw-r--r--arch/s390/kvm/kvm-s390.c4
-rw-r--r--arch/x86/include/asm/kvm_host.h5
-rw-r--r--arch/x86/kvm/cpuid.c4
-rw-r--r--arch/x86/kvm/emulate.c26
-rw-r--r--arch/x86/kvm/lapic.c30
-rw-r--r--arch/x86/kvm/pmu.c24
-rw-r--r--arch/x86/kvm/svm.c7
-rw-r--r--arch/x86/kvm/trace.h30
-rw-r--r--arch/x86/kvm/vmx.c142
-rw-r--r--arch/x86/kvm/x86.c33
14 files changed, 264 insertions, 59 deletions
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index a99e0cdf8ba2..9f788ebac55b 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -288,6 +288,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
}
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
vcpu->cpu = cpu;
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index cd7114147ae7..2362df2a79f9 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1002,6 +1002,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
}
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr)
{
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 329d7fdd0a6a..b9615ba5b083 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -101,7 +101,7 @@ struct kvm_rma_info *kvm_alloc_rma()
ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
if (!ri)
return NULL;
- page = cma_alloc(kvm_cma, kvm_rma_pages, get_order(kvm_rma_pages));
+ page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages));
if (!page)
goto err_out;
atomic_set(&ri->use_count, 1);
@@ -135,12 +135,12 @@ struct page *kvm_alloc_hpt(unsigned long nr_pages)
{
unsigned long align_pages = HPT_ALIGN_PAGES;
- VM_BUG_ON(get_order(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+ VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
/* Old CPUs require HPT aligned on a multiple of its size */
if (!cpu_has_feature(CPU_FTR_ARCH_206))
align_pages = nr_pages;
- return cma_alloc(kvm_cma, nr_pages, get_order(align_pages));
+ return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages));
}
EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4c79284b58be..cbc432f4f0a6 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -720,6 +720,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
kvmppc_subarch_vcpu_uninit(vcpu);
}
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
#ifdef CONFIG_BOOKE
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 82065dc7948d..36209969bf98 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -557,6 +557,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
/* Nothing todo */
}
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 572460175ba5..ac0f90e26a0b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -95,7 +95,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
#define KVM_REFILL_PAGES 25
#define KVM_MAX_CPUID_ENTRIES 80
#define KVM_NR_FIXED_MTRR_REGION 88
-#define KVM_NR_VAR_MTRR 10
+#define KVM_NR_VAR_MTRR 8
#define ASYNC_PF_PER_VCPU 64
@@ -710,7 +710,6 @@ struct kvm_x86_ops {
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
- void (*fpu_activate)(struct kvm_vcpu *vcpu);
void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
void (*tlb_flush)(struct kvm_vcpu *vcpu);
@@ -772,6 +771,8 @@ struct kvm_x86_ops {
bool (*mpx_supported)(void);
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
+
+ void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
};
struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 38a0afe83c6b..f4bad87ef256 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -112,8 +112,8 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
break;
}
}
- if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
- entry->edx &= ~(1 << 20);
+ if (entry && (entry->edx & bit(X86_FEATURE_NX)) && !is_efer_nx()) {
+ entry->edx &= ~bit(X86_FEATURE_NX);
printk(KERN_INFO "kvm: guest NX capability removed\n");
}
}
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 56657b0bb3bb..e5bf13003cd2 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -527,6 +527,7 @@ static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
static int emulate_exception(struct x86_emulate_ctxt *ctxt, int vec,
u32 error, bool valid)
{
+ WARN_ON(vec > 0x1f);
ctxt->exception.vector = vec;
ctxt->exception.error_code = error;
ctxt->exception.error_code_valid = valid;
@@ -1468,7 +1469,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
return ret;
err_code = selector & 0xfffc;
- err_vec = GP_VECTOR;
+ err_vec = in_task_switch ? TS_VECTOR : GP_VECTOR;
/* can't load system descriptor into segment selector */
if (seg <= VCPU_SREG_GS && !seg_desc.s)
@@ -1491,9 +1492,6 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
goto exception;
break;
case VCPU_SREG_CS:
- if (in_task_switch && rpl != dpl)
- goto exception;
-
if (!(seg_desc.type & 8))
goto exception;
@@ -1552,8 +1550,7 @@ load:
ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
return X86EMUL_CONTINUE;
exception:
- emulate_exception(ctxt, err_vec, err_code, true);
- return X86EMUL_PROPAGATE_FAULT;
+ return emulate_exception(ctxt, err_vec, err_code, true);
}
static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
@@ -2726,8 +2723,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
if (!next_tss_desc.p ||
((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
desc_limit < 0x2b)) {
- emulate_ts(ctxt, tss_selector & 0xfffc);
- return X86EMUL_PROPAGATE_FAULT;
+ return emulate_ts(ctxt, tss_selector & 0xfffc);
}
if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
@@ -3019,7 +3015,7 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt)
ctxt->dst.val = swab64(ctxt->src.val);
break;
default:
- return X86EMUL_PROPAGATE_FAULT;
+ BUG();
}
return X86EMUL_CONTINUE;
}
@@ -4394,8 +4390,11 @@ done_prefixes:
ctxt->execute = opcode.u.execute;
+ if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD)))
+ return EMULATION_FAILED;
+
if (unlikely(ctxt->d &
- (NotImpl|EmulateOnUD|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) {
+ (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) {
/*
* These are copied unconditionally here, and checked unconditionally
* in x86_emulate_insn.
@@ -4406,9 +4405,6 @@ done_prefixes:
if (ctxt->d & NotImpl)
return EMULATION_FAILED;
- if (!(ctxt->d & EmulateOnUD) && ctxt->ud)
- return EMULATION_FAILED;
-
if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
ctxt->op_bytes = 8;
@@ -4832,8 +4828,10 @@ writeback:
ctxt->eip = ctxt->_eip;
done:
- if (rc == X86EMUL_PROPAGATE_FAULT)
+ if (rc == X86EMUL_PROPAGATE_FAULT) {
+ WARN_ON(ctxt->exception.vector > 0x1f);
ctxt->have_exception = true;
+ }
if (rc == X86EMUL_INTERCEPTED)
return EMULATION_INTERCEPTED;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 08e8a899e005..fb919c574e23 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -112,17 +112,6 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
struct static_key_deferred apic_hw_disabled __read_mostly;
struct static_key_deferred apic_sw_disabled __read_mostly;
-static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
-{
- if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) {
- if (val & APIC_SPIV_APIC_ENABLED)
- static_key_slow_dec_deferred(&apic_sw_disabled);
- else
- static_key_slow_inc(&apic_sw_disabled.key);
- }
- apic_set_reg(apic, APIC_SPIV, val);
-}
-
static inline int apic_enabled(struct kvm_lapic *apic)
{
return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic);
@@ -210,6 +199,20 @@ out:
kvm_vcpu_request_scan_ioapic(kvm);
}
+static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
+{
+ u32 prev = kvm_apic_get_reg(apic, APIC_SPIV);
+
+ apic_set_reg(apic, APIC_SPIV, val);
+ if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) {
+ if (val & APIC_SPIV_APIC_ENABLED) {
+ static_key_slow_dec_deferred(&apic_sw_disabled);
+ recalculate_apic_map(apic->vcpu->kvm);
+ } else
+ static_key_slow_inc(&apic_sw_disabled.key);
+ }
+}
+
static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
{
apic_set_reg(apic, APIC_ID, id << 24);
@@ -1352,6 +1355,9 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
return;
hrtimer_cancel(&apic->lapic_timer.timer);
+ /* Inject here so clearing tscdeadline won't override new value */
+ if (apic_has_pending_timer(vcpu))
+ kvm_inject_apic_timer_irqs(vcpu);
apic->lapic_timer.tscdeadline = data;
start_apic_timer(apic);
}
@@ -1639,6 +1645,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
if (atomic_read(&apic->lapic_timer.pending) > 0) {
kvm_apic_local_deliver(apic, APIC_LVTT);
+ if (apic_lvtt_tscdeadline(apic))
+ apic->lapic_timer.tscdeadline = 0;
atomic_set(&apic->lapic_timer.pending, 0);
}
}
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 3dd6accb64ec..8e6b7d869d2f 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -15,6 +15,7 @@
#include <linux/types.h>
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
+#include <asm/perf_event.h>
#include "x86.h"
#include "cpuid.h"
#include "lapic.h"
@@ -463,7 +464,8 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_cpuid_entry2 *entry;
- unsigned bitmap_len;
+ union cpuid10_eax eax;
+ union cpuid10_edx edx;
pmu->nr_arch_gp_counters = 0;
pmu->nr_arch_fixed_counters = 0;
@@ -475,25 +477,27 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu)
entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
if (!entry)
return;
+ eax.full = entry->eax;
+ edx.full = entry->edx;
- pmu->version = entry->eax & 0xff;
+ pmu->version = eax.split.version_id;
if (!pmu->version)
return;
- pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff,
- INTEL_PMC_MAX_GENERIC);
- pmu->counter_bitmask[KVM_PMC_GP] =
- ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1;
- bitmap_len = (entry->eax >> 24) & 0xff;
- pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1);
+ pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
+ INTEL_PMC_MAX_GENERIC);
+ pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
+ pmu->available_event_types = ~entry->ebx &
+ ((1ull << eax.split.mask_length) - 1);
if (pmu->version == 1) {
pmu->nr_arch_fixed_counters = 0;
} else {
- pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f),
+ pmu->nr_arch_fixed_counters =
+ min_t(int, edx.split.num_counters_fixed,
INTEL_PMC_MAX_FIXED);
pmu->counter_bitmask[KVM_PMC_FIXED] =
- ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1;
+ ((u64)1 << edx.split.bit_width_fixed) - 1;
}
pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ddf742768ecf..1703aab84a6d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4305,6 +4305,10 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
local_irq_enable();
}
+static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -4349,7 +4353,6 @@ static struct kvm_x86_ops svm_x86_ops = {
.cache_reg = svm_cache_reg,
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
- .fpu_activate = svm_fpu_activate,
.fpu_deactivate = svm_fpu_deactivate,
.tlb_flush = svm_flush_tlb,
@@ -4406,6 +4409,8 @@ static struct kvm_x86_ops svm_x86_ops = {
.check_intercept = svm_check_intercept,
.handle_external_intr = svm_handle_external_intr,
+
+ .sched_in = svm_sched_in,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index e850a7d332be..4c2868f36808 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -850,6 +850,36 @@ TRACE_EVENT(kvm_track_tsc,
#endif /* CONFIG_X86_64 */
+TRACE_EVENT(kvm_ple_window,
+ TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
+ TP_ARGS(grow, vcpu_id, new, old),
+
+ TP_STRUCT__entry(
+ __field( bool, grow )
+ __field( unsigned int, vcpu_id )
+ __field( int, new )
+ __field( int, old )
+ ),
+
+ TP_fast_assign(
+ __entry->grow = grow;
+ __entry->vcpu_id = vcpu_id;
+ __entry->new = new;
+ __entry->old = old;
+ ),
+
+ TP_printk("vcpu %u: ple_window %d (%s %d)",
+ __entry->vcpu_id,
+ __entry->new,
+ __entry->grow ? "grow" : "shrink",
+ __entry->old)
+);
+
+#define trace_kvm_ple_window_grow(vcpu_id, new, old) \
+ trace_kvm_ple_window(true, vcpu_id, new, old)
+#define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
+ trace_kvm_ple_window(false, vcpu_id, new, old)
+
#endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bfe11cf124a1..661abc2f7049 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -125,14 +125,32 @@ module_param(nested, bool, S_IRUGO);
* Time is measured based on a counter that runs at the same rate as the TSC,
* refer SDM volume 3b section 21.6.13 & 22.1.3.
*/
-#define KVM_VMX_DEFAULT_PLE_GAP 128
-#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
+#define KVM_VMX_DEFAULT_PLE_GAP 128
+#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
+#define KVM_VMX_DEFAULT_PLE_WINDOW_GROW 2
+#define KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK 0
+#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX \
+ INT_MAX / KVM_VMX_DEFAULT_PLE_WINDOW_GROW
+
static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
module_param(ple_gap, int, S_IRUGO);
static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
module_param(ple_window, int, S_IRUGO);
+/* Default doubles per-vcpu window every exit. */
+static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
+module_param(ple_window_grow, int, S_IRUGO);
+
+/* Default resets per-vcpu window every exit to ple_window. */
+static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
+module_param(ple_window_shrink, int, S_IRUGO);
+
+/* Default is to compute the maximum so we can never overflow. */
+static int ple_window_actual_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+static int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
+module_param(ple_window_max, int, S_IRUGO);
+
extern const ulong vmx_return;
#define NR_AUTOLOAD_MSRS 8
@@ -484,6 +502,10 @@ struct vcpu_vmx {
/* Support for a guest hypervisor (nested VMX) */
struct nested_vmx nested;
+
+ /* Dynamic PLE window. */
+ int ple_window;
+ bool ple_window_dirty;
};
enum segment_cache_field {
@@ -4402,7 +4424,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
if (ple_gap) {
vmcs_write32(PLE_GAP, ple_gap);
- vmcs_write32(PLE_WINDOW, ple_window);
+ vmx->ple_window = ple_window;
+ vmx->ple_window_dirty = true;
}
vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
@@ -5521,17 +5544,18 @@ static u64 ept_rsvd_mask(u64 spte, int level)
for (i = 51; i > boot_cpu_data.x86_phys_bits; i--)
mask |= (1ULL << i);
- if (level > 2)
+ if (level == 4)
/* bits 7:3 reserved */
mask |= 0xf8;
- else if (level == 2) {
- if (spte & (1ULL << 7))
- /* 2MB ref, bits 20:12 reserved */
- mask |= 0x1ff000;
- else
- /* bits 6:3 reserved */
- mask |= 0x78;
- }
+ else if (spte & (1ULL << 7))
+ /*
+ * 1GB/2MB page, bits 29:12 or 20:12 reserved respectively,
+ * level == 1 if the hypervisor is using the ignored bit 7.
+ */
+ mask |= (PAGE_SIZE << ((level - 1) * 9)) - PAGE_SIZE;
+ else if (level > 1)
+ /* bits 6:3 reserved */
+ mask |= 0x78;
return mask;
}
@@ -5561,7 +5585,8 @@ static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte,
WARN_ON(1);
}
- if (level == 1 || (level == 2 && (spte & (1ULL << 7)))) {
+ /* bits 5:3 are _not_ reserved for large page or leaf page */
+ if ((rsvd_bits & 0x38) == 0) {
u64 ept_mem_type = (spte & 0x38) >> 3;
if (ept_mem_type == 2 || ept_mem_type == 3 ||
@@ -5676,12 +5701,85 @@ out:
return ret;
}
+static int __grow_ple_window(int val)
+{
+ if (ple_window_grow < 1)
+ return ple_window;
+
+ val = min(val, ple_window_actual_max);
+
+ if (ple_window_grow < ple_window)
+ val *= ple_window_grow;
+ else
+ val += ple_window_grow;
+
+ return val;
+}
+
+static int __shrink_ple_window(int val, int modifier, int minimum)
+{
+ if (modifier < 1)
+ return ple_window;
+
+ if (modifier < ple_window)
+ val /= modifier;
+ else
+ val -= modifier;
+
+ return max(val, minimum);
+}
+
+static void grow_ple_window(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int old = vmx->ple_window;
+
+ vmx->ple_window = __grow_ple_window(old);
+
+ if (vmx->ple_window != old)
+ vmx->ple_window_dirty = true;
+
+ trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
+}
+
+static void shrink_ple_window(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int old = vmx->ple_window;
+
+ vmx->ple_window = __shrink_ple_window(old,
+ ple_window_shrink, ple_window);
+
+ if (vmx->ple_window != old)
+ vmx->ple_window_dirty = true;
+
+ trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
+}
+
+/*
+ * ple_window_actual_max is computed to be one grow_ple_window() below
+ * ple_window_max. (See __grow_ple_window for the reason.)
+ * This prevents overflows, because ple_window_max is int.
+ * ple_window_max effectively rounded down to a multiple of ple_window_grow in
+ * this process.
+ * ple_window_max is also prevented from setting vmx->ple_window < ple_window.
+ */
+static void update_ple_window_actual_max(void)
+{
+ ple_window_actual_max =
+ __shrink_ple_window(max(ple_window_max, ple_window),
+ ple_window_grow, INT_MIN);
+}
+
/*
* Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
* exiting, so only get here on cpu with PAUSE-Loop-Exiting.
*/
static int handle_pause(struct kvm_vcpu *vcpu)
{
+ if (ple_gap)
+ grow_ple_window(vcpu);
+
skip_emulated_instruction(vcpu);
kvm_vcpu_on_spin(vcpu);
@@ -7387,6 +7485,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (vmx->emulation_required)
return;
+ if (vmx->ple_window_dirty) {
+ vmx->ple_window_dirty = false;
+ vmcs_write32(PLE_WINDOW, vmx->ple_window);
+ }
+
if (vmx->nested.sync_shadow_vmcs) {
copy_vmcs12_to_shadow(vmx);
vmx->nested.sync_shadow_vmcs = false;
@@ -8846,6 +8949,12 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
return X86EMUL_CONTINUE;
}
+void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+ if (ple_gap)
+ shrink_ple_window(vcpu);
+}
+
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -8890,7 +8999,6 @@ static struct kvm_x86_ops vmx_x86_ops = {
.cache_reg = vmx_cache_reg,
.get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags,
- .fpu_activate = vmx_fpu_activate,
.fpu_deactivate = vmx_fpu_deactivate,
.tlb_flush = vmx_flush_tlb,
@@ -8951,6 +9059,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.mpx_supported = vmx_mpx_supported,
.check_nested_events = vmx_check_nested_events,
+
+ .sched_in = vmx_sched_in,
};
static int __init vmx_init(void)
@@ -9065,6 +9175,8 @@ static int __init vmx_init(void)
} else
kvm_disable_tdp();
+ update_ple_window_actual_max();
+
return 0;
out7:
@@ -9098,7 +9210,7 @@ static void __exit vmx_exit(void)
free_page((unsigned long)vmx_vmread_bitmap);
#ifdef CONFIG_KEXEC
- rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL);
+ RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
synchronize_rcu();
#endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8f1e22d3b286..c10408ef9ab1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1726,6 +1726,7 @@ static bool valid_mtrr_type(unsigned t)
static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
int i;
+ u64 mask = 0;
if (!msr_mtrr_valid(msr))
return false;
@@ -1747,7 +1748,24 @@ static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
}
/* variable MTRRs */
- return valid_mtrr_type(data & 0xff);
+ WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
+
+ for (i = 63; i > boot_cpu_data.x86_phys_bits; i--)
+ mask |= (1ULL << i);
+ if ((msr & 1) == 0) {
+ /* MTRR base */
+ if (!valid_mtrr_type(data & 0xff))
+ return false;
+ mask |= 0xf00;
+ } else
+ /* MTRR mask */
+ mask |= 0x7ff;
+ if (data & mask) {
+ kvm_inject_gp(vcpu, 0);
+ return false;
+ }
+
+ return true;
}
static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
@@ -2419,7 +2437,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_K7_HWCR:
case MSR_VM_HSAVE_PA:
case MSR_K7_EVNTSEL0:
+ case MSR_K7_EVNTSEL1:
+ case MSR_K7_EVNTSEL2:
+ case MSR_K7_EVNTSEL3:
case MSR_K7_PERFCTR0:
+ case MSR_K7_PERFCTR1:
+ case MSR_K7_PERFCTR2:
+ case MSR_K7_PERFCTR3:
case MSR_K8_INT_PENDING_MSG:
case MSR_AMD64_NB_CFG:
case MSR_FAM10H_MMIO_CONF_BASE:
@@ -5224,6 +5248,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
ctxt->interruptibility = 0;
ctxt->have_exception = false;
+ ctxt->exception.vector = -1;
ctxt->perm_ok = false;
ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
@@ -7146,6 +7171,11 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
static_key_slow_dec(&kvm_no_apic_vcpu);
}
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+ kvm_x86_ops->sched_in(vcpu, cpu);
+}
+
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
if (type)
@@ -7643,3 +7673,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);