summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm/arm.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm/arm.c')
-rw-r--r--arch/arm64/kvm/arm.c218
1 files changed, 136 insertions, 82 deletions
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index b8e55a441282..38a91bb5d4c7 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -125,6 +125,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
}
mutex_unlock(&kvm->slots_lock);
break;
+ case KVM_CAP_ARM_WRITABLE_IMP_ID_REGS:
+ mutex_lock(&kvm->lock);
+ if (!kvm->created_vcpus) {
+ r = 0;
+ set_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &kvm->arch.flags);
+ }
+ mutex_unlock(&kvm->lock);
+ break;
default:
break;
}
@@ -313,6 +321,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ARM_SYSTEM_SUSPEND:
case KVM_CAP_IRQFD_RESAMPLE:
case KVM_CAP_COUNTER_OFFSET:
+ case KVM_CAP_ARM_WRITABLE_IMP_ID_REGS:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
@@ -359,6 +368,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ARM_EL1_32BIT:
r = cpus_have_final_cap(ARM64_HAS_32BIT_EL1);
break;
+ case KVM_CAP_ARM_EL2:
+ r = cpus_have_final_cap(ARM64_HAS_NESTED_VIRT);
+ break;
+ case KVM_CAP_ARM_EL2_E2H0:
+ r = cpus_have_final_cap(ARM64_HAS_HCR_NV1);
+ break;
case KVM_CAP_GUEST_DEBUG_HW_BPS:
r = get_num_brps();
break;
@@ -366,7 +381,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = get_num_wrps();
break;
case KVM_CAP_ARM_PMU_V3:
- r = kvm_arm_support_pmu_v3();
+ r = kvm_supports_guest_pmuv3();
break;
case KVM_CAP_ARM_INJECT_SERROR_ESR:
r = cpus_have_final_cap(ARM64_HAS_RAS_EXTN);
@@ -466,7 +481,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (err)
return err;
- return kvm_share_hyp(vcpu, vcpu + 1);
+ err = kvm_share_hyp(vcpu, vcpu + 1);
+ if (err)
+ kvm_vgic_vcpu_destroy(vcpu);
+
+ return err;
}
void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
@@ -560,6 +579,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
/*
+ * Ensure a VMID is allocated for the MMU before programming VTTBR_EL2,
+ * which happens eagerly in VHE.
+ *
+ * Also, the VMID allocator only preserves VMIDs that are active at the
+ * time of rollover, so KVM might need to grab a new VMID for the MMU if
+ * this is called from kvm_sched_in().
+ */
+ kvm_arm_vmid_update(&mmu->vmid);
+
+ /*
* We guarantee that both TLBs and I-cache are private to each
* vcpu. If detecting that a vcpu from the same VM has
* previously run on the same physical CPU, call into the
@@ -576,8 +605,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
nommu:
vcpu->cpu = cpu;
- kvm_vgic_load(vcpu);
+ /*
+ * The timer must be loaded before the vgic to correctly set up physical
+ * interrupt deactivation in nested state (e.g. timer interrupt).
+ */
kvm_timer_vcpu_load(vcpu);
+ kvm_vgic_load(vcpu);
kvm_vcpu_load_debug(vcpu);
if (has_vhe())
kvm_vcpu_load_vhe(vcpu);
@@ -815,6 +848,16 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
if (ret)
return ret;
+ if (vcpu_has_nv(vcpu)) {
+ ret = kvm_vcpu_allocate_vncr_tlb(vcpu);
+ if (ret)
+ return ret;
+
+ ret = kvm_vgic_vcpu_nv_init(vcpu);
+ if (ret)
+ return ret;
+ }
+
/*
* This needs to happen after any restriction has been applied
* to the feature set.
@@ -825,14 +868,20 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
if (ret)
return ret;
- ret = kvm_arm_pmu_v3_enable(vcpu);
- if (ret)
- return ret;
+ if (kvm_vcpu_has_pmu(vcpu)) {
+ ret = kvm_arm_pmu_v3_enable(vcpu);
+ if (ret)
+ return ret;
+ }
if (is_protected_kvm_enabled()) {
ret = pkvm_create_hyp_vm(kvm);
if (ret)
return ret;
+
+ ret = pkvm_create_hyp_vcpu(vcpu);
+ if (ret)
+ return ret;
}
mutex_lock(&kvm->arch.config_lock);
@@ -1138,19 +1187,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
preempt_disable();
- /*
- * The VMID allocator only tracks active VMIDs per
- * physical CPU, and therefore the VMID allocated may not be
- * preserved on VMID roll-over if the task was preempted,
- * making a thread's VMID inactive. So we need to call
- * kvm_arm_vmid_update() in non-premptible context.
- */
- if (kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid) &&
- has_vhe())
- __load_stage2(vcpu->arch.hw_mmu,
- vcpu->arch.hw_mmu->arch);
-
- kvm_pmu_flush_hwstate(vcpu);
+ if (kvm_vcpu_has_pmu(vcpu))
+ kvm_pmu_flush_hwstate(vcpu);
local_irq_disable();
@@ -1169,7 +1207,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (ret <= 0 || kvm_vcpu_exit_request(vcpu, &ret)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
isb(); /* Ensure work in x_flush_hwstate is committed */
- kvm_pmu_sync_hwstate(vcpu);
+ if (kvm_vcpu_has_pmu(vcpu))
+ kvm_pmu_sync_hwstate(vcpu);
if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
kvm_timer_sync_user(vcpu);
kvm_vgic_sync_hwstate(vcpu);
@@ -1199,7 +1238,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
* that the vgic can properly sample the updated state of the
* interrupt line.
*/
- kvm_pmu_sync_hwstate(vcpu);
+ if (kvm_vcpu_has_pmu(vcpu))
+ kvm_pmu_sync_hwstate(vcpu);
/*
* Sync the vgic state before syncing the timer state because
@@ -1388,7 +1428,7 @@ static unsigned long system_supported_vcpu_features(void)
if (!cpus_have_final_cap(ARM64_HAS_32BIT_EL1))
clear_bit(KVM_ARM_VCPU_EL1_32BIT, &features);
- if (!kvm_arm_support_pmu_v3())
+ if (!kvm_supports_guest_pmuv3())
clear_bit(KVM_ARM_VCPU_PMU_V3, &features);
if (!system_supports_sve())
@@ -1884,49 +1924,6 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
}
}
-/* unlocks vcpus from @vcpu_lock_idx and smaller */
-static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx)
-{
- struct kvm_vcpu *tmp_vcpu;
-
- for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
- tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
- mutex_unlock(&tmp_vcpu->mutex);
- }
-}
-
-void unlock_all_vcpus(struct kvm *kvm)
-{
- lockdep_assert_held(&kvm->lock);
-
- unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1);
-}
-
-/* Returns true if all vcpus were locked, false otherwise */
-bool lock_all_vcpus(struct kvm *kvm)
-{
- struct kvm_vcpu *tmp_vcpu;
- unsigned long c;
-
- lockdep_assert_held(&kvm->lock);
-
- /*
- * Any time a vcpu is in an ioctl (including running), the
- * core KVM code tries to grab the vcpu->mutex.
- *
- * By grabbing the vcpu->mutex of all VCPUs we ensure that no
- * other VCPUs can fiddle with the state while we access it.
- */
- kvm_for_each_vcpu(c, tmp_vcpu, kvm) {
- if (!mutex_trylock(&tmp_vcpu->mutex)) {
- unlock_vcpus(kvm, c - 1);
- return false;
- }
- }
-
- return true;
-}
-
static unsigned long nvhe_percpu_size(void)
{
return (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_end) -
@@ -1980,7 +1977,7 @@ static int kvm_init_vector_slots(void)
static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
{
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
- unsigned long tcr, ips;
+ unsigned long tcr;
/*
* Calculate the raw per-cpu offset without a translation from the
@@ -1994,19 +1991,18 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
params->mair_el2 = read_sysreg(mair_el1);
tcr = read_sysreg(tcr_el1);
- ips = FIELD_GET(TCR_IPS_MASK, tcr);
if (cpus_have_final_cap(ARM64_KVM_HVHE)) {
+ tcr &= ~(TCR_HD | TCR_HA | TCR_A1 | TCR_T0SZ_MASK);
tcr |= TCR_EPD1_MASK;
} else {
+ unsigned long ips = FIELD_GET(TCR_IPS_MASK, tcr);
+
tcr &= TCR_EL2_MASK;
- tcr |= TCR_EL2_RES1;
+ tcr |= TCR_EL2_RES1 | FIELD_PREP(TCR_EL2_PS_MASK, ips);
+ if (lpa2_is_enabled())
+ tcr |= TCR_EL2_DS;
}
- tcr &= ~TCR_T0SZ_MASK;
tcr |= TCR_T0SZ(hyp_va_bits);
- tcr &= ~TCR_EL2_PS_MASK;
- tcr |= FIELD_PREP(TCR_EL2_PS_MASK, ips);
- if (lpa2_is_enabled())
- tcr |= TCR_EL2_DS;
params->tcr_el2 = tcr;
params->pgd_pa = kvm_mmu_get_httbr();
@@ -2310,6 +2306,13 @@ static int __init init_subsystems(void)
goto out;
}
+ if (kvm_mode == KVM_MODE_NV &&
+ !(vgic_present && kvm_vgic_global_state.type == VGIC_V3)) {
+ kvm_err("NV support requires GICv3, giving up\n");
+ err = -EINVAL;
+ goto out;
+ }
+
/*
* Init HYP architected timer support
*/
@@ -2414,6 +2417,19 @@ static void kvm_hyp_init_symbols(void)
kvm_nvhe_sym(__icache_flags) = __icache_flags;
kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits;
+ /* Propagate the FGT state to the the nVHE side */
+ kvm_nvhe_sym(hfgrtr_masks) = hfgrtr_masks;
+ kvm_nvhe_sym(hfgwtr_masks) = hfgwtr_masks;
+ kvm_nvhe_sym(hfgitr_masks) = hfgitr_masks;
+ kvm_nvhe_sym(hdfgrtr_masks) = hdfgrtr_masks;
+ kvm_nvhe_sym(hdfgwtr_masks) = hdfgwtr_masks;
+ kvm_nvhe_sym(hafgrtr_masks) = hafgrtr_masks;
+ kvm_nvhe_sym(hfgrtr2_masks) = hfgrtr2_masks;
+ kvm_nvhe_sym(hfgwtr2_masks) = hfgwtr2_masks;
+ kvm_nvhe_sym(hfgitr2_masks) = hfgitr2_masks;
+ kvm_nvhe_sym(hdfgrtr2_masks)= hdfgrtr2_masks;
+ kvm_nvhe_sym(hdfgwtr2_masks)= hdfgwtr2_masks;
+
/*
* Flush entire BSS since part of its data containing init symbols is read
* while the MMU is off.
@@ -2568,6 +2584,13 @@ static int __init init_hyp_mode(void)
goto out_err;
}
+ err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_start),
+ kvm_ksym_ref(__hyp_data_end), PAGE_HYP);
+ if (err) {
+ kvm_err("Cannot map .hyp.data section\n");
+ goto out_err;
+ }
+
err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start),
kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO);
if (err) {
@@ -2707,28 +2730,58 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
return irqchip_in_kernel(kvm);
}
-bool kvm_arch_has_irq_bypass(void)
-{
- return true;
-}
-
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
struct irq_bypass_producer *prod)
{
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
+ struct kvm_kernel_irq_routing_entry *irq_entry = &irqfd->irq_entry;
+
+ /*
+ * The only thing we have a chance of directly-injecting is LPIs. Maybe
+ * one day...
+ */
+ if (irq_entry->type != KVM_IRQ_ROUTING_MSI)
+ return 0;
return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq,
&irqfd->irq_entry);
}
+
void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
struct irq_bypass_producer *prod)
{
struct kvm_kernel_irqfd *irqfd =
container_of(cons, struct kvm_kernel_irqfd, consumer);
+ struct kvm_kernel_irq_routing_entry *irq_entry = &irqfd->irq_entry;
+
+ if (irq_entry->type != KVM_IRQ_ROUTING_MSI)
+ return;
- kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq,
- &irqfd->irq_entry);
+ kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq);
+}
+
+bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
+ struct kvm_kernel_irq_routing_entry *new)
+{
+ if (old->type != KVM_IRQ_ROUTING_MSI ||
+ new->type != KVM_IRQ_ROUTING_MSI)
+ return true;
+
+ return memcmp(&old->msi, &new->msi, sizeof(new->msi));
+}
+
+int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
+ uint32_t guest_irq, bool set)
+{
+ /*
+ * Remapping the vLPI requires taking the its_lock mutex to resolve
+ * the new translation. We're in spinlock land at this point, so no
+ * chance of resolving the translation.
+ *
+ * Unmap the vLPI and fall back to software LPI injection.
+ */
+ return kvm_vgic_v4_unset_forwarding(kvm, host_irq);
}
void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
@@ -2806,11 +2859,12 @@ static __init int kvm_arm_init(void)
if (err)
goto out_hyp;
- kvm_info("%s%sVHE mode initialized successfully\n",
+ kvm_info("%s%sVHE%s mode initialized successfully\n",
in_hyp_mode ? "" : (is_protected_kvm_enabled() ?
"Protected " : "Hyp "),
in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ?
- "h" : "n"));
+ "h" : "n"),
+ cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) ? "+NV2": "");
/*
* FIXME: Do something reasonable if kvm_init() fails after pKVM