diff options
Diffstat (limited to 'arch/arm64/kvm/arm.c')
-rw-r--r-- | arch/arm64/kvm/arm.c | 191 |
1 files changed, 124 insertions, 67 deletions
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 1a479df5d78e..23dd3f3fc3eb 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -125,6 +125,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, } mutex_unlock(&kvm->slots_lock); break; + case KVM_CAP_ARM_WRITABLE_IMP_ID_REGS: + mutex_lock(&kvm->lock); + if (!kvm->created_vcpus) { + r = 0; + set_bit(KVM_ARCH_FLAG_WRITABLE_IMP_ID_REGS, &kvm->arch.flags); + } + mutex_unlock(&kvm->lock); + break; default: break; } @@ -313,6 +321,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_SYSTEM_SUSPEND: case KVM_CAP_IRQFD_RESAMPLE: case KVM_CAP_COUNTER_OFFSET: + case KVM_CAP_ARM_WRITABLE_IMP_ID_REGS: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: @@ -359,6 +368,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_EL1_32BIT: r = cpus_have_final_cap(ARM64_HAS_32BIT_EL1); break; + case KVM_CAP_ARM_EL2: + r = cpus_have_final_cap(ARM64_HAS_NESTED_VIRT); + break; + case KVM_CAP_ARM_EL2_E2H0: + r = cpus_have_final_cap(ARM64_HAS_HCR_NV1); + break; case KVM_CAP_GUEST_DEBUG_HW_BPS: r = get_num_brps(); break; @@ -366,7 +381,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = get_num_wrps(); break; case KVM_CAP_ARM_PMU_V3: - r = kvm_arm_support_pmu_v3(); + r = kvm_supports_guest_pmuv3(); break; case KVM_CAP_ARM_INJECT_SERROR_ESR: r = cpus_have_final_cap(ARM64_HAS_RAS_EXTN); @@ -590,8 +605,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) nommu: vcpu->cpu = cpu; - kvm_vgic_load(vcpu); + /* + * The timer must be loaded before the vgic to correctly set up physical + * interrupt deactivation in nested state (e.g. timer interrupt). + */ kvm_timer_vcpu_load(vcpu); + kvm_vgic_load(vcpu); kvm_vcpu_load_debug(vcpu); if (has_vhe()) kvm_vcpu_load_vhe(vcpu); @@ -806,10 +825,6 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (!kvm_arm_vcpu_is_finalized(vcpu)) return -EPERM; - ret = kvm_arch_vcpu_run_map_fp(vcpu); - if (ret) - return ret; - if (likely(vcpu_has_run_once(vcpu))) return 0; @@ -829,6 +844,16 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (ret) return ret; + if (vcpu_has_nv(vcpu)) { + ret = kvm_vcpu_allocate_vncr_tlb(vcpu); + if (ret) + return ret; + + ret = kvm_vgic_vcpu_nv_init(vcpu); + if (ret) + return ret; + } + /* * This needs to happen after any restriction has been applied * to the feature set. @@ -839,14 +864,20 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (ret) return ret; - ret = kvm_arm_pmu_v3_enable(vcpu); - if (ret) - return ret; + if (kvm_vcpu_has_pmu(vcpu)) { + ret = kvm_arm_pmu_v3_enable(vcpu); + if (ret) + return ret; + } if (is_protected_kvm_enabled()) { ret = pkvm_create_hyp_vm(kvm); if (ret) return ret; + + ret = pkvm_create_hyp_vcpu(vcpu); + if (ret) + return ret; } mutex_lock(&kvm->arch.config_lock); @@ -1152,7 +1183,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ preempt_disable(); - kvm_pmu_flush_hwstate(vcpu); + if (kvm_vcpu_has_pmu(vcpu)) + kvm_pmu_flush_hwstate(vcpu); local_irq_disable(); @@ -1171,7 +1203,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (ret <= 0 || kvm_vcpu_exit_request(vcpu, &ret)) { vcpu->mode = OUTSIDE_GUEST_MODE; isb(); /* Ensure work in x_flush_hwstate is committed */ - kvm_pmu_sync_hwstate(vcpu); + if (kvm_vcpu_has_pmu(vcpu)) + kvm_pmu_sync_hwstate(vcpu); if (unlikely(!irqchip_in_kernel(vcpu->kvm))) kvm_timer_sync_user(vcpu); kvm_vgic_sync_hwstate(vcpu); @@ -1201,7 +1234,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * that the vgic can properly sample the updated state of the * interrupt line. */ - kvm_pmu_sync_hwstate(vcpu); + if (kvm_vcpu_has_pmu(vcpu)) + kvm_pmu_sync_hwstate(vcpu); /* * Sync the vgic state before syncing the timer state because @@ -1390,7 +1424,7 @@ static unsigned long system_supported_vcpu_features(void) if (!cpus_have_final_cap(ARM64_HAS_32BIT_EL1)) clear_bit(KVM_ARM_VCPU_EL1_32BIT, &features); - if (!kvm_arm_support_pmu_v3()) + if (!kvm_supports_guest_pmuv3()) clear_bit(KVM_ARM_VCPU_PMU_V3, &features); if (!system_supports_sve()) @@ -1886,49 +1920,6 @@ int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) } } -/* unlocks vcpus from @vcpu_lock_idx and smaller */ -static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) -{ - struct kvm_vcpu *tmp_vcpu; - - for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { - tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); - mutex_unlock(&tmp_vcpu->mutex); - } -} - -void unlock_all_vcpus(struct kvm *kvm) -{ - lockdep_assert_held(&kvm->lock); - - unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); -} - -/* Returns true if all vcpus were locked, false otherwise */ -bool lock_all_vcpus(struct kvm *kvm) -{ - struct kvm_vcpu *tmp_vcpu; - unsigned long c; - - lockdep_assert_held(&kvm->lock); - - /* - * Any time a vcpu is in an ioctl (including running), the - * core KVM code tries to grab the vcpu->mutex. - * - * By grabbing the vcpu->mutex of all VCPUs we ensure that no - * other VCPUs can fiddle with the state while we access it. - */ - kvm_for_each_vcpu(c, tmp_vcpu, kvm) { - if (!mutex_trylock(&tmp_vcpu->mutex)) { - unlock_vcpus(kvm, c - 1); - return false; - } - } - - return true; -} - static unsigned long nvhe_percpu_size(void) { return (unsigned long)CHOOSE_NVHE_SYM(__per_cpu_end) - @@ -2134,7 +2125,7 @@ static void cpu_hyp_init(void *discard) static void cpu_hyp_uninit(void *discard) { - if (__this_cpu_read(kvm_hyp_initialized)) { + if (!is_protected_kvm_enabled() && __this_cpu_read(kvm_hyp_initialized)) { cpu_hyp_reset(); __this_cpu_write(kvm_hyp_initialized, 0); } @@ -2311,6 +2302,13 @@ static int __init init_subsystems(void) goto out; } + if (kvm_mode == KVM_MODE_NV && + !(vgic_present && kvm_vgic_global_state.type == VGIC_V3)) { + kvm_err("NV support requires GICv3, giving up\n"); + err = -EINVAL; + goto out; + } + /* * Init HYP architected timer support */ @@ -2343,8 +2341,13 @@ static void __init teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) { + if (per_cpu(kvm_hyp_initialized, cpu)) + continue; + free_pages(per_cpu(kvm_arm_hyp_stack_base, cpu), NVHE_STACK_SHIFT - PAGE_SHIFT); - free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); + + if (!kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]) + continue; if (free_sve) { struct cpu_sve_state *sve_state; @@ -2352,6 +2355,9 @@ static void __init teardown_hyp_mode(void) sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state; free_pages((unsigned long) sve_state, pkvm_host_sve_state_order()); } + + free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); + } } @@ -2415,6 +2421,19 @@ static void kvm_hyp_init_symbols(void) kvm_nvhe_sym(__icache_flags) = __icache_flags; kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits; + /* Propagate the FGT state to the the nVHE side */ + kvm_nvhe_sym(hfgrtr_masks) = hfgrtr_masks; + kvm_nvhe_sym(hfgwtr_masks) = hfgwtr_masks; + kvm_nvhe_sym(hfgitr_masks) = hfgitr_masks; + kvm_nvhe_sym(hdfgrtr_masks) = hdfgrtr_masks; + kvm_nvhe_sym(hdfgwtr_masks) = hdfgwtr_masks; + kvm_nvhe_sym(hafgrtr_masks) = hafgrtr_masks; + kvm_nvhe_sym(hfgrtr2_masks) = hfgrtr2_masks; + kvm_nvhe_sym(hfgwtr2_masks) = hfgwtr2_masks; + kvm_nvhe_sym(hfgitr2_masks) = hfgitr2_masks; + kvm_nvhe_sym(hdfgrtr2_masks)= hdfgrtr2_masks; + kvm_nvhe_sym(hdfgwtr2_masks)= hdfgwtr2_masks; + /* * Flush entire BSS since part of its data containing init symbols is read * while the MMU is off. @@ -2569,6 +2588,13 @@ static int __init init_hyp_mode(void) goto out_err; } + err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_start), + kvm_ksym_ref(__hyp_data_end), PAGE_HYP); + if (err) { + kvm_err("Cannot map .hyp.data section\n"); + goto out_err; + } + err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start), kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO); if (err) { @@ -2708,28 +2734,58 @@ bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) return irqchip_in_kernel(kvm); } -bool kvm_arch_has_irq_bypass(void) -{ - return true; -} - int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod) { struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm_kernel_irq_routing_entry *irq_entry = &irqfd->irq_entry; + + /* + * The only thing we have a chance of directly-injecting is LPIs. Maybe + * one day... + */ + if (irq_entry->type != KVM_IRQ_ROUTING_MSI) + return 0; return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq, &irqfd->irq_entry); } + void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, struct irq_bypass_producer *prod) { struct kvm_kernel_irqfd *irqfd = container_of(cons, struct kvm_kernel_irqfd, consumer); + struct kvm_kernel_irq_routing_entry *irq_entry = &irqfd->irq_entry; + + if (irq_entry->type != KVM_IRQ_ROUTING_MSI) + return; - kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq, - &irqfd->irq_entry); + kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq); +} + +bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old, + struct kvm_kernel_irq_routing_entry *new) +{ + if (old->type != KVM_IRQ_ROUTING_MSI || + new->type != KVM_IRQ_ROUTING_MSI) + return true; + + return memcmp(&old->msi, &new->msi, sizeof(new->msi)); +} + +int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set) +{ + /* + * Remapping the vLPI requires taking the its_lock mutex to resolve + * the new translation. We're in spinlock land at this point, so no + * chance of resolving the translation. + * + * Unmap the vLPI and fall back to software LPI injection. + */ + return kvm_vgic_v4_unset_forwarding(kvm, host_irq); } void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons) @@ -2807,11 +2863,12 @@ static __init int kvm_arm_init(void) if (err) goto out_hyp; - kvm_info("%s%sVHE mode initialized successfully\n", + kvm_info("%s%sVHE%s mode initialized successfully\n", in_hyp_mode ? "" : (is_protected_kvm_enabled() ? "Protected " : "Hyp "), in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ? - "h" : "n")); + "h" : "n"), + cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) ? "+NV2": ""); /* * FIXME: Do something reasonable if kvm_init() fails after pKVM |