summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm/vgic
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm/vgic')
-rw-r--r--arch/arm64/kvm/vgic/vgic-debug.c227
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c164
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c98
-rw-r--r--arch/arm64/kvm/vgic/vgic-kvm-device.c57
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c33
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3-nested.c407
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c57
-rw-r--r--arch/arm64/kvm/vgic/vgic-v4.c131
-rw-r--r--arch/arm64/kvm/vgic/vgic-v5.c52
-rw-r--r--arch/arm64/kvm/vgic/vgic.c42
-rw-r--r--arch/arm64/kvm/vgic/vgic.h87
11 files changed, 1151 insertions, 204 deletions
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c
index afb018528bc3..2684f273d9e1 100644
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -320,3 +320,230 @@ void vgic_debug_init(struct kvm *kvm)
void vgic_debug_destroy(struct kvm *kvm)
{
}
+
+/**
+ * struct vgic_its_iter - Iterator for traversing VGIC ITS device tables.
+ * @dev: Pointer to the current its_device being processed.
+ * @ite: Pointer to the current its_ite within the device being processed.
+ *
+ * This structure is used to maintain the current position during iteration
+ * over the ITS device tables. It holds pointers to both the current device
+ * and the current ITE within that device.
+ */
+struct vgic_its_iter {
+ struct its_device *dev;
+ struct its_ite *ite;
+};
+
+/**
+ * end_of_iter - Checks if the iterator has reached the end.
+ * @iter: The iterator to check.
+ *
+ * When the iterator completed processing the final ITE in the last device
+ * table, it was marked to indicate the end of iteration by setting its
+ * device and ITE pointers to NULL.
+ * This function checks whether the iterator was marked as end.
+ *
+ * Return: True if the iterator is marked as end, false otherwise.
+ */
+static inline bool end_of_iter(struct vgic_its_iter *iter)
+{
+ return !iter->dev && !iter->ite;
+}
+
+/**
+ * vgic_its_iter_next - Advances the iterator to the next entry in the ITS tables.
+ * @its: The VGIC ITS structure.
+ * @iter: The iterator to advance.
+ *
+ * This function moves the iterator to the next ITE within the current device,
+ * or to the first ITE of the next device if the current ITE is the last in
+ * the device. If the current device is the last device, the iterator is set
+ * to indicate the end of iteration.
+ */
+static void vgic_its_iter_next(struct vgic_its *its, struct vgic_its_iter *iter)
+{
+ struct its_device *dev = iter->dev;
+ struct its_ite *ite = iter->ite;
+
+ if (!ite || list_is_last(&ite->ite_list, &dev->itt_head)) {
+ if (list_is_last(&dev->dev_list, &its->device_list)) {
+ dev = NULL;
+ ite = NULL;
+ } else {
+ dev = list_next_entry(dev, dev_list);
+ ite = list_first_entry_or_null(&dev->itt_head,
+ struct its_ite,
+ ite_list);
+ }
+ } else {
+ ite = list_next_entry(ite, ite_list);
+ }
+
+ iter->dev = dev;
+ iter->ite = ite;
+}
+
+/**
+ * vgic_its_debug_start - Start function for the seq_file interface.
+ * @s: The seq_file structure.
+ * @pos: The starting position (offset).
+ *
+ * This function initializes the iterator to the beginning of the ITS tables
+ * and advances it to the specified position. It acquires the its_lock mutex
+ * to protect shared data.
+ *
+ * Return: An iterator pointer on success, NULL if no devices are found or
+ * the end of the list is reached, or ERR_PTR(-ENOMEM) on memory
+ * allocation failure.
+ */
+static void *vgic_its_debug_start(struct seq_file *s, loff_t *pos)
+{
+ struct vgic_its *its = s->private;
+ struct vgic_its_iter *iter;
+ struct its_device *dev;
+ loff_t offset = *pos;
+
+ mutex_lock(&its->its_lock);
+
+ dev = list_first_entry_or_null(&its->device_list,
+ struct its_device, dev_list);
+ if (!dev)
+ return NULL;
+
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return ERR_PTR(-ENOMEM);
+
+ iter->dev = dev;
+ iter->ite = list_first_entry_or_null(&dev->itt_head,
+ struct its_ite, ite_list);
+
+ while (!end_of_iter(iter) && offset--)
+ vgic_its_iter_next(its, iter);
+
+ if (end_of_iter(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+
+ return iter;
+}
+
+/**
+ * vgic_its_debug_next - Next function for the seq_file interface.
+ * @s: The seq_file structure.
+ * @v: The current iterator.
+ * @pos: The current position (offset).
+ *
+ * This function advances the iterator to the next entry and increments the
+ * position.
+ *
+ * Return: An iterator pointer on success, or NULL if the end of the list is
+ * reached.
+ */
+static void *vgic_its_debug_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct vgic_its *its = s->private;
+ struct vgic_its_iter *iter = v;
+
+ ++*pos;
+ vgic_its_iter_next(its, iter);
+
+ if (end_of_iter(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+ return iter;
+}
+
+/**
+ * vgic_its_debug_stop - Stop function for the seq_file interface.
+ * @s: The seq_file structure.
+ * @v: The current iterator.
+ *
+ * This function frees the iterator and releases the its_lock mutex.
+ */
+static void vgic_its_debug_stop(struct seq_file *s, void *v)
+{
+ struct vgic_its *its = s->private;
+ struct vgic_its_iter *iter = v;
+
+ if (!IS_ERR_OR_NULL(iter))
+ kfree(iter);
+ mutex_unlock(&its->its_lock);
+}
+
+/**
+ * vgic_its_debug_show - Show function for the seq_file interface.
+ * @s: The seq_file structure.
+ * @v: The current iterator.
+ *
+ * This function formats and prints the ITS table entry information to the
+ * seq_file output.
+ *
+ * Return: 0 on success.
+ */
+static int vgic_its_debug_show(struct seq_file *s, void *v)
+{
+ struct vgic_its_iter *iter = v;
+ struct its_device *dev = iter->dev;
+ struct its_ite *ite = iter->ite;
+
+ if (!ite)
+ return 0;
+
+ if (list_is_first(&ite->ite_list, &dev->itt_head)) {
+ seq_printf(s, "\n");
+ seq_printf(s, "Device ID: 0x%x, Event ID Range: [0 - %llu]\n",
+ dev->device_id, BIT_ULL(dev->num_eventid_bits) - 1);
+ seq_printf(s, "EVENT_ID INTID HWINTID TARGET COL_ID HW\n");
+ seq_printf(s, "-----------------------------------------------\n");
+ }
+
+ if (ite->irq && ite->collection) {
+ seq_printf(s, "%8u %8u %8u %8u %8u %2d\n",
+ ite->event_id, ite->irq->intid, ite->irq->hwintid,
+ ite->collection->target_addr,
+ ite->collection->collection_id, ite->irq->hw);
+ }
+
+ return 0;
+}
+
+static const struct seq_operations vgic_its_debug_sops = {
+ .start = vgic_its_debug_start,
+ .next = vgic_its_debug_next,
+ .stop = vgic_its_debug_stop,
+ .show = vgic_its_debug_show
+};
+
+DEFINE_SEQ_ATTRIBUTE(vgic_its_debug);
+
+/**
+ * vgic_its_debug_init - Initializes the debugfs interface for VGIC ITS.
+ * @dev: The KVM device structure.
+ *
+ * This function creates a debugfs file named "vgic-its-state@%its_base"
+ * to expose the ITS table information.
+ *
+ * Return: 0 on success.
+ */
+int vgic_its_debug_init(struct kvm_device *dev)
+{
+ struct vgic_its *its = dev->private;
+ char *name;
+
+ name = kasprintf(GFP_KERNEL, "vgic-its-state@%llx", (u64)its->vgic_its_base);
+ if (!name)
+ return -ENOMEM;
+
+ debugfs_create_file(name, 0444, dev->kvm->debugfs_dentry, its, &vgic_its_debug_fops);
+
+ kfree(name);
+ return 0;
+}
+
+void vgic_its_debug_destroy(struct kvm_device *dev)
+{
+}
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index bc7e22ab5d81..1e680ad6e863 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -34,9 +34,9 @@
*
* CPU Interface:
*
- * - kvm_vgic_vcpu_init(): initialization of static data that
- * doesn't depend on any sizing information or emulation type. No
- * allocation is allowed there.
+ * - kvm_vgic_vcpu_init(): initialization of static data that doesn't depend
+ * on any sizing information. Private interrupts are allocated if not
+ * already allocated at vgic-creation time.
*/
/* EARLY INIT */
@@ -58,6 +58,8 @@ void kvm_vgic_early_init(struct kvm *kvm)
/* CREATION */
+static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type);
+
/**
* kvm_vgic_create: triggered by the instantiation of the VGIC device by
* user space, either through the legacy KVM_CREATE_IRQCHIP ioctl (v2 only)
@@ -82,15 +84,40 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
!kvm_vgic_global_state.can_emulate_gicv2)
return -ENODEV;
- /* Must be held to avoid race with vCPU creation */
+ /*
+ * Ensure mutual exclusion with vCPU creation and any vCPU ioctls by:
+ *
+ * - Holding kvm->lock to prevent KVM_CREATE_VCPU from reaching
+ * kvm_arch_vcpu_precreate() and ensuring created_vcpus is stable.
+ * This alone is insufficient, as kvm_vm_ioctl_create_vcpu() drops
+ * the kvm->lock before completing the vCPU creation.
+ */
lockdep_assert_held(&kvm->lock);
+ /*
+ * - Acquiring the vCPU mutex for every *online* vCPU to prevent
+ * concurrent vCPU ioctls for vCPUs already visible to userspace.
+ */
ret = -EBUSY;
- if (!lock_all_vcpus(kvm))
+ if (kvm_trylock_all_vcpus(kvm))
return ret;
+ /*
+ * - Taking the config_lock which protects VGIC data structures such
+ * as the per-vCPU arrays of private IRQs (SGIs, PPIs).
+ */
mutex_lock(&kvm->arch.config_lock);
+ /*
+ * - Bailing on the entire thing if a vCPU is in the middle of creation,
+ * dropped the kvm->lock, but hasn't reached kvm_arch_vcpu_create().
+ *
+ * The whole combination of this guarantees that no vCPU can get into
+ * KVM with a VGIC configuration inconsistent with the VM's VGIC.
+ */
+ if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus))
+ goto out_unlock;
+
if (irqchip_in_kernel(kvm)) {
ret = -EEXIST;
goto out_unlock;
@@ -112,8 +139,25 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
goto out_unlock;
}
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ ret = vgic_allocate_private_irqs_locked(vcpu, type);
+ if (ret)
+ break;
+ }
+
+ if (ret) {
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ kfree(vgic_cpu->private_irqs);
+ vgic_cpu->private_irqs = NULL;
+ }
+
+ goto out_unlock;
+ }
+
kvm->arch.vgic.in_kernel = true;
kvm->arch.vgic.vgic_model = type;
+ kvm->arch.vgic.implementation_rev = KVM_VGIC_IMP_REV_LATEST;
kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
@@ -122,9 +166,12 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
else
INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
+ if (type == KVM_DEV_TYPE_ARM_VGIC_V3)
+ kvm->arch.vgic.nassgicap = system_supports_direct_sgis();
+
out_unlock:
mutex_unlock(&kvm->arch.config_lock);
- unlock_all_vcpus(kvm);
+ kvm_unlock_all_vcpus(kvm);
return ret;
}
@@ -180,7 +227,28 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
return 0;
}
-static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu)
+/* Default GICv3 Maintenance Interrupt INTID, as per SBSA */
+#define DEFAULT_MI_INTID 25
+
+int kvm_vgic_vcpu_nv_init(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ guard(mutex)(&vcpu->kvm->arch.config_lock);
+
+ /*
+ * Matching the tradition established with the timers, provide
+ * a default PPI for the maintenance interrupt. It makes
+ * things easier to reason about.
+ */
+ if (vcpu->kvm->arch.vgic.mi_intid == 0)
+ vcpu->kvm->arch.vgic.mi_intid = DEFAULT_MI_INTID;
+ ret = kvm_vgic_set_owner(vcpu, vcpu->kvm->arch.vgic.mi_intid, vcpu);
+
+ return ret;
+}
+
+static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
int i;
@@ -218,17 +286,28 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu)
/* PPIs */
irq->config = VGIC_CONFIG_LEVEL;
}
+
+ switch (type) {
+ case KVM_DEV_TYPE_ARM_VGIC_V3:
+ irq->group = 1;
+ irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
+ break;
+ case KVM_DEV_TYPE_ARM_VGIC_V2:
+ irq->group = 0;
+ irq->targets = BIT(vcpu->vcpu_id);
+ break;
+ }
}
return 0;
}
-static int vgic_allocate_private_irqs(struct kvm_vcpu *vcpu)
+static int vgic_allocate_private_irqs(struct kvm_vcpu *vcpu, u32 type)
{
int ret;
mutex_lock(&vcpu->kvm->arch.config_lock);
- ret = vgic_allocate_private_irqs_locked(vcpu);
+ ret = vgic_allocate_private_irqs_locked(vcpu, type);
mutex_unlock(&vcpu->kvm->arch.config_lock);
return ret;
@@ -258,7 +337,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
if (!irqchip_in_kernel(vcpu->kvm))
return 0;
- ret = vgic_allocate_private_irqs(vcpu);
+ ret = vgic_allocate_private_irqs(vcpu, dist->vgic_model);
if (ret)
return ret;
@@ -295,7 +374,7 @@ int vgic_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
- int ret = 0, i;
+ int ret = 0;
unsigned long idx;
lockdep_assert_held(&kvm->arch.config_lock);
@@ -315,41 +394,11 @@ int vgic_init(struct kvm *kvm)
if (ret)
goto out;
- /* Initialize groups on CPUs created before the VGIC type was known */
- kvm_for_each_vcpu(idx, vcpu, kvm) {
- ret = vgic_allocate_private_irqs_locked(vcpu);
- if (ret)
- goto out;
-
- for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
- struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i);
-
- switch (dist->vgic_model) {
- case KVM_DEV_TYPE_ARM_VGIC_V3:
- irq->group = 1;
- irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
- break;
- case KVM_DEV_TYPE_ARM_VGIC_V2:
- irq->group = 0;
- irq->targets = 1U << idx;
- break;
- default:
- ret = -EINVAL;
- }
-
- vgic_put_irq(kvm, irq);
-
- if (ret)
- goto out;
- }
- }
-
/*
- * If we have GICv4.1 enabled, unconditionally request enable the
- * v4 support so that we get HW-accelerated vSGIs. Otherwise, only
- * enable it if we present a virtual ITS to the guest.
+ * Ensure vPEs are allocated if direct IRQ injection (e.g. vSGIs,
+ * vLPIs) is supported.
*/
- if (vgic_supports_direct_msis(kvm)) {
+ if (vgic_supports_direct_irqs(kvm)) {
ret = vgic_v4_init(kvm);
if (ret)
goto out;
@@ -363,15 +412,7 @@ int vgic_init(struct kvm *kvm)
goto out;
vgic_debug_init(kvm);
-
- /*
- * If userspace didn't set the GIC implementation revision,
- * default to the latest and greatest. You know want it.
- */
- if (!dist->implementation_rev)
- dist->implementation_rev = KVM_VGIC_IMP_REV_LATEST;
dist->initialized = true;
-
out:
return ret;
}
@@ -397,7 +438,7 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
dist->vgic_cpu_base = VGIC_ADDR_UNDEF;
}
- if (vgic_supports_direct_msis(kvm))
+ if (vgic_supports_direct_irqs(kvm))
vgic_v4_teardown(kvm);
xa_destroy(&dist->lpi_xa);
@@ -588,12 +629,20 @@ void kvm_vgic_cpu_down(void)
static irqreturn_t vgic_maintenance_handler(int irq, void *data)
{
+ struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)data;
+
/*
* We cannot rely on the vgic maintenance interrupt to be
* delivered synchronously. This means we can only use it to
* exit the VM, and we perform the handling of EOIed
* interrupts on the exit path (see vgic_fold_lr_state).
+ *
+ * Of course, NV throws a wrench in this plan, and needs
+ * something special.
*/
+ if (vcpu && vgic_state_is_nested(vcpu))
+ vgic_v3_handle_nested_maint_irq(vcpu);
+
return IRQ_HANDLED;
}
@@ -620,10 +669,12 @@ void kvm_vgic_init_cpu_hardware(void)
* We want to make sure the list registers start out clear so that we
* only have the program the used registers.
*/
- if (kvm_vgic_global_state.type == VGIC_V2)
+ if (kvm_vgic_global_state.type == VGIC_V2) {
vgic_v2_init_lrs();
- else
+ } else if (kvm_vgic_global_state.type == VGIC_V3 ||
+ kvm_vgic_global_state.has_gcie_v3_compat) {
kvm_call_hyp(__vgic_v3_init_lrs);
+ }
}
/**
@@ -668,6 +719,9 @@ int kvm_vgic_hyp_init(void)
kvm_info("GIC system register CPU interface enabled\n");
}
break;
+ case GIC_V5:
+ ret = vgic_v5_probe(gic_kvm_info);
+ break;
default:
ret = -ENODEV;
}
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index fb96802799c6..7368c13f16b7 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -154,36 +154,6 @@ out_unlock:
return irq;
}
-struct its_device {
- struct list_head dev_list;
-
- /* the head for the list of ITTEs */
- struct list_head itt_head;
- u32 num_eventid_bits;
- gpa_t itt_addr;
- u32 device_id;
-};
-
-#define COLLECTION_NOT_MAPPED ((u32)~0)
-
-struct its_collection {
- struct list_head coll_list;
-
- u32 collection_id;
- u32 target_addr;
-};
-
-#define its_is_collection_mapped(coll) ((coll) && \
- ((coll)->target_addr != COLLECTION_NOT_MAPPED))
-
-struct its_ite {
- struct list_head ite_list;
-
- struct vgic_irq *irq;
- struct its_collection *collection;
- u32 event_id;
-};
-
/**
* struct vgic_its_abi - ITS abi ops and settings
* @cte_esz: collection table entry size
@@ -336,39 +306,34 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
}
}
- raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
-
if (irq->hw)
- return its_prop_update_vlpi(irq->host_irq, prop, needs_inv);
+ ret = its_prop_update_vlpi(irq->host_irq, prop, needs_inv);
- return 0;
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+ return ret;
}
static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
{
- int ret = 0;
- unsigned long flags;
+ struct its_vlpi_map map;
+ int ret;
- raw_spin_lock_irqsave(&irq->irq_lock, flags);
+ guard(raw_spinlock_irqsave)(&irq->irq_lock);
irq->target_vcpu = vcpu;
- raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
- if (irq->hw) {
- struct its_vlpi_map map;
-
- ret = its_get_vlpi(irq->host_irq, &map);
- if (ret)
- return ret;
+ if (!irq->hw)
+ return 0;
- if (map.vpe)
- atomic_dec(&map.vpe->vlpi_count);
- map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
- atomic_inc(&map.vpe->vlpi_count);
+ ret = its_get_vlpi(irq->host_irq, &map);
+ if (ret)
+ return ret;
- ret = its_map_vlpi(irq->host_irq, &map);
- }
+ if (map.vpe)
+ atomic_dec(&map.vpe->vlpi_count);
- return ret;
+ map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
+ atomic_inc(&map.vpe->vlpi_count);
+ return its_map_vlpi(irq->host_irq, &map);
}
static struct kvm_vcpu *collection_to_vcpu(struct kvm *kvm,
@@ -786,12 +751,17 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
/* Requires the its_lock to be held. */
static void its_free_ite(struct kvm *kvm, struct its_ite *ite)
{
+ struct vgic_irq *irq = ite->irq;
list_del(&ite->ite_list);
/* This put matches the get in vgic_add_lpi. */
- if (ite->irq) {
- if (ite->irq->hw)
- WARN_ON(its_unmap_vlpi(ite->irq->host_irq));
+ if (irq) {
+ scoped_guard(raw_spinlock_irqsave, &irq->irq_lock) {
+ if (irq->hw)
+ its_unmap_vlpi(ite->irq->host_irq);
+
+ irq->hw = false;
+ }
vgic_put_irq(kvm, ite->irq);
}
@@ -1938,6 +1908,8 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev)
mutex_lock(&its->its_lock);
+ vgic_its_debug_destroy(kvm_dev);
+
vgic_its_free_device_list(kvm, its);
vgic_its_free_collection_list(kvm, its);
vgic_its_invalidate_cache(its);
@@ -1999,7 +1971,7 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev,
mutex_lock(&dev->kvm->lock);
- if (!lock_all_vcpus(dev->kvm)) {
+ if (kvm_trylock_all_vcpus(dev->kvm)) {
mutex_unlock(&dev->kvm->lock);
return -EBUSY;
}
@@ -2034,7 +2006,7 @@ static int vgic_its_attr_regs_access(struct kvm_device *dev,
}
out:
mutex_unlock(&dev->kvm->arch.config_lock);
- unlock_all_vcpus(dev->kvm);
+ kvm_unlock_all_vcpus(dev->kvm);
mutex_unlock(&dev->kvm->lock);
return ret;
}
@@ -2704,7 +2676,7 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
mutex_lock(&kvm->lock);
- if (!lock_all_vcpus(kvm)) {
+ if (kvm_trylock_all_vcpus(kvm)) {
mutex_unlock(&kvm->lock);
return -EBUSY;
}
@@ -2722,11 +2694,14 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
case KVM_DEV_ARM_ITS_RESTORE_TABLES:
ret = abi->restore_tables(its);
break;
+ default:
+ ret = -ENXIO;
+ break;
}
mutex_unlock(&its->its_lock);
mutex_unlock(&kvm->arch.config_lock);
- unlock_all_vcpus(kvm);
+ kvm_unlock_all_vcpus(kvm);
mutex_unlock(&kvm->lock);
return ret;
}
@@ -2771,7 +2746,12 @@ static int vgic_its_set_attr(struct kvm_device *dev,
if (ret)
return ret;
- return vgic_register_its_iodev(dev->kvm, its, addr);
+ ret = vgic_register_its_iodev(dev->kvm, its, addr);
+ if (ret)
+ return ret;
+
+ return vgic_its_debug_init(dev);
+
}
case KVM_DEV_ARM_VGIC_GRP_CTRL:
return vgic_its_ctrl(dev->kvm, its, attr->attr);
diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c
index 5f4f57aaa23e..3d1a776b716d 100644
--- a/arch/arm64/kvm/vgic/vgic-kvm-device.c
+++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c
@@ -5,6 +5,7 @@
* Copyright (C) 2015 ARM Ltd.
* Author: Marc Zyngier <marc.zyngier@arm.com>
*/
+#include <linux/irqchip/arm-gic-v3.h>
#include <linux/kvm_host.h>
#include <kvm/arm_vgic.h>
#include <linux/uaccess.h>
@@ -268,7 +269,7 @@ static int vgic_set_common_attr(struct kvm_device *dev,
return -ENXIO;
mutex_lock(&dev->kvm->lock);
- if (!lock_all_vcpus(dev->kvm)) {
+ if (kvm_trylock_all_vcpus(dev->kvm)) {
mutex_unlock(&dev->kvm->lock);
return -EBUSY;
}
@@ -276,7 +277,7 @@ static int vgic_set_common_attr(struct kvm_device *dev,
mutex_lock(&dev->kvm->arch.config_lock);
r = vgic_v3_save_pending_tables(dev->kvm);
mutex_unlock(&dev->kvm->arch.config_lock);
- unlock_all_vcpus(dev->kvm);
+ kvm_unlock_all_vcpus(dev->kvm);
mutex_unlock(&dev->kvm->lock);
return r;
}
@@ -384,7 +385,7 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev,
mutex_lock(&dev->kvm->lock);
- if (!lock_all_vcpus(dev->kvm)) {
+ if (kvm_trylock_all_vcpus(dev->kvm)) {
mutex_unlock(&dev->kvm->lock);
return -EBUSY;
}
@@ -409,7 +410,7 @@ static int vgic_v2_attr_regs_access(struct kvm_device *dev,
out:
mutex_unlock(&dev->kvm->arch.config_lock);
- unlock_all_vcpus(dev->kvm);
+ kvm_unlock_all_vcpus(dev->kvm);
mutex_unlock(&dev->kvm->lock);
if (!ret && !is_write)
@@ -504,6 +505,24 @@ int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
}
/*
+ * Allow access to certain ID-like registers prior to VGIC initialization,
+ * thereby allowing the VMM to provision the features / sizing of the VGIC.
+ */
+static bool reg_allowed_pre_init(struct kvm_device_attr *attr)
+{
+ if (attr->group != KVM_DEV_ARM_VGIC_GRP_DIST_REGS)
+ return false;
+
+ switch (attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK) {
+ case GICD_IIDR:
+ case GICD_TYPER2:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*
* vgic_v3_attr_regs_access - allows user space to access VGIC v3 state
*
* @dev: kvm device handle
@@ -545,14 +564,14 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
mutex_lock(&dev->kvm->lock);
- if (!lock_all_vcpus(dev->kvm)) {
+ if (kvm_trylock_all_vcpus(dev->kvm)) {
mutex_unlock(&dev->kvm->lock);
return -EBUSY;
}
mutex_lock(&dev->kvm->arch.config_lock);
- if (unlikely(!vgic_initialized(dev->kvm))) {
+ if (!(vgic_initialized(dev->kvm) || reg_allowed_pre_init(attr))) {
ret = -EBUSY;
goto out;
}
@@ -589,7 +608,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev,
out:
mutex_unlock(&dev->kvm->arch.config_lock);
- unlock_all_vcpus(dev->kvm);
+ kvm_unlock_all_vcpus(dev->kvm);
mutex_unlock(&dev->kvm->lock);
if (!ret && uaccess && !is_write) {
@@ -609,6 +628,23 @@ static int vgic_v3_set_attr(struct kvm_device *dev,
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO:
return vgic_v3_attr_regs_access(dev, attr, true);
+ case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: {
+ u32 __user *uaddr = (u32 __user *)attr->addr;
+ u32 val;
+
+ if (get_user(val, uaddr))
+ return -EFAULT;
+
+ guard(mutex)(&dev->kvm->arch.config_lock);
+ if (vgic_initialized(dev->kvm))
+ return -EBUSY;
+
+ if (!irq_is_ppi(val))
+ return -EINVAL;
+
+ dev->kvm->arch.vgic.mi_intid = val;
+ return 0;
+ }
default:
return vgic_set_common_attr(dev, attr);
}
@@ -623,6 +659,12 @@ static int vgic_v3_get_attr(struct kvm_device *dev,
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO:
return vgic_v3_attr_regs_access(dev, attr, false);
+ case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+
+ guard(mutex)(&dev->kvm->arch.config_lock);
+ return put_user(dev->kvm->arch.vgic.mi_intid, uaddr);
+ }
default:
return vgic_get_common_attr(dev, attr);
}
@@ -645,6 +687,7 @@ static int vgic_v3_has_attr(struct kvm_device *dev,
case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS:
return vgic_v3_has_attr_regs(dev, attr);
case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+ case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ:
return 0;
case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: {
if (((attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >>
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index ae4c0593d114..a3ef185209e9 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -50,8 +50,17 @@ bool vgic_has_its(struct kvm *kvm)
bool vgic_supports_direct_msis(struct kvm *kvm)
{
- return (kvm_vgic_global_state.has_gicv4_1 ||
- (kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm)));
+ return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm);
+}
+
+bool system_supports_direct_sgis(void)
+{
+ return kvm_vgic_global_state.has_gicv4_1 && gic_cpuif_has_vsgi();
+}
+
+bool vgic_supports_direct_sgis(struct kvm *kvm)
+{
+ return kvm->arch.vgic.nassgicap;
}
/*
@@ -86,7 +95,7 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
}
break;
case GICD_TYPER2:
- if (kvm_vgic_global_state.has_gicv4_1 && gic_cpuif_has_vsgi())
+ if (vgic_supports_direct_sgis(vcpu->kvm))
value = GICD_TYPER2_nASSGIcap;
break;
case GICD_IIDR:
@@ -119,7 +128,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
/* Not a GICv4.1? No HW SGIs */
- if (!kvm_vgic_global_state.has_gicv4_1 || !gic_cpuif_has_vsgi())
+ if (!vgic_supports_direct_sgis(vcpu->kvm))
val &= ~GICD_CTLR_nASSGIreq;
/* Dist stays enabled? nASSGIreq is RO */
@@ -133,7 +142,7 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
if (is_hwsgi != dist->nassgireq)
vgic_v4_configure_vsgis(vcpu->kvm);
- if (kvm_vgic_global_state.has_gicv4_1 &&
+ if (vgic_supports_direct_sgis(vcpu->kvm) &&
was_enabled != dist->enabled)
kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_RELOAD_GICv4);
else if (!was_enabled && dist->enabled)
@@ -159,8 +168,18 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu,
switch (addr & 0x0c) {
case GICD_TYPER2:
- if (val != vgic_mmio_read_v3_misc(vcpu, addr, len))
+ reg = vgic_mmio_read_v3_misc(vcpu, addr, len);
+
+ if (reg == val)
+ return 0;
+ if (vgic_initialized(vcpu->kvm))
+ return -EBUSY;
+ if ((reg ^ val) & ~GICD_TYPER2_nASSGIcap)
return -EINVAL;
+ if (!system_supports_direct_sgis() && val)
+ return -EINVAL;
+
+ dist->nassgicap = val & GICD_TYPER2_nASSGIcap;
return 0;
case GICD_IIDR:
reg = vgic_mmio_read_v3_misc(vcpu, addr, len);
@@ -178,7 +197,7 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu,
}
case GICD_CTLR:
/* Not a GICv4.1? No HW SGIs */
- if (!kvm_vgic_global_state.has_gicv4_1)
+ if (!vgic_supports_direct_sgis(vcpu->kvm))
val &= ~GICD_CTLR_nASSGIreq;
dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c
new file mode 100644
index 000000000000..7f1259b49c50
--- /dev/null
+++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c
@@ -0,0 +1,407 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/cpu.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+
+#include <kvm/arm_vgic.h>
+
+#include <asm/kvm_arm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_nested.h>
+
+#include "vgic.h"
+
+#define ICH_LRN(n) (ICH_LR0_EL2 + (n))
+#define ICH_AP0RN(n) (ICH_AP0R0_EL2 + (n))
+#define ICH_AP1RN(n) (ICH_AP1R0_EL2 + (n))
+
+struct mi_state {
+ u16 eisr;
+ u16 elrsr;
+ bool pend;
+};
+
+/*
+ * The shadow registers loaded to the hardware when running a L2 guest
+ * with the virtual IMO/FMO bits set.
+ */
+struct shadow_if {
+ struct vgic_v3_cpu_if cpuif;
+ unsigned long lr_map;
+};
+
+static DEFINE_PER_CPU(struct shadow_if, shadow_if);
+
+static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx)
+{
+ return hweight16(shadow_if->lr_map & (BIT(idx) - 1));
+}
+
+/*
+ * Nesting GICv3 support
+ *
+ * On a non-nesting VM (only running at EL0/EL1), the host hypervisor
+ * completely controls the interrupts injected via the list registers.
+ * Consequently, most of the state that is modified by the guest (by ACK-ing
+ * and EOI-ing interrupts) is synced by KVM on each entry/exit, so that we
+ * keep a semi-consistent view of the interrupts.
+ *
+ * This still applies for a NV guest, but only while "InHost" (either
+ * running at EL2, or at EL0 with HCR_EL2.{E2H.TGE}=={1,1}.
+ *
+ * When running a L2 guest ("not InHost"), things are radically different,
+ * as the L1 guest is in charge of provisioning the interrupts via its own
+ * view of the ICH_LR*_EL2 registers, which conveniently live in the VNCR
+ * page. This means that the flow described above does work (there is no
+ * state to rebuild in the L0 hypervisor), and that most things happed on L2
+ * load/put:
+ *
+ * - on L2 load: move the in-memory L1 vGIC configuration into a shadow,
+ * per-CPU data structure that is used to populate the actual LRs. This is
+ * an extra copy that we could avoid, but life is short. In the process,
+ * we remap any interrupt that has the HW bit set to the mapped interrupt
+ * on the host, should the host consider it a HW one. This allows the HW
+ * deactivation to take its course, such as for the timer.
+ *
+ * - on L2 put: perform the inverse transformation, so that the result of L2
+ * running becomes visible to L1 in the VNCR-accessible registers.
+ *
+ * - there is nothing to do on L2 entry, as everything will have happened
+ * on load. However, this is the point where we detect that an interrupt
+ * targeting L1 and prepare the grand switcheroo.
+ *
+ * - on L2 exit: emulate the HW bit, and deactivate corresponding the L1
+ * interrupt. The L0 active state will be cleared by the HW if the L1
+ * interrupt was itself backed by a HW interrupt.
+ *
+ * Maintenance Interrupt (MI) management:
+ *
+ * Since the L2 guest runs the vgic in its full glory, MIs get delivered and
+ * used as a handover point between L2 and L1.
+ *
+ * - on delivery of a MI to L0 while L2 is running: make the L1 MI pending,
+ * and let it rip. This will initiate a vcpu_put() on L2, and allow L1 to
+ * run and process the MI.
+ *
+ * - L1 MI is a fully virtual interrupt, not linked to the host's MI. Its
+ * state must be computed at each entry/exit of the guest, much like we do
+ * it for the PMU interrupt.
+ *
+ * - because most of the ICH_*_EL2 registers live in the VNCR page, the
+ * quality of emulation is poor: L1 can setup the vgic so that an MI would
+ * immediately fire, and not observe anything until the next exit. Trying
+ * to read ICH_MISR_EL2 would do the trick, for example.
+ *
+ * System register emulation:
+ *
+ * We get two classes of registers:
+ *
+ * - those backed by memory (LRs, APRs, HCR, VMCR): L1 can freely access
+ * them, and L0 doesn't see a thing.
+ *
+ * - those that always trap (ELRSR, EISR, MISR): these are status registers
+ * that are built on the fly based on the in-memory state.
+ *
+ * Only L1 can access the ICH_*_EL2 registers. A non-NV L2 obviously cannot,
+ * and a NV L2 would either access the VNCR page provided by L1 (memory
+ * based registers), or see the access redirected to L1 (registers that
+ * trap) thanks to NV being set by L1.
+ */
+
+bool vgic_state_is_nested(struct kvm_vcpu *vcpu)
+{
+ u64 xmo;
+
+ if (is_nested_ctxt(vcpu)) {
+ xmo = __vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_IMO | HCR_FMO);
+ WARN_ONCE(xmo && xmo != (HCR_IMO | HCR_FMO),
+ "Separate virtual IRQ/FIQ settings not supported\n");
+
+ return !!xmo;
+ }
+
+ return false;
+}
+
+static struct shadow_if *get_shadow_if(void)
+{
+ return this_cpu_ptr(&shadow_if);
+}
+
+static bool lr_triggers_eoi(u64 lr)
+{
+ return !(lr & (ICH_LR_STATE | ICH_LR_HW)) && (lr & ICH_LR_EOI);
+}
+
+static void vgic_compute_mi_state(struct kvm_vcpu *vcpu, struct mi_state *mi_state)
+{
+ u16 eisr = 0, elrsr = 0;
+ bool pend = false;
+
+ for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
+ u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
+
+ if (lr_triggers_eoi(lr))
+ eisr |= BIT(i);
+ if (!(lr & ICH_LR_STATE))
+ elrsr |= BIT(i);
+ pend |= (lr & ICH_LR_PENDING_BIT);
+ }
+
+ mi_state->eisr = eisr;
+ mi_state->elrsr = elrsr;
+ mi_state->pend = pend;
+}
+
+u16 vgic_v3_get_eisr(struct kvm_vcpu *vcpu)
+{
+ struct mi_state mi_state;
+
+ vgic_compute_mi_state(vcpu, &mi_state);
+ return mi_state.eisr;
+}
+
+u16 vgic_v3_get_elrsr(struct kvm_vcpu *vcpu)
+{
+ struct mi_state mi_state;
+
+ vgic_compute_mi_state(vcpu, &mi_state);
+ return mi_state.elrsr;
+}
+
+u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
+{
+ struct mi_state mi_state;
+ u64 reg = 0, hcr, vmcr;
+
+ hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
+ vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2);
+
+ vgic_compute_mi_state(vcpu, &mi_state);
+
+ if (mi_state.eisr)
+ reg |= ICH_MISR_EL2_EOI;
+
+ if (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_UIE) {
+ int used_lrs = kvm_vgic_global_state.nr_lr;
+
+ used_lrs -= hweight16(mi_state.elrsr);
+ reg |= (used_lrs <= 1) ? ICH_MISR_EL2_U : 0;
+ }
+
+ if ((hcr & ICH_HCR_EL2_LRENPIE) && FIELD_GET(ICH_HCR_EL2_EOIcount_MASK, hcr))
+ reg |= ICH_MISR_EL2_LRENP;
+
+ if ((hcr & ICH_HCR_EL2_NPIE) && !mi_state.pend)
+ reg |= ICH_MISR_EL2_NP;
+
+ if ((hcr & ICH_HCR_EL2_VGrp0EIE) && (vmcr & ICH_VMCR_ENG0_MASK))
+ reg |= ICH_MISR_EL2_VGrp0E;
+
+ if ((hcr & ICH_HCR_EL2_VGrp0DIE) && !(vmcr & ICH_VMCR_ENG0_MASK))
+ reg |= ICH_MISR_EL2_VGrp0D;
+
+ if ((hcr & ICH_HCR_EL2_VGrp1EIE) && (vmcr & ICH_VMCR_ENG1_MASK))
+ reg |= ICH_MISR_EL2_VGrp1E;
+
+ if ((hcr & ICH_HCR_EL2_VGrp1DIE) && !(vmcr & ICH_VMCR_ENG1_MASK))
+ reg |= ICH_MISR_EL2_VGrp1D;
+
+ return reg;
+}
+
+static u64 translate_lr_pintid(struct kvm_vcpu *vcpu, u64 lr)
+{
+ struct vgic_irq *irq;
+
+ if (!(lr & ICH_LR_HW))
+ return lr;
+
+ /* We have the HW bit set, check for validity of pINTID */
+ irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
+ /* If there was no real mapping, nuke the HW bit */
+ if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI)
+ lr &= ~ICH_LR_HW;
+
+ /* Translate the virtual mapping to the real one, even if invalid */
+ if (irq) {
+ lr &= ~ICH_LR_PHYS_ID_MASK;
+ lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid);
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+
+ return lr;
+}
+
+/*
+ * For LRs which have HW bit set such as timer interrupts, we modify them to
+ * have the host hardware interrupt number instead of the virtual one programmed
+ * by the guest hypervisor.
+ */
+static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu,
+ struct vgic_v3_cpu_if *s_cpu_if)
+{
+ struct shadow_if *shadow_if;
+
+ shadow_if = container_of(s_cpu_if, struct shadow_if, cpuif);
+ shadow_if->lr_map = 0;
+
+ for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
+ u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
+
+ if (!(lr & ICH_LR_STATE))
+ continue;
+
+ lr = translate_lr_pintid(vcpu, lr);
+
+ s_cpu_if->vgic_lr[hweight16(shadow_if->lr_map)] = lr;
+ shadow_if->lr_map |= BIT(i);
+ }
+
+ s_cpu_if->used_lrs = hweight16(shadow_if->lr_map);
+}
+
+void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
+{
+ struct shadow_if *shadow_if = get_shadow_if();
+ int i;
+
+ for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
+ u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
+ struct vgic_irq *irq;
+
+ if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE))
+ continue;
+
+ /*
+ * If we had a HW lr programmed by the guest hypervisor, we
+ * need to emulate the HW effect between the guest hypervisor
+ * and the nested guest.
+ */
+ irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
+ if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */
+ continue;
+
+ lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i));
+ if (!(lr & ICH_LR_STATE))
+ irq->active = false;
+
+ vgic_put_irq(vcpu->kvm, irq);
+ }
+}
+
+static void vgic_v3_create_shadow_state(struct kvm_vcpu *vcpu,
+ struct vgic_v3_cpu_if *s_cpu_if)
+{
+ struct vgic_v3_cpu_if *host_if = &vcpu->arch.vgic_cpu.vgic_v3;
+ u64 val = 0;
+ int i;
+
+ /*
+ * If we're on a system with a broken vgic that requires
+ * trapping, propagate the trapping requirements.
+ *
+ * Ah, the smell of rotten fruits...
+ */
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap))
+ val = host_if->vgic_hcr & (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 |
+ ICH_HCR_EL2_TC | ICH_HCR_EL2_TDIR);
+ s_cpu_if->vgic_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) | val;
+ s_cpu_if->vgic_vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2);
+ s_cpu_if->vgic_sre = host_if->vgic_sre;
+
+ for (i = 0; i < 4; i++) {
+ s_cpu_if->vgic_ap0r[i] = __vcpu_sys_reg(vcpu, ICH_AP0RN(i));
+ s_cpu_if->vgic_ap1r[i] = __vcpu_sys_reg(vcpu, ICH_AP1RN(i));
+ }
+
+ vgic_v3_create_shadow_lr(vcpu, s_cpu_if);
+}
+
+void vgic_v3_load_nested(struct kvm_vcpu *vcpu)
+{
+ struct shadow_if *shadow_if = get_shadow_if();
+ struct vgic_v3_cpu_if *cpu_if = &shadow_if->cpuif;
+
+ BUG_ON(!vgic_state_is_nested(vcpu));
+
+ vgic_v3_create_shadow_state(vcpu, cpu_if);
+
+ __vgic_v3_restore_vmcr_aprs(cpu_if);
+ __vgic_v3_activate_traps(cpu_if);
+
+ __vgic_v3_restore_state(cpu_if);
+
+ /*
+ * Propagate the number of used LRs for the benefit of the HYP
+ * GICv3 emulation code. Yes, this is a pretty sorry hack.
+ */
+ vcpu->arch.vgic_cpu.vgic_v3.used_lrs = cpu_if->used_lrs;
+}
+
+void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
+{
+ struct shadow_if *shadow_if = get_shadow_if();
+ struct vgic_v3_cpu_if *s_cpu_if = &shadow_if->cpuif;
+ u64 val;
+ int i;
+
+ __vgic_v3_save_vmcr_aprs(s_cpu_if);
+ __vgic_v3_deactivate_traps(s_cpu_if);
+ __vgic_v3_save_state(s_cpu_if);
+
+ /*
+ * Translate the shadow state HW fields back to the virtual ones
+ * before copying the shadow struct back to the nested one.
+ */
+ val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
+ val &= ~ICH_HCR_EL2_EOIcount_MASK;
+ val |= (s_cpu_if->vgic_hcr & ICH_HCR_EL2_EOIcount_MASK);
+ __vcpu_assign_sys_reg(vcpu, ICH_HCR_EL2, val);
+ __vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, s_cpu_if->vgic_vmcr);
+
+ for (i = 0; i < 4; i++) {
+ __vcpu_assign_sys_reg(vcpu, ICH_AP0RN(i), s_cpu_if->vgic_ap0r[i]);
+ __vcpu_assign_sys_reg(vcpu, ICH_AP1RN(i), s_cpu_if->vgic_ap1r[i]);
+ }
+
+ for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
+ val = __vcpu_sys_reg(vcpu, ICH_LRN(i));
+
+ val &= ~ICH_LR_STATE;
+ val |= s_cpu_if->vgic_lr[lr_map_idx_to_shadow_idx(shadow_if, i)] & ICH_LR_STATE;
+
+ __vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val);
+ }
+
+ vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0;
+}
+
+/*
+ * If we exit a L2 VM with a pending maintenance interrupt from the GIC,
+ * then we need to forward this to L1 so that it can re-sync the appropriate
+ * LRs and sample level triggered interrupts again.
+ */
+void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu)
+{
+ bool state = read_sysreg_s(SYS_ICH_MISR_EL2);
+
+ /* This will force a switch back to L1 if the level is high */
+ kvm_vgic_inject_irq(vcpu->kvm, vcpu,
+ vcpu->kvm->arch.vgic.mi_intid, state, vcpu);
+
+ sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EL2_En, 0);
+}
+
+void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu)
+{
+ bool level;
+
+ level = (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En) && vgic_v3_get_misr(vcpu);
+ kvm_vgic_inject_irq(vcpu->kvm, vcpu,
+ vcpu->kvm->arch.vgic.mi_intid, level, vcpu);
+}
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index f267bc2486a1..b9ad7c42c5b0 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -6,6 +6,7 @@
#include <linux/kstrtox.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <linux/string_choices.h>
#include <kvm/arm_vgic.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
@@ -23,7 +24,7 @@ void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
- cpuif->vgic_hcr |= ICH_HCR_UIE;
+ cpuif->vgic_hcr |= ICH_HCR_EL2_UIE;
}
static bool lr_signals_eoi_mi(u64 lr_val)
@@ -41,7 +42,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
- cpuif->vgic_hcr &= ~ICH_HCR_UIE;
+ cpuif->vgic_hcr &= ~ICH_HCR_EL2_UIE;
for (lr = 0; lr < cpuif->used_lrs; lr++) {
u64 val = cpuif->vgic_lr[lr];
@@ -283,15 +284,13 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu)
vgic_v3->vgic_sre = 0;
}
- vcpu->arch.vgic_cpu.num_id_bits = (kvm_vgic_global_state.ich_vtr_el2 &
- ICH_VTR_ID_BITS_MASK) >>
- ICH_VTR_ID_BITS_SHIFT;
- vcpu->arch.vgic_cpu.num_pri_bits = ((kvm_vgic_global_state.ich_vtr_el2 &
- ICH_VTR_PRI_BITS_MASK) >>
- ICH_VTR_PRI_BITS_SHIFT) + 1;
+ vcpu->arch.vgic_cpu.num_id_bits = FIELD_GET(ICH_VTR_EL2_IDbits,
+ kvm_vgic_global_state.ich_vtr_el2);
+ vcpu->arch.vgic_cpu.num_pri_bits = FIELD_GET(ICH_VTR_EL2_PRIbits,
+ kvm_vgic_global_state.ich_vtr_el2) + 1;
/* Get the show on the road... */
- vgic_v3->vgic_hcr = ICH_HCR_EN;
+ vgic_v3->vgic_hcr = ICH_HCR_EL2_En;
}
void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
@@ -300,18 +299,19 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu)
/* Hide GICv3 sysreg if necessary */
if (!kvm_has_gicv3(vcpu->kvm)) {
- vgic_v3->vgic_hcr |= ICH_HCR_TALL0 | ICH_HCR_TALL1 | ICH_HCR_TC;
+ vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 |
+ ICH_HCR_EL2_TC);
return;
}
if (group0_trap)
- vgic_v3->vgic_hcr |= ICH_HCR_TALL0;
+ vgic_v3->vgic_hcr |= ICH_HCR_EL2_TALL0;
if (group1_trap)
- vgic_v3->vgic_hcr |= ICH_HCR_TALL1;
+ vgic_v3->vgic_hcr |= ICH_HCR_EL2_TALL1;
if (common_trap)
- vgic_v3->vgic_hcr |= ICH_HCR_TC;
+ vgic_v3->vgic_hcr |= ICH_HCR_EL2_TC;
if (dir_trap)
- vgic_v3->vgic_hcr |= ICH_HCR_TDIR;
+ vgic_v3->vgic_hcr |= ICH_HCR_EL2_TDIR;
}
int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
@@ -631,8 +631,8 @@ static const struct midr_range broken_seis[] = {
static bool vgic_v3_broken_seis(void)
{
- return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) &&
- is_midr_in_range_list(read_cpuid_id(), broken_seis));
+ return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_EL2_SEIS) &&
+ is_midr_in_range_list(broken_seis));
}
/**
@@ -663,9 +663,9 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
if (info->has_v4) {
kvm_vgic_global_state.has_gicv4 = gicv4_enable;
kvm_vgic_global_state.has_gicv4_1 = info->has_v4_1 && gicv4_enable;
- kvm_info("GICv4%s support %sabled\n",
+ kvm_info("GICv4%s support %s\n",
kvm_vgic_global_state.has_gicv4_1 ? ".1" : "",
- gicv4_enable ? "en" : "dis");
+ str_enabled_disabled(gicv4_enable));
}
kvm_vgic_global_state.vcpu_base = 0;
@@ -705,10 +705,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
if (vgic_v3_broken_seis()) {
kvm_info("GICv3 with broken locally generated SEI\n");
- kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_SEIS_MASK;
+ kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_EL2_SEIS;
group0_trap = true;
group1_trap = true;
- if (ich_vtr_el2 & ICH_VTR_TDS_MASK)
+ if (ich_vtr_el2 & ICH_VTR_EL2_TDS)
dir_trap = true;
else
common_trap = true;
@@ -734,7 +734,14 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
- kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if);
+ /* If the vgic is nested, perform the full state loading */
+ if (vgic_state_is_nested(vcpu)) {
+ vgic_v3_load_nested(vcpu);
+ return;
+ }
+
+ if (likely(!is_protected_kvm_enabled()))
+ kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if);
if (has_vhe())
__vgic_v3_activate_traps(cpu_if);
@@ -746,7 +753,13 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
- kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if);
+ if (vgic_state_is_nested(vcpu)) {
+ vgic_v3_put_nested(vcpu);
+ return;
+ }
+
+ if (likely(!is_protected_kvm_enabled()))
+ kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if);
WARN_ON(vgic_v4_put(vcpu));
if (has_vhe())
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index eedecbbbcf31..4d9343d2b0b1 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -336,14 +336,30 @@ void vgic_v4_teardown(struct kvm *kvm)
its_vm->vpes = NULL;
}
+static inline bool vgic_v4_want_doorbell(struct kvm_vcpu *vcpu)
+{
+ if (vcpu_get_flag(vcpu, IN_WFI))
+ return true;
+
+ if (likely(!vcpu_has_nv(vcpu)))
+ return false;
+
+ /*
+ * GICv4 hardware is only ever used for the L1. Mark the vPE (i.e. the
+ * L1 context) nonresident and request a doorbell to kick us out of the
+ * L2 when an IRQ becomes pending.
+ */
+ return vcpu_get_flag(vcpu, IN_NESTED_ERET);
+}
+
int vgic_v4_put(struct kvm_vcpu *vcpu)
{
struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
- if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
+ if (!vgic_supports_direct_irqs(vcpu->kvm) || !vpe->resident)
return 0;
- return its_make_vpe_non_resident(vpe, !!vcpu_get_flag(vcpu, IN_WFI));
+ return its_make_vpe_non_resident(vpe, vgic_v4_want_doorbell(vcpu));
}
int vgic_v4_load(struct kvm_vcpu *vcpu)
@@ -351,7 +367,7 @@ int vgic_v4_load(struct kvm_vcpu *vcpu)
struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
int err;
- if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident)
+ if (!vgic_supports_direct_irqs(vcpu->kvm) || vpe->resident)
return 0;
if (vcpu_get_flag(vcpu, IN_WFI))
@@ -415,7 +431,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
struct vgic_irq *irq;
struct its_vlpi_map map;
unsigned long flags;
- int ret;
+ int ret = 0;
if (!vgic_supports_direct_msis(kvm))
return 0;
@@ -428,17 +444,24 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
if (IS_ERR(its))
return 0;
- mutex_lock(&its->its_lock);
+ guard(mutex)(&its->its_lock);
- /* Perform the actual DevID/EventID -> LPI translation. */
- ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
- irq_entry->msi.data, &irq);
- if (ret)
- goto out;
+ /*
+ * Perform the actual DevID/EventID -> LPI translation.
+ *
+ * Silently exit if translation fails as the guest (or userspace!) has
+ * managed to do something stupid. Emulated LPI injection will still
+ * work if the guest figures itself out at a later time.
+ */
+ if (vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
+ irq_entry->msi.data, &irq))
+ return 0;
+
+ raw_spin_lock_irqsave(&irq->irq_lock, flags);
/* Silently exit if the vLPI is already mapped */
if (irq->hw)
- goto out;
+ goto out_unlock_irq;
/*
* Emit the mapping request. If it fails, the ITS probably
@@ -458,68 +481,72 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
ret = its_map_vlpi(virq, &map);
if (ret)
- goto out;
+ goto out_unlock_irq;
irq->hw = true;
irq->host_irq = virq;
atomic_inc(&map.vpe->vlpi_count);
/* Transfer pending state */
- raw_spin_lock_irqsave(&irq->irq_lock, flags);
- if (irq->pending_latch) {
- ret = irq_set_irqchip_state(irq->host_irq,
- IRQCHIP_STATE_PENDING,
- irq->pending_latch);
- WARN_RATELIMIT(ret, "IRQ %d", irq->host_irq);
+ if (!irq->pending_latch)
+ goto out_unlock_irq;
- /*
- * Clear pending_latch and communicate this state
- * change via vgic_queue_irq_unlock.
- */
- irq->pending_latch = false;
- vgic_queue_irq_unlock(kvm, irq, flags);
- } else {
- raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
- }
+ ret = irq_set_irqchip_state(irq->host_irq, IRQCHIP_STATE_PENDING,
+ irq->pending_latch);
+ WARN_RATELIMIT(ret, "IRQ %d", irq->host_irq);
+
+ /*
+ * Clear pending_latch and communicate this state
+ * change via vgic_queue_irq_unlock.
+ */
+ irq->pending_latch = false;
+ vgic_queue_irq_unlock(kvm, irq, flags);
+ return ret;
-out:
- mutex_unlock(&its->its_lock);
+out_unlock_irq:
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
return ret;
}
-int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
- struct kvm_kernel_irq_routing_entry *irq_entry)
+static struct vgic_irq *__vgic_host_irq_get_vlpi(struct kvm *kvm, int host_irq)
{
- struct vgic_its *its;
struct vgic_irq *irq;
- int ret;
+ unsigned long idx;
- if (!vgic_supports_direct_msis(kvm))
- return 0;
+ guard(rcu)();
+ xa_for_each(&kvm->arch.vgic.lpi_xa, idx, irq) {
+ if (!irq->hw || irq->host_irq != host_irq)
+ continue;
- /*
- * Get the ITS, and escape early on error (not a valid
- * doorbell for any of our vITSs).
- */
- its = vgic_get_its(kvm, irq_entry);
- if (IS_ERR(its))
- return 0;
+ if (!vgic_try_get_irq_kref(irq))
+ return NULL;
- mutex_lock(&its->its_lock);
+ return irq;
+ }
- ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
- irq_entry->msi.data, &irq);
- if (ret)
- goto out;
+ return NULL;
+}
+
+void kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int host_irq)
+{
+ struct vgic_irq *irq;
+ unsigned long flags;
+
+ if (!vgic_supports_direct_msis(kvm))
+ return;
+
+ irq = __vgic_host_irq_get_vlpi(kvm, host_irq);
+ if (!irq)
+ return;
- WARN_ON(!(irq->hw && irq->host_irq == virq));
+ raw_spin_lock_irqsave(&irq->irq_lock, flags);
+ WARN_ON(irq->hw && irq->host_irq != host_irq);
if (irq->hw) {
atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count);
irq->hw = false;
- ret = its_unmap_vlpi(virq);
+ its_unmap_vlpi(host_irq);
}
-out:
- mutex_unlock(&its->its_lock);
- return ret;
+ raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
+ vgic_put_irq(kvm, irq);
}
diff --git a/arch/arm64/kvm/vgic/vgic-v5.c b/arch/arm64/kvm/vgic/vgic-v5.c
new file mode 100644
index 000000000000..6bdbb221bcde
--- /dev/null
+++ b/arch/arm64/kvm/vgic/vgic-v5.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <kvm/arm_vgic.h>
+#include <linux/irqchip/arm-vgic-info.h>
+
+#include "vgic.h"
+
+/*
+ * Probe for a vGICv5 compatible interrupt controller, returning 0 on success.
+ * Currently only supports GICv3-based VMs on a GICv5 host, and hence only
+ * registers a VGIC_V3 device.
+ */
+int vgic_v5_probe(const struct gic_kvm_info *info)
+{
+ u64 ich_vtr_el2;
+ int ret;
+
+ if (!info->has_gcie_v3_compat)
+ return -ENODEV;
+
+ kvm_vgic_global_state.type = VGIC_V5;
+ kvm_vgic_global_state.has_gcie_v3_compat = true;
+
+ /* We only support v3 compat mode - use vGICv3 limits */
+ kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS;
+
+ kvm_vgic_global_state.vcpu_base = 0;
+ kvm_vgic_global_state.vctrl_base = NULL;
+ kvm_vgic_global_state.can_emulate_gicv2 = false;
+ kvm_vgic_global_state.has_gicv4 = false;
+ kvm_vgic_global_state.has_gicv4_1 = false;
+
+ ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config);
+ kvm_vgic_global_state.ich_vtr_el2 = (u32)ich_vtr_el2;
+
+ /*
+ * The ListRegs field is 5 bits, but there is an architectural
+ * maximum of 16 list registers. Just ignore bit 4...
+ */
+ kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1;
+
+ ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3);
+ if (ret) {
+ kvm_err("Cannot register GICv3-legacy KVM device.\n");
+ return ret;
+ }
+
+ static_branch_enable(&kvm_vgic_global_state.gicv3_cpuif);
+ kvm_info("GCIE legacy system register CPU interface\n");
+
+ return 0;
+}
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index cc8c6b9b5dd8..f5148b38120a 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -872,6 +872,15 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
int used_lrs;
+ /* If nesting, emulate the HW effect from L0 to L1 */
+ if (vgic_state_is_nested(vcpu)) {
+ vgic_v3_sync_nested(vcpu);
+ return;
+ }
+
+ if (vcpu_has_nv(vcpu))
+ vgic_v3_nested_update_mi(vcpu);
+
/* An empty ap_list_head implies used_lrs == 0 */
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
return;
@@ -901,6 +910,35 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
{
/*
+ * If in a nested state, we must return early. Two possibilities:
+ *
+ * - If we have any pending IRQ for the guest and the guest
+ * expects IRQs to be handled in its virtual EL2 mode (the
+ * virtual IMO bit is set) and it is not already running in
+ * virtual EL2 mode, then we have to emulate an IRQ
+ * exception to virtual EL2.
+ *
+ * We do that by placing a request to ourselves which will
+ * abort the entry procedure and inject the exception at the
+ * beginning of the run loop.
+ *
+ * - Otherwise, do exactly *NOTHING*. The guest state is
+ * already loaded, and we can carry on with running it.
+ *
+ * If we have NV, but are not in a nested state, compute the
+ * maintenance interrupt state, as it may fire.
+ */
+ if (vgic_state_is_nested(vcpu)) {
+ if (kvm_vgic_vcpu_pending_irq(vcpu))
+ kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu);
+
+ return;
+ }
+
+ if (vcpu_has_nv(vcpu))
+ vgic_v3_nested_update_mi(vcpu);
+
+ /*
* If there are no virtual interrupts active or pending for this
* VCPU, then there is no work to do and we can bail out without
* taking any lock. There is a potential race with someone injecting
@@ -913,7 +951,7 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
* can be directly injected (GICv4).
*/
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) &&
- !vgic_supports_direct_msis(vcpu->kvm))
+ !vgic_supports_direct_irqs(vcpu->kvm))
return;
DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
@@ -927,7 +965,7 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
if (can_access_vgic_from_kernel())
vgic_restore_state(vcpu);
- if (vgic_supports_direct_msis(vcpu->kvm))
+ if (vgic_supports_direct_irqs(vcpu->kvm))
vgic_v4_commit(vcpu);
}
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 122d95b4e284..1384a04c0784 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -64,6 +64,24 @@
KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \
KVM_REG_ARM_VGIC_SYSREG_OP2_MASK)
+#define KVM_ICC_SRE_EL2 (ICC_SRE_EL2_ENABLE | ICC_SRE_EL2_SRE | \
+ ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB)
+#define KVM_ICH_VTR_EL2_RES0 (ICH_VTR_EL2_DVIM | \
+ ICH_VTR_EL2_A3V | \
+ ICH_VTR_EL2_IDbits)
+#define KVM_ICH_VTR_EL2_RES1 ICH_VTR_EL2_nV4
+
+static inline u64 kvm_get_guest_vtr_el2(void)
+{
+ u64 vtr;
+
+ vtr = kvm_vgic_global_state.ich_vtr_el2;
+ vtr &= ~KVM_ICH_VTR_EL2_RES0;
+ vtr |= KVM_ICH_VTR_EL2_RES1;
+
+ return vtr;
+}
+
/*
* As per Documentation/virt/kvm/devices/arm-vgic-its.rst,
* below macros are defined for ITS table entry encoding.
@@ -172,6 +190,36 @@ struct vgic_reg_attr {
gpa_t addr;
};
+struct its_device {
+ struct list_head dev_list;
+
+ /* the head for the list of ITTEs */
+ struct list_head itt_head;
+ u32 num_eventid_bits;
+ gpa_t itt_addr;
+ u32 device_id;
+};
+
+#define COLLECTION_NOT_MAPPED ((u32)~0)
+
+struct its_collection {
+ struct list_head coll_list;
+
+ u32 collection_id;
+ u32 target_addr;
+};
+
+#define its_is_collection_mapped(coll) ((coll) && \
+ ((coll)->target_addr != COLLECTION_NOT_MAPPED))
+
+struct its_ite {
+ struct list_head ite_list;
+
+ struct vgic_irq *irq;
+ struct its_collection *collection;
+ u32 event_id;
+};
+
int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
struct vgic_reg_attr *reg_attr);
int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr,
@@ -267,6 +315,7 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr, bool is_write);
int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr);
+const struct sys_reg_desc *vgic_v3_get_sysreg_table(unsigned int *sz);
int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
u32 intid, u32 *val);
int kvm_register_vgic_device(unsigned long type);
@@ -278,6 +327,8 @@ int vgic_init(struct kvm *kvm);
void vgic_debug_init(struct kvm *kvm);
void vgic_debug_destroy(struct kvm *kvm);
+int vgic_v5_probe(const struct gic_kvm_info *info);
+
static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu;
@@ -339,7 +390,23 @@ void vgic_its_invalidate_all_caches(struct kvm *kvm);
int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq);
int vgic_its_invall(struct kvm_vcpu *vcpu);
+bool system_supports_direct_sgis(void);
bool vgic_supports_direct_msis(struct kvm *kvm);
+bool vgic_supports_direct_sgis(struct kvm *kvm);
+
+static inline bool vgic_supports_direct_irqs(struct kvm *kvm)
+{
+ /*
+ * Deliberately conflate vLPI and vSGI support on GICv4.1 hardware,
+ * indirectly allowing userspace to control whether or not vPEs are
+ * allocated for the VM.
+ */
+ if (system_supports_direct_sgis())
+ return vgic_supports_direct_sgis(kvm);
+
+ return vgic_supports_direct_msis(kvm);
+}
+
int vgic_v4_init(struct kvm *kvm);
void vgic_v4_teardown(struct kvm *kvm);
void vgic_v4_configure_vsgis(struct kvm *kvm);
@@ -353,4 +420,24 @@ static inline bool kvm_has_gicv3(struct kvm *kvm)
return kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP);
}
+void vgic_v3_sync_nested(struct kvm_vcpu *vcpu);
+void vgic_v3_load_nested(struct kvm_vcpu *vcpu);
+void vgic_v3_put_nested(struct kvm_vcpu *vcpu);
+void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu);
+void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu);
+
+static inline bool vgic_is_v3_compat(struct kvm *kvm)
+{
+ return cpus_have_final_cap(ARM64_HAS_GICV5_CPUIF) &&
+ kvm_vgic_global_state.has_gcie_v3_compat;
+}
+
+static inline bool vgic_is_v3(struct kvm *kvm)
+{
+ return kvm_vgic_global_state.type == VGIC_V3 || vgic_is_v3_compat(kvm);
+}
+
+int vgic_its_debug_init(struct kvm_device *dev);
+void vgic_its_debug_destroy(struct kvm_device *dev);
+
#endif