diff options
Diffstat (limited to 'drivers/iommu/intel/irq_remapping.c')
-rw-r--r-- | drivers/iommu/intel/irq_remapping.c | 80 |
1 files changed, 42 insertions, 38 deletions
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 466c1412dd45..3bc2a03cceca 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -24,12 +24,6 @@ #include "iommu.h" #include "../irq_remapping.h" #include "../iommu-pages.h" -#include "cap_audit.h" - -enum irq_mode { - IRQ_REMAPPING, - IRQ_POSTING, -}; struct ioapic_scope { struct intel_iommu *iommu; @@ -50,8 +44,8 @@ struct irq_2_iommu { u16 irte_index; u16 sub_handle; u8 irte_mask; - enum irq_mode mode; bool posted_msi; + bool posted_vcpu; }; struct intel_ir_data { @@ -139,7 +133,6 @@ static int alloc_irte(struct intel_iommu *iommu, irq_iommu->irte_index = index; irq_iommu->sub_handle = 0; irq_iommu->irte_mask = mask; - irq_iommu->mode = IRQ_REMAPPING; } raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); @@ -194,8 +187,6 @@ static int modify_irte(struct irq_2_iommu *irq_iommu, rc = qi_flush_iec(iommu, index, 0); - /* Update iommu mode according to the IRTE mode */ - irq_iommu->mode = irte->pst ? IRQ_POSTING : IRQ_REMAPPING; raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); return rc; @@ -727,9 +718,6 @@ static int __init intel_prepare_irq_remapping(void) if (dmar_table_init() < 0) return -ENODEV; - if (intel_cap_audit(CAP_AUDIT_STATIC_IRQR, NULL)) - return -ENODEV; - if (!dmar_ir_support()) return -ENODEV; @@ -1173,7 +1161,26 @@ static void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) static inline void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) {} #endif -static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force) +static void __intel_ir_reconfigure_irte(struct irq_data *irqd, bool force_host) +{ + struct intel_ir_data *ir_data = irqd->chip_data; + + /* + * Don't modify IRTEs for IRQs that are being posted to vCPUs if the + * host CPU affinity changes. + */ + if (ir_data->irq_2_iommu.posted_vcpu && !force_host) + return; + + ir_data->irq_2_iommu.posted_vcpu = false; + + if (ir_data->irq_2_iommu.posted_msi) + intel_ir_reconfigure_irte_posted(irqd); + else + modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry); +} + +static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force_host) { struct intel_ir_data *ir_data = irqd->chip_data; struct irte *irte = &ir_data->irte_entry; @@ -1186,10 +1193,7 @@ static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force) irte->vector = cfg->vector; irte->dest_id = IRTE_DEST(cfg->dest_apicid); - if (ir_data->irq_2_iommu.posted_msi) - intel_ir_reconfigure_irte_posted(irqd); - else if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING) - modify_irte(&ir_data->irq_2_iommu, irte); + __intel_ir_reconfigure_irte(irqd, force_host); } /* @@ -1244,7 +1248,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info) /* stop posting interrupts, back to the default mode */ if (!vcpu_pi_info) { - modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry); + __intel_ir_reconfigure_irte(data, true); } else { struct irte irte_pi; @@ -1267,6 +1271,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info) irte_pi.pda_h = (vcpu_pi_info->pi_desc_addr >> 32) & ~(-1UL << PDA_HIGH_BIT); + ir_data->irq_2_iommu.posted_vcpu = true; modify_irte(&ir_data->irq_2_iommu, &irte_pi); } @@ -1282,43 +1287,44 @@ static struct irq_chip intel_ir_chip = { }; /* - * With posted MSIs, all vectors are multiplexed into a single notification - * vector. Devices MSIs are then dispatched in a demux loop where - * EOIs can be coalesced as well. + * With posted MSIs, the MSI vectors are multiplexed into a single notification + * vector, and only the notification vector is sent to the APIC IRR. Device + * MSIs are then dispatched in a demux loop that harvests the MSIs from the + * CPU's Posted Interrupt Request bitmap. I.e. Posted MSIs never get sent to + * the APIC IRR, and thus do not need an EOI. The notification handler instead + * performs a single EOI after processing the PIR. * - * "INTEL-IR-POST" IRQ chip does not do EOI on ACK, thus the dummy irq_ack() - * function. Instead EOI is performed by the posted interrupt notification - * handler. + * Note! Pending SMP/CPU affinity changes, which are per MSI, must still be + * honored, only the APIC EOI is omitted. * * For the example below, 3 MSIs are coalesced into one CPU notification. Only - * one apic_eoi() is needed. + * one apic_eoi() is needed, but each MSI needs to process pending changes to + * its CPU affinity. * * __sysvec_posted_msi_notification() * irq_enter(); * handle_edge_irq() * irq_chip_ack_parent() - * dummy(); // No EOI + * irq_move_irq(); // No EOI * handle_irq_event() * driver_handler() * handle_edge_irq() * irq_chip_ack_parent() - * dummy(); // No EOI + * irq_move_irq(); // No EOI * handle_irq_event() * driver_handler() * handle_edge_irq() * irq_chip_ack_parent() - * dummy(); // No EOI + * irq_move_irq(); // No EOI * handle_irq_event() * driver_handler() * apic_eoi() * irq_exit() + * */ - -static void dummy_ack(struct irq_data *d) { } - static struct irq_chip intel_ir_chip_post_msi = { .name = "INTEL-IR-POST", - .irq_ack = dummy_ack, + .irq_ack = irq_move_irq, .irq_set_affinity = intel_ir_set_affinity, .irq_compose_msi_msg = intel_ir_compose_msi_msg, .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity, @@ -1463,7 +1469,6 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain, else irq_data->chip = &intel_ir_chip; intel_irq_remapping_prepare_irte(ird, irq_cfg, info, index, i); - irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT); } return 0; @@ -1494,6 +1499,9 @@ static void intel_irq_remapping_deactivate(struct irq_domain *domain, struct intel_ir_data *data = irq_data->chip_data; struct irte entry; + WARN_ON_ONCE(data->irq_2_iommu.posted_vcpu); + data->irq_2_iommu.posted_vcpu = false; + memset(&entry, 0, sizeof(entry)); modify_irte(&data->irq_2_iommu, &entry); } @@ -1534,10 +1542,6 @@ static int dmar_ir_add(struct dmar_drhd_unit *dmaru, struct intel_iommu *iommu) int ret; int eim = x2apic_enabled(); - ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_IRQR, iommu); - if (ret) - return ret; - if (eim && !ecap_eim_support(iommu->ecap)) { pr_info("DRHD %Lx: EIM not supported by DRHD, ecap %Lx\n", iommu->reg_phys, iommu->ecap); |