summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@nvidia.com>2023-01-23 21:24:04 +0300
committerJason Gunthorpe <jgg@nvidia.com>2023-01-23 21:24:04 +0300
commitfc3873095a09ce969543fa4a17fee271c8ca3566 (patch)
treedb46969d11afe94250112f72f178d6dae3bfccaa
parentb7bfaa761d760e72a969d116517eaa12e404c262 (diff)
parentb062007c63eb4452f1122384e86d402531fb1d52 (diff)
downloadlinux-fc3873095a09ce969543fa4a17fee271c8ca3566.tar.xz
Merge branch 'isolated_msi' into iommufd.git for-next
Jason Gunthorpe says: ==================== Harmonize these into a single irq_domain based check under msi_device_has_isolated_msi(). In real HW "isolated MSI" is implemented in a few different ways: - x86 uses "interrupt remapping" which is a block that sits between the device and APIC, that can "remap" the MSI MemWr. AMD uses per-RID tables to implement isolation while Intel stores the authorized RID in each IRTE entry. Part of the remapping is discarding, HW will not forward MSIs that don't positively match the tables. - ARM GICv3 ITS integrates the concept of an out-of-band "device ID" directly into the interrupt controller logic. The tables the GIC checks that determine how to deliver the interrupt through the ITS device table and interrupt translation tables allow limiting which interrupts device IDs can trigger. - S390 has unconditionally claimed it has isolated MSI through the iommu driver. This is a weaker version of the other arches in that it only works between "gisa" domains. See zpci_set_airq() and https://lore.kernel.org/r/31af8174-35e9-ebeb-b9ef-74c90d4bfd93@linux.ibm.com/ After this series the "isolated MSI" is tagged based only on the irq_domains that the interrupt travels through. For x86 enabling interrupt remapping causes IR irq_domains to be installed in the path, and they can carry the IRQ_DOMAIN_FLAG_ISOLATED_MSI. For ARM the GICv3 ITS itself already sets the flag when it is running in a isolated mode, and S390 simply sets it always through an arch hook since it doesn't use irq_domains at all. This removes the intrusion of IRQ subsystem information into the iommu drivers. Linux's iommu_domains abstraction has no bearing at all on the security of MSI. Even if HW linked to the IOMMU may implement the security on x86 implementations, Linux models that HW through the irq_domain, not the iommu_domain. ==================== * branch 'isolated_msi': iommu: Remove IOMMU_CAP_INTR_REMAP irq/s390: Add arch_is_isolated_msi() for s390 iommu/x86: Replace IOMMU_CAP_INTR_REMAP with IRQ_DOMAIN_FLAG_ISOLATED_MSI genirq/msi: Rename IRQ_DOMAIN_MSI_REMAP to IRQ_DOMAIN_ISOLATED_MSI genirq/irqdomain: Remove unused irq_domain_check_msi_remap() code iommufd: Convert to msi_device_has_isolated_msi() vfio/type1: Convert to iommu_group_has_isolated_msi() iommu: Add iommu_group_has_isolated_msi() genirq/msi: Add msi_device_has_isolated_msi() Link: https://lore.kernel.org/r/0-v3-3313bb5dd3a3+10f11-secure_msi_jgg@nvidia.com Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
-rw-r--r--arch/s390/include/asm/msi.h17
-rw-r--r--drivers/iommu/amd/iommu.c5
-rw-r--r--drivers/iommu/intel/iommu.c2
-rw-r--r--drivers/iommu/intel/irq_remapping.c3
-rw-r--r--drivers/iommu/iommu.c24
-rw-r--r--drivers/iommu/iommufd/device.c4
-rw-r--r--drivers/iommu/s390-iommu.c2
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c4
-rw-r--r--drivers/vfio/vfio_iommu_type1.c16
-rw-r--r--include/linux/iommu.h2
-rw-r--r--include/linux/irqdomain.h29
-rw-r--r--include/linux/msi.h17
-rw-r--r--kernel/irq/irqdomain.c39
-rw-r--r--kernel/irq/msi.c27
14 files changed, 100 insertions, 91 deletions
diff --git a/arch/s390/include/asm/msi.h b/arch/s390/include/asm/msi.h
new file mode 100644
index 000000000000..399343ed9ffb
--- /dev/null
+++ b/arch/s390/include/asm/msi.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_MSI_H
+#define _ASM_S390_MSI_H
+#include <asm-generic/msi.h>
+
+/*
+ * Work around S390 not using irq_domain at all so we can't set
+ * IRQ_DOMAIN_FLAG_ISOLATED_MSI. See for an explanation how it works:
+ *
+ * https://lore.kernel.org/r/31af8174-35e9-ebeb-b9ef-74c90d4bfd93@linux.ibm.com/
+ *
+ * Note this is less isolated than the ARM/x86 versions as userspace can trigger
+ * MSI belonging to kernel devices within the same gisa.
+ */
+#define arch_is_isolated_msi() true
+
+#endif
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index cbeaab55c0db..321d50e9df5b 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2271,8 +2271,6 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
return true;
- case IOMMU_CAP_INTR_REMAP:
- return (irq_remapping_enabled == 1);
case IOMMU_CAP_NOEXEC:
return false;
case IOMMU_CAP_PRE_BOOT_PROTECTION:
@@ -3671,7 +3669,8 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
}
irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_AMDVI);
- iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
+ iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT |
+ IRQ_DOMAIN_FLAG_ISOLATED_MSI;
if (amd_iommu_np_cache)
iommu->ir_domain->msi_parent_ops = &virt_amdvi_msi_parent_ops;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 59df7e42fd53..7cfab5fd5e59 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4464,8 +4464,6 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
return true;
- case IOMMU_CAP_INTR_REMAP:
- return irq_remapping_enabled == 1;
case IOMMU_CAP_PRE_BOOT_PROTECTION:
return dmar_platform_optin();
case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index f58f5f57af78..6d01fa078c36 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -573,7 +573,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
}
irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_DMAR);
- iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
+ iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT |
+ IRQ_DOMAIN_FLAG_ISOLATED_MSI;
if (cap_caching_mode(iommu->cap))
iommu->ir_domain->msi_parent_ops = &virt_dmar_msi_parent_ops;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index de91dd88705b..834e6ecf3e51 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -30,6 +30,7 @@
#include <linux/cc_platform.h>
#include <trace/events/iommu.h>
#include <linux/sched/mm.h>
+#include <linux/msi.h>
#include "dma-iommu.h"
@@ -1898,6 +1899,29 @@ bool device_iommu_capable(struct device *dev, enum iommu_cap cap)
EXPORT_SYMBOL_GPL(device_iommu_capable);
/**
+ * iommu_group_has_isolated_msi() - Compute msi_device_has_isolated_msi()
+ * for a group
+ * @group: Group to query
+ *
+ * IOMMU groups should not have differing values of
+ * msi_device_has_isolated_msi() for devices in a group. However nothing
+ * directly prevents this, so ensure mistakes don't result in isolation failures
+ * by checking that all the devices are the same.
+ */
+bool iommu_group_has_isolated_msi(struct iommu_group *group)
+{
+ struct group_device *group_dev;
+ bool ret = true;
+
+ mutex_lock(&group->mutex);
+ list_for_each_entry(group_dev, &group->devices, list)
+ ret &= msi_device_has_isolated_msi(group_dev->dev);
+ mutex_unlock(&group->mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_group_has_isolated_msi);
+
+/**
* iommu_set_fault_handler() - set a fault handler for an iommu domain
* @domain: iommu domain
* @handler: fault handler
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index d81f93a321af..9f3b9674d72e 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -4,7 +4,6 @@
#include <linux/iommufd.h>
#include <linux/slab.h>
#include <linux/iommu.h>
-#include <linux/irqdomain.h>
#include "io_pagetable.h"
#include "iommufd_private.h"
@@ -169,8 +168,7 @@ static int iommufd_device_setup_msi(struct iommufd_device *idev,
* operation from the device (eg a simple DMA) cannot trigger an
* interrupt outside this iommufd context.
*/
- if (!device_iommu_capable(idev->dev, IOMMU_CAP_INTR_REMAP) &&
- !irq_domain_check_msi_remap()) {
+ if (!iommu_group_has_isolated_msi(idev->group)) {
if (!allow_unsafe_interrupts)
return -EPERM;
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index ed33c6cce083..bb00580a30d8 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -34,8 +34,6 @@ static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap)
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
return true;
- case IOMMU_CAP_INTR_REMAP:
- return true;
default:
return false;
}
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 973ede0197e3..b4069f825a9b 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -4692,7 +4692,7 @@ static bool __maybe_unused its_enable_quirk_socionext_synquacer(void *data)
}
/* the pre-ITS breaks isolation, so disable MSI remapping */
- its->msi_domain_flags &= ~IRQ_DOMAIN_FLAG_MSI_REMAP;
+ its->msi_domain_flags &= ~IRQ_DOMAIN_FLAG_ISOLATED_MSI;
return true;
}
return false;
@@ -5074,7 +5074,7 @@ static int __init its_probe_one(struct resource *res,
its->cmd_write = its->cmd_base;
its->fwnode_handle = handle;
its->get_msi_base = its_irq_get_msi_base;
- its->msi_domain_flags = IRQ_DOMAIN_FLAG_MSI_REMAP;
+ its->msi_domain_flags = IRQ_DOMAIN_FLAG_ISOLATED_MSI;
its_enable_quirks(its);
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 23c24fe98c00..393b27a3bd87 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -37,7 +37,6 @@
#include <linux/vfio.h>
#include <linux/workqueue.h>
#include <linux/notifier.h>
-#include <linux/irqdomain.h>
#include "vfio.h"
#define DRIVER_VERSION "0.2"
@@ -2160,12 +2159,6 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu,
list_splice_tail(iova_copy, iova);
}
-/* Redundantly walks non-present capabilities to simplify caller */
-static int vfio_iommu_device_capable(struct device *dev, void *data)
-{
- return device_iommu_capable(dev, (enum iommu_cap)data);
-}
-
static int vfio_iommu_domain_alloc(struct device *dev, void *data)
{
struct iommu_domain **domain = data;
@@ -2180,7 +2173,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
struct vfio_iommu *iommu = iommu_data;
struct vfio_iommu_group *group;
struct vfio_domain *domain, *d;
- bool resv_msi, msi_remap;
+ bool resv_msi;
phys_addr_t resv_msi_base = 0;
struct iommu_domain_geometry *geo;
LIST_HEAD(iova_copy);
@@ -2278,11 +2271,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
INIT_LIST_HEAD(&domain->group_list);
list_add(&group->next, &domain->group_list);
- msi_remap = irq_domain_check_msi_remap() ||
- iommu_group_for_each_dev(iommu_group, (void *)IOMMU_CAP_INTR_REMAP,
- vfio_iommu_device_capable);
-
- if (!allow_unsafe_interrupts && !msi_remap) {
+ if (!allow_unsafe_interrupts &&
+ !iommu_group_has_isolated_msi(iommu_group)) {
pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n",
__func__);
ret = -EPERM;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 46e1347bfa22..933cc57bfc48 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -120,7 +120,6 @@ static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
enum iommu_cap {
IOMMU_CAP_CACHE_COHERENCY, /* IOMMU_CACHE is supported */
- IOMMU_CAP_INTR_REMAP, /* IOMMU supports interrupt isolation */
IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */
IOMMU_CAP_PRE_BOOT_PROTECTION, /* Firmware says it used the IOMMU for
DMA protection and we should too */
@@ -455,6 +454,7 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
extern int bus_iommu_probe(struct bus_type *bus);
extern bool iommu_present(struct bus_type *bus);
extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap);
+extern bool iommu_group_has_isolated_msi(struct iommu_group *group);
extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus);
extern struct iommu_group *iommu_group_get_by_id(int id);
extern void iommu_domain_free(struct iommu_domain *domain);
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index a372086750ca..0a3e974b7288 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -192,8 +192,10 @@ enum {
/* Irq domain implements MSIs */
IRQ_DOMAIN_FLAG_MSI = (1 << 4),
- /* Irq domain implements MSI remapping */
- IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5),
+ /*
+ * Irq domain implements isolated MSI, see msi_device_has_isolated_msi()
+ */
+ IRQ_DOMAIN_FLAG_ISOLATED_MSI = (1 << 5),
/* Irq domain doesn't translate anything */
IRQ_DOMAIN_FLAG_NO_MAP = (1 << 6),
@@ -276,7 +278,6 @@ struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode,
void *host_data);
extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
enum irq_domain_bus_token bus_token);
-extern bool irq_domain_check_msi_remap(void);
extern void irq_set_default_host(struct irq_domain *host);
extern struct irq_domain *irq_get_default_host(void);
extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
@@ -559,13 +560,6 @@ static inline bool irq_domain_is_msi(struct irq_domain *domain)
return domain->flags & IRQ_DOMAIN_FLAG_MSI;
}
-static inline bool irq_domain_is_msi_remap(struct irq_domain *domain)
-{
- return domain->flags & IRQ_DOMAIN_FLAG_MSI_REMAP;
-}
-
-extern bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain);
-
static inline bool irq_domain_is_msi_parent(struct irq_domain *domain)
{
return domain->flags & IRQ_DOMAIN_FLAG_MSI_PARENT;
@@ -611,17 +605,6 @@ static inline bool irq_domain_is_msi(struct irq_domain *domain)
return false;
}
-static inline bool irq_domain_is_msi_remap(struct irq_domain *domain)
-{
- return false;
-}
-
-static inline bool
-irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain)
-{
- return false;
-}
-
static inline bool irq_domain_is_msi_parent(struct irq_domain *domain)
{
return false;
@@ -641,10 +624,6 @@ static inline struct irq_domain *irq_find_matching_fwnode(
{
return NULL;
}
-static inline bool irq_domain_check_msi_remap(void)
-{
- return false;
-}
#endif /* !CONFIG_IRQ_DOMAIN */
#endif /* _LINUX_IRQDOMAIN_H */
diff --git a/include/linux/msi.h b/include/linux/msi.h
index a112b913fff9..13c9b74a4575 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -48,6 +48,10 @@ typedef struct arch_msi_msg_data {
} __attribute__ ((packed)) arch_msi_msg_data_t;
#endif
+#ifndef arch_is_isolated_msi
+#define arch_is_isolated_msi() false
+#endif
+
/**
* msi_msg - Representation of a MSI message
* @address_lo: Low 32 bits of msi message address
@@ -649,6 +653,19 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir
void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq,
unsigned int nvec);
void *platform_msi_get_host_data(struct irq_domain *domain);
+
+bool msi_device_has_isolated_msi(struct device *dev);
+#else /* CONFIG_GENERIC_MSI_IRQ */
+static inline bool msi_device_has_isolated_msi(struct device *dev)
+{
+ /*
+ * Arguably if the platform does not enable MSI support then it has
+ * "isolated MSI", as an interrupt controller that cannot receive MSIs
+ * is inherently isolated by our definition. The default definition for
+ * arch_is_isolated_msi() is conservative and returns false anyhow.
+ */
+ return arch_is_isolated_msi();
+}
#endif /* CONFIG_GENERIC_MSI_IRQ */
/* PCI specific interfaces */
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8fe1da9614ee..104954951582 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -437,31 +437,6 @@ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
EXPORT_SYMBOL_GPL(irq_find_matching_fwspec);
/**
- * irq_domain_check_msi_remap - Check whether all MSI irq domains implement
- * IRQ remapping
- *
- * Return: false if any MSI irq domain does not support IRQ remapping,
- * true otherwise (including if there is no MSI irq domain)
- */
-bool irq_domain_check_msi_remap(void)
-{
- struct irq_domain *h;
- bool ret = true;
-
- mutex_lock(&irq_domain_mutex);
- list_for_each_entry(h, &irq_domain_list, link) {
- if (irq_domain_is_msi(h) &&
- !irq_domain_hierarchical_is_msi_remap(h)) {
- ret = false;
- break;
- }
- }
- mutex_unlock(&irq_domain_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(irq_domain_check_msi_remap);
-
-/**
* irq_set_default_host() - Set a "default" irq domain
* @domain: default domain pointer
*
@@ -1815,20 +1790,6 @@ static void irq_domain_check_hierarchy(struct irq_domain *domain)
if (domain->ops->alloc)
domain->flags |= IRQ_DOMAIN_FLAG_HIERARCHY;
}
-
-/**
- * irq_domain_hierarchical_is_msi_remap - Check if the domain or any
- * parent has MSI remapping support
- * @domain: domain pointer
- */
-bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain)
-{
- for (; domain; domain = domain->parent) {
- if (irq_domain_is_msi_remap(domain))
- return true;
- }
- return false;
-}
#else /* CONFIG_IRQ_DOMAIN_HIERARCHY */
/**
* irq_domain_get_irq_data - Get irq_data associated with @virq and @domain
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 955267bbc2be..4dec57fc4ea6 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -1623,3 +1623,30 @@ struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain)
{
return (struct msi_domain_info *)domain->host_data;
}
+
+/**
+ * msi_device_has_isolated_msi - True if the device has isolated MSI
+ * @dev: The device to check
+ *
+ * Isolated MSI means that HW modeled by an irq_domain on the path from the
+ * initiating device to the CPU will validate that the MSI message specifies an
+ * interrupt number that the device is authorized to trigger. This must block
+ * devices from triggering interrupts they are not authorized to trigger.
+ * Currently authorization means the MSI vector is one assigned to the device.
+ *
+ * This is interesting for securing VFIO use cases where a rouge MSI (eg created
+ * by abusing a normal PCI MemWr DMA) must not allow the VFIO userspace to
+ * impact outside its security domain, eg userspace triggering interrupts on
+ * kernel drivers, a VM triggering interrupts on the hypervisor, or a VM
+ * triggering interrupts on another VM.
+ */
+bool msi_device_has_isolated_msi(struct device *dev)
+{
+ struct irq_domain *domain = dev_get_msi_domain(dev);
+
+ for (; domain; domain = domain->parent)
+ if (domain->flags & IRQ_DOMAIN_FLAG_ISOLATED_MSI)
+ return true;
+ return arch_is_isolated_msi();
+}
+EXPORT_SYMBOL_GPL(msi_device_has_isolated_msi);