summaryrefslogtreecommitdiff
path: root/drivers/iommu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/Kconfig17
-rw-r--r--drivers/iommu/amd/amd_iommu.h8
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h71
-rw-r--r--drivers/iommu/amd/init.c340
-rw-r--r--drivers/iommu/amd/io_pgtable.c7
-rw-r--r--drivers/iommu/amd/io_pgtable_v2.c2
-rw-r--r--drivers/iommu/amd/iommu.c556
-rw-r--r--drivers/iommu/amd/pasid.c4
-rw-r--r--drivers/iommu/apple-dart.c22
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c68
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c21
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c347
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h67
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c32
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-impl.c5
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c2
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c120
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h3
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.c54
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu.h2
-rw-r--r--drivers/iommu/dma-iommu.c256
-rw-r--r--drivers/iommu/dma-iommu.h14
-rw-r--r--drivers/iommu/exynos-iommu.c10
-rw-r--r--drivers/iommu/hyperv-iommu.c12
-rw-r--r--drivers/iommu/intel/Makefile2
-rw-r--r--drivers/iommu/intel/cache.c16
-rw-r--r--drivers/iommu/intel/cap_audit.c217
-rw-r--r--drivers/iommu/intel/cap_audit.h131
-rw-r--r--drivers/iommu/intel/iommu.c335
-rw-r--r--drivers/iommu/intel/iommu.h31
-rw-r--r--drivers/iommu/intel/irq_remapping.c80
-rw-r--r--drivers/iommu/intel/nested.c6
-rw-r--r--drivers/iommu/intel/pasid.c43
-rw-r--r--drivers/iommu/intel/prq.c2
-rw-r--r--drivers/iommu/intel/svm.c43
-rw-r--r--drivers/iommu/io-pgtable-arm.c240
-rw-r--r--drivers/iommu/io-pgtable-dart.c2
-rw-r--r--drivers/iommu/iommu-priv.h21
-rw-r--r--drivers/iommu/iommu-sva.c1
-rw-r--r--drivers/iommu/iommu.c481
-rw-r--r--drivers/iommu/iommufd/Kconfig2
-rw-r--r--drivers/iommu/iommufd/Makefile2
-rw-r--r--drivers/iommu/iommufd/device.c503
-rw-r--r--drivers/iommu/iommufd/driver.c198
-rw-r--r--drivers/iommu/iommufd/eventq.c598
-rw-r--r--drivers/iommu/iommufd/fault.c462
-rw-r--r--drivers/iommu/iommufd/hw_pagetable.c47
-rw-r--r--drivers/iommu/iommufd/iommufd_private.h196
-rw-r--r--drivers/iommu/iommufd/iommufd_test.h40
-rw-r--r--drivers/iommu/iommufd/main.c46
-rw-r--r--drivers/iommu/iommufd/selftest.c340
-rw-r--r--drivers/iommu/iommufd/viommu.c2
-rw-r--r--drivers/iommu/ipmmu-vmsa.c28
-rw-r--r--drivers/iommu/msm_iommu.c51
-rw-r--r--drivers/iommu/mtk_iommu.c35
-rw-r--r--drivers/iommu/mtk_iommu_v1.c28
-rw-r--r--drivers/iommu/of_iommu.c15
-rw-r--r--drivers/iommu/riscv/iommu-pci.c8
-rw-r--r--drivers/iommu/riscv/iommu-platform.c108
-rw-r--r--drivers/iommu/riscv/iommu.c12
-rw-r--r--drivers/iommu/riscv/iommu.h1
-rw-r--r--drivers/iommu/rockchip-iommu.c67
-rw-r--r--drivers/iommu/s390-iommu.c138
-rw-r--r--drivers/iommu/tegra-smmu.c1
64 files changed, 4195 insertions, 2424 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 47c46e4b739e..bad585b45a31 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -154,7 +154,6 @@ config IOMMU_DMA
select DMA_OPS_HELPERS
select IOMMU_API
select IOMMU_IOVA
- select IRQ_MSI_IOMMU
select NEED_SG_DMA_LENGTH
select NEED_SG_DMA_FLAGS if SWIOTLB
@@ -200,7 +199,6 @@ source "drivers/iommu/riscv/Kconfig"
config IRQ_REMAP
bool "Support for Interrupt Remapping"
depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI
- select DMAR_TABLE if INTEL_IOMMU
help
Supports Interrupt remapping for IO-APIC and MSI devices.
To use x2apic mode in the CPU's which support x2APIC enhancements or
@@ -367,6 +365,18 @@ config ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT
'arm-smmu.disable_bypass' will continue to override this
config.
+config ARM_SMMU_MMU_500_CPRE_ERRATA
+ bool "Enable errata workaround for CPRE in SMMU reset path"
+ depends on ARM_SMMU
+ default y
+ help
+ Say Y here (by default) to apply workaround to disable
+ MMU-500's next-page prefetcher for sake of 4 known errata.
+
+ Say N here only when it is sure that any errata related to
+ prefetch enablement are not applicable on the platform.
+ Refer silicon-errata.rst for info on errata IDs.
+
config ARM_SMMU_QCOM
def_tristate y
depends on ARM_SMMU && ARCH_QCOM
@@ -471,8 +481,7 @@ config MTK_IOMMU
config MTK_IOMMU_V1
tristate "MediaTek IOMMU Version 1 (M4U gen1) Support"
- depends on ARM
- depends on ARCH_MEDIATEK || COMPILE_TEST
+ depends on (ARCH_MEDIATEK && ARM) || COMPILE_TEST
select ARM_DMA_USE_IOMMU
select IOMMU_API
select MEMORY
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 6eb0af2e0339..220c598b7e14 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -16,7 +16,6 @@ irqreturn_t amd_iommu_int_thread_evtlog(int irq, void *data);
irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data);
irqreturn_t amd_iommu_int_thread_galog(int irq, void *data);
irqreturn_t amd_iommu_int_handler(int irq, void *data);
-void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid);
void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type,
u8 cntrl_intr, u8 cntrl_log,
u32 status_run_mask, u32 status_overflow_mask);
@@ -47,8 +46,7 @@ extern unsigned long amd_iommu_pgsize_bitmap;
/* Protection domain ops */
void amd_iommu_init_identity_domain(void);
-struct protection_domain *protection_domain_alloc(int nid);
-void protection_domain_free(struct protection_domain *domain);
+struct protection_domain *protection_domain_alloc(void);
struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev,
struct mm_struct *mm);
void amd_iommu_domain_free(struct iommu_domain *dom);
@@ -177,9 +175,11 @@ void amd_iommu_apply_ivrs_quirks(void);
#else
static inline void amd_iommu_apply_ivrs_quirks(void) { }
#endif
+struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid);
void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
u64 *root, int mode);
struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
+struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid);
-#endif
+#endif /* AMD_IOMMU_H */
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 903b426c9f89..5089b58e528a 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -112,6 +112,10 @@
#define FEATURE_SNPAVICSUP_GAM(x) \
(FIELD_GET(FEATURE_SNPAVICSUP, x) == 0x1)
+#define FEATURE_NUM_INT_REMAP_SUP GENMASK_ULL(9, 8)
+#define FEATURE_NUM_INT_REMAP_SUP_2K(x) \
+ (FIELD_GET(FEATURE_NUM_INT_REMAP_SUP, x) == 0x1)
+
/* Note:
* The current driver only support 16-bit PASID.
* Currently, hardware only implement upto 16-bit PASID
@@ -175,13 +179,16 @@
#define CONTROL_GAM_EN 25
#define CONTROL_GALOG_EN 28
#define CONTROL_GAINT_EN 29
+#define CONTROL_NUM_INT_REMAP_MODE 43
+#define CONTROL_NUM_INT_REMAP_MODE_MASK 0x03
+#define CONTROL_NUM_INT_REMAP_MODE_2K 0x01
#define CONTROL_EPH_EN 45
#define CONTROL_XT_EN 50
#define CONTROL_INTCAPXT_EN 51
#define CONTROL_IRTCACHEDIS 59
#define CONTROL_SNPAVIC_EN 61
-#define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT)
+#define CTRL_INV_TO_MASK 7
#define CTRL_INV_TO_NONE 0
#define CTRL_INV_TO_1MS 1
#define CTRL_INV_TO_10MS 2
@@ -221,6 +228,8 @@
#define DEV_ENTRY_EX 0x67
#define DEV_ENTRY_SYSMGT1 0x68
#define DEV_ENTRY_SYSMGT2 0x69
+#define DTE_DATA1_SYSMGT_MASK GENMASK_ULL(41, 40)
+
#define DEV_ENTRY_IRQ_TBL_EN 0x80
#define DEV_ENTRY_INIT_PASS 0xb8
#define DEV_ENTRY_EINT_PASS 0xb9
@@ -307,15 +316,13 @@
#define DTE_IRQ_REMAP_INTCTL (2ULL << 60)
#define DTE_IRQ_REMAP_ENABLE 1ULL
-/*
- * AMD IOMMU hardware only support 512 IRTEs despite
- * the architectural limitation of 2048 entries.
- */
-#define DTE_INTTAB_ALIGNMENT 128
-#define DTE_INTTABLEN_VALUE 9ULL
-#define DTE_INTTABLEN (DTE_INTTABLEN_VALUE << 1)
#define DTE_INTTABLEN_MASK (0xfULL << 1)
-#define MAX_IRQS_PER_TABLE (1 << DTE_INTTABLEN_VALUE)
+#define DTE_INTTABLEN_VALUE_512 9ULL
+#define DTE_INTTABLEN_512 (DTE_INTTABLEN_VALUE_512 << 1)
+#define MAX_IRQS_PER_TABLE_512 BIT(DTE_INTTABLEN_VALUE_512)
+#define DTE_INTTABLEN_VALUE_2K 11ULL
+#define DTE_INTTABLEN_2K (DTE_INTTABLEN_VALUE_2K << 1)
+#define MAX_IRQS_PER_TABLE_2K BIT(DTE_INTTABLEN_VALUE_2K)
#define PAGE_MODE_NONE 0x00
#define PAGE_MODE_1_LEVEL 0x01
@@ -408,8 +415,7 @@
#define DTE_FLAG_HAD (3ULL << 7)
#define DTE_FLAG_GIOV BIT_ULL(54)
#define DTE_FLAG_GV BIT_ULL(55)
-#define DTE_GLX_SHIFT (56)
-#define DTE_GLX_MASK (3)
+#define DTE_GLX GENMASK_ULL(57, 56)
#define DTE_FLAG_IR BIT_ULL(61)
#define DTE_FLAG_IW BIT_ULL(62)
@@ -417,18 +423,18 @@
#define DTE_FLAG_MASK (0x3ffULL << 32)
#define DEV_DOMID_MASK 0xffffULL
-#define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL)
-#define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL)
-#define DTE_GCR3_VAL_C(x) (((x) >> 31) & 0x1fffffULL)
-
-#define DTE_GCR3_SHIFT_A 58
-#define DTE_GCR3_SHIFT_B 16
-#define DTE_GCR3_SHIFT_C 43
+#define DTE_GCR3_14_12 GENMASK_ULL(60, 58)
+#define DTE_GCR3_30_15 GENMASK_ULL(31, 16)
+#define DTE_GCR3_51_31 GENMASK_ULL(63, 43)
#define DTE_GPT_LEVEL_SHIFT 54
+#define DTE_GPT_LEVEL_MASK GENMASK_ULL(55, 54)
#define GCR3_VALID 0x01ULL
+/* DTE[128:179] | DTE[184:191] */
+#define DTE_DATA2_INTR_MASK ~GENMASK_ULL(55, 52)
+
#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR)
#define IOMMU_PTE_DIRTY(pte) ((pte) & IOMMU_PTE_HD)
@@ -469,7 +475,7 @@ extern bool amd_iommu_dump;
#define DUMP_printk(format, arg...) \
do { \
if (amd_iommu_dump) \
- pr_info("AMD-Vi: " format, ## arg); \
+ pr_info(format, ## arg); \
} while(0);
/* global flag if IOMMUs cache non-present entries */
@@ -491,9 +497,6 @@ extern const struct iommu_ops amd_iommu_ops;
/* IVRS indicates that pre-boot remapping was enabled */
extern bool amdr_ivrs_remap_support;
-/* kmem_cache to get tables with 128 byte alignement */
-extern struct kmem_cache *amd_iommu_irq_cache;
-
#define PCI_SBDF_TO_SEGID(sbdf) (((sbdf) >> 16) & 0xffff)
#define PCI_SBDF_TO_DEVID(sbdf) ((sbdf) & 0xffff)
#define PCI_SEG_DEVID_TO_SBDF(seg, devid) ((((u32)(seg) & 0xffff) << 16) | \
@@ -517,6 +520,9 @@ extern struct kmem_cache *amd_iommu_irq_cache;
#define for_each_pdom_dev_data_safe(pdom_dev_data, next, pdom) \
list_for_each_entry_safe((pdom_dev_data), (next), &pdom->dev_data_list, list)
+#define for_each_ivhd_dte_flags(entry) \
+ list_for_each_entry((entry), &amd_ivhd_dev_flags_list, list)
+
struct amd_iommu;
struct iommu_domain;
struct irq_domain;
@@ -838,6 +844,7 @@ struct devid_map {
struct iommu_dev_data {
/*Protect against attach/detach races */
struct mutex mutex;
+ spinlock_t dte_lock; /* DTE lock for 256-bit access */
struct list_head list; /* For domain->dev_list */
struct llist_node dev_data_list; /* For global dev_data_list */
@@ -846,6 +853,7 @@ struct iommu_dev_data {
struct device *dev;
u16 devid; /* PCI Device ID */
+ unsigned int max_irqs; /* Maximum IRQs supported by device */
u32 max_pasids; /* Max supported PASIDs */
u32 flags; /* Holds AMD_IOMMU_DEVICE_FLAG_<*> */
int ats_qdep;
@@ -882,7 +890,21 @@ extern struct list_head amd_iommu_list;
* Structure defining one entry in the device table
*/
struct dev_table_entry {
- u64 data[4];
+ union {
+ u64 data[4];
+ u128 data128[2];
+ };
+};
+
+/*
+ * Structure to sture persistent DTE flags from IVHD
+ */
+struct ivhd_dte_flags {
+ struct list_head list;
+ u16 segid;
+ u16 devid_first;
+ u16 devid_last;
+ struct dev_table_entry dte;
};
/*
@@ -909,9 +931,6 @@ struct unity_map_entry {
* Data structures for device handling
*/
-/* size of the dma_ops aperture as power of 2 */
-extern unsigned amd_iommu_aperture_order;
-
extern bool amd_iommu_force_isolation;
/* Max levels of glxval supported */
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 4c0f876445de..14aa0d77df26 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -12,7 +12,6 @@
#include <linux/acpi.h>
#include <linux/list.h>
#include <linux/bitmap.h>
-#include <linux/slab.h>
#include <linux/syscore_ops.h>
#include <linux/interrupt.h>
#include <linux/msi.h>
@@ -174,8 +173,8 @@ bool amd_iommu_snp_en;
EXPORT_SYMBOL(amd_iommu_snp_en);
LIST_HEAD(amd_iommu_pci_seg_list); /* list of all PCI segments */
-LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
- system */
+LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */
+LIST_HEAD(amd_ivhd_dev_flags_list); /* list of all IVHD device entry settings */
/* Number of IOMMUs present in the system */
static int amd_iommus_present;
@@ -219,7 +218,6 @@ static bool __initdata cmdline_maps;
static enum iommu_init_state init_state = IOMMU_START_STATE;
static int amd_iommu_enable_interrupts(void);
-static int __init iommu_go_to_state(enum iommu_init_state state);
static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg);
static bool amd_iommu_pre_enabled = true;
@@ -412,33 +410,26 @@ static void iommu_set_device_table(struct amd_iommu *iommu)
&entry, sizeof(entry));
}
-/* Generic functions to enable/disable certain features of the IOMMU. */
-void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
+static void iommu_feature_set(struct amd_iommu *iommu, u64 val, u64 mask, u8 shift)
{
u64 ctrl;
ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
- ctrl |= (1ULL << bit);
+ mask <<= shift;
+ ctrl &= ~mask;
+ ctrl |= (val << shift) & mask;
writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
}
-static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
+/* Generic functions to enable/disable certain features of the IOMMU. */
+void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
{
- u64 ctrl;
-
- ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
- ctrl &= ~(1ULL << bit);
- writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
+ iommu_feature_set(iommu, 1ULL, 1ULL, bit);
}
-static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
+static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
{
- u64 ctrl;
-
- ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
- ctrl &= ~CTRL_INV_TO_MASK;
- ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
- writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
+ iommu_feature_set(iommu, 0ULL, 1ULL, bit);
}
/* Function to enable the hardware */
@@ -984,36 +975,12 @@ static void iommu_enable_gt(struct amd_iommu *iommu)
}
/* sets a specific bit in the device table entry. */
-static void __set_dev_entry_bit(struct dev_table_entry *dev_table,
- u16 devid, u8 bit)
+static void set_dte_bit(struct dev_table_entry *dte, u8 bit)
{
int i = (bit >> 6) & 0x03;
int _bit = bit & 0x3f;
- dev_table[devid].data[i] |= (1UL << _bit);
-}
-
-static void set_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
-{
- struct dev_table_entry *dev_table = get_dev_table(iommu);
-
- return __set_dev_entry_bit(dev_table, devid, bit);
-}
-
-static int __get_dev_entry_bit(struct dev_table_entry *dev_table,
- u16 devid, u8 bit)
-{
- int i = (bit >> 6) & 0x03;
- int _bit = bit & 0x3f;
-
- return (dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
-}
-
-static int get_dev_entry_bit(struct amd_iommu *iommu, u16 devid, u8 bit)
-{
- struct dev_table_entry *dev_table = get_dev_table(iommu);
-
- return __get_dev_entry_bit(dev_table, devid, bit);
+ dte->data[i] |= (1UL << _bit);
}
static bool __copy_device_table(struct amd_iommu *iommu)
@@ -1081,11 +1048,9 @@ static bool __copy_device_table(struct amd_iommu *iommu)
}
/* If gcr3 table existed, mask it out */
if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
- tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
- tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
+ tmp = (DTE_GCR3_30_15 | DTE_GCR3_51_31);
pci_seg->old_dev_tbl_cpy[devid].data[1] &= ~tmp;
- tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
- tmp |= DTE_FLAG_GV;
+ tmp = (DTE_GCR3_14_12 | DTE_FLAG_GV);
pci_seg->old_dev_tbl_cpy[devid].data[0] &= ~tmp;
}
}
@@ -1095,7 +1060,8 @@ static bool __copy_device_table(struct amd_iommu *iommu)
int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK;
if (irq_v && (int_ctl || int_tab_len)) {
if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
- (int_tab_len != DTE_INTTABLEN)) {
+ (int_tab_len != DTE_INTTABLEN_512 &&
+ int_tab_len != DTE_INTTABLEN_2K)) {
pr_err("Wrong old irq remapping flag: %#x\n", devid);
memunmap(old_devtb);
return false;
@@ -1136,42 +1102,107 @@ static bool copy_device_table(void)
return true;
}
-void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid)
+struct dev_table_entry *amd_iommu_get_ivhd_dte_flags(u16 segid, u16 devid)
{
- int sysmgt;
+ struct ivhd_dte_flags *e;
+ unsigned int best_len = UINT_MAX;
+ struct dev_table_entry *dte = NULL;
- sysmgt = get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1) |
- (get_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2) << 1);
+ for_each_ivhd_dte_flags(e) {
+ /*
+ * Need to go through the whole list to find the smallest range,
+ * which contains the devid.
+ */
+ if ((e->segid == segid) &&
+ (e->devid_first <= devid) && (devid <= e->devid_last)) {
+ unsigned int len = e->devid_last - e->devid_first;
- if (sysmgt == 0x01)
- set_dev_entry_bit(iommu, devid, DEV_ENTRY_IW);
+ if (len < best_len) {
+ dte = &(e->dte);
+ best_len = len;
+ }
+ }
+ }
+ return dte;
+}
+
+static bool search_ivhd_dte_flags(u16 segid, u16 first, u16 last)
+{
+ struct ivhd_dte_flags *e;
+
+ for_each_ivhd_dte_flags(e) {
+ if ((e->segid == segid) &&
+ (e->devid_first == first) &&
+ (e->devid_last == last))
+ return true;
+ }
+ return false;
}
/*
* This function takes the device specific flags read from the ACPI
* table and sets up the device table entry with that information
*/
-static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
- u16 devid, u32 flags, u32 ext_flags)
+static void __init
+set_dev_entry_from_acpi_range(struct amd_iommu *iommu, u16 first, u16 last,
+ u32 flags, u32 ext_flags)
{
- if (flags & ACPI_DEVFLAG_INITPASS)
- set_dev_entry_bit(iommu, devid, DEV_ENTRY_INIT_PASS);
- if (flags & ACPI_DEVFLAG_EXTINT)
- set_dev_entry_bit(iommu, devid, DEV_ENTRY_EINT_PASS);
- if (flags & ACPI_DEVFLAG_NMI)
- set_dev_entry_bit(iommu, devid, DEV_ENTRY_NMI_PASS);
- if (flags & ACPI_DEVFLAG_SYSMGT1)
- set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT1);
- if (flags & ACPI_DEVFLAG_SYSMGT2)
- set_dev_entry_bit(iommu, devid, DEV_ENTRY_SYSMGT2);
- if (flags & ACPI_DEVFLAG_LINT0)
- set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT0_PASS);
- if (flags & ACPI_DEVFLAG_LINT1)
- set_dev_entry_bit(iommu, devid, DEV_ENTRY_LINT1_PASS);
+ int i;
+ struct dev_table_entry dte = {};
+
+ /* Parse IVHD DTE setting flags and store information */
+ if (flags) {
+ struct ivhd_dte_flags *d;
- amd_iommu_apply_erratum_63(iommu, devid);
+ if (search_ivhd_dte_flags(iommu->pci_seg->id, first, last))
+ return;
+
+ d = kzalloc(sizeof(struct ivhd_dte_flags), GFP_KERNEL);
+ if (!d)
+ return;
+
+ pr_debug("%s: devid range %#x:%#x\n", __func__, first, last);
+
+ if (flags & ACPI_DEVFLAG_INITPASS)
+ set_dte_bit(&dte, DEV_ENTRY_INIT_PASS);
+ if (flags & ACPI_DEVFLAG_EXTINT)
+ set_dte_bit(&dte, DEV_ENTRY_EINT_PASS);
+ if (flags & ACPI_DEVFLAG_NMI)
+ set_dte_bit(&dte, DEV_ENTRY_NMI_PASS);
+ if (flags & ACPI_DEVFLAG_SYSMGT1)
+ set_dte_bit(&dte, DEV_ENTRY_SYSMGT1);
+ if (flags & ACPI_DEVFLAG_SYSMGT2)
+ set_dte_bit(&dte, DEV_ENTRY_SYSMGT2);
+ if (flags & ACPI_DEVFLAG_LINT0)
+ set_dte_bit(&dte, DEV_ENTRY_LINT0_PASS);
+ if (flags & ACPI_DEVFLAG_LINT1)
+ set_dte_bit(&dte, DEV_ENTRY_LINT1_PASS);
+
+ /* Apply erratum 63, which needs info in initial_dte */
+ if (FIELD_GET(DTE_DATA1_SYSMGT_MASK, dte.data[1]) == 0x1)
+ dte.data[0] |= DTE_FLAG_IW;
+
+ memcpy(&d->dte, &dte, sizeof(dte));
+ d->segid = iommu->pci_seg->id;
+ d->devid_first = first;
+ d->devid_last = last;
+ list_add_tail(&d->list, &amd_ivhd_dev_flags_list);
+ }
+
+ for (i = first; i <= last; i++) {
+ if (flags) {
+ struct dev_table_entry *dev_table = get_dev_table(iommu);
+
+ memcpy(&dev_table[i], &dte, sizeof(dte));
+ }
+ amd_iommu_set_rlookup_table(iommu, i);
+ }
+}
- amd_iommu_set_rlookup_table(iommu, devid);
+static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
+ u16 devid, u32 flags, u32 ext_flags)
+{
+ set_dev_entry_from_acpi_range(iommu, devid, devid, flags, ext_flags);
}
int __init add_special_device(u8 type, u8 id, u32 *devid, bool cmd_line)
@@ -1239,7 +1270,7 @@ static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u32 *devid,
entry->cmd_line = cmd_line;
entry->root_devid = (entry->devid & (~0x7));
- pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n",
+ pr_info("%s, add hid:%s, uid:%s, rdevid:%#x\n",
entry->cmd_line ? "cmd" : "ivrs",
entry->hid, entry->uid, entry->root_devid);
@@ -1331,15 +1362,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
switch (e->type) {
case IVHD_DEV_ALL:
- DUMP_printk(" DEV_ALL\t\t\tflags: %02x\n", e->flags);
-
- for (dev_i = 0; dev_i <= pci_seg->last_bdf; ++dev_i)
- set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
+ DUMP_printk(" DEV_ALL\t\t\tsetting: %#02x\n", e->flags);
+ set_dev_entry_from_acpi_range(iommu, 0, pci_seg->last_bdf, e->flags, 0);
break;
case IVHD_DEV_SELECT:
- DUMP_printk(" DEV_SELECT\t\t\t devid: %04x:%02x:%02x.%x "
- "flags: %02x\n",
+ DUMP_printk(" DEV_SELECT\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
@@ -1350,8 +1378,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_SELECT_RANGE_START:
- DUMP_printk(" DEV_SELECT_RANGE_START\t "
- "devid: %04x:%02x:%02x.%x flags: %02x\n",
+ DUMP_printk(" DEV_SELECT_RANGE_START\tdevid: %04x:%02x:%02x.%x flags: %#02x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
@@ -1364,8 +1391,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_ALIAS:
- DUMP_printk(" DEV_ALIAS\t\t\t devid: %04x:%02x:%02x.%x "
- "flags: %02x devid_to: %02x:%02x.%x\n",
+ DUMP_printk(" DEV_ALIAS\t\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %02x:%02x.%x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
@@ -1382,9 +1408,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_ALIAS_RANGE:
- DUMP_printk(" DEV_ALIAS_RANGE\t\t "
- "devid: %04x:%02x:%02x.%x flags: %02x "
- "devid_to: %04x:%02x:%02x.%x\n",
+ DUMP_printk(" DEV_ALIAS_RANGE\t\tdevid: %04x:%02x:%02x.%x flags: %#02x devid_to: %04x:%02x:%02x.%x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
@@ -1401,8 +1425,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_EXT_SELECT:
- DUMP_printk(" DEV_EXT_SELECT\t\t devid: %04x:%02x:%02x.%x "
- "flags: %02x ext: %08x\n",
+ DUMP_printk(" DEV_EXT_SELECT\t\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
@@ -1414,8 +1437,7 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_EXT_SELECT_RANGE:
- DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: "
- "%04x:%02x:%02x.%x flags: %02x ext: %08x\n",
+ DUMP_printk(" DEV_EXT_SELECT_RANGE\tdevid: %04x:%02x:%02x.%x flags: %#02x ext: %08x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid),
@@ -1428,21 +1450,18 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
break;
case IVHD_DEV_RANGE_END:
- DUMP_printk(" DEV_RANGE_END\t\t devid: %04x:%02x:%02x.%x\n",
+ DUMP_printk(" DEV_RANGE_END\t\tdevid: %04x:%02x:%02x.%x\n",
seg_id, PCI_BUS_NUM(e->devid),
PCI_SLOT(e->devid),
PCI_FUNC(e->devid));
devid = e->devid;
for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
- if (alias) {
+ if (alias)
pci_seg->alias_table[dev_i] = devid_to;
- set_dev_entry_from_acpi(iommu,
- devid_to, flags, ext_flags);
- }
- set_dev_entry_from_acpi(iommu, dev_i,
- flags, ext_flags);
}
+ set_dev_entry_from_acpi_range(iommu, devid_start, devid, flags, ext_flags);
+ set_dev_entry_from_acpi(iommu, devid_to, flags, ext_flags);
break;
case IVHD_DEV_SPECIAL: {
u8 handle, type;
@@ -1461,11 +1480,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
else
var = "UNKNOWN";
- DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x\n",
+ DUMP_printk(" DEV_SPECIAL(%s[%d])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n",
var, (int)handle,
seg_id, PCI_BUS_NUM(devid),
PCI_SLOT(devid),
- PCI_FUNC(devid));
+ PCI_FUNC(devid),
+ e->flags);
ret = add_special_device(type, handle, &devid, false);
if (ret)
@@ -1525,11 +1545,12 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
}
devid = PCI_SEG_DEVID_TO_SBDF(seg_id, e->devid);
- DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x\n",
+ DUMP_printk(" DEV_ACPI_HID(%s[%s])\t\tdevid: %04x:%02x:%02x.%x, flags: %#02x\n",
hid, uid, seg_id,
PCI_BUS_NUM(devid),
PCI_SLOT(devid),
- PCI_FUNC(devid));
+ PCI_FUNC(devid),
+ e->flags);
flags = e->flags;
@@ -1757,13 +1778,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
else
iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
- /*
- * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
- * GAM also requires GA mode. Therefore, we need to
- * check cmpxchg16b support before enabling it.
- */
- if (!boot_cpu_has(X86_FEATURE_CX16) ||
- ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
+ /* GAM requires GA mode. */
+ if ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)
amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
break;
case 0x11:
@@ -1773,13 +1789,8 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
else
iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
- /*
- * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
- * XT, GAM also requires GA mode. Therefore, we need to
- * check cmpxchg16b support before enabling them.
- */
- if (!boot_cpu_has(X86_FEATURE_CX16) ||
- ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) {
+ /* XT and GAM require GA mode. */
+ if ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0) {
amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
break;
}
@@ -2332,7 +2343,7 @@ static struct irq_chip intcapxt_controller = {
.irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_set_affinity = intcapxt_set_affinity,
.irq_set_wake = intcapxt_set_wake,
- .flags = IRQCHIP_MASK_ON_SUSPEND,
+ .flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_MOVE_DEFERRED,
};
static const struct irq_domain_ops intcapxt_domain_ops = {
@@ -2575,9 +2586,9 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
return;
for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
- __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_VALID);
+ set_dte_bit(&dev_table[devid], DEV_ENTRY_VALID);
if (!amd_iommu_snp_en)
- __set_dev_entry_bit(dev_table, devid, DEV_ENTRY_TRANSLATION);
+ set_dte_bit(&dev_table[devid], DEV_ENTRY_TRANSLATION);
}
}
@@ -2605,8 +2616,7 @@ static void init_device_table(void)
for_each_pci_segment(pci_seg) {
for (devid = 0; devid <= pci_seg->last_bdf; ++devid)
- __set_dev_entry_bit(pci_seg->dev_table,
- devid, DEV_ENTRY_IRQ_TBL_EN);
+ set_dte_bit(&pci_seg->dev_table[devid], DEV_ENTRY_IRQ_TBL_EN);
}
}
@@ -2634,7 +2644,7 @@ static void iommu_init_flags(struct amd_iommu *iommu)
iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
/* Set IOTLB invalidation timeout to 1s */
- iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
+ iommu_feature_set(iommu, CTRL_INV_TO_1S, CTRL_INV_TO_MASK, CONTROL_INV_TIMEOUT);
/* Enable Enhanced Peripheral Page Request Handling */
if (check_feature(FEATURE_EPHSUP))
@@ -2727,6 +2737,17 @@ static void iommu_enable_irtcachedis(struct amd_iommu *iommu)
iommu->irtcachedis_enabled ? "disabled" : "enabled");
}
+static void iommu_enable_2k_int(struct amd_iommu *iommu)
+{
+ if (!FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2))
+ return;
+
+ iommu_feature_set(iommu,
+ CONTROL_NUM_INT_REMAP_MODE_2K,
+ CONTROL_NUM_INT_REMAP_MODE_MASK,
+ CONTROL_NUM_INT_REMAP_MODE);
+}
+
static void early_enable_iommu(struct amd_iommu *iommu)
{
iommu_disable(iommu);
@@ -2739,6 +2760,7 @@ static void early_enable_iommu(struct amd_iommu *iommu)
iommu_enable_ga(iommu);
iommu_enable_xt(iommu);
iommu_enable_irtcachedis(iommu);
+ iommu_enable_2k_int(iommu);
iommu_enable(iommu);
amd_iommu_flush_all_caches(iommu);
}
@@ -2795,6 +2817,7 @@ static void early_enable_iommus(void)
iommu_enable_ga(iommu);
iommu_enable_xt(iommu);
iommu_enable_irtcachedis(iommu);
+ iommu_enable_2k_int(iommu);
iommu_set_device_table(iommu);
amd_iommu_flush_all_caches(iommu);
}
@@ -2921,9 +2944,6 @@ static struct syscore_ops amd_iommu_syscore_ops = {
static void __init free_iommu_resources(void)
{
- kmem_cache_destroy(amd_iommu_irq_cache);
- amd_iommu_irq_cache = NULL;
-
free_iommu_all();
free_pci_segments();
}
@@ -3022,7 +3042,7 @@ static void __init ivinfo_init(void *ivrs)
static int __init early_amd_iommu_init(void)
{
struct acpi_table_header *ivrs_base;
- int remap_cache_sz, ret;
+ int ret;
acpi_status status;
if (!amd_iommu_detected)
@@ -3037,6 +3057,11 @@ static int __init early_amd_iommu_init(void)
return -EINVAL;
}
+ if (!boot_cpu_has(X86_FEATURE_CX16)) {
+ pr_err("Failed to initialize. The CMPXCHG16B feature is required.\n");
+ return -EINVAL;
+ }
+
/*
* Validate checksum here so we don't need to do it when
* we actually parse the table
@@ -3079,22 +3104,7 @@ static int __init early_amd_iommu_init(void)
if (amd_iommu_irq_remap) {
struct amd_iommu_pci_seg *pci_seg;
- /*
- * Interrupt remapping enabled, create kmem_cache for the
- * remapping tables.
- */
ret = -ENOMEM;
- if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
- remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
- else
- remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
- amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
- remap_cache_sz,
- DTE_INTTAB_ALIGNMENT,
- 0, NULL);
- if (!amd_iommu_irq_cache)
- goto out;
-
for_each_pci_segment(pci_seg) {
if (alloc_irq_lookup_table(pci_seg))
goto out;
@@ -3175,7 +3185,7 @@ out:
return true;
}
-static void iommu_snp_enable(void)
+static __init void iommu_snp_enable(void)
{
#ifdef CONFIG_KVM_AMD_SEV
if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
@@ -3200,6 +3210,14 @@ static void iommu_snp_enable(void)
goto disable_snp;
}
+ /*
+ * Enable host SNP support once SNP support is checked on IOMMU.
+ */
+ if (snp_rmptable_init()) {
+ pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n");
+ goto disable_snp;
+ }
+
pr_info("IOMMU SNP support enabled.\n");
return;
@@ -3299,6 +3317,19 @@ static int __init iommu_go_to_state(enum iommu_init_state state)
ret = state_next();
}
+ /*
+ * SNP platform initilazation requires IOMMUs to be fully configured.
+ * If the SNP support on IOMMUs has NOT been checked, simply mark SNP
+ * as unsupported. If the SNP support on IOMMUs has been checked and
+ * host SNP support enabled but RMP enforcement has not been enabled
+ * in IOMMUs, then the system is in a half-baked state, but can limp
+ * along as all memory should be Hypervisor-Owned in the RMP. WARN,
+ * but leave SNP as "supported" to avoid confusing the kernel.
+ */
+ if (ret && cc_platform_has(CC_ATTR_HOST_SEV_SNP) &&
+ !WARN_ON_ONCE(amd_iommu_snp_en))
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
+
return ret;
}
@@ -3402,25 +3433,28 @@ static bool amd_iommu_sme_check(void)
* IOMMUs
*
****************************************************************************/
-int __init amd_iommu_detect(void)
+void __init amd_iommu_detect(void)
{
int ret;
if (no_iommu || (iommu_detected && !gart_iommu_aperture))
- return -ENODEV;
+ goto disable_snp;
if (!amd_iommu_sme_check())
- return -ENODEV;
+ goto disable_snp;
ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
if (ret)
- return ret;
+ goto disable_snp;
amd_iommu_detected = true;
iommu_detected = 1;
x86_init.iommu.iommu_init = amd_iommu_init;
+ return;
- return 1;
+disable_snp:
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
}
/****************************************************************************
@@ -3630,6 +3664,14 @@ found:
while (*uid == '0' && *(uid + 1))
uid++;
+ if (strlen(hid) >= ACPIHID_HID_LEN) {
+ pr_err("Invalid command line: hid is too long\n");
+ return 1;
+ } else if (strlen(uid) >= ACPIHID_UID_LEN) {
+ pr_err("Invalid command line: uid is too long\n");
+ return 1;
+ }
+
i = early_acpihid_map_size++;
memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index f3399087859f..26cf562dde11 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -47,13 +47,6 @@ static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
return fpte;
}
-/****************************************************************************
- *
- * The functions below are used the create the page table mappings for
- * unity mapped regions.
- *
- ****************************************************************************/
-
static void free_pt_page(u64 *pt, struct list_head *freelist)
{
struct page *p = virt_to_page(pt);
diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c
index c616de2c5926..a56a27396305 100644
--- a/drivers/iommu/amd/io_pgtable_v2.c
+++ b/drivers/iommu/amd/io_pgtable_v2.c
@@ -254,7 +254,7 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
pte = v2_alloc_pte(cfg->amd.nid, pgtable->pgd,
iova, map_size, gfp, &updated);
if (!pte) {
- ret = -EINVAL;
+ ret = -ENOMEM;
goto out;
}
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 80b2c9eb438f..31f8d208dedb 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -75,20 +75,148 @@ struct iommu_cmd {
*/
DEFINE_IDA(pdom_ids);
-struct kmem_cache *amd_iommu_irq_cache;
-
static int amd_iommu_attach_device(struct iommu_domain *dom,
struct device *dev);
static void set_dte_entry(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data);
+static void iommu_flush_dte_sync(struct amd_iommu *iommu, u16 devid);
+
+static struct iommu_dev_data *find_dev_data(struct amd_iommu *iommu, u16 devid);
+
/****************************************************************************
*
* Helper functions
*
****************************************************************************/
+static __always_inline void amd_iommu_atomic128_set(__int128 *ptr, __int128 val)
+{
+ /*
+ * Note:
+ * We use arch_cmpxchg128_local() because:
+ * - Need cmpxchg16b instruction mainly for 128-bit store to DTE
+ * (not necessary for cmpxchg since this function is already
+ * protected by a spin_lock for this DTE).
+ * - Neither need LOCK_PREFIX nor try loop because of the spin_lock.
+ */
+ arch_cmpxchg128_local(ptr, *ptr, val);
+}
+
+static void write_dte_upper128(struct dev_table_entry *ptr, struct dev_table_entry *new)
+{
+ struct dev_table_entry old;
+
+ old.data128[1] = ptr->data128[1];
+ /*
+ * Preserve DTE_DATA2_INTR_MASK. This needs to be
+ * done here since it requires to be inside
+ * spin_lock(&dev_data->dte_lock) context.
+ */
+ new->data[2] &= ~DTE_DATA2_INTR_MASK;
+ new->data[2] |= old.data[2] & DTE_DATA2_INTR_MASK;
+
+ amd_iommu_atomic128_set(&ptr->data128[1], new->data128[1]);
+}
+
+static void write_dte_lower128(struct dev_table_entry *ptr, struct dev_table_entry *new)
+{
+ amd_iommu_atomic128_set(&ptr->data128[0], new->data128[0]);
+}
+
+/*
+ * Note:
+ * IOMMU reads the entire Device Table entry in a single 256-bit transaction
+ * but the driver is programming DTE using 2 128-bit cmpxchg. So, the driver
+ * need to ensure the following:
+ * - DTE[V|GV] bit is being written last when setting.
+ * - DTE[V|GV] bit is being written first when clearing.
+ *
+ * This function is used only by code, which updates DMA translation part of the DTE.
+ * So, only consider control bits related to DMA when updating the entry.
+ */
+static void update_dte256(struct amd_iommu *iommu, struct iommu_dev_data *dev_data,
+ struct dev_table_entry *new)
+{
+ unsigned long flags;
+ struct dev_table_entry *dev_table = get_dev_table(iommu);
+ struct dev_table_entry *ptr = &dev_table[dev_data->devid];
+
+ spin_lock_irqsave(&dev_data->dte_lock, flags);
+
+ if (!(ptr->data[0] & DTE_FLAG_V)) {
+ /* Existing DTE is not valid. */
+ write_dte_upper128(ptr, new);
+ write_dte_lower128(ptr, new);
+ iommu_flush_dte_sync(iommu, dev_data->devid);
+ } else if (!(new->data[0] & DTE_FLAG_V)) {
+ /* Existing DTE is valid. New DTE is not valid. */
+ write_dte_lower128(ptr, new);
+ write_dte_upper128(ptr, new);
+ iommu_flush_dte_sync(iommu, dev_data->devid);
+ } else if (!FIELD_GET(DTE_FLAG_GV, ptr->data[0])) {
+ /*
+ * Both DTEs are valid.
+ * Existing DTE has no guest page table.
+ */
+ write_dte_upper128(ptr, new);
+ write_dte_lower128(ptr, new);
+ iommu_flush_dte_sync(iommu, dev_data->devid);
+ } else if (!FIELD_GET(DTE_FLAG_GV, new->data[0])) {
+ /*
+ * Both DTEs are valid.
+ * Existing DTE has guest page table,
+ * new DTE has no guest page table,
+ */
+ write_dte_lower128(ptr, new);
+ write_dte_upper128(ptr, new);
+ iommu_flush_dte_sync(iommu, dev_data->devid);
+ } else if (FIELD_GET(DTE_GPT_LEVEL_MASK, ptr->data[2]) !=
+ FIELD_GET(DTE_GPT_LEVEL_MASK, new->data[2])) {
+ /*
+ * Both DTEs are valid and have guest page table,
+ * but have different number of levels. So, we need
+ * to upadte both upper and lower 128-bit value, which
+ * require disabling and flushing.
+ */
+ struct dev_table_entry clear = {};
+
+ /* First disable DTE */
+ write_dte_lower128(ptr, &clear);
+ iommu_flush_dte_sync(iommu, dev_data->devid);
+
+ /* Then update DTE */
+ write_dte_upper128(ptr, new);
+ write_dte_lower128(ptr, new);
+ iommu_flush_dte_sync(iommu, dev_data->devid);
+ } else {
+ /*
+ * Both DTEs are valid and have guest page table,
+ * and same number of levels. We just need to only
+ * update the lower 128-bit. So no need to disable DTE.
+ */
+ write_dte_lower128(ptr, new);
+ }
+
+ spin_unlock_irqrestore(&dev_data->dte_lock, flags);
+}
+
+static void get_dte256(struct amd_iommu *iommu, struct iommu_dev_data *dev_data,
+ struct dev_table_entry *dte)
+{
+ unsigned long flags;
+ struct dev_table_entry *ptr;
+ struct dev_table_entry *dev_table = get_dev_table(iommu);
+
+ ptr = &dev_table[dev_data->devid];
+
+ spin_lock_irqsave(&dev_data->dte_lock, flags);
+ dte->data128[0] = ptr->data128[0];
+ dte->data128[1] = ptr->data128[1];
+ spin_unlock_irqrestore(&dev_data->dte_lock, flags);
+}
+
static inline bool pdom_is_v2_pgtbl_mode(struct protection_domain *pdom)
{
return (pdom && (pdom->pd_mode == PD_MODE_V2));
@@ -113,7 +241,9 @@ static inline int get_acpihid_device_id(struct device *dev,
struct acpihid_map_entry **entry)
{
struct acpi_device *adev = ACPI_COMPANION(dev);
- struct acpihid_map_entry *p;
+ struct acpihid_map_entry *p, *p1 = NULL;
+ int hid_count = 0;
+ bool fw_bug;
if (!adev)
return -ENODEV;
@@ -121,12 +251,33 @@ static inline int get_acpihid_device_id(struct device *dev,
list_for_each_entry(p, &acpihid_map, list) {
if (acpi_dev_hid_uid_match(adev, p->hid,
p->uid[0] ? p->uid : NULL)) {
- if (entry)
- *entry = p;
- return p->devid;
+ p1 = p;
+ fw_bug = false;
+ hid_count = 1;
+ break;
+ }
+
+ /*
+ * Count HID matches w/o UID, raise FW_BUG but allow exactly one match
+ */
+ if (acpi_dev_hid_match(adev, p->hid)) {
+ p1 = p;
+ hid_count++;
+ fw_bug = true;
}
}
- return -EINVAL;
+
+ if (!p1)
+ return -EINVAL;
+ if (fw_bug)
+ dev_err_once(dev, FW_BUG "No ACPI device matched UID, but %d device%s matched HID.\n",
+ hid_count, hid_count > 1 ? "s" : "");
+ if (hid_count > 1)
+ return -EINVAL;
+ if (entry)
+ *entry = p1;
+
+ return p1->devid;
}
static inline int get_device_sbdf_id(struct device *dev)
@@ -209,6 +360,7 @@ static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid)
return NULL;
mutex_init(&dev_data->mutex);
+ spin_lock_init(&dev_data->dte_lock);
dev_data->devid = devid;
ratelimit_default_init(&dev_data->rs);
@@ -216,7 +368,7 @@ static struct iommu_dev_data *alloc_dev_data(struct amd_iommu *iommu, u16 devid)
return dev_data;
}
-static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid)
+struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid)
{
struct iommu_dev_data *dev_data;
struct llist_node *node;
@@ -236,9 +388,11 @@ static struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid
static int clone_alias(struct pci_dev *pdev, u16 alias, void *data)
{
+ struct dev_table_entry new;
struct amd_iommu *iommu;
- struct dev_table_entry *dev_table;
+ struct iommu_dev_data *dev_data, *alias_data;
u16 devid = pci_dev_id(pdev);
+ int ret = 0;
if (devid == alias)
return 0;
@@ -247,13 +401,27 @@ static int clone_alias(struct pci_dev *pdev, u16 alias, void *data)
if (!iommu)
return 0;
- amd_iommu_set_rlookup_table(iommu, alias);
- dev_table = get_dev_table(iommu);
- memcpy(dev_table[alias].data,
- dev_table[devid].data,
- sizeof(dev_table[alias].data));
+ /* Copy the data from pdev */
+ dev_data = dev_iommu_priv_get(&pdev->dev);
+ if (!dev_data) {
+ pr_err("%s : Failed to get dev_data for 0x%x\n", __func__, devid);
+ ret = -EINVAL;
+ goto out;
+ }
+ get_dte256(iommu, dev_data, &new);
- return 0;
+ /* Setup alias */
+ alias_data = find_dev_data(iommu, alias);
+ if (!alias_data) {
+ pr_err("%s : Failed to get alias dev_data for 0x%x\n", __func__, alias);
+ ret = -EINVAL;
+ goto out;
+ }
+ update_dte256(iommu, alias_data, &new);
+
+ amd_iommu_set_rlookup_table(iommu, alias);
+out:
+ return ret;
}
static void clone_aliases(struct amd_iommu *iommu, struct device *dev)
@@ -526,6 +694,12 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev)
return -ENOMEM;
dev_data->dev = dev;
+
+ /*
+ * The dev_iommu_priv_set() needes to be called before setup_aliases.
+ * Otherwise, subsequent call to dev_iommu_priv_get() will fail.
+ */
+ dev_iommu_priv_set(dev, dev_data);
setup_aliases(iommu, dev);
/*
@@ -539,8 +713,6 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev)
dev_data->flags = pdev_get_caps(to_pci_dev(dev));
}
- dev_iommu_priv_set(dev, dev_data);
-
return 0;
}
@@ -571,10 +743,13 @@ static void iommu_ignore_device(struct amd_iommu *iommu, struct device *dev)
static void dump_dte_entry(struct amd_iommu *iommu, u16 devid)
{
int i;
- struct dev_table_entry *dev_table = get_dev_table(iommu);
+ struct dev_table_entry dte;
+ struct iommu_dev_data *dev_data = find_dev_data(iommu, devid);
+
+ get_dte256(iommu, dev_data, &dte);
for (i = 0; i < 4; ++i)
- pr_err("DTE[%d]: %016llx\n", i, dev_table[devid].data[i]);
+ pr_err("DTE[%d]: %016llx\n", i, dte.data[i]);
}
static void dump_command(unsigned long phys_addr)
@@ -714,7 +889,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
int type, devid, flags, tag;
volatile u32 *event = __evt;
int count = 0;
- u64 address;
+ u64 address, ctrl;
u32 pasid;
retry:
@@ -724,6 +899,7 @@ retry:
(event[1] & EVENT_DOMID_MASK_LO);
flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
address = (u64)(((u64)event[3]) << 32) | event[2];
+ ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
if (type == 0) {
/* Did we hit the erratum? */
@@ -745,6 +921,7 @@ retry:
dev_err(dev, "Event logged [ILLEGAL_DEV_TABLE_ENTRY device=%04x:%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x]\n",
iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags);
+ dev_err(dev, "Control Reg : 0x%llx\n", ctrl);
dump_dte_entry(iommu, devid);
break;
case EVENT_TYPE_DEV_TAB_ERR:
@@ -828,6 +1005,14 @@ int amd_iommu_register_ga_log_notifier(int (*notifier)(u32))
{
iommu_ga_log_notifier = notifier;
+ /*
+ * Ensure all in-flight IRQ handlers run to completion before returning
+ * to the caller, e.g. to ensure module code isn't unloaded while it's
+ * being executed in the IRQ handler.
+ */
+ if (!notifier)
+ synchronize_rcu();
+
return 0;
}
EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier);
@@ -1261,6 +1446,15 @@ static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
return iommu_queue_command(iommu, &cmd);
}
+static void iommu_flush_dte_sync(struct amd_iommu *iommu, u16 devid)
+{
+ int ret;
+
+ ret = iommu_flush_dte(iommu, devid);
+ if (!ret)
+ iommu_completion_wait(iommu);
+}
+
static void amd_iommu_flush_dte_all(struct amd_iommu *iommu)
{
u32 devid;
@@ -1817,90 +2011,109 @@ int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid)
return ret;
}
+static void make_clear_dte(struct iommu_dev_data *dev_data, struct dev_table_entry *ptr,
+ struct dev_table_entry *new)
+{
+ /* All existing DTE must have V bit set */
+ new->data128[0] = DTE_FLAG_V;
+ new->data128[1] = 0;
+}
+
+/*
+ * Note:
+ * The old value for GCR3 table and GPT have been cleared from caller.
+ */
+static void set_dte_gcr3_table(struct amd_iommu *iommu,
+ struct iommu_dev_data *dev_data,
+ struct dev_table_entry *target)
+{
+ struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
+ u64 gcr3;
+
+ if (!gcr3_info->gcr3_tbl)
+ return;
+
+ pr_debug("%s: devid=%#x, glx=%#x, gcr3_tbl=%#llx\n",
+ __func__, dev_data->devid, gcr3_info->glx,
+ (unsigned long long)gcr3_info->gcr3_tbl);
+
+ gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);
+
+ target->data[0] |= DTE_FLAG_GV |
+ FIELD_PREP(DTE_GLX, gcr3_info->glx) |
+ FIELD_PREP(DTE_GCR3_14_12, gcr3 >> 12);
+ if (pdom_is_v2_pgtbl_mode(dev_data->domain))
+ target->data[0] |= DTE_FLAG_GIOV;
+
+ target->data[1] |= FIELD_PREP(DTE_GCR3_30_15, gcr3 >> 15) |
+ FIELD_PREP(DTE_GCR3_51_31, gcr3 >> 31);
+
+ /* Guest page table can only support 4 and 5 levels */
+ if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL)
+ target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_5_LEVEL);
+ else
+ target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_4_LEVEL);
+}
+
static void set_dte_entry(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data)
{
- u64 pte_root = 0;
- u64 flags = 0;
- u32 old_domid;
- u16 devid = dev_data->devid;
u16 domid;
+ u32 old_domid;
+ struct dev_table_entry *initial_dte;
+ struct dev_table_entry new = {};
struct protection_domain *domain = dev_data->domain;
- struct dev_table_entry *dev_table = get_dev_table(iommu);
struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
+ struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid];
if (gcr3_info && gcr3_info->gcr3_tbl)
domid = dev_data->gcr3_info.domid;
else
domid = domain->id;
+ make_clear_dte(dev_data, dte, &new);
+
if (domain->iop.mode != PAGE_MODE_NONE)
- pte_root = iommu_virt_to_phys(domain->iop.root);
+ new.data[0] |= iommu_virt_to_phys(domain->iop.root);
- pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
+ new.data[0] |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
- pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V;
+ new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW;
/*
- * When SNP is enabled, Only set TV bit when IOMMU
- * page translation is in use.
+ * When SNP is enabled, we can only support TV=1 with non-zero domain ID.
+ * This is prevented by the SNP-enable and IOMMU_DOMAIN_IDENTITY check in
+ * do_iommu_domain_alloc().
*/
- if (!amd_iommu_snp_en || (domid != 0))
- pte_root |= DTE_FLAG_TV;
-
- flags = dev_table[devid].data[1];
-
- if (dev_data->ats_enabled)
- flags |= DTE_FLAG_IOTLB;
+ WARN_ON(amd_iommu_snp_en && (domid == 0));
+ new.data[0] |= DTE_FLAG_TV;
if (dev_data->ppr)
- pte_root |= 1ULL << DEV_ENTRY_PPR;
+ new.data[0] |= 1ULL << DEV_ENTRY_PPR;
if (domain->dirty_tracking)
- pte_root |= DTE_FLAG_HAD;
-
- if (gcr3_info && gcr3_info->gcr3_tbl) {
- u64 gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);
- u64 glx = gcr3_info->glx;
- u64 tmp;
+ new.data[0] |= DTE_FLAG_HAD;
- pte_root |= DTE_FLAG_GV;
- pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT;
-
- /* First mask out possible old values for GCR3 table */
- tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
- flags &= ~tmp;
-
- tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
- flags &= ~tmp;
-
- /* Encode GCR3 table into DTE */
- tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A;
- pte_root |= tmp;
-
- tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B;
- flags |= tmp;
+ if (dev_data->ats_enabled)
+ new.data[1] |= DTE_FLAG_IOTLB;
- tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C;
- flags |= tmp;
+ old_domid = READ_ONCE(dte->data[1]) & DEV_DOMID_MASK;
+ new.data[1] |= domid;
- if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL) {
- dev_table[devid].data[2] |=
- ((u64)GUEST_PGTABLE_5_LEVEL << DTE_GPT_LEVEL_SHIFT);
- }
-
- /* GIOV is supported with V2 page table mode only */
- if (pdom_is_v2_pgtbl_mode(domain))
- pte_root |= DTE_FLAG_GIOV;
+ /*
+ * Restore cached persistent DTE bits, which can be set by information
+ * in IVRS table. See set_dev_entry_from_acpi().
+ */
+ initial_dte = amd_iommu_get_ivhd_dte_flags(iommu->pci_seg->id, dev_data->devid);
+ if (initial_dte) {
+ new.data128[0] |= initial_dte->data128[0];
+ new.data128[1] |= initial_dte->data128[1];
}
- flags &= ~DEV_DOMID_MASK;
- flags |= domid;
+ set_dte_gcr3_table(iommu, dev_data, &new);
- old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK;
- dev_table[devid].data[1] = flags;
- dev_table[devid].data[0] = pte_root;
+ update_dte256(iommu, dev_data, &new);
/*
* A kdump kernel might be replacing a domain ID that was copied from
@@ -1912,19 +2125,16 @@ static void set_dte_entry(struct amd_iommu *iommu,
}
}
-static void clear_dte_entry(struct amd_iommu *iommu, u16 devid)
+/*
+ * Clear DMA-remap related flags to block all DMA (blockeded domain)
+ */
+static void clear_dte_entry(struct amd_iommu *iommu, struct iommu_dev_data *dev_data)
{
- struct dev_table_entry *dev_table = get_dev_table(iommu);
-
- /* remove entry from the device table seen by the hardware */
- dev_table[devid].data[0] = DTE_FLAG_V;
-
- if (!amd_iommu_snp_en)
- dev_table[devid].data[0] |= DTE_FLAG_TV;
-
- dev_table[devid].data[1] &= DTE_FLAG_MASK;
+ struct dev_table_entry new = {};
+ struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid];
- amd_iommu_apply_erratum_63(iommu, devid);
+ make_clear_dte(dev_data, dte, &new);
+ update_dte256(iommu, dev_data, &new);
}
/* Update and flush DTE for the given device */
@@ -1935,7 +2145,7 @@ static void dev_update_dte(struct iommu_dev_data *dev_data, bool set)
if (set)
set_dte_entry(iommu, dev_data);
else
- clear_dte_entry(iommu, dev_data->devid);
+ clear_dte_entry(iommu, dev_data);
clone_aliases(iommu, dev_data->dev);
device_flush_dte(dev_data);
@@ -1998,7 +2208,6 @@ static int pdom_attach_iommu(struct amd_iommu *iommu,
struct protection_domain *pdom)
{
struct pdom_iommu_info *pdom_iommu_info, *curr;
- struct io_pgtable_cfg *cfg = &pdom->iop.pgtbl.cfg;
unsigned long flags;
int ret = 0;
@@ -2027,10 +2236,6 @@ static int pdom_attach_iommu(struct amd_iommu *iommu,
goto out_unlock;
}
- /* Update NUMA Node ID */
- if (cfg->amd.nid == NUMA_NO_NODE)
- cfg->amd.nid = dev_to_node(&iommu->dev->dev);
-
out_unlock:
spin_unlock_irqrestore(&pdom->lock, flags);
return ret;
@@ -2220,8 +2425,14 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
}
out_err:
+
iommu_completion_wait(iommu);
+ if (FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2))
+ dev_data->max_irqs = MAX_IRQS_PER_TABLE_2K;
+ else
+ dev_data->max_irqs = MAX_IRQS_PER_TABLE_512;
+
if (dev_is_pci(dev))
pci_prepare_ats(to_pci_dev(dev), PAGE_SHIFT);
@@ -2258,25 +2469,15 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev)
*
*****************************************************************************/
-void protection_domain_free(struct protection_domain *domain)
-{
- WARN_ON(!list_empty(&domain->dev_list));
- if (domain->domain.type & __IOMMU_DOMAIN_PAGING)
- free_io_pgtable_ops(&domain->iop.pgtbl.ops);
- pdom_id_free(domain->id);
- kfree(domain);
-}
-
-static void protection_domain_init(struct protection_domain *domain, int nid)
+static void protection_domain_init(struct protection_domain *domain)
{
spin_lock_init(&domain->lock);
INIT_LIST_HEAD(&domain->dev_list);
INIT_LIST_HEAD(&domain->dev_data_list);
xa_init(&domain->iommu_array);
- domain->iop.pgtbl.cfg.amd.nid = nid;
}
-struct protection_domain *protection_domain_alloc(int nid)
+struct protection_domain *protection_domain_alloc(void)
{
struct protection_domain *domain;
int domid;
@@ -2292,12 +2493,13 @@ struct protection_domain *protection_domain_alloc(int nid)
}
domain->id = domid;
- protection_domain_init(domain, nid);
+ protection_domain_init(domain);
return domain;
}
-static int pdom_setup_pgtable(struct protection_domain *domain)
+static int pdom_setup_pgtable(struct protection_domain *domain,
+ struct device *dev)
{
struct io_pgtable_ops *pgtbl_ops;
enum io_pgtable_fmt fmt;
@@ -2311,6 +2513,7 @@ static int pdom_setup_pgtable(struct protection_domain *domain)
break;
}
+ domain->iop.pgtbl.cfg.amd.nid = dev_to_node(dev);
pgtbl_ops = alloc_io_pgtable_ops(fmt, &domain->iop.pgtbl.cfg, domain);
if (!pgtbl_ops)
return -ENOMEM;
@@ -2341,12 +2544,12 @@ do_iommu_domain_alloc(struct device *dev, u32 flags,
struct protection_domain *domain;
int ret;
- domain = protection_domain_alloc(dev_to_node(dev));
+ domain = protection_domain_alloc();
if (!domain)
return ERR_PTR(-ENOMEM);
domain->pd_mode = pgtable;
- ret = pdom_setup_pgtable(domain);
+ ret = pdom_setup_pgtable(domain, dev);
if (ret) {
pdom_id_free(domain->id);
kfree(domain);
@@ -2403,7 +2606,11 @@ void amd_iommu_domain_free(struct iommu_domain *dom)
{
struct protection_domain *domain = to_pdomain(dom);
- protection_domain_free(domain);
+ WARN_ON(!list_empty(&domain->dev_list));
+ if (domain->domain.type & __IOMMU_DOMAIN_PAGING)
+ free_io_pgtable_ops(&domain->iop.pgtbl.ops);
+ pdom_id_free(domain->id);
+ kfree(domain);
}
static int blocked_domain_attach_device(struct iommu_domain *domain,
@@ -2422,10 +2629,19 @@ static int blocked_domain_attach_device(struct iommu_domain *domain,
return 0;
}
+static int blocked_domain_set_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
+{
+ amd_iommu_remove_dev_pasid(dev, pasid, old);
+ return 0;
+}
+
static struct iommu_domain blocked_domain = {
.type = IOMMU_DOMAIN_BLOCKED,
.ops = &(const struct iommu_domain_ops) {
.attach_dev = blocked_domain_attach_device,
+ .set_dev_pasid = blocked_domain_set_dev_pasid,
}
};
@@ -2445,7 +2661,7 @@ void amd_iommu_init_identity_domain(void)
identity_domain.id = pdom_id_alloc();
- protection_domain_init(&identity_domain, NUMA_NO_NODE);
+ protection_domain_init(&identity_domain);
}
/* Same as blocked domain except it supports only ops->attach_dev() */
@@ -2613,12 +2829,12 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
bool enable)
{
struct protection_domain *pdomain = to_pdomain(domain);
- struct dev_table_entry *dev_table;
+ struct dev_table_entry *dte;
struct iommu_dev_data *dev_data;
bool domain_flush = false;
struct amd_iommu *iommu;
unsigned long flags;
- u64 pte_root;
+ u64 new;
spin_lock_irqsave(&pdomain->lock, flags);
if (!(pdomain->dirty_tracking ^ enable)) {
@@ -2627,16 +2843,15 @@ static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
}
list_for_each_entry(dev_data, &pdomain->dev_list, list) {
+ spin_lock(&dev_data->dte_lock);
iommu = get_amd_iommu_from_dev_data(dev_data);
-
- dev_table = get_dev_table(iommu);
- pte_root = dev_table[dev_data->devid].data[0];
-
- pte_root = (enable ? pte_root | DTE_FLAG_HAD :
- pte_root & ~DTE_FLAG_HAD);
+ dte = &get_dev_table(iommu)[dev_data->devid];
+ new = dte->data[0];
+ new = (enable ? new | DTE_FLAG_HAD : new & ~DTE_FLAG_HAD);
+ dte->data[0] = new;
+ spin_unlock(&dev_data->dte_lock);
/* Flush device DTE */
- dev_table[dev_data->devid].data[0] = pte_root;
device_flush_dte(dev_data);
domain_flush = true;
}
@@ -2847,7 +3062,6 @@ const struct iommu_ops amd_iommu_ops = {
.def_domain_type = amd_iommu_def_domain_type,
.dev_enable_feat = amd_iommu_dev_enable_feature,
.dev_disable_feat = amd_iommu_dev_disable_feature,
- .remove_dev_pasid = amd_iommu_remove_dev_pasid,
.page_response = amd_iommu_page_response,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = amd_iommu_attach_device,
@@ -2899,20 +3113,33 @@ out:
raw_spin_unlock_irqrestore(&iommu->lock, flags);
}
+static inline u8 iommu_get_int_tablen(struct iommu_dev_data *dev_data)
+{
+ if (dev_data && dev_data->max_irqs == MAX_IRQS_PER_TABLE_2K)
+ return DTE_INTTABLEN_2K;
+ return DTE_INTTABLEN_512;
+}
+
static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid,
struct irq_remap_table *table)
{
- u64 dte;
- struct dev_table_entry *dev_table = get_dev_table(iommu);
+ u64 new;
+ struct dev_table_entry *dte = &get_dev_table(iommu)[devid];
+ struct iommu_dev_data *dev_data = search_dev_data(iommu, devid);
- dte = dev_table[devid].data[2];
- dte &= ~DTE_IRQ_PHYS_ADDR_MASK;
- dte |= iommu_virt_to_phys(table->table);
- dte |= DTE_IRQ_REMAP_INTCTL;
- dte |= DTE_INTTABLEN;
- dte |= DTE_IRQ_REMAP_ENABLE;
+ if (dev_data)
+ spin_lock(&dev_data->dte_lock);
- dev_table[devid].data[2] = dte;
+ new = READ_ONCE(dte->data[2]);
+ new &= ~DTE_IRQ_PHYS_ADDR_MASK;
+ new |= iommu_virt_to_phys(table->table);
+ new |= DTE_IRQ_REMAP_INTCTL;
+ new |= iommu_get_int_tablen(dev_data);
+ new |= DTE_IRQ_REMAP_ENABLE;
+ WRITE_ONCE(dte->data[2], new);
+
+ if (dev_data)
+ spin_unlock(&dev_data->dte_lock);
}
static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid)
@@ -2933,7 +3160,7 @@ static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid)
return table;
}
-static struct irq_remap_table *__alloc_irq_table(void)
+static struct irq_remap_table *__alloc_irq_table(int nid, int order)
{
struct irq_remap_table *table;
@@ -2941,19 +3168,13 @@ static struct irq_remap_table *__alloc_irq_table(void)
if (!table)
return NULL;
- table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_KERNEL);
+ table->table = iommu_alloc_pages_node(nid, GFP_KERNEL, order);
if (!table->table) {
kfree(table);
return NULL;
}
raw_spin_lock_init(&table->lock);
- if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
- memset(table->table, 0,
- MAX_IRQS_PER_TABLE * sizeof(u32));
- else
- memset(table->table, 0,
- (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2)));
return table;
}
@@ -2985,13 +3206,24 @@ static int set_remap_table_entry_alias(struct pci_dev *pdev, u16 alias,
return 0;
}
+static inline size_t get_irq_table_size(unsigned int max_irqs)
+{
+ if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
+ return max_irqs * sizeof(u32);
+
+ return max_irqs * (sizeof(u64) * 2);
+}
+
static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu,
- u16 devid, struct pci_dev *pdev)
+ u16 devid, struct pci_dev *pdev,
+ unsigned int max_irqs)
{
struct irq_remap_table *table = NULL;
struct irq_remap_table *new_table = NULL;
struct amd_iommu_pci_seg *pci_seg;
unsigned long flags;
+ int order = get_order(get_irq_table_size(max_irqs));
+ int nid = iommu && iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
u16 alias;
spin_lock_irqsave(&iommu_table_lock, flags);
@@ -3010,7 +3242,7 @@ static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu,
spin_unlock_irqrestore(&iommu_table_lock, flags);
/* Nothing there yet, allocate new irq remapping table */
- new_table = __alloc_irq_table();
+ new_table = __alloc_irq_table(nid, order);
if (!new_table)
return NULL;
@@ -3045,20 +3277,21 @@ out_unlock:
spin_unlock_irqrestore(&iommu_table_lock, flags);
if (new_table) {
- kmem_cache_free(amd_iommu_irq_cache, new_table->table);
+ iommu_free_pages(new_table->table, order);
kfree(new_table);
}
return table;
}
static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count,
- bool align, struct pci_dev *pdev)
+ bool align, struct pci_dev *pdev,
+ unsigned long max_irqs)
{
struct irq_remap_table *table;
int index, c, alignment = 1;
unsigned long flags;
- table = alloc_irq_table(iommu, devid, pdev);
+ table = alloc_irq_table(iommu, devid, pdev, max_irqs);
if (!table)
return -ENODEV;
@@ -3069,7 +3302,7 @@ static int alloc_irq_index(struct amd_iommu *iommu, u16 devid, int count,
/* Scan table for free entries */
for (index = ALIGN(table->min_index, alignment), c = 0;
- index < MAX_IRQS_PER_TABLE;) {
+ index < max_irqs;) {
if (!iommu->irte_ops->is_allocated(table, index)) {
c += 1;
} else {
@@ -3339,6 +3572,14 @@ static void fill_msi_msg(struct msi_msg *msg, u32 index)
msg->data = index;
msg->address_lo = 0;
msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW;
+ /*
+ * The struct msi_msg.dest_mode_logical is used to set the DM bit
+ * in MSI Message Address Register. For device w/ 2K int-remap support,
+ * this is bit must be set to 1 regardless of the actual destination
+ * mode, which is signified by the IRTE[DM].
+ */
+ if (FEATURE_NUM_INT_REMAP_SUP_2K(amd_iommu_efr2))
+ msg->arch_addr_lo.dest_mode_logical = true;
msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH;
}
@@ -3401,6 +3642,8 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
struct amd_ir_data *data = NULL;
struct amd_iommu *iommu;
struct irq_cfg *cfg;
+ struct iommu_dev_data *dev_data;
+ unsigned long max_irqs;
int i, ret, devid, seg, sbdf;
int index;
@@ -3419,6 +3662,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
if (!iommu)
return -EINVAL;
+ dev_data = search_dev_data(iommu, devid);
+ max_irqs = dev_data ? dev_data->max_irqs : MAX_IRQS_PER_TABLE_512;
+
ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
if (ret < 0)
return ret;
@@ -3426,7 +3672,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) {
struct irq_remap_table *table;
- table = alloc_irq_table(iommu, devid, NULL);
+ table = alloc_irq_table(iommu, devid, NULL, max_irqs);
if (table) {
if (!table->min_index) {
/*
@@ -3447,9 +3693,11 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
bool align = (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI);
index = alloc_irq_index(iommu, devid, nr_irqs, align,
- msi_desc_to_pci_dev(info->desc));
+ msi_desc_to_pci_dev(info->desc),
+ max_irqs);
} else {
- index = alloc_irq_index(iommu, devid, nr_irqs, false, NULL);
+ index = alloc_irq_index(iommu, devid, nr_irqs, false, NULL,
+ max_irqs);
}
if (index < 0) {
@@ -3486,7 +3734,6 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
irq_data->chip_data = data;
irq_data->chip = &amd_ir_chip;
irq_remapping_prepare_irte(data, cfg, info, devid, index, i);
- irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT);
}
return 0;
@@ -3653,6 +3900,9 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
struct iommu_dev_data *dev_data;
+ if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)))
+ return -EINVAL;
+
if (ir_data->iommu == NULL)
return -EINVAL;
@@ -3663,21 +3913,11 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
* we should not modify the IRTE
*/
if (!dev_data || !dev_data->use_vapic)
- return 0;
+ return -EINVAL;
ir_data->cfg = irqd_cfg(data);
pi_data->ir_data = ir_data;
- /* Note:
- * SVM tries to set up for VAPIC mode, but we are in
- * legacy mode. So, we force legacy mode instead.
- */
- if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) {
- pr_debug("%s: Fall back to using intr legacy remap\n",
- __func__);
- pi_data->is_guest_mode = false;
- }
-
pi_data->prev_ga_tag = ir_data->cached_ga_tag;
if (pi_data->is_guest_mode) {
ir_data->ga_root_ptr = (pi_data->base >> 12);
diff --git a/drivers/iommu/amd/pasid.c b/drivers/iommu/amd/pasid.c
index 9101d07b11d3..77c8e9a91cbc 100644
--- a/drivers/iommu/amd/pasid.c
+++ b/drivers/iommu/amd/pasid.c
@@ -185,7 +185,7 @@ struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev,
struct protection_domain *pdom;
int ret;
- pdom = protection_domain_alloc(dev_to_node(dev));
+ pdom = protection_domain_alloc();
if (!pdom)
return ERR_PTR(-ENOMEM);
@@ -195,7 +195,7 @@ struct iommu_domain *amd_iommu_domain_alloc_sva(struct device *dev,
ret = mmu_notifier_register(&pdom->mn, mm);
if (ret) {
- protection_domain_free(pdom);
+ amd_iommu_domain_free(&pdom->domain);
return ERR_PTR(ret);
}
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index 95ba3caeb401..e13501541fdd 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -36,7 +36,7 @@
#define DART_MAX_STREAMS 256
#define DART_MAX_TTBR 4
-#define MAX_DARTS_PER_DEVICE 2
+#define MAX_DARTS_PER_DEVICE 3
/* Common registers */
@@ -277,6 +277,9 @@ struct apple_dart_domain {
* @streams: streams for this device
*/
struct apple_dart_master_cfg {
+ /* Intersection of DART capabilitles */
+ u32 supports_bypass : 1;
+
struct apple_dart_stream_map stream_maps[MAX_DARTS_PER_DEVICE];
};
@@ -684,7 +687,7 @@ static int apple_dart_attach_dev_identity(struct iommu_domain *domain,
struct apple_dart_stream_map *stream_map;
int i;
- if (!cfg->stream_maps[0].dart->supports_bypass)
+ if (!cfg->supports_bypass)
return -EINVAL;
for_each_stream_map(i, cfg, stream_map)
@@ -792,20 +795,23 @@ static int apple_dart_of_xlate(struct device *dev,
return -EINVAL;
sid = args->args[0];
- if (!cfg)
+ if (!cfg) {
cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
- if (!cfg)
- return -ENOMEM;
+ if (!cfg)
+ return -ENOMEM;
+ /* Will be ANDed with DART capabilities */
+ cfg->supports_bypass = true;
+ }
dev_iommu_priv_set(dev, cfg);
cfg_dart = cfg->stream_maps[0].dart;
if (cfg_dart) {
- if (cfg_dart->supports_bypass != dart->supports_bypass)
- return -EINVAL;
if (cfg_dart->pgsize != dart->pgsize)
return -EINVAL;
}
+ cfg->supports_bypass &= dart->supports_bypass;
+
for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) {
if (cfg->stream_maps[i].dart == dart) {
set_bit(sid, cfg->stream_maps[i].sidmap);
@@ -945,7 +951,7 @@ static int apple_dart_def_domain_type(struct device *dev)
if (cfg->stream_maps[0].dart->pgsize > PAGE_SIZE)
return IOMMU_DOMAIN_IDENTITY;
- if (!cfg->stream_maps[0].dart->supports_bypass)
+ if (!cfg->supports_bypass)
return IOMMU_DOMAIN_DMA;
return 0;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
index c7cc613050d9..e4fd8d522af8 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
@@ -43,6 +43,8 @@ static void arm_smmu_make_nested_cd_table_ste(
target->data[0] |= nested_domain->ste[0] &
~cpu_to_le64(STRTAB_STE_0_CFG);
target->data[1] |= nested_domain->ste[1];
+ /* Merge events for DoS mitigations on eventq */
+ target->data[1] |= cpu_to_le64(STRTAB_STE_1_MEV);
}
/*
@@ -85,6 +87,47 @@ static void arm_smmu_make_nested_domain_ste(
}
}
+int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
+ struct arm_smmu_nested_domain *nested_domain)
+{
+ struct arm_smmu_vmaster *vmaster;
+ unsigned long vsid;
+ int ret;
+
+ iommu_group_mutex_assert(state->master->dev);
+
+ ret = iommufd_viommu_get_vdev_id(&nested_domain->vsmmu->core,
+ state->master->dev, &vsid);
+ if (ret)
+ return ret;
+
+ vmaster = kzalloc(sizeof(*vmaster), GFP_KERNEL);
+ if (!vmaster)
+ return -ENOMEM;
+ vmaster->vsmmu = nested_domain->vsmmu;
+ vmaster->vsid = vsid;
+ state->vmaster = vmaster;
+
+ return 0;
+}
+
+void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state)
+{
+ struct arm_smmu_master *master = state->master;
+
+ mutex_lock(&master->smmu->streams_mutex);
+ kfree(master->vmaster);
+ master->vmaster = state->vmaster;
+ mutex_unlock(&master->smmu->streams_mutex);
+}
+
+void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master)
+{
+ struct arm_smmu_attach_state state = { .master = master };
+
+ arm_smmu_attach_commit_vmaster(&state);
+}
+
static int arm_smmu_attach_dev_nested(struct iommu_domain *domain,
struct device *dev)
{
@@ -178,18 +221,12 @@ arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
const struct iommu_user_data *user_data)
{
struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core);
- const u32 SUPPORTED_FLAGS = IOMMU_HWPT_FAULT_ID_VALID;
struct arm_smmu_nested_domain *nested_domain;
struct iommu_hwpt_arm_smmuv3 arg;
bool enable_ats = false;
int ret;
- /*
- * Faults delivered to the nested domain are faults that originated by
- * the S1 in the domain. The core code will match all PASIDs when
- * delivering the fault due to user_pasid_table
- */
- if (flags & ~SUPPORTED_FLAGS)
+ if (flags)
return ERR_PTR(-EOPNOTSUPP);
ret = iommu_copy_struct_from_user(&arg, user_data,
@@ -398,4 +435,21 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
return &vsmmu->core;
}
+int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt)
+{
+ struct iommu_vevent_arm_smmuv3 vevt;
+ int i;
+
+ lockdep_assert_held(&vmaster->vsmmu->smmu->streams_mutex);
+
+ vevt.evt[0] = cpu_to_le64((evt[0] & ~EVTQ_0_SID) |
+ FIELD_PREP(EVTQ_0_SID, vmaster->vsid));
+ for (i = 1; i < EVTQ_ENT_DWORDS; i++)
+ vevt.evt[i] = cpu_to_le64(evt[i]);
+
+ return iommufd_viommu_report_event(&vmaster->vsmmu->core,
+ IOMMU_VEVENTQ_TYPE_ARM_SMMUV3, &vevt,
+ sizeof(vevt));
+}
+
MODULE_IMPORT_NS("IOMMUFD");
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 1d3e71569775..980cc6b33c43 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -112,6 +112,15 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,
* from the current CPU register
*/
target->data[3] = cpu_to_le64(read_sysreg(mair_el1));
+
+ /*
+ * Note that we don't bother with S1PIE on the SMMU, we just rely on
+ * our default encoding scheme matching direct permissions anyway.
+ * SMMU has no notion of S1POE nor GCS, so make sure that is clear if
+ * either is enabled for CPUs, just in case anyone imagines otherwise.
+ */
+ if (system_supports_poe() || system_supports_gcs())
+ dev_warn_once(master->smmu->dev, "SVA devices ignore permission overlays and GCS\n");
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_sva_cd);
@@ -206,8 +215,12 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
unsigned long asid_bits;
u32 feat_mask = ARM_SMMU_FEAT_COHERENCY;
- if (vabits_actual == 52)
+ if (vabits_actual == 52) {
+ /* We don't support LPA2 */
+ if (PAGE_SIZE != SZ_64K)
+ return false;
feat_mask |= ARM_SMMU_FEAT_VAX;
+ }
if ((smmu->features & feat_mask) != feat_mask)
return false;
@@ -398,6 +411,12 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev,
return ERR_CAST(smmu_domain);
smmu_domain->domain.type = IOMMU_DOMAIN_SVA;
smmu_domain->domain.ops = &arm_smmu_sva_domain_ops;
+
+ /*
+ * Choose page_size as the leaf page size for invalidation when
+ * ARM_SMMU_FEAT_RANGE_INV is present
+ */
+ smmu_domain->domain.pgsize_bitmap = PAGE_SIZE;
smmu_domain->smmu = smmu;
ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 42a89d499cda..be8d0f7db617 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -26,6 +26,7 @@
#include <linux/pci.h>
#include <linux/pci-ats.h>
#include <linux/platform_device.h>
+#include <linux/string_choices.h>
#include <kunit/visibility.h>
#include <uapi/linux/iommufd.h>
@@ -83,8 +84,28 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
{ 0, NULL},
};
-static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
- struct arm_smmu_device *smmu, u32 flags);
+static const char * const event_str[] = {
+ [EVT_ID_BAD_STREAMID_CONFIG] = "C_BAD_STREAMID",
+ [EVT_ID_STE_FETCH_FAULT] = "F_STE_FETCH",
+ [EVT_ID_BAD_STE_CONFIG] = "C_BAD_STE",
+ [EVT_ID_STREAM_DISABLED_FAULT] = "F_STREAM_DISABLED",
+ [EVT_ID_BAD_SUBSTREAMID_CONFIG] = "C_BAD_SUBSTREAMID",
+ [EVT_ID_CD_FETCH_FAULT] = "F_CD_FETCH",
+ [EVT_ID_BAD_CD_CONFIG] = "C_BAD_CD",
+ [EVT_ID_TRANSLATION_FAULT] = "F_TRANSLATION",
+ [EVT_ID_ADDR_SIZE_FAULT] = "F_ADDR_SIZE",
+ [EVT_ID_ACCESS_FAULT] = "F_ACCESS",
+ [EVT_ID_PERMISSION_FAULT] = "F_PERMISSION",
+ [EVT_ID_VMS_FETCH_FAULT] = "F_VMS_FETCH",
+};
+
+static const char * const event_class_str[] = {
+ [0] = "CD fetch",
+ [1] = "Stage 1 translation table fetch",
+ [2] = "Input address caused fault",
+ [3] = "Reserved",
+};
+
static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
static void parse_driver_options(struct arm_smmu_device *smmu)
@@ -1031,7 +1052,7 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |
STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW |
- STRTAB_STE_1_EATS);
+ STRTAB_STE_1_EATS | STRTAB_STE_1_MEV);
used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
/*
@@ -1047,7 +1068,7 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
if (cfg & BIT(1)) {
used_bits[1] |=
cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS |
- STRTAB_STE_1_SHCFG);
+ STRTAB_STE_1_SHCFG | STRTAB_STE_1_MEV);
used_bits[2] |=
cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
@@ -1759,17 +1780,53 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
}
/* IRQ and event handlers */
-static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
+static void arm_smmu_decode_event(struct arm_smmu_device *smmu, u64 *raw,
+ struct arm_smmu_event *event)
+{
+ struct arm_smmu_master *master;
+
+ event->id = FIELD_GET(EVTQ_0_ID, raw[0]);
+ event->sid = FIELD_GET(EVTQ_0_SID, raw[0]);
+ event->ssv = FIELD_GET(EVTQ_0_SSV, raw[0]);
+ event->ssid = event->ssv ? FIELD_GET(EVTQ_0_SSID, raw[0]) : IOMMU_NO_PASID;
+ event->privileged = FIELD_GET(EVTQ_1_PnU, raw[1]);
+ event->instruction = FIELD_GET(EVTQ_1_InD, raw[1]);
+ event->s2 = FIELD_GET(EVTQ_1_S2, raw[1]);
+ event->read = FIELD_GET(EVTQ_1_RnW, raw[1]);
+ event->stag = FIELD_GET(EVTQ_1_STAG, raw[1]);
+ event->stall = FIELD_GET(EVTQ_1_STALL, raw[1]);
+ event->class = FIELD_GET(EVTQ_1_CLASS, raw[1]);
+ event->iova = FIELD_GET(EVTQ_2_ADDR, raw[2]);
+ event->ipa = raw[3] & EVTQ_3_IPA;
+ event->fetch_addr = raw[3] & EVTQ_3_FETCH_ADDR;
+ event->ttrnw = FIELD_GET(EVTQ_1_TT_READ, raw[1]);
+ event->class_tt = false;
+ event->dev = NULL;
+
+ if (event->id == EVT_ID_PERMISSION_FAULT)
+ event->class_tt = (event->class == EVTQ_1_CLASS_TT);
+
+ mutex_lock(&smmu->streams_mutex);
+ master = arm_smmu_find_master(smmu, event->sid);
+ if (master)
+ event->dev = get_device(master->dev);
+ mutex_unlock(&smmu->streams_mutex);
+}
+
+static int arm_smmu_handle_event(struct arm_smmu_device *smmu, u64 *evt,
+ struct arm_smmu_event *event)
{
int ret = 0;
u32 perm = 0;
struct arm_smmu_master *master;
- bool ssid_valid = evt[0] & EVTQ_0_SSV;
- u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
struct iopf_fault fault_evt = { };
struct iommu_fault *flt = &fault_evt.fault;
- switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
+ switch (event->id) {
+ case EVT_ID_BAD_STE_CONFIG:
+ case EVT_ID_STREAM_DISABLED_FAULT:
+ case EVT_ID_BAD_SUBSTREAMID_CONFIG:
+ case EVT_ID_BAD_CD_CONFIG:
case EVT_ID_TRANSLATION_FAULT:
case EVT_ID_ADDR_SIZE_FAULT:
case EVT_ID_ACCESS_FAULT:
@@ -1779,69 +1836,126 @@ static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
return -EOPNOTSUPP;
}
- if (!(evt[1] & EVTQ_1_STALL))
- return -EOPNOTSUPP;
-
- if (evt[1] & EVTQ_1_RnW)
- perm |= IOMMU_FAULT_PERM_READ;
- else
- perm |= IOMMU_FAULT_PERM_WRITE;
+ if (event->stall) {
+ if (event->read)
+ perm |= IOMMU_FAULT_PERM_READ;
+ else
+ perm |= IOMMU_FAULT_PERM_WRITE;
- if (evt[1] & EVTQ_1_InD)
- perm |= IOMMU_FAULT_PERM_EXEC;
+ if (event->instruction)
+ perm |= IOMMU_FAULT_PERM_EXEC;
- if (evt[1] & EVTQ_1_PnU)
- perm |= IOMMU_FAULT_PERM_PRIV;
+ if (event->privileged)
+ perm |= IOMMU_FAULT_PERM_PRIV;
- flt->type = IOMMU_FAULT_PAGE_REQ;
- flt->prm = (struct iommu_fault_page_request) {
- .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
- .grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
- .perm = perm,
- .addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
- };
+ flt->type = IOMMU_FAULT_PAGE_REQ;
+ flt->prm = (struct iommu_fault_page_request){
+ .flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
+ .grpid = event->stag,
+ .perm = perm,
+ .addr = event->iova,
+ };
- if (ssid_valid) {
- flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
- flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
+ if (event->ssv) {
+ flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
+ flt->prm.pasid = event->ssid;
+ }
}
mutex_lock(&smmu->streams_mutex);
- master = arm_smmu_find_master(smmu, sid);
+ master = arm_smmu_find_master(smmu, event->sid);
if (!master) {
ret = -EINVAL;
goto out_unlock;
}
- ret = iommu_report_device_fault(master->dev, &fault_evt);
+ if (event->stall)
+ ret = iommu_report_device_fault(master->dev, &fault_evt);
+ else if (master->vmaster && !event->s2)
+ ret = arm_vmaster_report_event(master->vmaster, evt);
+ else
+ ret = -EOPNOTSUPP; /* Unhandled events should be pinned */
out_unlock:
mutex_unlock(&smmu->streams_mutex);
return ret;
}
+static void arm_smmu_dump_raw_event(struct arm_smmu_device *smmu, u64 *raw,
+ struct arm_smmu_event *event)
+{
+ int i;
+
+ dev_err(smmu->dev, "event 0x%02x received:\n", event->id);
+
+ for (i = 0; i < EVTQ_ENT_DWORDS; ++i)
+ dev_err(smmu->dev, "\t0x%016llx\n", raw[i]);
+}
+
+#define ARM_SMMU_EVT_KNOWN(e) ((e)->id < ARRAY_SIZE(event_str) && event_str[(e)->id])
+#define ARM_SMMU_LOG_EVT_STR(e) ARM_SMMU_EVT_KNOWN(e) ? event_str[(e)->id] : "UNKNOWN"
+#define ARM_SMMU_LOG_CLIENT(e) (e)->dev ? dev_name((e)->dev) : "(unassigned sid)"
+
+static void arm_smmu_dump_event(struct arm_smmu_device *smmu, u64 *raw,
+ struct arm_smmu_event *evt,
+ struct ratelimit_state *rs)
+{
+ if (!__ratelimit(rs))
+ return;
+
+ arm_smmu_dump_raw_event(smmu, raw, evt);
+
+ switch (evt->id) {
+ case EVT_ID_TRANSLATION_FAULT:
+ case EVT_ID_ADDR_SIZE_FAULT:
+ case EVT_ID_ACCESS_FAULT:
+ case EVT_ID_PERMISSION_FAULT:
+ dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x iova: %#llx ipa: %#llx",
+ ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt),
+ evt->sid, evt->ssid, evt->iova, evt->ipa);
+
+ dev_err(smmu->dev, "%s %s %s %s \"%s\"%s%s stag: %#x",
+ evt->privileged ? "priv" : "unpriv",
+ evt->instruction ? "inst" : "data",
+ str_read_write(evt->read),
+ evt->s2 ? "s2" : "s1", event_class_str[evt->class],
+ evt->class_tt ? (evt->ttrnw ? " ttd_read" : " ttd_write") : "",
+ evt->stall ? " stall" : "", evt->stag);
+
+ break;
+
+ case EVT_ID_STE_FETCH_FAULT:
+ case EVT_ID_CD_FETCH_FAULT:
+ case EVT_ID_VMS_FETCH_FAULT:
+ dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x fetch_addr: %#llx",
+ ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt),
+ evt->sid, evt->ssid, evt->fetch_addr);
+
+ break;
+
+ default:
+ dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x",
+ ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt),
+ evt->sid, evt->ssid);
+ }
+}
+
static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
{
- int i, ret;
+ u64 evt[EVTQ_ENT_DWORDS];
+ struct arm_smmu_event event = {0};
struct arm_smmu_device *smmu = dev;
struct arm_smmu_queue *q = &smmu->evtq.q;
struct arm_smmu_ll_queue *llq = &q->llq;
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
- u64 evt[EVTQ_ENT_DWORDS];
do {
while (!queue_remove_raw(q, evt)) {
- u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
-
- ret = arm_smmu_handle_evt(smmu, evt);
- if (!ret || !__ratelimit(&rs))
- continue;
-
- dev_info(smmu->dev, "event 0x%02x received:\n", id);
- for (i = 0; i < ARRAY_SIZE(evt); ++i)
- dev_info(smmu->dev, "\t0x%016llx\n",
- (unsigned long long)evt[i]);
+ arm_smmu_decode_event(smmu, evt, &event);
+ if (arm_smmu_handle_event(smmu, evt, &event))
+ arm_smmu_dump_event(smmu, evt, &event, &rs);
+ put_device(event.dev);
cond_resched();
}
@@ -2353,39 +2467,12 @@ struct arm_smmu_domain *arm_smmu_domain_alloc(void)
if (!smmu_domain)
return ERR_PTR(-ENOMEM);
- mutex_init(&smmu_domain->init_mutex);
INIT_LIST_HEAD(&smmu_domain->devices);
spin_lock_init(&smmu_domain->devices_lock);
return smmu_domain;
}
-static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
-{
- struct arm_smmu_domain *smmu_domain;
-
- /*
- * Allocate the domain and initialise some of its data structures.
- * We can't really do anything meaningful until we've added a
- * master.
- */
- smmu_domain = arm_smmu_domain_alloc();
- if (IS_ERR(smmu_domain))
- return ERR_CAST(smmu_domain);
-
- if (dev) {
- struct arm_smmu_master *master = dev_iommu_priv_get(dev);
- int ret;
-
- ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, 0);
- if (ret) {
- kfree(smmu_domain);
- return ERR_PTR(ret);
- }
- }
- return &smmu_domain->domain;
-}
-
static void arm_smmu_domain_free_paging(struct iommu_domain *domain)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -2451,12 +2538,6 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
struct arm_smmu_domain *smmu_domain);
bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
- /* Restrict the stage to what we can actually support */
- if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
- smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
- if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
- smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
-
pgtbl_cfg = (struct io_pgtable_cfg) {
.pgsize_bitmap = smmu->pgsize_bitmap,
.coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
@@ -2730,6 +2811,7 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
struct arm_smmu_domain *smmu_domain =
to_smmu_domain_devices(new_domain);
unsigned long flags;
+ int ret;
/*
* arm_smmu_share_asid() must not see two domains pointing to the same
@@ -2759,9 +2841,18 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
}
if (smmu_domain) {
+ if (new_domain->type == IOMMU_DOMAIN_NESTED) {
+ ret = arm_smmu_attach_prepare_vmaster(
+ state, to_smmu_nested_domain(new_domain));
+ if (ret)
+ return ret;
+ }
+
master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL);
- if (!master_domain)
+ if (!master_domain) {
+ kfree(state->vmaster);
return -ENOMEM;
+ }
master_domain->master = master;
master_domain->ssid = state->ssid;
if (new_domain->type == IOMMU_DOMAIN_NESTED)
@@ -2788,6 +2879,7 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
spin_unlock_irqrestore(&smmu_domain->devices_lock,
flags);
kfree(master_domain);
+ kfree(state->vmaster);
return -EINVAL;
}
@@ -2820,6 +2912,8 @@ void arm_smmu_attach_commit(struct arm_smmu_attach_state *state)
lockdep_assert_held(&arm_smmu_asid_lock);
+ arm_smmu_attach_commit_vmaster(state);
+
if (state->ats_enabled && !master->ats_enabled) {
arm_smmu_enable_ats(master);
} else if (state->ats_enabled && master->ats_enabled) {
@@ -2858,16 +2952,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
state.master = master = dev_iommu_priv_get(dev);
smmu = master->smmu;
- mutex_lock(&smmu_domain->init_mutex);
-
- if (!smmu_domain->smmu) {
- ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0);
- } else if (smmu_domain->smmu != smmu)
- ret = -EINVAL;
-
- mutex_unlock(&smmu_domain->init_mutex);
- if (ret)
- return ret;
+ if (smmu_domain->smmu != smmu)
+ return -EINVAL;
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID);
@@ -2923,16 +3009,9 @@ static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_cd target_cd;
- int ret = 0;
- mutex_lock(&smmu_domain->init_mutex);
- if (!smmu_domain->smmu)
- ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0);
- else if (smmu_domain->smmu != smmu)
- ret = -EINVAL;
- mutex_unlock(&smmu_domain->init_mutex);
- if (ret)
- return ret;
+ if (smmu_domain->smmu != smmu)
+ return -EINVAL;
if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
return -EINVAL;
@@ -3021,13 +3100,12 @@ out_unlock:
return ret;
}
-static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
- struct iommu_domain *domain)
+static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old_domain)
{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(old_domain);
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
- struct arm_smmu_domain *smmu_domain;
-
- smmu_domain = to_smmu_domain(domain);
mutex_lock(&arm_smmu_asid_lock);
arm_smmu_clear_cd(master, pasid);
@@ -3048,6 +3126,7 @@ static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
sid_domain->type == IOMMU_DOMAIN_BLOCKED)
sid_domain->ops->attach_dev(sid_domain, dev);
}
+ return 0;
}
static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
@@ -3104,6 +3183,7 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
struct arm_smmu_ste ste;
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ arm_smmu_master_clear_vmaster(master);
arm_smmu_make_bypass_ste(master->smmu, &ste);
arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS);
return 0;
@@ -3122,7 +3202,9 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
struct device *dev)
{
struct arm_smmu_ste ste;
+ struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ arm_smmu_master_clear_vmaster(master);
arm_smmu_make_abort_ste(&ste);
arm_smmu_attach_dev_ste(domain, dev, &ste,
STRTAB_STE_1_S1DSS_TERMINATE);
@@ -3131,6 +3213,7 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
static const struct iommu_domain_ops arm_smmu_blocked_ops = {
.attach_dev = arm_smmu_attach_dev_blocked,
+ .set_dev_pasid = arm_smmu_blocking_set_dev_pasid,
};
static struct iommu_domain arm_smmu_blocked_domain = {
@@ -3143,6 +3226,7 @@ arm_smmu_domain_alloc_paging_flags(struct device *dev, u32 flags,
const struct iommu_user_data *user_data)
{
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+ struct arm_smmu_device *smmu = master->smmu;
const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
IOMMU_HWPT_ALLOC_PASID |
IOMMU_HWPT_ALLOC_NEST_PARENT;
@@ -3154,25 +3238,43 @@ arm_smmu_domain_alloc_paging_flags(struct device *dev, u32 flags,
if (user_data)
return ERR_PTR(-EOPNOTSUPP);
- if (flags & IOMMU_HWPT_ALLOC_PASID)
- return arm_smmu_domain_alloc_paging(dev);
-
smmu_domain = arm_smmu_domain_alloc();
if (IS_ERR(smmu_domain))
return ERR_CAST(smmu_domain);
- if (flags & IOMMU_HWPT_ALLOC_NEST_PARENT) {
- if (!(master->smmu->features & ARM_SMMU_FEAT_NESTING)) {
+ switch (flags) {
+ case 0:
+ /* Prefer S1 if available */
+ if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ else
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
+ break;
+ case IOMMU_HWPT_ALLOC_NEST_PARENT:
+ if (!(smmu->features & ARM_SMMU_FEAT_NESTING)) {
ret = -EOPNOTSUPP;
goto err_free;
}
smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
smmu_domain->nest_parent = true;
+ break;
+ case IOMMU_HWPT_ALLOC_DIRTY_TRACKING:
+ case IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_PASID:
+ case IOMMU_HWPT_ALLOC_PASID:
+ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) {
+ ret = -EOPNOTSUPP;
+ goto err_free;
+ }
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ goto err_free;
}
smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
- ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, flags);
+ ret = arm_smmu_domain_finalise(smmu_domain, smmu, flags);
if (ret)
goto err_free;
return &smmu_domain->domain;
@@ -3244,8 +3346,8 @@ static struct platform_driver arm_smmu_driver;
static
struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
{
- struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
- fwnode);
+ struct device *dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode);
+
put_device(dev);
return dev ? dev_get_drvdata(dev) : NULL;
}
@@ -3286,6 +3388,7 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
mutex_lock(&smmu->streams_mutex);
for (i = 0; i < fwspec->num_ids; i++) {
struct arm_smmu_stream *new_stream = &master->streams[i];
+ struct rb_node *existing;
u32 sid = fwspec->ids[i];
new_stream->id = sid;
@@ -3296,11 +3399,21 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
break;
/* Insert into SID tree */
- if (rb_find_add(&new_stream->node, &smmu->streams,
- arm_smmu_streams_cmp_node)) {
- dev_warn(master->dev, "stream %u already in tree\n",
- sid);
- ret = -EINVAL;
+ existing = rb_find_add(&new_stream->node, &smmu->streams,
+ arm_smmu_streams_cmp_node);
+ if (existing) {
+ struct arm_smmu_master *existing_master =
+ rb_entry(existing, struct arm_smmu_stream, node)
+ ->master;
+
+ /* Bridged PCI devices may end up with duplicated IDs */
+ if (existing_master == master)
+ continue;
+
+ dev_warn(master->dev,
+ "Aliasing StreamID 0x%x (from %s) unsupported, expect DMA to be broken\n",
+ sid, dev_name(existing_master->dev));
+ ret = -ENODEV;
break;
}
}
@@ -3550,7 +3663,6 @@ static struct iommu_ops arm_smmu_ops = {
.blocked_domain = &arm_smmu_blocked_domain,
.capable = arm_smmu_capable,
.hw_info = arm_smmu_hw_info,
- .domain_alloc_paging = arm_smmu_domain_alloc_paging,
.domain_alloc_sva = arm_smmu_sva_domain_alloc,
.domain_alloc_paging_flags = arm_smmu_domain_alloc_paging_flags,
.probe_device = arm_smmu_probe_device,
@@ -3558,7 +3670,6 @@ static struct iommu_ops arm_smmu_ops = {
.device_group = arm_smmu_device_group,
.of_xlate = arm_smmu_of_xlate,
.get_resv_regions = arm_smmu_get_resv_regions,
- .remove_dev_pasid = arm_smmu_remove_dev_pasid,
.dev_enable_feat = arm_smmu_dev_enable_feature,
.dev_disable_feat = arm_smmu_dev_disable_feature,
.page_response = arm_smmu_page_response,
@@ -4246,7 +4357,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
*/
if (!!(reg & IDR0_COHACC) != coherent)
dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
- coherent ? "true" : "false");
+ str_true_false(coherent));
switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
case IDR0_STALL_MODEL_FORCE:
@@ -4329,6 +4440,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
if (FIELD_GET(IDR3_RIL, reg))
smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
+ if (FIELD_GET(IDR3_FWB, reg))
+ smmu->features |= ARM_SMMU_FEAT_S2FWB;
/* IDR5 */
reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 0107d3f333a1..dd1ad56ce863 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -266,6 +266,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid)
#define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4)
#define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6)
+#define STRTAB_STE_1_MEV (1UL << 19)
#define STRTAB_STE_1_S2FWB (1UL << 25)
#define STRTAB_STE_1_S1STALLD (1UL << 27)
@@ -452,10 +453,18 @@ static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid)
#define EVTQ_0_ID GENMASK_ULL(7, 0)
+#define EVT_ID_BAD_STREAMID_CONFIG 0x02
+#define EVT_ID_STE_FETCH_FAULT 0x03
+#define EVT_ID_BAD_STE_CONFIG 0x04
+#define EVT_ID_STREAM_DISABLED_FAULT 0x06
+#define EVT_ID_BAD_SUBSTREAMID_CONFIG 0x08
+#define EVT_ID_CD_FETCH_FAULT 0x09
+#define EVT_ID_BAD_CD_CONFIG 0x0a
#define EVT_ID_TRANSLATION_FAULT 0x10
#define EVT_ID_ADDR_SIZE_FAULT 0x11
#define EVT_ID_ACCESS_FAULT 0x12
#define EVT_ID_PERMISSION_FAULT 0x13
+#define EVT_ID_VMS_FETCH_FAULT 0x25
#define EVTQ_0_SSV (1UL << 11)
#define EVTQ_0_SSID GENMASK_ULL(31, 12)
@@ -467,9 +476,11 @@ static inline unsigned int arm_smmu_cdtab_l2_idx(unsigned int ssid)
#define EVTQ_1_RnW (1UL << 35)
#define EVTQ_1_S2 (1UL << 39)
#define EVTQ_1_CLASS GENMASK_ULL(41, 40)
+#define EVTQ_1_CLASS_TT 0x01
#define EVTQ_1_TT_READ (1UL << 44)
#define EVTQ_2_ADDR GENMASK_ULL(63, 0)
#define EVTQ_3_IPA GENMASK_ULL(51, 12)
+#define EVTQ_3_FETCH_ADDR GENMASK_ULL(51, 3)
/* PRI queue */
#define PRIQ_ENT_SZ_SHIFT 4
@@ -789,11 +800,37 @@ struct arm_smmu_stream {
struct rb_node node;
};
+struct arm_smmu_vmaster {
+ struct arm_vsmmu *vsmmu;
+ unsigned long vsid;
+};
+
+struct arm_smmu_event {
+ u8 stall : 1,
+ ssv : 1,
+ privileged : 1,
+ instruction : 1,
+ s2 : 1,
+ read : 1,
+ ttrnw : 1,
+ class_tt : 1;
+ u8 id;
+ u8 class;
+ u16 stag;
+ u32 sid;
+ u32 ssid;
+ u64 iova;
+ u64 ipa;
+ u64 fetch_addr;
+ struct device *dev;
+};
+
/* SMMU private data for each master */
struct arm_smmu_master {
struct arm_smmu_device *smmu;
struct device *dev;
struct arm_smmu_stream *streams;
+ struct arm_smmu_vmaster *vmaster; /* use smmu->streams_mutex */
/* Locked by the iommu core using the group mutex */
struct arm_smmu_ctx_desc_cfg cd_table;
unsigned int num_streams;
@@ -813,7 +850,6 @@ enum arm_smmu_domain_stage {
struct arm_smmu_domain {
struct arm_smmu_device *smmu;
- struct mutex init_mutex; /* Protects smmu pointer */
struct io_pgtable_ops *pgtbl_ops;
atomic_t nr_ats_masters;
@@ -943,6 +979,7 @@ struct arm_smmu_attach_state {
bool disable_ats;
ioasid_t ssid;
/* Resulting state */
+ struct arm_smmu_vmaster *vmaster;
bool ats_enabled;
};
@@ -1026,9 +1063,37 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
struct iommu_domain *parent,
struct iommufd_ctx *ictx,
unsigned int viommu_type);
+int arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
+ struct arm_smmu_nested_domain *nested_domain);
+void arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state);
+void arm_smmu_master_clear_vmaster(struct arm_smmu_master *master);
+int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster, u64 *evt);
#else
#define arm_smmu_hw_info NULL
#define arm_vsmmu_alloc NULL
+
+static inline int
+arm_smmu_attach_prepare_vmaster(struct arm_smmu_attach_state *state,
+ struct arm_smmu_nested_domain *nested_domain)
+{
+ return 0;
+}
+
+static inline void
+arm_smmu_attach_commit_vmaster(struct arm_smmu_attach_state *state)
+{
+}
+
+static inline void
+arm_smmu_master_clear_vmaster(struct arm_smmu_master *master)
+{
+}
+
+static inline int arm_vmaster_report_event(struct arm_smmu_vmaster *vmaster,
+ u64 *evt)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_ARM_SMMU_V3_IOMMUFD */
#endif /* _ARM_SMMU_V3_H */
diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
index d525ab43a4ae..dd7d030d2e89 100644
--- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
+++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
@@ -487,17 +487,6 @@ static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu)
/* VCMDQ Resource Helpers */
-static void tegra241_vcmdq_free_smmu_cmdq(struct tegra241_vcmdq *vcmdq)
-{
- struct arm_smmu_queue *q = &vcmdq->cmdq.q;
- size_t nents = 1 << q->llq.max_n_shift;
- size_t qsz = nents << CMDQ_ENT_SZ_SHIFT;
-
- if (!q->base)
- return;
- dmam_free_coherent(vcmdq->cmdqv->smmu.dev, qsz, q->base, q->base_dma);
-}
-
static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq)
{
struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu;
@@ -560,7 +549,8 @@ static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx];
char header[64];
- tegra241_vcmdq_free_smmu_cmdq(vcmdq);
+ /* Note that the lvcmdq queue memory space is managed by devres */
+
tegra241_vintf_deinit_lvcmdq(vintf, lidx);
dev_dbg(vintf->cmdqv->dev,
@@ -768,13 +758,13 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu)
vintf = kzalloc(sizeof(*vintf), GFP_KERNEL);
if (!vintf)
- goto out_fallback;
+ return -ENOMEM;
/* Init VINTF0 for in-kernel use */
ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf);
if (ret) {
dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret);
- goto free_vintf;
+ return ret;
}
/* Preallocate logical VCMDQs to VINTF0 */
@@ -783,24 +773,12 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu)
vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx);
if (IS_ERR(vcmdq))
- goto free_lvcmdq;
+ return PTR_ERR(vcmdq);
}
/* Now, we are ready to run all the impl ops */
smmu->impl_ops = &tegra241_cmdqv_impl_ops;
return 0;
-
-free_lvcmdq:
- for (lidx--; lidx >= 0; lidx--)
- tegra241_vintf_free_lvcmdq(vintf, lidx);
- tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx);
-free_vintf:
- kfree(vintf);
-out_fallback:
- dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n");
- smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV;
- tegra241_cmdqv_remove(smmu);
- return 0;
}
#ifdef CONFIG_IOMMU_DEBUGFS
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
index 99030e6b16e7..db9b9a8e139c 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
@@ -110,7 +110,6 @@ static struct arm_smmu_device *cavium_smmu_impl_init(struct arm_smmu_device *smm
int arm_mmu500_reset(struct arm_smmu_device *smmu)
{
u32 reg, major;
- int i;
/*
* On MMU-500 r2p0 onwards we need to clear ACR.CACHE_LOCK before
* writes to the context bank ACTLRs will stick. And we just hope that
@@ -128,11 +127,12 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu)
reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sACR, reg);
+#ifdef CONFIG_ARM_SMMU_MMU_500_CPRE_ERRATA
/*
* Disable MMU-500's not-particularly-beneficial next-page
* prefetcher for the sake of at least 5 known errata.
*/
- for (i = 0; i < smmu->num_context_banks; ++i) {
+ for (int i = 0; i < smmu->num_context_banks; ++i) {
reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR);
reg &= ~ARM_MMU500_ACTLR_CPRE;
arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, reg);
@@ -140,6 +140,7 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu)
if (reg & ARM_MMU500_ACTLR_CPRE)
dev_warn_once(smmu->dev, "Failed to disable prefetcher for errata workarounds, check SACR.CACHE_LOCK\n");
}
+#endif
return 0;
}
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
index 548783f3f8e8..d03b2239baad 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
@@ -73,7 +73,7 @@ void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu)
if (__ratelimit(&rs)) {
dev_err(smmu->dev, "TLB sync timed out -- SMMU may be deadlocked\n");
- cfg = qsmmu->cfg;
+ cfg = qsmmu->data->cfg;
if (!cfg)
return;
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 601fb878d0ef..59d02687280e 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -16,6 +16,40 @@
#define QCOM_DUMMY_VAL -1
+/*
+ * SMMU-500 TRM defines BIT(0) as CMTLB (Enable context caching in the
+ * macro TLB) and BIT(1) as CPRE (Enable context caching in the prefetch
+ * buffer). The remaining bits are implementation defined and vary across
+ * SoCs.
+ */
+
+#define CPRE (1 << 1)
+#define CMTLB (1 << 0)
+#define PREFETCH_SHIFT 8
+#define PREFETCH_DEFAULT 0
+#define PREFETCH_SHALLOW (1 << PREFETCH_SHIFT)
+#define PREFETCH_MODERATE (2 << PREFETCH_SHIFT)
+#define PREFETCH_DEEP (3 << PREFETCH_SHIFT)
+#define GFX_ACTLR_PRR (1 << 5)
+
+static const struct of_device_id qcom_smmu_actlr_client_of_match[] = {
+ { .compatible = "qcom,adreno",
+ .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
+ { .compatible = "qcom,adreno-gmu",
+ .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
+ { .compatible = "qcom,adreno-smmu",
+ .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
+ { .compatible = "qcom,fastrpc",
+ .data = (const void *) (PREFETCH_DEEP | CPRE | CMTLB) },
+ { .compatible = "qcom,sc7280-mdss",
+ .data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
+ { .compatible = "qcom,sc7280-venus",
+ .data = (const void *) (PREFETCH_SHALLOW | CPRE | CMTLB) },
+ { .compatible = "qcom,sm8550-mdss",
+ .data = (const void *) (PREFETCH_DEFAULT | CMTLB) },
+ { }
+};
+
static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
{
return container_of(smmu, struct qcom_smmu, smmu);
@@ -99,6 +133,47 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
}
+static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
+{
+ struct arm_smmu_domain *smmu_domain = (void *)cookie;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+ u32 reg = 0;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(smmu->dev);
+ if (ret < 0) {
+ dev_err(smmu->dev, "failed to get runtime PM: %d\n", ret);
+ return;
+ }
+
+ reg = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
+ reg &= ~GFX_ACTLR_PRR;
+ if (set)
+ reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
+ arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
+ pm_runtime_put_autosuspend(smmu->dev);
+}
+
+static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
+{
+ struct arm_smmu_domain *smmu_domain = (void *)cookie;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ int ret;
+
+ ret = pm_runtime_resume_and_get(smmu->dev);
+ if (ret < 0) {
+ dev_err(smmu->dev, "failed to get runtime PM: %d\n", ret);
+ return;
+ }
+
+ writel_relaxed(lower_32_bits(page_addr),
+ smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
+ writel_relaxed(upper_32_bits(page_addr),
+ smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
+ pm_runtime_put_autosuspend(smmu->dev);
+}
+
#define QCOM_ADRENO_SMMU_GPU_SID 0
static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
@@ -207,13 +282,37 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
return true;
}
+static void qcom_smmu_set_actlr_dev(struct device *dev, struct arm_smmu_device *smmu, int cbndx,
+ const struct of_device_id *client_match)
+{
+ const struct of_device_id *match =
+ of_match_device(client_match, dev);
+
+ if (!match) {
+ dev_dbg(dev, "no ACTLR settings present\n");
+ return;
+ }
+
+ arm_smmu_cb_write(smmu, cbndx, ARM_SMMU_CB_ACTLR, (unsigned long)match->data);
+}
+
static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
{
+ const struct device_node *np = smmu_domain->smmu->dev->of_node;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
+ const struct of_device_id *client_match;
+ int cbndx = smmu_domain->cfg.cbndx;
struct adreno_smmu_priv *priv;
smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
+ client_match = qsmmu->data->client_match;
+
+ if (client_match)
+ qcom_smmu_set_actlr_dev(dev, smmu, cbndx, client_match);
+
/* Only enable split pagetables for the GPU device (SID 0) */
if (!qcom_adreno_smmu_is_gpu_device(dev))
return 0;
@@ -239,6 +338,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
priv->set_stall = qcom_adreno_smmu_set_stall;
priv->resume_translation = qcom_adreno_smmu_resume_translation;
+ priv->set_prr_bit = NULL;
+ priv->set_prr_addr = NULL;
+
+ if (of_device_is_compatible(np, "qcom,smmu-500") &&
+ of_device_is_compatible(np, "qcom,adreno-smmu")) {
+ priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
+ priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
+ }
return 0;
}
@@ -269,8 +376,18 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
static int qcom_smmu_init_context(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
{
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct qcom_smmu *qsmmu = to_qcom_smmu(smmu);
+ const struct of_device_id *client_match;
+ int cbndx = smmu_domain->cfg.cbndx;
+
smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
+ client_match = qsmmu->data->client_match;
+
+ if (client_match)
+ qcom_smmu_set_actlr_dev(dev, smmu, cbndx, client_match);
+
return 0;
}
@@ -507,7 +624,7 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
return ERR_PTR(-ENOMEM);
qsmmu->smmu.impl = impl;
- qsmmu->cfg = data->cfg;
+ qsmmu->data = data;
return &qsmmu->smmu;
}
@@ -550,6 +667,7 @@ static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = {
.impl = &qcom_smmu_500_impl,
.adreno_impl = &qcom_adreno_smmu_500_impl,
.cfg = &qcom_smmu_impl0_cfg,
+ .client_match = qcom_smmu_actlr_client_of_match,
};
/*
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
index 3c134d1a6277..8addd453f5f1 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
@@ -8,7 +8,7 @@
struct qcom_smmu {
struct arm_smmu_device smmu;
- const struct qcom_smmu_config *cfg;
+ const struct qcom_smmu_match_data *data;
bool bypass_quirk;
u8 bypass_cbndx;
u32 stall_enabled;
@@ -28,6 +28,7 @@ struct qcom_smmu_match_data {
const struct qcom_smmu_config *cfg;
const struct arm_smmu_impl *impl;
const struct arm_smmu_impl *adreno_impl;
+ const struct of_device_id * const client_match;
};
irqreturn_t qcom_smmu_context_fault(int irq, void *dev);
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 650664e0f6e3..8f439c265a23 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -34,6 +34,7 @@
#include <linux/pm_runtime.h>
#include <linux/ratelimit.h>
#include <linux/slab.h>
+#include <linux/string_choices.h>
#include <linux/fsl/mc.h>
@@ -78,8 +79,11 @@ static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
{
- if (pm_runtime_enabled(smmu->dev))
- pm_runtime_put_autosuspend(smmu->dev);
+ if (pm_runtime_enabled(smmu->dev)) {
+ pm_runtime_mark_last_busy(smmu->dev);
+ __pm_runtime_put_autosuspend(smmu->dev);
+
+ }
}
static void arm_smmu_rpm_use_autosuspend(struct arm_smmu_device *smmu)
@@ -1194,7 +1198,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
/* Looks ok, so add the device to the domain */
arm_smmu_master_install_s2crs(cfg, S2CR_TYPE_TRANS,
smmu_domain->cfg.cbndx, fwspec);
- arm_smmu_rpm_use_autosuspend(smmu);
rpm_put:
arm_smmu_rpm_put(smmu);
return ret;
@@ -1217,7 +1220,6 @@ static int arm_smmu_attach_dev_type(struct device *dev,
return ret;
arm_smmu_master_install_s2crs(cfg, type, 0, fwspec);
- arm_smmu_rpm_use_autosuspend(smmu);
arm_smmu_rpm_put(smmu);
return 0;
}
@@ -1411,8 +1413,8 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
static
struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
{
- struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
- fwnode);
+ struct device *dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode);
+
put_device(dev);
return dev ? dev_get_drvdata(dev) : NULL;
}
@@ -1437,17 +1439,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
goto out_free;
} else {
smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
-
- /*
- * Defer probe if the relevant SMMU instance hasn't finished
- * probing yet. This is a fragile hack and we'd ideally
- * avoid this race in the core code. Until that's ironed
- * out, however, this is the most pragmatic option on the
- * table.
- */
- if (!smmu)
- return ERR_PTR(dev_err_probe(dev, -EPROBE_DEFER,
- "smmu dev has not bound yet\n"));
}
ret = -EINVAL;
@@ -1496,7 +1487,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
out_cfg_free:
kfree(cfg);
out_free:
- iommu_fwspec_free(dev);
return ERR_PTR(ret);
}
@@ -2117,7 +2107,7 @@ static void arm_smmu_rmr_install_bypass_smr(struct arm_smmu_device *smmu)
}
dev_notice(smmu->dev, "\tpreserved %d boot mapping%s\n", cnt,
- cnt == 1 ? "" : "s");
+ str_plural(cnt));
iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
}
@@ -2227,29 +2217,26 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
i, irq);
}
+ platform_set_drvdata(pdev, smmu);
+
+ /* Check for RMRs and install bypass SMRs if any */
+ arm_smmu_rmr_install_bypass_smr(smmu);
+
+ arm_smmu_device_reset(smmu);
+ arm_smmu_test_smr_masks(smmu);
+
err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
"smmu.%pa", &smmu->ioaddr);
- if (err) {
- dev_err(dev, "Failed to register iommu in sysfs\n");
- return err;
- }
+ if (err)
+ return dev_err_probe(dev, err, "Failed to register iommu in sysfs\n");
err = iommu_device_register(&smmu->iommu, &arm_smmu_ops,
using_legacy_binding ? NULL : dev);
if (err) {
- dev_err(dev, "Failed to register iommu\n");
iommu_device_sysfs_remove(&smmu->iommu);
- return err;
+ return dev_err_probe(dev, err, "Failed to register iommu\n");
}
- platform_set_drvdata(pdev, smmu);
-
- /* Check for RMRs and install bypass SMRs if any */
- arm_smmu_rmr_install_bypass_smr(smmu);
-
- arm_smmu_device_reset(smmu);
- arm_smmu_test_smr_masks(smmu);
-
/*
* We want to avoid touching dev->power.lock in fastpaths unless
* it's really going to do something useful - pm_runtime_enabled()
@@ -2259,6 +2246,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
if (dev->pm_domain) {
pm_runtime_set_active(dev);
pm_runtime_enable(dev);
+ arm_smmu_rpm_use_autosuspend(smmu);
}
return 0;
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index e2aeb511ae90..2dbf3243b5ad 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
#define ARM_SMMU_SCTLR_M BIT(0)
#define ARM_SMMU_CB_ACTLR 0x4
+#define ARM_SMMU_GFX_PRR_CFG_LADDR 0x6008
+#define ARM_SMMU_GFX_PRR_CFG_UADDR 0x600C
#define ARM_SMMU_CB_RESUME 0x8
#define ARM_SMMU_RESUME_TERMINATE BIT(0)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 2a9fa0c8cc00..a775e4dbe06f 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -24,6 +24,7 @@
#include <linux/memremap.h>
#include <linux/mm.h>
#include <linux/mutex.h>
+#include <linux/msi.h>
#include <linux/of_iommu.h>
#include <linux/pci.h>
#include <linux/scatterlist.h>
@@ -41,11 +42,6 @@ struct iommu_dma_msi_page {
phys_addr_t phys;
};
-enum iommu_dma_cookie_type {
- IOMMU_DMA_IOVA_COOKIE,
- IOMMU_DMA_MSI_COOKIE,
-};
-
enum iommu_dma_queue_type {
IOMMU_DMA_OPTS_PER_CPU_QUEUE,
IOMMU_DMA_OPTS_SINGLE_QUEUE,
@@ -58,35 +54,30 @@ struct iommu_dma_options {
};
struct iommu_dma_cookie {
- enum iommu_dma_cookie_type type;
+ struct iova_domain iovad;
+ struct list_head msi_page_list;
+ /* Flush queue */
union {
- /* Full allocator for IOMMU_DMA_IOVA_COOKIE */
- struct {
- struct iova_domain iovad;
- /* Flush queue */
- union {
- struct iova_fq *single_fq;
- struct iova_fq __percpu *percpu_fq;
- };
- /* Number of TLB flushes that have been started */
- atomic64_t fq_flush_start_cnt;
- /* Number of TLB flushes that have been finished */
- atomic64_t fq_flush_finish_cnt;
- /* Timer to regularily empty the flush queues */
- struct timer_list fq_timer;
- /* 1 when timer is active, 0 when not */
- atomic_t fq_timer_on;
- };
- /* Trivial linear page allocator for IOMMU_DMA_MSI_COOKIE */
- dma_addr_t msi_iova;
+ struct iova_fq *single_fq;
+ struct iova_fq __percpu *percpu_fq;
};
- struct list_head msi_page_list;
-
+ /* Number of TLB flushes that have been started */
+ atomic64_t fq_flush_start_cnt;
+ /* Number of TLB flushes that have been finished */
+ atomic64_t fq_flush_finish_cnt;
+ /* Timer to regularily empty the flush queues */
+ struct timer_list fq_timer;
+ /* 1 when timer is active, 0 when not */
+ atomic_t fq_timer_on;
/* Domain for flush queue callback; NULL if flush queue not in use */
- struct iommu_domain *fq_domain;
+ struct iommu_domain *fq_domain;
/* Options for dma-iommu use */
- struct iommu_dma_options options;
- struct mutex mutex;
+ struct iommu_dma_options options;
+};
+
+struct iommu_dma_msi_cookie {
+ dma_addr_t msi_iova;
+ struct list_head msi_page_list;
};
static DEFINE_STATIC_KEY_FALSE(iommu_deferred_attach_enabled);
@@ -280,7 +271,7 @@ static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
if (!cookie->fq_domain)
return;
- del_timer_sync(&cookie->fq_timer);
+ timer_delete_sync(&cookie->fq_timer);
if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE)
iommu_dma_free_fq_single(cookie->single_fq);
else
@@ -365,39 +356,24 @@ int iommu_dma_init_fq(struct iommu_domain *domain)
return 0;
}
-static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
-{
- if (cookie->type == IOMMU_DMA_IOVA_COOKIE)
- return cookie->iovad.granule;
- return PAGE_SIZE;
-}
-
-static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
-{
- struct iommu_dma_cookie *cookie;
-
- cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
- if (cookie) {
- INIT_LIST_HEAD(&cookie->msi_page_list);
- cookie->type = type;
- }
- return cookie;
-}
-
/**
* iommu_get_dma_cookie - Acquire DMA-API resources for a domain
* @domain: IOMMU domain to prepare for DMA-API usage
*/
int iommu_get_dma_cookie(struct iommu_domain *domain)
{
- if (domain->iova_cookie)
+ struct iommu_dma_cookie *cookie;
+
+ if (domain->cookie_type != IOMMU_COOKIE_NONE)
return -EEXIST;
- domain->iova_cookie = cookie_alloc(IOMMU_DMA_IOVA_COOKIE);
- if (!domain->iova_cookie)
+ cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
+ if (!cookie)
return -ENOMEM;
- mutex_init(&domain->iova_cookie->mutex);
+ INIT_LIST_HEAD(&cookie->msi_page_list);
+ domain->cookie_type = IOMMU_COOKIE_DMA_IOVA;
+ domain->iova_cookie = cookie;
return 0;
}
@@ -415,48 +391,56 @@ int iommu_get_dma_cookie(struct iommu_domain *domain)
*/
int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
{
- struct iommu_dma_cookie *cookie;
+ struct iommu_dma_msi_cookie *cookie;
if (domain->type != IOMMU_DOMAIN_UNMANAGED)
return -EINVAL;
- if (domain->iova_cookie)
+ if (domain->cookie_type != IOMMU_COOKIE_NONE)
return -EEXIST;
- cookie = cookie_alloc(IOMMU_DMA_MSI_COOKIE);
+ cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
if (!cookie)
return -ENOMEM;
cookie->msi_iova = base;
- domain->iova_cookie = cookie;
+ INIT_LIST_HEAD(&cookie->msi_page_list);
+ domain->cookie_type = IOMMU_COOKIE_DMA_MSI;
+ domain->msi_cookie = cookie;
return 0;
}
EXPORT_SYMBOL(iommu_get_msi_cookie);
/**
* iommu_put_dma_cookie - Release a domain's DMA mapping resources
- * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() or
- * iommu_get_msi_cookie()
+ * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
*/
void iommu_put_dma_cookie(struct iommu_domain *domain)
{
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iommu_dma_msi_page *msi, *tmp;
- if (!cookie)
- return;
-
- if (cookie->type == IOMMU_DMA_IOVA_COOKIE && cookie->iovad.granule) {
+ if (cookie->iovad.granule) {
iommu_dma_free_fq(cookie);
put_iova_domain(&cookie->iovad);
}
+ list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list)
+ kfree(msi);
+ kfree(cookie);
+}
- list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) {
- list_del(&msi->list);
+/**
+ * iommu_put_msi_cookie - Release a domain's MSI mapping resources
+ * @domain: IOMMU domain previously prepared by iommu_get_msi_cookie()
+ */
+void iommu_put_msi_cookie(struct iommu_domain *domain)
+{
+ struct iommu_dma_msi_cookie *cookie = domain->msi_cookie;
+ struct iommu_dma_msi_page *msi, *tmp;
+
+ list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list)
kfree(msi);
- }
kfree(cookie);
- domain->iova_cookie = NULL;
}
/**
@@ -676,7 +660,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev
struct iova_domain *iovad;
int ret;
- if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
+ if (!cookie || domain->cookie_type != IOMMU_COOKIE_DMA_IOVA)
return -EINVAL;
iovad = &cookie->iovad;
@@ -698,23 +682,20 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev
domain->geometry.aperture_start >> order);
/* start_pfn is always nonzero for an already-initialised domain */
- mutex_lock(&cookie->mutex);
if (iovad->start_pfn) {
if (1UL << order != iovad->granule ||
base_pfn != iovad->start_pfn) {
pr_warn("Incompatible range for DMA domain\n");
- ret = -EFAULT;
- goto done_unlock;
+ return -EFAULT;
}
- ret = 0;
- goto done_unlock;
+ return 0;
}
init_iova_domain(iovad, 1UL << order, base_pfn);
ret = iova_domain_init_rcaches(iovad);
if (ret)
- goto done_unlock;
+ return ret;
iommu_dma_init_options(&cookie->options, dev);
@@ -723,11 +704,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, struct device *dev
(!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain)))
domain->type = IOMMU_DOMAIN_DMA;
- ret = iova_reserve_iommu_regions(dev, domain);
-
-done_unlock:
- mutex_unlock(&cookie->mutex);
- return ret;
+ return iova_reserve_iommu_regions(dev, domain);
}
/**
@@ -766,9 +743,9 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
struct iova_domain *iovad = &cookie->iovad;
unsigned long shift, iova_len, iova;
- if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
- cookie->msi_iova += size;
- return cookie->msi_iova - size;
+ if (domain->cookie_type == IOMMU_COOKIE_DMA_MSI) {
+ domain->msi_cookie->msi_iova += size;
+ return domain->msi_cookie->msi_iova - size;
}
shift = iova_shift(iovad);
@@ -805,16 +782,16 @@ done:
return (dma_addr_t)iova << shift;
}
-static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
- dma_addr_t iova, size_t size, struct iommu_iotlb_gather *gather)
+static void iommu_dma_free_iova(struct iommu_domain *domain, dma_addr_t iova,
+ size_t size, struct iommu_iotlb_gather *gather)
{
- struct iova_domain *iovad = &cookie->iovad;
+ struct iova_domain *iovad = &domain->iova_cookie->iovad;
/* The MSI case is only ever cleaning up its most recent allocation */
- if (cookie->type == IOMMU_DMA_MSI_COOKIE)
- cookie->msi_iova -= size;
+ if (domain->cookie_type == IOMMU_COOKIE_DMA_MSI)
+ domain->msi_cookie->msi_iova -= size;
else if (gather && gather->queued)
- queue_iova(cookie, iova_pfn(iovad, iova),
+ queue_iova(domain->iova_cookie, iova_pfn(iovad, iova),
size >> iova_shift(iovad),
&gather->freelist);
else
@@ -842,7 +819,7 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
if (!iotlb_gather.queued)
iommu_iotlb_sync(domain, &iotlb_gather);
- iommu_dma_free_iova(cookie, dma_addr, size, &iotlb_gather);
+ iommu_dma_free_iova(domain, dma_addr, size, &iotlb_gather);
}
static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
@@ -870,7 +847,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
return DMA_MAPPING_ERROR;
if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) {
- iommu_dma_free_iova(cookie, iova, size, NULL);
+ iommu_dma_free_iova(domain, iova, size, NULL);
return DMA_MAPPING_ERROR;
}
return iova + iova_off;
@@ -1007,7 +984,7 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
out_free_sg:
sg_free_table(sgt);
out_free_iova:
- iommu_dma_free_iova(cookie, iova, size, NULL);
+ iommu_dma_free_iova(domain, iova, size, NULL);
out_free_pages:
__iommu_dma_free_pages(pages, count);
return NULL;
@@ -1484,7 +1461,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
return __finalise_sg(dev, sg, nents, iova);
out_free_iova:
- iommu_dma_free_iova(cookie, iova, iova_len, NULL);
+ iommu_dma_free_iova(domain, iova, iova_len, NULL);
out_restore_sg:
__invalidate_sg(sg, nents);
out:
@@ -1762,17 +1739,47 @@ out_err:
dev->dma_iommu = false;
}
+static bool has_msi_cookie(const struct iommu_domain *domain)
+{
+ return domain && (domain->cookie_type == IOMMU_COOKIE_DMA_IOVA ||
+ domain->cookie_type == IOMMU_COOKIE_DMA_MSI);
+}
+
+static size_t cookie_msi_granule(const struct iommu_domain *domain)
+{
+ switch (domain->cookie_type) {
+ case IOMMU_COOKIE_DMA_IOVA:
+ return domain->iova_cookie->iovad.granule;
+ case IOMMU_COOKIE_DMA_MSI:
+ return PAGE_SIZE;
+ default:
+ BUG();
+ }
+}
+
+static struct list_head *cookie_msi_pages(const struct iommu_domain *domain)
+{
+ switch (domain->cookie_type) {
+ case IOMMU_COOKIE_DMA_IOVA:
+ return &domain->iova_cookie->msi_page_list;
+ case IOMMU_COOKIE_DMA_MSI:
+ return &domain->msi_cookie->msi_page_list;
+ default:
+ BUG();
+ }
+}
+
static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
phys_addr_t msi_addr, struct iommu_domain *domain)
{
- struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct list_head *msi_page_list = cookie_msi_pages(domain);
struct iommu_dma_msi_page *msi_page;
dma_addr_t iova;
int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
- size_t size = cookie_msi_granule(cookie);
+ size_t size = cookie_msi_granule(domain);
msi_addr &= ~(phys_addr_t)(size - 1);
- list_for_each_entry(msi_page, &cookie->msi_page_list, list)
+ list_for_each_entry(msi_page, msi_page_list, list)
if (msi_page->phys == msi_addr)
return msi_page;
@@ -1790,70 +1797,35 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
INIT_LIST_HEAD(&msi_page->list);
msi_page->phys = msi_addr;
msi_page->iova = iova;
- list_add(&msi_page->list, &cookie->msi_page_list);
+ list_add(&msi_page->list, msi_page_list);
return msi_page;
out_free_iova:
- iommu_dma_free_iova(cookie, iova, size, NULL);
+ iommu_dma_free_iova(domain, iova, size, NULL);
out_free_page:
kfree(msi_page);
return NULL;
}
-/**
- * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain
- * @desc: MSI descriptor, will store the MSI page
- * @msi_addr: MSI target address to be mapped
- *
- * Return: 0 on success or negative error code if the mapping failed.
- */
-int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
+int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
+ phys_addr_t msi_addr)
{
struct device *dev = msi_desc_to_dev(desc);
- struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- struct iommu_dma_msi_page *msi_page;
- static DEFINE_MUTEX(msi_prepare_lock); /* see below */
+ const struct iommu_dma_msi_page *msi_page;
- if (!domain || !domain->iova_cookie) {
- desc->iommu_cookie = NULL;
+ if (!has_msi_cookie(domain)) {
+ msi_desc_set_iommu_msi_iova(desc, 0, 0);
return 0;
}
- /*
- * In fact the whole prepare operation should already be serialised by
- * irq_domain_mutex further up the callchain, but that's pretty subtle
- * on its own, so consider this locking as failsafe documentation...
- */
- mutex_lock(&msi_prepare_lock);
+ iommu_group_mutex_assert(dev);
msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
- mutex_unlock(&msi_prepare_lock);
-
- msi_desc_set_iommu_cookie(desc, msi_page);
-
if (!msi_page)
return -ENOMEM;
- return 0;
-}
-/**
- * iommu_dma_compose_msi_msg() - Apply translation to an MSI message
- * @desc: MSI descriptor prepared by iommu_dma_prepare_msi()
- * @msg: MSI message containing target physical address
- */
-void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
-{
- struct device *dev = msi_desc_to_dev(desc);
- const struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- const struct iommu_dma_msi_page *msi_page;
-
- msi_page = msi_desc_get_iommu_cookie(desc);
-
- if (!domain || !domain->iova_cookie || WARN_ON(!msi_page))
- return;
-
- msg->address_hi = upper_32_bits(msi_page->iova);
- msg->address_lo &= cookie_msi_granule(domain->iova_cookie) - 1;
- msg->address_lo += lower_32_bits(msi_page->iova);
+ msi_desc_set_iommu_msi_iova(desc, msi_page->iova,
+ ilog2(cookie_msi_granule(domain)));
+ return 0;
}
static int iommu_dma_init(void)
diff --git a/drivers/iommu/dma-iommu.h b/drivers/iommu/dma-iommu.h
index c12d63457c76..eca201c1f963 100644
--- a/drivers/iommu/dma-iommu.h
+++ b/drivers/iommu/dma-iommu.h
@@ -13,11 +13,15 @@ void iommu_setup_dma_ops(struct device *dev);
int iommu_get_dma_cookie(struct iommu_domain *domain);
void iommu_put_dma_cookie(struct iommu_domain *domain);
+void iommu_put_msi_cookie(struct iommu_domain *domain);
int iommu_dma_init_fq(struct iommu_domain *domain);
void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
+int iommu_dma_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
+ phys_addr_t msi_addr);
+
extern bool iommu_dma_forcedac;
#else /* CONFIG_IOMMU_DMA */
@@ -40,9 +44,19 @@ static inline void iommu_put_dma_cookie(struct iommu_domain *domain)
{
}
+static inline void iommu_put_msi_cookie(struct iommu_domain *domain)
+{
+}
+
static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list)
{
}
+static inline int iommu_dma_sw_msi(struct iommu_domain *domain,
+ struct msi_desc *desc, phys_addr_t msi_addr)
+{
+ return -ENODEV;
+}
+
#endif /* CONFIG_IOMMU_DMA */
#endif /* __DMA_IOMMU_H */
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index c666ecab955d..317266aca6e2 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -249,7 +249,7 @@ struct exynos_iommu_domain {
struct list_head clients; /* list of sysmmu_drvdata.domain_node */
sysmmu_pte_t *pgtable; /* lv1 page table, 16KB */
short *lv2entcnt; /* free lv2 entry counter for each section */
- spinlock_t lock; /* lock for modyfying list of clients */
+ spinlock_t lock; /* lock for modifying list of clients */
spinlock_t pgtablelock; /* lock for modifying page table @ pgtable */
struct iommu_domain domain; /* generic domain data structure */
};
@@ -292,7 +292,7 @@ struct sysmmu_drvdata {
struct clk *aclk; /* SYSMMU's aclk clock */
struct clk *pclk; /* SYSMMU's pclk clock */
struct clk *clk_master; /* master's device clock */
- spinlock_t lock; /* lock for modyfying state */
+ spinlock_t lock; /* lock for modifying state */
bool active; /* current status */
struct exynos_iommu_domain *domain; /* domain we belong to */
struct list_head domain_node; /* node for domain clients list */
@@ -746,7 +746,7 @@ static int exynos_sysmmu_probe(struct platform_device *pdev)
ret = devm_request_irq(dev, irq, exynos_sysmmu_irq, 0,
dev_name(dev), data);
if (ret) {
- dev_err(dev, "Unabled to register handler of irq %d\n", irq);
+ dev_err(dev, "Unable to register handler of irq %d\n", irq);
return ret;
}
@@ -832,7 +832,7 @@ static int __maybe_unused exynos_sysmmu_suspend(struct device *dev)
struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
mutex_lock(&owner->rpm_lock);
- if (&data->domain->domain != &exynos_identity_domain) {
+ if (data->domain) {
dev_dbg(data->sysmmu, "saving state\n");
__sysmmu_disable(data);
}
@@ -850,7 +850,7 @@ static int __maybe_unused exynos_sysmmu_resume(struct device *dev)
struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
mutex_lock(&owner->rpm_lock);
- if (&data->domain->domain != &exynos_identity_domain) {
+ if (data->domain) {
dev_dbg(data->sysmmu, "restoring state\n");
__sysmmu_enable(data);
}
diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
index 8a5c17b97310..761ab647f372 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -130,7 +130,7 @@ static int __init hyperv_prepare_irq_remapping(void)
x86_init.hyper.msi_ext_dest_id())
return -ENODEV;
- if (hv_root_partition) {
+ if (hv_root_partition()) {
name = "HYPERV-ROOT-IR";
ops = &hyperv_root_ir_domain_ops;
} else {
@@ -151,7 +151,7 @@ static int __init hyperv_prepare_irq_remapping(void)
return -ENOMEM;
}
- if (hv_root_partition)
+ if (hv_root_partition())
return 0; /* The rest is only relevant to guests */
/*
@@ -164,8 +164,8 @@ static int __init hyperv_prepare_irq_remapping(void)
* max cpu affinity for IOAPIC irqs. Scan cpu 0-255 and set cpu
* into ioapic_max_cpumask if its APIC ID is less than 256.
*/
- for (i = min_t(unsigned int, num_possible_cpus() - 1, 255); i >= 0; i--)
- if (cpu_physical_id(i) < 256)
+ for (i = min_t(unsigned int, nr_cpu_ids - 1, 255); i >= 0; i--)
+ if (cpu_possible(i) && cpu_physical_id(i) < 256)
cpumask_set_cpu(i, &ioapic_max_cpumask);
return 0;
@@ -217,7 +217,7 @@ hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg)
status = hv_unmap_ioapic_interrupt(ioapic_id, &entry);
if (status != HV_STATUS_SUCCESS)
- pr_debug("%s: unexpected unmap status %lld\n", __func__, status);
+ hv_status_debug(status, "failed to unmap\n");
data->entry.ioapic_rte.as_uint64 = 0;
data->entry.source = 0; /* Invalid source */
@@ -228,7 +228,7 @@ hyperv_root_ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg)
vector, &entry);
if (status != HV_STATUS_SUCCESS) {
- pr_err("%s: map hypercall failed, status %lld\n", __func__, status);
+ hv_status_err(status, "map failed\n");
return;
}
diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile
index d3bb0798092d..6c7528130cf9 100644
--- a/drivers/iommu/intel/Makefile
+++ b/drivers/iommu/intel/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o nested.o cache.o prq.o
-obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o
+obj-$(CONFIG_DMAR_TABLE) += trace.o
obj-$(CONFIG_DMAR_PERF) += perf.o
obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o
diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c
index 09694cca8752..47692cbfaabd 100644
--- a/drivers/iommu/intel/cache.c
+++ b/drivers/iommu/intel/cache.c
@@ -40,13 +40,13 @@ static bool cache_tage_match(struct cache_tag *tag, u16 domain_id,
}
/* Assign a cache tag with specified type to domain. */
-static int cache_tag_assign(struct dmar_domain *domain, u16 did,
- struct device *dev, ioasid_t pasid,
- enum cache_tag_type type)
+int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev,
+ ioasid_t pasid, enum cache_tag_type type)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
struct cache_tag *tag, *temp;
+ struct list_head *prev;
unsigned long flags;
tag = kzalloc(sizeof(*tag), GFP_KERNEL);
@@ -65,6 +65,7 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did,
tag->dev = iommu->iommu.dev;
spin_lock_irqsave(&domain->cache_lock, flags);
+ prev = &domain->cache_tags;
list_for_each_entry(temp, &domain->cache_tags, node) {
if (cache_tage_match(temp, did, iommu, dev, pasid, type)) {
temp->users++;
@@ -73,8 +74,15 @@ static int cache_tag_assign(struct dmar_domain *domain, u16 did,
trace_cache_tag_assign(temp);
return 0;
}
+ if (temp->iommu == iommu)
+ prev = &temp->node;
}
- list_add_tail(&tag->node, &domain->cache_tags);
+ /*
+ * Link cache tags of same iommu unit together, so corresponding
+ * flush ops can be batched for iommu unit.
+ */
+ list_add(&tag->node, prev);
+
spin_unlock_irqrestore(&domain->cache_lock, flags);
trace_cache_tag_assign(tag);
diff --git a/drivers/iommu/intel/cap_audit.c b/drivers/iommu/intel/cap_audit.c
deleted file mode 100644
index 9862dc20b35e..000000000000
--- a/drivers/iommu/intel/cap_audit.c
+++ /dev/null
@@ -1,217 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * cap_audit.c - audit iommu capabilities for boot time and hot plug
- *
- * Copyright (C) 2021 Intel Corporation
- *
- * Author: Kyung Min Park <kyung.min.park@intel.com>
- * Lu Baolu <baolu.lu@linux.intel.com>
- */
-
-#define pr_fmt(fmt) "DMAR: " fmt
-
-#include "iommu.h"
-#include "cap_audit.h"
-
-static u64 intel_iommu_cap_sanity;
-static u64 intel_iommu_ecap_sanity;
-
-static inline void check_irq_capabilities(struct intel_iommu *a,
- struct intel_iommu *b)
-{
- CHECK_FEATURE_MISMATCH(a, b, cap, pi_support, CAP_PI_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, eim_support, ECAP_EIM_MASK);
-}
-
-static inline void check_dmar_capabilities(struct intel_iommu *a,
- struct intel_iommu *b)
-{
- MINIMAL_FEATURE_IOMMU(b, cap, CAP_MAMV_MASK);
- MINIMAL_FEATURE_IOMMU(b, cap, CAP_NFR_MASK);
- MINIMAL_FEATURE_IOMMU(b, cap, CAP_SLLPS_MASK);
- MINIMAL_FEATURE_IOMMU(b, cap, CAP_FRO_MASK);
- MINIMAL_FEATURE_IOMMU(b, cap, CAP_MGAW_MASK);
- MINIMAL_FEATURE_IOMMU(b, cap, CAP_SAGAW_MASK);
- MINIMAL_FEATURE_IOMMU(b, cap, CAP_NDOMS_MASK);
- MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_PSS_MASK);
- MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_MHMV_MASK);
- MINIMAL_FEATURE_IOMMU(b, ecap, ECAP_IRO_MASK);
-
- CHECK_FEATURE_MISMATCH(a, b, cap, fl5lp_support, CAP_FL5LP_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, fl1gp_support, CAP_FL1GP_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, read_drain, CAP_RD_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, write_drain, CAP_WD_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, pgsel_inv, CAP_PSI_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, zlr, CAP_ZLR_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, caching_mode, CAP_CM_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, phmr, CAP_PHMR_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, plmr, CAP_PLMR_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, rwbf, CAP_RWBF_MASK);
- CHECK_FEATURE_MISMATCH(a, b, cap, afl, CAP_AFL_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, rps, ECAP_RPS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, smpwc, ECAP_SMPWC_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, flts, ECAP_FLTS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, slts, ECAP_SLTS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, nwfs, ECAP_NWFS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, slads, ECAP_SLADS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, smts, ECAP_SMTS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, pds, ECAP_PDS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, dit, ECAP_DIT_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, pasid, ECAP_PASID_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, eafs, ECAP_EAFS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, srs, ECAP_SRS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, ers, ECAP_ERS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, prs, ECAP_PRS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, nest, ECAP_NEST_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, mts, ECAP_MTS_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, sc_support, ECAP_SC_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, pass_through, ECAP_PT_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, dev_iotlb_support, ECAP_DT_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, qis, ECAP_QI_MASK);
- CHECK_FEATURE_MISMATCH(a, b, ecap, coherent, ECAP_C_MASK);
-}
-
-static int cap_audit_hotplug(struct intel_iommu *iommu, enum cap_audit_type type)
-{
- bool mismatch = false;
- u64 old_cap = intel_iommu_cap_sanity;
- u64 old_ecap = intel_iommu_ecap_sanity;
-
- if (type == CAP_AUDIT_HOTPLUG_IRQR) {
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, pi_support, CAP_PI_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, eim_support, ECAP_EIM_MASK);
- goto out;
- }
-
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, fl5lp_support, CAP_FL5LP_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, fl1gp_support, CAP_FL1GP_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, read_drain, CAP_RD_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, write_drain, CAP_WD_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, pgsel_inv, CAP_PSI_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, zlr, CAP_ZLR_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, caching_mode, CAP_CM_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, phmr, CAP_PHMR_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, plmr, CAP_PLMR_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, rwbf, CAP_RWBF_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, cap, afl, CAP_AFL_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, rps, ECAP_RPS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smpwc, ECAP_SMPWC_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, flts, ECAP_FLTS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slts, ECAP_SLTS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nwfs, ECAP_NWFS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, slads, ECAP_SLADS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, smts, ECAP_SMTS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pds, ECAP_PDS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dit, ECAP_DIT_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pasid, ECAP_PASID_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, eafs, ECAP_EAFS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, srs, ECAP_SRS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, ers, ECAP_ERS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, prs, ECAP_PRS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, nest, ECAP_NEST_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, mts, ECAP_MTS_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, sc_support, ECAP_SC_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, pass_through, ECAP_PT_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, dev_iotlb_support, ECAP_DT_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, qis, ECAP_QI_MASK);
- CHECK_FEATURE_MISMATCH_HOTPLUG(iommu, ecap, coherent, ECAP_C_MASK);
-
- /* Abort hot plug if the hot plug iommu feature is smaller than global */
- MINIMAL_FEATURE_HOTPLUG(iommu, cap, max_amask_val, CAP_MAMV_MASK, mismatch);
- MINIMAL_FEATURE_HOTPLUG(iommu, cap, num_fault_regs, CAP_NFR_MASK, mismatch);
- MINIMAL_FEATURE_HOTPLUG(iommu, cap, super_page_val, CAP_SLLPS_MASK, mismatch);
- MINIMAL_FEATURE_HOTPLUG(iommu, cap, fault_reg_offset, CAP_FRO_MASK, mismatch);
- MINIMAL_FEATURE_HOTPLUG(iommu, cap, mgaw, CAP_MGAW_MASK, mismatch);
- MINIMAL_FEATURE_HOTPLUG(iommu, cap, sagaw, CAP_SAGAW_MASK, mismatch);
- MINIMAL_FEATURE_HOTPLUG(iommu, cap, ndoms, CAP_NDOMS_MASK, mismatch);
- MINIMAL_FEATURE_HOTPLUG(iommu, ecap, pss, ECAP_PSS_MASK, mismatch);
- MINIMAL_FEATURE_HOTPLUG(iommu, ecap, max_handle_mask, ECAP_MHMV_MASK, mismatch);
- MINIMAL_FEATURE_HOTPLUG(iommu, ecap, iotlb_offset, ECAP_IRO_MASK, mismatch);
-
-out:
- if (mismatch) {
- intel_iommu_cap_sanity = old_cap;
- intel_iommu_ecap_sanity = old_ecap;
- return -EFAULT;
- }
-
- return 0;
-}
-
-static int cap_audit_static(struct intel_iommu *iommu, enum cap_audit_type type)
-{
- struct dmar_drhd_unit *d;
- struct intel_iommu *i;
- int rc = 0;
-
- rcu_read_lock();
- if (list_empty(&dmar_drhd_units))
- goto out;
-
- for_each_active_iommu(i, d) {
- if (!iommu) {
- intel_iommu_ecap_sanity = i->ecap;
- intel_iommu_cap_sanity = i->cap;
- iommu = i;
- continue;
- }
-
- if (type == CAP_AUDIT_STATIC_DMAR)
- check_dmar_capabilities(iommu, i);
- else
- check_irq_capabilities(iommu, i);
- }
-
- /*
- * If the system is sane to support scalable mode, either SL or FL
- * should be sane.
- */
- if (intel_cap_smts_sanity() &&
- !intel_cap_flts_sanity() && !intel_cap_slts_sanity())
- rc = -EOPNOTSUPP;
-
-out:
- rcu_read_unlock();
- return rc;
-}
-
-int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu)
-{
- switch (type) {
- case CAP_AUDIT_STATIC_DMAR:
- case CAP_AUDIT_STATIC_IRQR:
- return cap_audit_static(iommu, type);
- case CAP_AUDIT_HOTPLUG_DMAR:
- case CAP_AUDIT_HOTPLUG_IRQR:
- return cap_audit_hotplug(iommu, type);
- default:
- break;
- }
-
- return -EFAULT;
-}
-
-bool intel_cap_smts_sanity(void)
-{
- return ecap_smts(intel_iommu_ecap_sanity);
-}
-
-bool intel_cap_pasid_sanity(void)
-{
- return ecap_pasid(intel_iommu_ecap_sanity);
-}
-
-bool intel_cap_nest_sanity(void)
-{
- return ecap_nest(intel_iommu_ecap_sanity);
-}
-
-bool intel_cap_flts_sanity(void)
-{
- return ecap_flts(intel_iommu_ecap_sanity);
-}
-
-bool intel_cap_slts_sanity(void)
-{
- return ecap_slts(intel_iommu_ecap_sanity);
-}
diff --git a/drivers/iommu/intel/cap_audit.h b/drivers/iommu/intel/cap_audit.h
deleted file mode 100644
index d07b75938961..000000000000
--- a/drivers/iommu/intel/cap_audit.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * cap_audit.h - audit iommu capabilities header
- *
- * Copyright (C) 2021 Intel Corporation
- *
- * Author: Kyung Min Park <kyung.min.park@intel.com>
- */
-
-/*
- * Capability Register Mask
- */
-#define CAP_FL5LP_MASK BIT_ULL(60)
-#define CAP_PI_MASK BIT_ULL(59)
-#define CAP_FL1GP_MASK BIT_ULL(56)
-#define CAP_RD_MASK BIT_ULL(55)
-#define CAP_WD_MASK BIT_ULL(54)
-#define CAP_MAMV_MASK GENMASK_ULL(53, 48)
-#define CAP_NFR_MASK GENMASK_ULL(47, 40)
-#define CAP_PSI_MASK BIT_ULL(39)
-#define CAP_SLLPS_MASK GENMASK_ULL(37, 34)
-#define CAP_FRO_MASK GENMASK_ULL(33, 24)
-#define CAP_ZLR_MASK BIT_ULL(22)
-#define CAP_MGAW_MASK GENMASK_ULL(21, 16)
-#define CAP_SAGAW_MASK GENMASK_ULL(12, 8)
-#define CAP_CM_MASK BIT_ULL(7)
-#define CAP_PHMR_MASK BIT_ULL(6)
-#define CAP_PLMR_MASK BIT_ULL(5)
-#define CAP_RWBF_MASK BIT_ULL(4)
-#define CAP_AFL_MASK BIT_ULL(3)
-#define CAP_NDOMS_MASK GENMASK_ULL(2, 0)
-
-/*
- * Extended Capability Register Mask
- */
-#define ECAP_RPS_MASK BIT_ULL(49)
-#define ECAP_SMPWC_MASK BIT_ULL(48)
-#define ECAP_FLTS_MASK BIT_ULL(47)
-#define ECAP_SLTS_MASK BIT_ULL(46)
-#define ECAP_SLADS_MASK BIT_ULL(45)
-#define ECAP_VCS_MASK BIT_ULL(44)
-#define ECAP_SMTS_MASK BIT_ULL(43)
-#define ECAP_PDS_MASK BIT_ULL(42)
-#define ECAP_DIT_MASK BIT_ULL(41)
-#define ECAP_PASID_MASK BIT_ULL(40)
-#define ECAP_PSS_MASK GENMASK_ULL(39, 35)
-#define ECAP_EAFS_MASK BIT_ULL(34)
-#define ECAP_NWFS_MASK BIT_ULL(33)
-#define ECAP_SRS_MASK BIT_ULL(31)
-#define ECAP_ERS_MASK BIT_ULL(30)
-#define ECAP_PRS_MASK BIT_ULL(29)
-#define ECAP_NEST_MASK BIT_ULL(26)
-#define ECAP_MTS_MASK BIT_ULL(25)
-#define ECAP_MHMV_MASK GENMASK_ULL(23, 20)
-#define ECAP_IRO_MASK GENMASK_ULL(17, 8)
-#define ECAP_SC_MASK BIT_ULL(7)
-#define ECAP_PT_MASK BIT_ULL(6)
-#define ECAP_EIM_MASK BIT_ULL(4)
-#define ECAP_DT_MASK BIT_ULL(2)
-#define ECAP_QI_MASK BIT_ULL(1)
-#define ECAP_C_MASK BIT_ULL(0)
-
-/*
- * u64 intel_iommu_cap_sanity, intel_iommu_ecap_sanity will be adjusted as each
- * IOMMU gets audited.
- */
-#define DO_CHECK_FEATURE_MISMATCH(a, b, cap, feature, MASK) \
-do { \
- if (cap##_##feature(a) != cap##_##feature(b)) { \
- intel_iommu_##cap##_sanity &= ~(MASK); \
- pr_info("IOMMU feature %s inconsistent", #feature); \
- } \
-} while (0)
-
-#define CHECK_FEATURE_MISMATCH(a, b, cap, feature, MASK) \
- DO_CHECK_FEATURE_MISMATCH((a)->cap, (b)->cap, cap, feature, MASK)
-
-#define CHECK_FEATURE_MISMATCH_HOTPLUG(b, cap, feature, MASK) \
-do { \
- if (cap##_##feature(intel_iommu_##cap##_sanity)) \
- DO_CHECK_FEATURE_MISMATCH(intel_iommu_##cap##_sanity, \
- (b)->cap, cap, feature, MASK); \
-} while (0)
-
-#define MINIMAL_FEATURE_IOMMU(iommu, cap, MASK) \
-do { \
- u64 min_feature = intel_iommu_##cap##_sanity & (MASK); \
- min_feature = min_t(u64, min_feature, (iommu)->cap & (MASK)); \
- intel_iommu_##cap##_sanity = (intel_iommu_##cap##_sanity & ~(MASK)) | \
- min_feature; \
-} while (0)
-
-#define MINIMAL_FEATURE_HOTPLUG(iommu, cap, feature, MASK, mismatch) \
-do { \
- if ((intel_iommu_##cap##_sanity & (MASK)) > \
- (cap##_##feature((iommu)->cap))) \
- mismatch = true; \
- else \
- (iommu)->cap = ((iommu)->cap & ~(MASK)) | \
- (intel_iommu_##cap##_sanity & (MASK)); \
-} while (0)
-
-enum cap_audit_type {
- CAP_AUDIT_STATIC_DMAR,
- CAP_AUDIT_STATIC_IRQR,
- CAP_AUDIT_HOTPLUG_DMAR,
- CAP_AUDIT_HOTPLUG_IRQR,
-};
-
-bool intel_cap_smts_sanity(void);
-bool intel_cap_pasid_sanity(void);
-bool intel_cap_nest_sanity(void);
-bool intel_cap_flts_sanity(void);
-bool intel_cap_slts_sanity(void);
-
-static inline bool scalable_mode_support(void)
-{
- return (intel_iommu_sm && intel_cap_smts_sanity());
-}
-
-static inline bool pasid_mode_support(void)
-{
- return scalable_mode_support() && intel_cap_pasid_sanity();
-}
-
-static inline bool nested_mode_support(void)
-{
- return scalable_mode_support() && intel_cap_nest_sanity();
-}
-
-int intel_cap_audit(enum cap_audit_type type, struct intel_iommu *iommu);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 9ab5371c3538..ff07ee2940f5 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -29,7 +29,6 @@
#include "../irq_remapping.h"
#include "../iommu-pages.h"
#include "pasid.h"
-#include "cap_audit.h"
#include "perfmon.h"
#define ROOT_SIZE VTD_PAGE_SIZE
@@ -738,7 +737,8 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
return NULL;
domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
- pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
+ pteval = virt_to_phys(tmp_page) | DMA_PTE_READ |
+ DMA_PTE_WRITE;
if (domain->use_first_level)
pteval |= DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
@@ -1173,32 +1173,59 @@ static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev)
return true;
}
-static void iommu_enable_pci_caps(struct device_domain_info *info)
+static void iommu_enable_pci_ats(struct device_domain_info *info)
{
struct pci_dev *pdev;
- if (!dev_is_pci(info->dev))
+ if (!info->ats_supported)
return;
pdev = to_pci_dev(info->dev);
- if (info->ats_supported && pci_ats_page_aligned(pdev) &&
- !pci_enable_ats(pdev, VTD_PAGE_SHIFT))
+ if (!pci_ats_page_aligned(pdev))
+ return;
+
+ if (!pci_enable_ats(pdev, VTD_PAGE_SHIFT))
info->ats_enabled = 1;
}
-static void iommu_disable_pci_caps(struct device_domain_info *info)
+static void iommu_disable_pci_ats(struct device_domain_info *info)
+{
+ if (!info->ats_enabled)
+ return;
+
+ pci_disable_ats(to_pci_dev(info->dev));
+ info->ats_enabled = 0;
+}
+
+static void iommu_enable_pci_pri(struct device_domain_info *info)
{
struct pci_dev *pdev;
- if (!dev_is_pci(info->dev))
+ if (!info->ats_enabled || !info->pri_supported)
return;
pdev = to_pci_dev(info->dev);
+ /* PASID is required in PRG Response Message. */
+ if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev))
+ return;
- if (info->ats_enabled) {
- pci_disable_ats(pdev);
- info->ats_enabled = 0;
- }
+ if (pci_reset_pri(pdev))
+ return;
+
+ if (!pci_enable_pri(pdev, PRQ_DEPTH))
+ info->pri_enabled = 1;
+}
+
+static void iommu_disable_pci_pri(struct device_domain_info *info)
+{
+ if (!info->pri_enabled)
+ return;
+
+ if (WARN_ON(info->iopf_refcount))
+ iopf_queue_remove_device(info->iommu->iopf_queue, info->dev);
+
+ pci_disable_pri(to_pci_dev(info->dev));
+ info->pri_enabled = 0;
}
static void intel_flush_iotlb_all(struct iommu_domain *domain)
@@ -1557,12 +1584,19 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev)
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
u8 bus = info->bus, devfn = info->devfn;
+ int ret;
if (!dev_is_pci(dev))
return domain_context_mapping_one(domain, iommu, bus, devfn);
- return pci_for_each_dma_alias(to_pci_dev(dev),
- domain_context_mapping_cb, domain);
+ ret = pci_for_each_dma_alias(to_pci_dev(dev),
+ domain_context_mapping_cb, domain);
+ if (ret)
+ return ret;
+
+ iommu_enable_pci_ats(info);
+
+ return 0;
}
/* Return largest possible superpage level for a given mapping */
@@ -1749,7 +1783,7 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8
context_clear_entry(context);
__iommu_flush_cache(iommu, context, sizeof(*context));
spin_unlock(&iommu->lock);
- intel_context_flush_present(info, context, did, true);
+ intel_context_flush_no_pasid(info, context, did);
}
int __domain_setup_first_level(struct intel_iommu *iommu,
@@ -1825,6 +1859,7 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
return ret;
info->domain = domain;
+ info->domain_attached = true;
spin_lock_irqsave(&domain->lock, flags);
list_add(&info->link, &domain->devices);
spin_unlock_irqrestore(&domain->lock, flags);
@@ -1844,8 +1879,6 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
if (ret)
goto out_block_translation;
- iommu_enable_pci_caps(info);
-
ret = cache_tag_assign_domain(domain, dev, IOMMU_NO_PASID);
if (ret)
goto out_block_translation;
@@ -2118,10 +2151,6 @@ static int __init init_dmars(void)
struct intel_iommu *iommu;
int ret;
- ret = intel_cap_audit(CAP_AUDIT_STATIC_DMAR, NULL);
- if (ret)
- goto free_iommu;
-
for_each_iommu(iommu, drhd) {
if (drhd->ignored) {
iommu_disable_translation(iommu);
@@ -2617,10 +2646,6 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
struct intel_iommu *iommu = dmaru->iommu;
int ret;
- ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_DMAR, iommu);
- if (ret)
- goto out;
-
/*
* Disable translation if already enabled prior to OS handover.
*/
@@ -2880,16 +2905,19 @@ void intel_iommu_shutdown(void)
if (no_iommu || dmar_disabled)
return;
- down_write(&dmar_global_lock);
+ /*
+ * All other CPUs were brought down, hotplug interrupts were disabled,
+ * no lock and RCU checking needed anymore
+ */
+ list_for_each_entry(drhd, &dmar_drhd_units, list) {
+ iommu = drhd->iommu;
- /* Disable PMRs explicitly here. */
- for_each_iommu(iommu, drhd)
+ /* Disable PMRs explicitly here. */
iommu_disable_protect_mem_regions(iommu);
- /* Make sure the IOMMUs are switched off */
- intel_disable_iommus();
-
- up_write(&dmar_global_lock);
+ /* Make sure the IOMMUs are switched off */
+ iommu_disable_translation(iommu);
+ }
}
static struct intel_iommu *dev_to_intel_iommu(struct device *dev)
@@ -3022,6 +3050,7 @@ static int __init probe_acpi_namespace_devices(void)
if (dev->bus != &acpi_bus_type)
continue;
+ up_read(&dmar_global_lock);
adev = to_acpi_device(dev);
mutex_lock(&adev->physical_node_lock);
list_for_each_entry(pn,
@@ -3031,6 +3060,7 @@ static int __init probe_acpi_namespace_devices(void)
break;
}
mutex_unlock(&adev->physical_node_lock);
+ down_read(&dmar_global_lock);
if (ret)
return ret;
@@ -3214,6 +3244,7 @@ static void domain_context_clear(struct device_domain_info *info)
pci_for_each_dma_alias(to_pci_dev(info->dev),
&domain_context_clear_one_cb, info);
+ iommu_disable_pci_ats(info);
}
/*
@@ -3227,10 +3258,13 @@ void device_block_translation(struct device *dev)
struct intel_iommu *iommu = info->iommu;
unsigned long flags;
+ /* Device in DMA blocking state. Noting to do. */
+ if (!info->domain_attached)
+ return;
+
if (info->domain)
cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID);
- iommu_disable_pci_caps(info);
if (!dev_is_real_dma_subdevice(dev)) {
if (sm_supported(iommu))
intel_pasid_tear_down_entry(iommu, dev,
@@ -3239,6 +3273,9 @@ void device_block_translation(struct device *dev)
domain_context_clear(info);
}
+ /* Device now in DMA blocking state. */
+ info->domain_attached = false;
+
if (!info->domain)
return;
@@ -3257,10 +3294,15 @@ static int blocking_domain_attach_dev(struct iommu_domain *domain,
return 0;
}
+static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old);
+
static struct iommu_domain blocking_domain = {
.type = IOMMU_DOMAIN_BLOCKED,
.ops = &(const struct iommu_domain_ops) {
.attach_dev = blocking_domain_attach_dev,
+ .set_dev_pasid = blocking_domain_set_dev_pasid,
}
};
@@ -3349,8 +3391,8 @@ intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
bool first_stage;
if (flags &
- (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING
- | IOMMU_HWPT_FAULT_ID_VALID)))
+ (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
+ IOMMU_HWPT_ALLOC_PASID)))
return ERR_PTR(-EOPNOTSUPP);
if (nested_parent && !nested_supported(iommu))
return ERR_PTR(-EOPNOTSUPP);
@@ -3751,16 +3793,6 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
intel_iommu_debugfs_create_dev(info);
- /*
- * The PCIe spec, in its wisdom, declares that the behaviour of the
- * device is undefined if you enable PASID support after ATS support.
- * So always enable PASID support on devices which have it, even if
- * we can't yet know if we're ever going to use it.
- */
- if (info->pasid_supported &&
- !pci_enable_pasid(pdev, info->pasid_supported & ~1))
- info->pasid_enabled = 1;
-
return &iommu->iommu;
free_table:
intel_pasid_free_table(dev);
@@ -3772,11 +3804,43 @@ free:
return ERR_PTR(ret);
}
+static void intel_iommu_probe_finalize(struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu = info->iommu;
+
+ /*
+ * The PCIe spec, in its wisdom, declares that the behaviour of the
+ * device is undefined if you enable PASID support after ATS support.
+ * So always enable PASID support on devices which have it, even if
+ * we can't yet know if we're ever going to use it.
+ */
+ if (info->pasid_supported &&
+ !pci_enable_pasid(to_pci_dev(dev), info->pasid_supported & ~1))
+ info->pasid_enabled = 1;
+
+ if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
+ iommu_enable_pci_ats(info);
+ /* Assign a DEVTLB cache tag to the default domain. */
+ if (info->ats_enabled && info->domain) {
+ u16 did = domain_id_iommu(info->domain, iommu);
+
+ if (cache_tag_assign(info->domain, did, dev,
+ IOMMU_NO_PASID, CACHE_TAG_DEVTLB))
+ iommu_disable_pci_ats(info);
+ }
+ }
+ iommu_enable_pci_pri(info);
+}
+
static void intel_iommu_release_device(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
+ iommu_disable_pci_pri(info);
+ iommu_disable_pci_ats(info);
+
if (info->pasid_enabled) {
pci_disable_pasid(to_pci_dev(dev));
info->pasid_enabled = 0;
@@ -3794,7 +3858,6 @@ static void intel_iommu_release_device(struct device *dev)
intel_pasid_free_table(dev);
intel_iommu_debugfs_remove_dev(info);
kfree(info);
- set_dma_ops(dev, NULL);
}
static void intel_iommu_get_resv_regions(struct device *device,
@@ -3863,151 +3926,41 @@ static struct iommu_group *intel_iommu_device_group(struct device *dev)
return generic_device_group(dev);
}
-static int intel_iommu_enable_sva(struct device *dev)
+int intel_iommu_enable_iopf(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct intel_iommu *iommu;
-
- if (!info || dmar_disabled)
- return -EINVAL;
-
- iommu = info->iommu;
- if (!iommu)
- return -EINVAL;
-
- if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
- return -ENODEV;
-
- if (!info->pasid_enabled || !info->ats_enabled)
- return -EINVAL;
-
- /*
- * Devices having device-specific I/O fault handling should not
- * support PCI/PRI. The IOMMU side has no means to check the
- * capability of device-specific IOPF. Therefore, IOMMU can only
- * default that if the device driver enables SVA on a non-PRI
- * device, it will handle IOPF in its own way.
- */
- if (!info->pri_supported)
- return 0;
-
- /* Devices supporting PRI should have it enabled. */
- if (!info->pri_enabled)
- return -EINVAL;
-
- return 0;
-}
-
-static int context_flip_pri(struct device_domain_info *info, bool enable)
-{
struct intel_iommu *iommu = info->iommu;
- u8 bus = info->bus, devfn = info->devfn;
- struct context_entry *context;
- u16 did;
-
- spin_lock(&iommu->lock);
- if (context_copied(iommu, bus, devfn)) {
- spin_unlock(&iommu->lock);
- return -EINVAL;
- }
-
- context = iommu_context_addr(iommu, bus, devfn, false);
- if (!context || !context_present(context)) {
- spin_unlock(&iommu->lock);
- return -ENODEV;
- }
- did = context_domain_id(context);
-
- if (enable)
- context_set_sm_pre(context);
- else
- context_clear_sm_pre(context);
-
- if (!ecap_coherent(iommu->ecap))
- clflush_cache_range(context, sizeof(*context));
- intel_context_flush_present(info, context, did, true);
- spin_unlock(&iommu->lock);
-
- return 0;
-}
-
-static int intel_iommu_enable_iopf(struct device *dev)
-{
- struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL;
- struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct intel_iommu *iommu;
int ret;
- if (!pdev || !info || !info->ats_enabled || !info->pri_supported)
+ if (!info->pri_enabled)
return -ENODEV;
- if (info->pri_enabled)
- return -EBUSY;
-
- iommu = info->iommu;
- if (!iommu)
- return -EINVAL;
-
- /* PASID is required in PRG Response Message. */
- if (info->pasid_enabled && !pci_prg_resp_pasid_required(pdev))
- return -EINVAL;
-
- ret = pci_reset_pri(pdev);
- if (ret)
- return ret;
+ if (info->iopf_refcount) {
+ info->iopf_refcount++;
+ return 0;
+ }
ret = iopf_queue_add_device(iommu->iopf_queue, dev);
if (ret)
return ret;
- ret = context_flip_pri(info, true);
- if (ret)
- goto err_remove_device;
-
- ret = pci_enable_pri(pdev, PRQ_DEPTH);
- if (ret)
- goto err_clear_pri;
-
- info->pri_enabled = 1;
+ info->iopf_refcount = 1;
return 0;
-err_clear_pri:
- context_flip_pri(info, false);
-err_remove_device:
- iopf_queue_remove_device(iommu->iopf_queue, dev);
-
- return ret;
}
-static int intel_iommu_disable_iopf(struct device *dev)
+void intel_iommu_disable_iopf(struct device *dev)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
struct intel_iommu *iommu = info->iommu;
- if (!info->pri_enabled)
- return -EINVAL;
+ if (WARN_ON(!info->pri_enabled || !info->iopf_refcount))
+ return;
- /* Disable new PRI reception: */
- context_flip_pri(info, false);
+ if (--info->iopf_refcount)
+ return;
- /*
- * Remove device from fault queue and acknowledge all outstanding
- * PRQs to the device:
- */
iopf_queue_remove_device(iommu->iopf_queue, dev);
-
- /*
- * PCIe spec states that by clearing PRI enable bit, the Page
- * Request Interface will not issue new page requests, but has
- * outstanding page requests that have been transmitted or are
- * queued for transmission. This is supposed to be called after
- * the device driver has stopped DMA, all PASIDs have been
- * unbound and the outstanding PRQs have been drained.
- */
- pci_disable_pri(to_pci_dev(dev));
- info->pri_enabled = 0;
-
- return 0;
}
static int
@@ -4018,7 +3971,7 @@ intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
return intel_iommu_enable_iopf(dev);
case IOMMU_DEV_FEAT_SVA:
- return intel_iommu_enable_sva(dev);
+ return 0;
default:
return -ENODEV;
@@ -4030,7 +3983,8 @@ intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
{
switch (feat) {
case IOMMU_DEV_FEAT_IOPF:
- return intel_iommu_disable_iopf(dev);
+ intel_iommu_disable_iopf(dev);
+ return 0;
case IOMMU_DEV_FEAT_SVA:
return 0;
@@ -4107,13 +4061,16 @@ void domain_remove_dev_pasid(struct iommu_domain *domain,
}
}
-static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
- struct iommu_domain *domain)
+static int blocking_domain_set_dev_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
{
struct device_domain_info *info = dev_iommu_priv_get(dev);
intel_pasid_tear_down_entry(info->iommu, dev, pasid, false);
- domain_remove_dev_pasid(domain, dev, pasid);
+ domain_remove_dev_pasid(old, dev, pasid);
+
+ return 0;
}
struct dev_pasid_info *
@@ -4412,13 +4369,13 @@ static int identity_domain_attach_dev(struct iommu_domain *domain, struct device
if (dev_is_real_dma_subdevice(dev))
return 0;
- if (sm_supported(iommu)) {
+ if (sm_supported(iommu))
ret = intel_pasid_setup_pass_through(iommu, dev, IOMMU_NO_PASID);
- if (!ret)
- iommu_enable_pci_caps(info);
- } else {
+ else
ret = device_setup_pass_through(dev);
- }
+
+ if (!ret)
+ info->domain_attached = true;
return ret;
}
@@ -4450,21 +4407,6 @@ static struct iommu_domain identity_domain = {
},
};
-static struct iommu_domain *intel_iommu_domain_alloc_paging(struct device *dev)
-{
- struct device_domain_info *info = dev_iommu_priv_get(dev);
- struct intel_iommu *iommu = info->iommu;
- struct dmar_domain *dmar_domain;
- bool first_stage;
-
- first_stage = first_level_by_default(iommu);
- dmar_domain = paging_domain_alloc(dev, first_stage);
- if (IS_ERR(dmar_domain))
- return ERR_CAST(dmar_domain);
-
- return &dmar_domain->domain;
-}
-
const struct iommu_ops intel_iommu_ops = {
.blocked_domain = &blocking_domain,
.release_domain = &blocking_domain,
@@ -4473,9 +4415,9 @@ const struct iommu_ops intel_iommu_ops = {
.hw_info = intel_iommu_hw_info,
.domain_alloc_paging_flags = intel_iommu_domain_alloc_paging_flags,
.domain_alloc_sva = intel_svm_domain_alloc,
- .domain_alloc_paging = intel_iommu_domain_alloc_paging,
.domain_alloc_nested = intel_iommu_domain_alloc_nested,
.probe_device = intel_iommu_probe_device,
+ .probe_finalize = intel_iommu_probe_finalize,
.release_device = intel_iommu_release_device,
.get_resv_regions = intel_iommu_get_resv_regions,
.device_group = intel_iommu_device_group,
@@ -4483,7 +4425,6 @@ const struct iommu_ops intel_iommu_ops = {
.dev_disable_feat = intel_iommu_dev_disable_feat,
.is_attach_deferred = intel_iommu_is_attach_deferred,
.def_domain_type = device_def_domain_type,
- .remove_dev_pasid = intel_iommu_remove_dev_pasid,
.pgsize_bitmap = SZ_4K,
.page_response = intel_iommu_page_response,
.default_domain_ops = &(const struct iommu_domain_ops) {
@@ -4518,6 +4459,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_igfx);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_igfx);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_igfx);
+/* QM57/QS57 integrated gfx malfunctions with dmar */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_iommu_igfx);
+
/* Broadwell igfx malfunctions with dmar */
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1606, quirk_iommu_igfx);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x160B, quirk_iommu_igfx);
@@ -4595,7 +4539,6 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
}
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 6ea7bbe26b19..814f5ed20018 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -773,7 +773,9 @@ struct device_domain_info {
u8 ats_supported:1;
u8 ats_enabled:1;
u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */
+ u8 domain_attached:1; /* Device has domain attached */
u8 ats_qdep;
+ unsigned int iopf_refcount;
struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
struct intel_iommu *iommu; /* IOMMU used by this device */
struct dmar_domain *domain; /* pointer to domain */
@@ -953,25 +955,6 @@ static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
return 1UL << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
}
-/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
- are never going to work. */
-static inline unsigned long mm_to_dma_pfn_start(unsigned long mm_pfn)
-{
- return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
-}
-static inline unsigned long mm_to_dma_pfn_end(unsigned long mm_pfn)
-{
- return ((mm_pfn + 1) << (PAGE_SHIFT - VTD_PAGE_SHIFT)) - 1;
-}
-static inline unsigned long page_to_dma_pfn(struct page *pg)
-{
- return mm_to_dma_pfn_start(page_to_pfn(pg));
-}
-static inline unsigned long virt_to_dma_pfn(void *p)
-{
- return page_to_dma_pfn(virt_to_page(p));
-}
-
static inline void context_set_present(struct context_entry *context)
{
context->lo |= 1;
@@ -1294,6 +1277,8 @@ struct cache_tag {
unsigned int users;
};
+int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev,
+ ioasid_t pasid, enum cache_tag_type type);
int cache_tag_assign_domain(struct dmar_domain *domain,
struct device *dev, ioasid_t pasid);
void cache_tag_unassign_domain(struct dmar_domain *domain,
@@ -1304,9 +1289,8 @@ void cache_tag_flush_all(struct dmar_domain *domain);
void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
unsigned long end);
-void intel_context_flush_present(struct device_domain_info *info,
- struct context_entry *context,
- u16 did, bool affect_domains);
+void intel_context_flush_no_pasid(struct device_domain_info *info,
+ struct context_entry *context, u16 did);
int intel_iommu_enable_prq(struct intel_iommu *iommu);
int intel_iommu_finish_prq(struct intel_iommu *iommu);
@@ -1314,6 +1298,9 @@ void intel_iommu_page_response(struct device *dev, struct iopf_fault *evt,
struct iommu_page_response *msg);
void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid);
+int intel_iommu_enable_iopf(struct device *dev);
+void intel_iommu_disable_iopf(struct device *dev);
+
#ifdef CONFIG_INTEL_IOMMU_SVM
void intel_svm_check(struct intel_iommu *iommu);
struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index 466c1412dd45..3bc2a03cceca 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -24,12 +24,6 @@
#include "iommu.h"
#include "../irq_remapping.h"
#include "../iommu-pages.h"
-#include "cap_audit.h"
-
-enum irq_mode {
- IRQ_REMAPPING,
- IRQ_POSTING,
-};
struct ioapic_scope {
struct intel_iommu *iommu;
@@ -50,8 +44,8 @@ struct irq_2_iommu {
u16 irte_index;
u16 sub_handle;
u8 irte_mask;
- enum irq_mode mode;
bool posted_msi;
+ bool posted_vcpu;
};
struct intel_ir_data {
@@ -139,7 +133,6 @@ static int alloc_irte(struct intel_iommu *iommu,
irq_iommu->irte_index = index;
irq_iommu->sub_handle = 0;
irq_iommu->irte_mask = mask;
- irq_iommu->mode = IRQ_REMAPPING;
}
raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
@@ -194,8 +187,6 @@ static int modify_irte(struct irq_2_iommu *irq_iommu,
rc = qi_flush_iec(iommu, index, 0);
- /* Update iommu mode according to the IRTE mode */
- irq_iommu->mode = irte->pst ? IRQ_POSTING : IRQ_REMAPPING;
raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return rc;
@@ -727,9 +718,6 @@ static int __init intel_prepare_irq_remapping(void)
if (dmar_table_init() < 0)
return -ENODEV;
- if (intel_cap_audit(CAP_AUDIT_STATIC_IRQR, NULL))
- return -ENODEV;
-
if (!dmar_ir_support())
return -ENODEV;
@@ -1173,7 +1161,26 @@ static void intel_ir_reconfigure_irte_posted(struct irq_data *irqd)
static inline void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) {}
#endif
-static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
+static void __intel_ir_reconfigure_irte(struct irq_data *irqd, bool force_host)
+{
+ struct intel_ir_data *ir_data = irqd->chip_data;
+
+ /*
+ * Don't modify IRTEs for IRQs that are being posted to vCPUs if the
+ * host CPU affinity changes.
+ */
+ if (ir_data->irq_2_iommu.posted_vcpu && !force_host)
+ return;
+
+ ir_data->irq_2_iommu.posted_vcpu = false;
+
+ if (ir_data->irq_2_iommu.posted_msi)
+ intel_ir_reconfigure_irte_posted(irqd);
+ else
+ modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry);
+}
+
+static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force_host)
{
struct intel_ir_data *ir_data = irqd->chip_data;
struct irte *irte = &ir_data->irte_entry;
@@ -1186,10 +1193,7 @@ static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
irte->vector = cfg->vector;
irte->dest_id = IRTE_DEST(cfg->dest_apicid);
- if (ir_data->irq_2_iommu.posted_msi)
- intel_ir_reconfigure_irte_posted(irqd);
- else if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
- modify_irte(&ir_data->irq_2_iommu, irte);
+ __intel_ir_reconfigure_irte(irqd, force_host);
}
/*
@@ -1244,7 +1248,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info)
/* stop posting interrupts, back to the default mode */
if (!vcpu_pi_info) {
- modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry);
+ __intel_ir_reconfigure_irte(data, true);
} else {
struct irte irte_pi;
@@ -1267,6 +1271,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info)
irte_pi.pda_h = (vcpu_pi_info->pi_desc_addr >> 32) &
~(-1UL << PDA_HIGH_BIT);
+ ir_data->irq_2_iommu.posted_vcpu = true;
modify_irte(&ir_data->irq_2_iommu, &irte_pi);
}
@@ -1282,43 +1287,44 @@ static struct irq_chip intel_ir_chip = {
};
/*
- * With posted MSIs, all vectors are multiplexed into a single notification
- * vector. Devices MSIs are then dispatched in a demux loop where
- * EOIs can be coalesced as well.
+ * With posted MSIs, the MSI vectors are multiplexed into a single notification
+ * vector, and only the notification vector is sent to the APIC IRR. Device
+ * MSIs are then dispatched in a demux loop that harvests the MSIs from the
+ * CPU's Posted Interrupt Request bitmap. I.e. Posted MSIs never get sent to
+ * the APIC IRR, and thus do not need an EOI. The notification handler instead
+ * performs a single EOI after processing the PIR.
*
- * "INTEL-IR-POST" IRQ chip does not do EOI on ACK, thus the dummy irq_ack()
- * function. Instead EOI is performed by the posted interrupt notification
- * handler.
+ * Note! Pending SMP/CPU affinity changes, which are per MSI, must still be
+ * honored, only the APIC EOI is omitted.
*
* For the example below, 3 MSIs are coalesced into one CPU notification. Only
- * one apic_eoi() is needed.
+ * one apic_eoi() is needed, but each MSI needs to process pending changes to
+ * its CPU affinity.
*
* __sysvec_posted_msi_notification()
* irq_enter();
* handle_edge_irq()
* irq_chip_ack_parent()
- * dummy(); // No EOI
+ * irq_move_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* handle_edge_irq()
* irq_chip_ack_parent()
- * dummy(); // No EOI
+ * irq_move_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* handle_edge_irq()
* irq_chip_ack_parent()
- * dummy(); // No EOI
+ * irq_move_irq(); // No EOI
* handle_irq_event()
* driver_handler()
* apic_eoi()
* irq_exit()
+ *
*/
-
-static void dummy_ack(struct irq_data *d) { }
-
static struct irq_chip intel_ir_chip_post_msi = {
.name = "INTEL-IR-POST",
- .irq_ack = dummy_ack,
+ .irq_ack = irq_move_irq,
.irq_set_affinity = intel_ir_set_affinity,
.irq_compose_msi_msg = intel_ir_compose_msi_msg,
.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
@@ -1463,7 +1469,6 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain,
else
irq_data->chip = &intel_ir_chip;
intel_irq_remapping_prepare_irte(ird, irq_cfg, info, index, i);
- irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT);
}
return 0;
@@ -1494,6 +1499,9 @@ static void intel_irq_remapping_deactivate(struct irq_domain *domain,
struct intel_ir_data *data = irq_data->chip_data;
struct irte entry;
+ WARN_ON_ONCE(data->irq_2_iommu.posted_vcpu);
+ data->irq_2_iommu.posted_vcpu = false;
+
memset(&entry, 0, sizeof(entry));
modify_irte(&data->irq_2_iommu, &entry);
}
@@ -1534,10 +1542,6 @@ static int dmar_ir_add(struct dmar_drhd_unit *dmaru, struct intel_iommu *iommu)
int ret;
int eim = x2apic_enabled();
- ret = intel_cap_audit(CAP_AUDIT_HOTPLUG_IRQR, iommu);
- if (ret)
- return ret;
-
if (eim && !ecap_eim_support(iommu->ecap)) {
pr_info("DRHD %Lx: EIM not supported by DRHD, ecap %Lx\n",
iommu->reg_phys, iommu->ecap);
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c
index aba92c00b427..1e149169ee77 100644
--- a/drivers/iommu/intel/nested.c
+++ b/drivers/iommu/intel/nested.c
@@ -27,8 +27,7 @@ static int intel_nested_attach_dev(struct iommu_domain *domain,
unsigned long flags;
int ret = 0;
- if (info->domain)
- device_block_translation(dev);
+ device_block_translation(dev);
if (iommu->agaw < dmar_domain->s2_domain->agaw) {
dev_err_ratelimited(dev, "Adjusted guest address width not compatible\n");
@@ -62,6 +61,7 @@ static int intel_nested_attach_dev(struct iommu_domain *domain,
goto unassign_tag;
info->domain = dmar_domain;
+ info->domain_attached = true;
spin_lock_irqsave(&dmar_domain->lock, flags);
list_add(&info->link, &dmar_domain->devices);
spin_unlock_irqrestore(&dmar_domain->lock, flags);
@@ -198,7 +198,7 @@ intel_iommu_domain_alloc_nested(struct device *dev, struct iommu_domain *parent,
struct dmar_domain *domain;
int ret;
- if (!nested_supported(iommu) || flags)
+ if (!nested_supported(iommu) || flags & ~IOMMU_HWPT_ALLOC_PASID)
return ERR_PTR(-EOPNOTSUPP);
/* Must be nested domain */
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index fb59a7d35958..7ee18bb48bd4 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -932,7 +932,7 @@ static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn)
context_clear_entry(context);
__iommu_flush_cache(iommu, context, sizeof(*context));
spin_unlock(&iommu->lock);
- intel_context_flush_present(info, context, did, false);
+ intel_context_flush_no_pasid(info, context, did);
}
static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data)
@@ -992,6 +992,8 @@ static int context_entry_set_pasid_table(struct context_entry *context,
context_set_sm_dte(context);
if (info->pasid_supported)
context_set_pasid(context);
+ if (info->pri_supported)
+ context_set_sm_pre(context);
context_set_fault_enable(context);
context_set_present(context);
@@ -1117,17 +1119,15 @@ static void __context_flush_dev_iotlb(struct device_domain_info *info)
/*
* Cache invalidations after change in a context table entry that was present
- * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). If
- * IOMMU is in scalable mode and all PASID table entries of the device were
- * non-present, set flush_domains to false. Otherwise, true.
+ * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations).
+ * This helper can only be used when IOMMU is working in the legacy mode or
+ * IOMMU is in scalable mode but all PASID table entries of the device are
+ * non-present.
*/
-void intel_context_flush_present(struct device_domain_info *info,
- struct context_entry *context,
- u16 did, bool flush_domains)
+void intel_context_flush_no_pasid(struct device_domain_info *info,
+ struct context_entry *context, u16 did)
{
struct intel_iommu *iommu = info->iommu;
- struct pasid_entry *pte;
- int i;
/*
* Device-selective context-cache invalidation. The Domain-ID field
@@ -1150,30 +1150,5 @@ void intel_context_flush_present(struct device_domain_info *info,
return;
}
- /*
- * For scalable mode:
- * - Domain-selective PASID-cache invalidation to affected domains
- * - Domain-selective IOTLB invalidation to affected domains
- * - Global Device-TLB invalidation to affected functions
- */
- if (flush_domains) {
- /*
- * If the IOMMU is running in scalable mode and there might
- * be potential PASID translations, the caller should hold
- * the lock to ensure that context changes and cache flushes
- * are atomic.
- */
- assert_spin_locked(&iommu->lock);
- for (i = 0; i < info->pasid_table->max_pasid; i++) {
- pte = intel_pasid_get_entry(info->dev, i);
- if (!pte || !pasid_pte_is_present(pte))
- continue;
-
- did = pasid_get_domain_id(pte);
- qi_flush_pasid_cache(iommu, did, QI_PC_ALL_PASIDS, 0);
- iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
- }
- }
-
__context_flush_dev_iotlb(info);
}
diff --git a/drivers/iommu/intel/prq.c b/drivers/iommu/intel/prq.c
index 064194399b38..5b6a64d96850 100644
--- a/drivers/iommu/intel/prq.c
+++ b/drivers/iommu/intel/prq.c
@@ -67,7 +67,7 @@ void intel_iommu_drain_pasid_prq(struct device *dev, u32 pasid)
u16 sid, did;
info = dev_iommu_priv_get(dev);
- if (!info->pri_enabled)
+ if (!info->iopf_refcount)
return;
iommu = info->iommu;
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index f5569347591f..ba93123cb4eb 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -110,6 +110,41 @@ static const struct mmu_notifier_ops intel_mmuops = {
.free_notifier = intel_mm_free_notifier,
};
+static int intel_iommu_sva_supported(struct device *dev)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev);
+ struct intel_iommu *iommu;
+
+ if (!info || dmar_disabled)
+ return -EINVAL;
+
+ iommu = info->iommu;
+ if (!iommu)
+ return -EINVAL;
+
+ if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
+ return -ENODEV;
+
+ if (!info->pasid_enabled || !info->ats_enabled)
+ return -EINVAL;
+
+ /*
+ * Devices having device-specific I/O fault handling should not
+ * support PCI/PRI. The IOMMU side has no means to check the
+ * capability of device-specific IOPF. Therefore, IOMMU can only
+ * default that if the device driver enables SVA on a non-PRI
+ * device, it will handle IOPF in its own way.
+ */
+ if (!info->pri_supported)
+ return 0;
+
+ /* Devices supporting PRI should have it enabled. */
+ if (!info->pri_enabled)
+ return -EINVAL;
+
+ return 0;
+}
+
static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid,
struct iommu_domain *old)
@@ -121,6 +156,10 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
unsigned long sflags;
int ret = 0;
+ ret = intel_iommu_sva_supported(dev);
+ if (ret)
+ return ret;
+
dev_pasid = domain_add_dev_pasid(domain, dev, pasid);
if (IS_ERR(dev_pasid))
return PTR_ERR(dev_pasid);
@@ -161,6 +200,10 @@ struct iommu_domain *intel_svm_domain_alloc(struct device *dev,
struct dmar_domain *domain;
int ret;
+ ret = intel_iommu_sva_supported(dev);
+ if (ret)
+ return ERR_PTR(ret);
+
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 6b9bb58a414f..396c4f6f5a5b 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -13,6 +13,7 @@
#include <linux/bitops.h>
#include <linux/io-pgtable.h>
#include <linux/kernel.h>
+#include <linux/device/faux.h>
#include <linux/sizes.h>
#include <linux/slab.h>
#include <linux/types.h>
@@ -223,6 +224,34 @@ static inline int arm_lpae_max_entries(int i, struct arm_lpae_io_pgtable *data)
return ptes_per_table - (i & (ptes_per_table - 1));
}
+/*
+ * Check if concatenated PGDs are mandatory according to Arm DDI0487 (K.a)
+ * 1) R_DXBSH: For 16KB, and 48-bit input size, use level 1 instead of 0.
+ * 2) R_SRKBC: After de-ciphering the table for PA size and valid initial lookup
+ * a) 40 bits PA size with 4K: use level 1 instead of level 0 (2 tables for ias = oas)
+ * b) 40 bits PA size with 16K: use level 2 instead of level 1 (16 tables for ias = oas)
+ * c) 42 bits PA size with 4K: use level 1 instead of level 0 (8 tables for ias = oas)
+ * d) 48 bits PA size with 16K: use level 1 instead of level 0 (2 tables for ias = oas)
+ */
+static inline bool arm_lpae_concat_mandatory(struct io_pgtable_cfg *cfg,
+ struct arm_lpae_io_pgtable *data)
+{
+ unsigned int ias = cfg->ias;
+ unsigned int oas = cfg->oas;
+
+ /* Covers 1 and 2.d */
+ if ((ARM_LPAE_GRANULE(data) == SZ_16K) && (data->start_level == 0))
+ return (oas == 48) || (ias == 48);
+
+ /* Covers 2.a and 2.c */
+ if ((ARM_LPAE_GRANULE(data) == SZ_4K) && (data->start_level == 0))
+ return (oas == 40) || (oas == 42);
+
+ /* Case 2.b */
+ return (ARM_LPAE_GRANULE(data) == SZ_16K) &&
+ (data->start_level == 1) && (oas == 40);
+}
+
static bool selftest_running = false;
static dma_addr_t __arm_lpae_dma_addr(void *pages)
@@ -676,85 +705,107 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov
data->start_level, ptep);
}
+struct io_pgtable_walk_data {
+ struct io_pgtable *iop;
+ void *data;
+ int (*visit)(struct io_pgtable_walk_data *walk_data, int lvl,
+ arm_lpae_iopte *ptep, size_t size);
+ unsigned long flags;
+ u64 addr;
+ const u64 end;
+};
+
+static int __arm_lpae_iopte_walk(struct arm_lpae_io_pgtable *data,
+ struct io_pgtable_walk_data *walk_data,
+ arm_lpae_iopte *ptep,
+ int lvl);
+
+struct iova_to_phys_data {
+ arm_lpae_iopte pte;
+ int lvl;
+};
+
+static int visit_iova_to_phys(struct io_pgtable_walk_data *walk_data, int lvl,
+ arm_lpae_iopte *ptep, size_t size)
+{
+ struct iova_to_phys_data *data = walk_data->data;
+ data->pte = *ptep;
+ data->lvl = lvl;
+ return 0;
+}
+
static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
unsigned long iova)
{
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
- arm_lpae_iopte pte, *ptep = data->pgd;
- int lvl = data->start_level;
-
- do {
- /* Valid IOPTE pointer? */
- if (!ptep)
- return 0;
-
- /* Grab the IOPTE we're interested in */
- ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
- pte = READ_ONCE(*ptep);
-
- /* Valid entry? */
- if (!pte)
- return 0;
+ struct iova_to_phys_data d;
+ struct io_pgtable_walk_data walk_data = {
+ .data = &d,
+ .visit = visit_iova_to_phys,
+ .addr = iova,
+ .end = iova + 1,
+ };
+ int ret;
- /* Leaf entry? */
- if (iopte_leaf(pte, lvl, data->iop.fmt))
- goto found_translation;
+ ret = __arm_lpae_iopte_walk(data, &walk_data, data->pgd, data->start_level);
+ if (ret)
+ return 0;
- /* Take it to the next level */
- ptep = iopte_deref(pte, data);
- } while (++lvl < ARM_LPAE_MAX_LEVELS);
+ iova &= (ARM_LPAE_BLOCK_SIZE(d.lvl, data) - 1);
+ return iopte_to_paddr(d.pte, data) | iova;
+}
- /* Ran out of page tables to walk */
+static int visit_pgtable_walk(struct io_pgtable_walk_data *walk_data, int lvl,
+ arm_lpae_iopte *ptep, size_t size)
+{
+ struct arm_lpae_io_pgtable_walk_data *data = walk_data->data;
+ data->ptes[lvl] = *ptep;
return 0;
-
-found_translation:
- iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1);
- return iopte_to_paddr(pte, data) | iova;
}
-struct io_pgtable_walk_data {
- struct iommu_dirty_bitmap *dirty;
- unsigned long flags;
- u64 addr;
- const u64 end;
-};
+static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops, unsigned long iova,
+ void *wd)
+{
+ struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+ struct io_pgtable_walk_data walk_data = {
+ .data = wd,
+ .visit = visit_pgtable_walk,
+ .addr = iova,
+ .end = iova + 1,
+ };
-static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data,
- struct io_pgtable_walk_data *walk_data,
- arm_lpae_iopte *ptep,
- int lvl);
+ return __arm_lpae_iopte_walk(data, &walk_data, data->pgd, data->start_level);
+}
-static int io_pgtable_visit_dirty(struct arm_lpae_io_pgtable *data,
- struct io_pgtable_walk_data *walk_data,
- arm_lpae_iopte *ptep, int lvl)
+static int io_pgtable_visit(struct arm_lpae_io_pgtable *data,
+ struct io_pgtable_walk_data *walk_data,
+ arm_lpae_iopte *ptep, int lvl)
{
struct io_pgtable *iop = &data->iop;
arm_lpae_iopte pte = READ_ONCE(*ptep);
- if (iopte_leaf(pte, lvl, iop->fmt)) {
- size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data);
+ size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data);
+ int ret = walk_data->visit(walk_data, lvl, ptep, size);
+ if (ret)
+ return ret;
- if (iopte_writeable_dirty(pte)) {
- iommu_dirty_bitmap_record(walk_data->dirty,
- walk_data->addr, size);
- if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR))
- iopte_set_writeable_clean(ptep);
- }
+ if (iopte_leaf(pte, lvl, iop->fmt)) {
walk_data->addr += size;
return 0;
}
- if (WARN_ON(!iopte_table(pte, lvl)))
+ if (!iopte_table(pte, lvl)) {
return -EINVAL;
+ }
ptep = iopte_deref(pte, data);
- return __arm_lpae_iopte_walk_dirty(data, walk_data, ptep, lvl + 1);
+ return __arm_lpae_iopte_walk(data, walk_data, ptep, lvl + 1);
}
-static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data,
- struct io_pgtable_walk_data *walk_data,
- arm_lpae_iopte *ptep,
- int lvl)
+static int __arm_lpae_iopte_walk(struct arm_lpae_io_pgtable *data,
+ struct io_pgtable_walk_data *walk_data,
+ arm_lpae_iopte *ptep,
+ int lvl)
{
u32 idx;
int max_entries, ret;
@@ -769,7 +820,7 @@ static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data,
for (idx = ARM_LPAE_LVL_IDX(walk_data->addr, lvl, data);
(idx < max_entries) && (walk_data->addr < walk_data->end); ++idx) {
- ret = io_pgtable_visit_dirty(data, walk_data, ptep + idx, lvl);
+ ret = io_pgtable_visit(data, walk_data, ptep + idx, lvl);
if (ret)
return ret;
}
@@ -777,6 +828,23 @@ static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data,
return 0;
}
+static int visit_dirty(struct io_pgtable_walk_data *walk_data, int lvl,
+ arm_lpae_iopte *ptep, size_t size)
+{
+ struct iommu_dirty_bitmap *dirty = walk_data->data;
+
+ if (!iopte_leaf(*ptep, lvl, walk_data->iop->fmt))
+ return 0;
+
+ if (iopte_writeable_dirty(*ptep)) {
+ iommu_dirty_bitmap_record(dirty, walk_data->addr, size);
+ if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR))
+ iopte_set_writeable_clean(ptep);
+ }
+
+ return 0;
+}
+
static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops,
unsigned long iova, size_t size,
unsigned long flags,
@@ -785,7 +853,9 @@ static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops,
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
struct io_pgtable_cfg *cfg = &data->iop.cfg;
struct io_pgtable_walk_data walk_data = {
- .dirty = dirty,
+ .iop = &data->iop,
+ .data = dirty,
+ .visit = visit_dirty,
.flags = flags,
.addr = iova,
.end = iova + size,
@@ -800,7 +870,7 @@ static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops,
if (data->iop.fmt != ARM_64_LPAE_S1)
return -EINVAL;
- return __arm_lpae_iopte_walk_dirty(data, &walk_data, ptep, lvl);
+ return __arm_lpae_iopte_walk(data, &walk_data, ptep, lvl);
}
static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
@@ -882,6 +952,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
.unmap_pages = arm_lpae_unmap_pages,
.iova_to_phys = arm_lpae_iova_to_phys,
.read_and_clear_dirty = arm_lpae_read_and_clear_dirty,
+ .pgtable_walk = arm_lpae_pgtable_walk,
};
return data;
@@ -1006,18 +1077,12 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
if (!data)
return NULL;
- /*
- * Concatenate PGDs at level 1 if possible in order to reduce
- * the depth of the stage-2 walk.
- */
- if (data->start_level == 0) {
- unsigned long pgd_pages;
-
- pgd_pages = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte);
- if (pgd_pages <= ARM_LPAE_S2_MAX_CONCAT_PAGES) {
- data->pgd_bits += data->bits_per_level;
- data->start_level++;
- }
+ if (arm_lpae_concat_mandatory(cfg, data)) {
+ if (WARN_ON((ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte)) >
+ ARM_LPAE_S2_MAX_CONCAT_PAGES))
+ return NULL;
+ data->pgd_bits += data->bits_per_level;
+ data->start_level++;
}
/* VTCR */
@@ -1364,36 +1429,43 @@ static int __init arm_lpae_do_selftests(void)
SZ_64K | SZ_512M,
};
- static const unsigned int ias[] __initconst = {
+ static const unsigned int address_size[] __initconst = {
32, 36, 40, 42, 44, 48,
};
- int i, j, pass = 0, fail = 0;
- struct device dev;
+ int i, j, k, pass = 0, fail = 0;
+ struct faux_device *dev;
struct io_pgtable_cfg cfg = {
.tlb = &dummy_tlb_ops,
- .oas = 48,
.coherent_walk = true,
- .iommu_dev = &dev,
};
- /* __arm_lpae_alloc_pages() merely needs dev_to_node() to work */
- set_dev_node(&dev, NUMA_NO_NODE);
+ dev = faux_device_create("io-pgtable-test", NULL, 0);
+ if (!dev)
+ return -ENOMEM;
+
+ cfg.iommu_dev = &dev->dev;
for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
- for (j = 0; j < ARRAY_SIZE(ias); ++j) {
- cfg.pgsize_bitmap = pgsize[i];
- cfg.ias = ias[j];
- pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u\n",
- pgsize[i], ias[j]);
- if (arm_lpae_run_tests(&cfg))
- fail++;
- else
- pass++;
+ for (j = 0; j < ARRAY_SIZE(address_size); ++j) {
+ /* Don't use ias > oas as it is not valid for stage-2. */
+ for (k = 0; k <= j; ++k) {
+ cfg.pgsize_bitmap = pgsize[i];
+ cfg.ias = address_size[k];
+ cfg.oas = address_size[j];
+ pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u OAS %u\n",
+ pgsize[i], cfg.ias, cfg.oas);
+ if (arm_lpae_run_tests(&cfg))
+ fail++;
+ else
+ pass++;
+ }
}
}
pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail);
+ faux_device_destroy(dev);
+
return fail ? -EFAULT : 0;
}
subsys_initcall(arm_lpae_do_selftests);
diff --git a/drivers/iommu/io-pgtable-dart.c b/drivers/iommu/io-pgtable-dart.c
index c004640640ee..06aca9ab52f9 100644
--- a/drivers/iommu/io-pgtable-dart.c
+++ b/drivers/iommu/io-pgtable-dart.c
@@ -135,7 +135,6 @@ static int dart_init_pte(struct dart_io_pgtable *data,
pte |= FIELD_PREP(APPLE_DART_PTE_SUBPAGE_START, 0);
pte |= FIELD_PREP(APPLE_DART_PTE_SUBPAGE_END, 0xfff);
- pte |= APPLE_DART1_PTE_PROT_SP_DIS;
pte |= APPLE_DART_PTE_VALID;
for (i = 0; i < num_entries; i++)
@@ -211,6 +210,7 @@ static dart_iopte dart_prot_to_pte(struct dart_io_pgtable *data,
dart_iopte pte = 0;
if (data->iop.fmt == APPLE_DART) {
+ pte |= APPLE_DART1_PTE_PROT_SP_DIS;
if (!(prot & IOMMU_WRITE))
pte |= APPLE_DART1_PTE_PROT_NO_WRITE;
if (!(prot & IOMMU_READ))
diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h
index de5b54eaa8bf..e236b932e766 100644
--- a/drivers/iommu/iommu-priv.h
+++ b/drivers/iommu/iommu-priv.h
@@ -5,6 +5,7 @@
#define __LINUX_IOMMU_PRIV_H
#include <linux/iommu.h>
+#include <linux/msi.h>
static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
{
@@ -17,6 +18,8 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
return dev->iommu->iommu_dev->ops;
}
+void dev_iommu_free(struct device *dev);
+
const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode);
static inline const struct iommu_ops *iommu_fwspec_ops(struct iommu_fwspec *fwspec)
@@ -24,8 +27,7 @@ static inline const struct iommu_ops *iommu_fwspec_ops(struct iommu_fwspec *fwsp
return iommu_ops_from_fwnode(fwspec ? fwspec->iommu_fwnode : NULL);
}
-int iommu_group_replace_domain(struct iommu_group *group,
- struct iommu_domain *new_domain);
+void iommu_fwspec_free(struct device *dev);
int iommu_device_register_bus(struct iommu_device *iommu,
const struct iommu_ops *ops,
@@ -46,4 +48,19 @@ void iommu_detach_group_handle(struct iommu_domain *domain,
int iommu_replace_group_handle(struct iommu_group *group,
struct iommu_domain *new_domain,
struct iommu_attach_handle *handle);
+
+#if IS_ENABLED(CONFIG_IOMMUFD_DRIVER_CORE) && IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
+int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
+ phys_addr_t msi_addr);
+#else /* !CONFIG_IOMMUFD_DRIVER_CORE || !CONFIG_IRQ_MSI_IOMMU */
+static inline int iommufd_sw_msi(struct iommu_domain *domain,
+ struct msi_desc *desc, phys_addr_t msi_addr)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_IOMMUFD_DRIVER_CORE && CONFIG_IRQ_MSI_IOMMU */
+
+int iommu_replace_device_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_attach_handle *handle);
#endif /* __LINUX_IOMMU_PRIV_H */
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index 503c5d23c1ea..ab18bc494eef 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -310,6 +310,7 @@ static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
}
domain->type = IOMMU_DOMAIN_SVA;
+ domain->cookie_type = IOMMU_COOKIE_SVA;
mmgrab(mm);
domain->mm = mm;
domain->owner = ops;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 599030e1e890..df871a500b28 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -18,6 +18,7 @@
#include <linux/errno.h>
#include <linux/host1x_context_bus.h>
#include <linux/iommu.h>
+#include <linux/iommufd.h>
#include <linux/idr.h>
#include <linux/err.h>
#include <linux/pci.h>
@@ -45,6 +46,9 @@ static unsigned int iommu_def_domain_type __read_mostly;
static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT);
static u32 iommu_cmd_line __read_mostly;
+/* Tags used with xa_tag_pointer() in group->pasid_array */
+enum { IOMMU_PASID_ARRAY_DOMAIN = 0, IOMMU_PASID_ARRAY_HANDLE = 1 };
+
struct iommu_group {
struct kobject kobj;
struct kobject *devices_kobj;
@@ -273,6 +277,8 @@ int iommu_device_register(struct iommu_device *iommu,
err = bus_iommu_probe(iommu_buses[i]);
if (err)
iommu_device_unregister(iommu);
+ else
+ WRITE_ONCE(iommu->ready, true);
return err;
}
EXPORT_SYMBOL_GPL(iommu_device_register);
@@ -352,7 +358,7 @@ static struct dev_iommu *dev_iommu_get(struct device *dev)
return param;
}
-static void dev_iommu_free(struct device *dev)
+void dev_iommu_free(struct device *dev)
{
struct dev_iommu *param = dev->iommu;
@@ -404,14 +410,42 @@ EXPORT_SYMBOL_GPL(dev_iommu_priv_set);
* Init the dev->iommu and dev->iommu_group in the struct device and get the
* driver probed
*/
-static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
+static int iommu_init_device(struct device *dev)
{
+ const struct iommu_ops *ops;
struct iommu_device *iommu_dev;
struct iommu_group *group;
int ret;
if (!dev_iommu_get(dev))
return -ENOMEM;
+ /*
+ * For FDT-based systems and ACPI IORT/VIOT, the common firmware parsing
+ * is buried in the bus dma_configure path. Properly unpicking that is
+ * still a big job, so for now just invoke the whole thing. The device
+ * already having a driver bound means dma_configure has already run and
+ * found no IOMMU to wait for, so there's no point calling it again.
+ */
+ if (!dev->iommu->fwspec && !dev->driver && dev->bus->dma_configure) {
+ mutex_unlock(&iommu_probe_device_lock);
+ dev->bus->dma_configure(dev);
+ mutex_lock(&iommu_probe_device_lock);
+ /* If another instance finished the job for us, skip it */
+ if (!dev->iommu || dev->iommu_group)
+ return -ENODEV;
+ }
+ /*
+ * At this point, relevant devices either now have a fwspec which will
+ * match ops registered with a non-NULL fwnode, or we can reasonably
+ * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can
+ * be present, and that any of their registered instances has suitable
+ * ops for probing, and thus cheekily co-opt the same mechanism.
+ */
+ ops = iommu_fwspec_ops(dev->iommu->fwspec);
+ if (!ops) {
+ ret = -ENODEV;
+ goto err_free;
+ }
if (!try_module_get(ops->owner)) {
ret = -EINVAL;
@@ -508,29 +542,27 @@ static void iommu_deinit_device(struct device *dev)
dev->iommu_group = NULL;
module_put(ops->owner);
dev_iommu_free(dev);
+#ifdef CONFIG_IOMMU_DMA
+ dev->dma_iommu = false;
+#endif
+}
+
+static struct iommu_domain *pasid_array_entry_to_domain(void *entry)
+{
+ if (xa_pointer_tag(entry) == IOMMU_PASID_ARRAY_DOMAIN)
+ return xa_untag_pointer(entry);
+ return ((struct iommu_attach_handle *)xa_untag_pointer(entry))->domain;
}
DEFINE_MUTEX(iommu_probe_device_lock);
static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
{
- const struct iommu_ops *ops;
struct iommu_group *group;
struct group_device *gdev;
int ret;
/*
- * For FDT-based systems and ACPI IORT/VIOT, drivers register IOMMU
- * instances with non-NULL fwnodes, and client devices should have been
- * identified with a fwspec by this point. Otherwise, we can currently
- * assume that only one of Intel, AMD, s390, PAMU or legacy SMMUv2 can
- * be present, and that any of their registered instances has suitable
- * ops for probing, and thus cheekily co-opt the same mechanism.
- */
- ops = iommu_fwspec_ops(dev_iommu_fwspec_get(dev));
- if (!ops)
- return -ENODEV;
- /*
* Serialise to avoid races between IOMMU drivers registering in
* parallel and/or the "replay" calls from ACPI/OF code via client
* driver probe. Once the latter have been cleaned up we should
@@ -543,9 +575,15 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
if (dev->iommu_group)
return 0;
- ret = iommu_init_device(dev, ops);
+ ret = iommu_init_device(dev);
if (ret)
return ret;
+ /*
+ * And if we do now see any replay calls, they would indicate someone
+ * misusing the dma_configure path outside bus code.
+ */
+ if (dev->driver)
+ dev_WARN(dev, "late IOMMU probe at driver bind, something fishy here!\n");
group = dev->iommu_group;
gdev = iommu_group_alloc_device(group, dev);
@@ -1756,7 +1794,7 @@ static int iommu_get_def_domain_type(struct iommu_group *group,
group->id);
/*
- * Try to recover, drivers are allowed to force IDENITY or DMA, IDENTITY
+ * Try to recover, drivers are allowed to force IDENTITY or DMA, IDENTITY
* takes precedence.
*/
if (type == IOMMU_DOMAIN_IDENTITY)
@@ -1950,8 +1988,10 @@ void iommu_set_fault_handler(struct iommu_domain *domain,
iommu_fault_handler_t handler,
void *token)
{
- BUG_ON(!domain);
+ if (WARN_ON(!domain || domain->cookie_type != IOMMU_COOKIE_NONE))
+ return;
+ domain->cookie_type = IOMMU_COOKIE_FAULT_HANDLER;
domain->handler = handler;
domain->handler_token = token;
}
@@ -2021,9 +2061,19 @@ EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc_flags);
void iommu_domain_free(struct iommu_domain *domain)
{
- if (domain->type == IOMMU_DOMAIN_SVA)
+ switch (domain->cookie_type) {
+ case IOMMU_COOKIE_DMA_IOVA:
+ iommu_put_dma_cookie(domain);
+ break;
+ case IOMMU_COOKIE_DMA_MSI:
+ iommu_put_msi_cookie(domain);
+ break;
+ case IOMMU_COOKIE_SVA:
mmdrop(domain->mm);
- iommu_put_dma_cookie(domain);
+ break;
+ default:
+ break;
+ }
if (domain->ops->free)
domain->ops->free(domain);
}
@@ -2147,6 +2197,30 @@ struct iommu_domain *iommu_get_dma_domain(struct device *dev)
return dev->iommu_group->default_domain;
}
+static void *iommu_make_pasid_array_entry(struct iommu_domain *domain,
+ struct iommu_attach_handle *handle)
+{
+ if (handle) {
+ handle->domain = domain;
+ return xa_tag_pointer(handle, IOMMU_PASID_ARRAY_HANDLE);
+ }
+
+ return xa_tag_pointer(domain, IOMMU_PASID_ARRAY_DOMAIN);
+}
+
+static bool domain_iommu_ops_compatible(const struct iommu_ops *ops,
+ struct iommu_domain *domain)
+{
+ if (domain->owner == ops)
+ return true;
+
+ /* For static domains, owner isn't set. */
+ if (domain == ops->blocked_domain || domain == ops->identity_domain)
+ return true;
+
+ return false;
+}
+
static int __iommu_attach_group(struct iommu_domain *domain,
struct iommu_group *group)
{
@@ -2157,7 +2231,8 @@ static int __iommu_attach_group(struct iommu_domain *domain,
return -EBUSY;
dev = iommu_group_first_dev(group);
- if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner)
+ if (!dev_has_iommu(dev) ||
+ !domain_iommu_ops_compatible(dev_iommu_ops(dev), domain))
return -EINVAL;
return __iommu_group_set_domain(group, domain);
@@ -2187,32 +2262,6 @@ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
}
EXPORT_SYMBOL_GPL(iommu_attach_group);
-/**
- * iommu_group_replace_domain - replace the domain that a group is attached to
- * @group: IOMMU group that will be attached to the new domain
- * @new_domain: new IOMMU domain to replace with
- *
- * This API allows the group to switch domains without being forced to go to
- * the blocking domain in-between.
- *
- * If the currently attached domain is a core domain (e.g. a default_domain),
- * it will act just like the iommu_attach_group().
- */
-int iommu_group_replace_domain(struct iommu_group *group,
- struct iommu_domain *new_domain)
-{
- int ret;
-
- if (!new_domain)
- return -EINVAL;
-
- mutex_lock(&group->mutex);
- ret = __iommu_group_set_domain(group, new_domain);
- mutex_unlock(&group->mutex);
- return ret;
-}
-EXPORT_SYMBOL_NS_GPL(iommu_group_replace_domain, "IOMMUFD_INTERNAL");
-
static int __iommu_device_set_domain(struct iommu_group *group,
struct device *dev,
struct iommu_domain *new_domain,
@@ -2364,6 +2413,7 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
unsigned int pgsize_idx, pgsize_idx_next;
unsigned long pgsizes;
size_t offset, pgsize, pgsize_next;
+ size_t offset_end;
unsigned long addr_merge = paddr | iova;
/* Page sizes supported by the hardware and small enough for @size */
@@ -2404,7 +2454,8 @@ static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long iova,
* If size is big enough to accommodate the larger page, reduce
* the number of smaller pages.
*/
- if (offset + pgsize_next <= size)
+ if (!check_add_overflow(offset, pgsize_next, &offset_end) &&
+ offset_end <= size)
size = offset;
out_set_count:
@@ -2689,7 +2740,8 @@ int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
* if upper layers showed interest and installed a fault handler,
* invoke it.
*/
- if (domain->handler)
+ if (domain->cookie_type == IOMMU_COOKIE_FAULT_HANDLER &&
+ domain->handler)
ret = domain->handler(domain, dev, iova, flags,
domain->handler_token);
@@ -2798,31 +2850,39 @@ bool iommu_default_passthrough(void)
}
EXPORT_SYMBOL_GPL(iommu_default_passthrough);
-const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode)
+static const struct iommu_device *iommu_from_fwnode(const struct fwnode_handle *fwnode)
{
- const struct iommu_ops *ops = NULL;
- struct iommu_device *iommu;
+ const struct iommu_device *iommu, *ret = NULL;
spin_lock(&iommu_device_lock);
list_for_each_entry(iommu, &iommu_device_list, list)
if (iommu->fwnode == fwnode) {
- ops = iommu->ops;
+ ret = iommu;
break;
}
spin_unlock(&iommu_device_lock);
- return ops;
+ return ret;
+}
+
+const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode)
+{
+ const struct iommu_device *iommu = iommu_from_fwnode(fwnode);
+
+ return iommu ? iommu->ops : NULL;
}
int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode)
{
- const struct iommu_ops *ops = iommu_ops_from_fwnode(iommu_fwnode);
+ const struct iommu_device *iommu = iommu_from_fwnode(iommu_fwnode);
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- if (!ops)
+ if (!iommu)
+ return driver_deferred_probe_check_state(dev);
+ if (!dev->iommu && !READ_ONCE(iommu->ready))
return -EPROBE_DEFER;
if (fwspec)
- return ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL;
+ return iommu->ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL;
if (!dev_iommu_get(dev))
return -ENOMEM;
@@ -2849,7 +2909,6 @@ void iommu_fwspec_free(struct device *dev)
dev_iommu_fwspec_set(dev, NULL);
}
}
-EXPORT_SYMBOL_GPL(iommu_fwspec_free);
int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids)
{
@@ -3097,6 +3156,11 @@ int iommu_device_use_default_domain(struct device *dev)
return 0;
mutex_lock(&group->mutex);
+ /* We may race against bus_iommu_probe() finalising groups here */
+ if (!group->default_domain) {
+ ret = -EPROBE_DEFER;
+ goto unlock_out;
+ }
if (group->owner_cnt) {
if (group->domain != group->default_domain || group->owner ||
!xa_empty(&group->pasid_array)) {
@@ -3312,17 +3376,30 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group)
}
EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);
+static void iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
+ struct iommu_domain *domain)
+{
+ const struct iommu_ops *ops = dev_iommu_ops(dev);
+ struct iommu_domain *blocked_domain = ops->blocked_domain;
+
+ WARN_ON(blocked_domain->ops->set_dev_pasid(blocked_domain,
+ dev, pasid, domain));
+}
+
static int __iommu_set_group_pasid(struct iommu_domain *domain,
- struct iommu_group *group, ioasid_t pasid)
+ struct iommu_group *group, ioasid_t pasid,
+ struct iommu_domain *old)
{
struct group_device *device, *last_gdev;
int ret;
for_each_group_device(group, device) {
- ret = domain->ops->set_dev_pasid(domain, device->dev,
- pasid, NULL);
- if (ret)
- goto err_revert;
+ if (device->dev->iommu->max_pasids > 0) {
+ ret = domain->ops->set_dev_pasid(domain, device->dev,
+ pasid, old);
+ if (ret)
+ goto err_revert;
+ }
}
return 0;
@@ -3330,11 +3407,20 @@ static int __iommu_set_group_pasid(struct iommu_domain *domain,
err_revert:
last_gdev = device;
for_each_group_device(group, device) {
- const struct iommu_ops *ops = dev_iommu_ops(device->dev);
-
if (device == last_gdev)
break;
- ops->remove_dev_pasid(device->dev, pasid, domain);
+ if (device->dev->iommu->max_pasids > 0) {
+ /*
+ * If no old domain, undo the succeeded devices/pasid.
+ * Otherwise, rollback the succeeded devices/pasid to
+ * the old domain. And it is a driver bug to fail
+ * attaching with a previously good domain.
+ */
+ if (!old ||
+ WARN_ON(old->ops->set_dev_pasid(old, device->dev,
+ pasid, domain)))
+ iommu_remove_dev_pasid(device->dev, pasid, domain);
+ }
}
return ret;
}
@@ -3344,11 +3430,10 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
struct iommu_domain *domain)
{
struct group_device *device;
- const struct iommu_ops *ops;
for_each_group_device(group, device) {
- ops = dev_iommu_ops(device->dev);
- ops->remove_dev_pasid(device->dev, pasid, domain);
+ if (device->dev->iommu->max_pasids > 0)
+ iommu_remove_dev_pasid(device->dev, pasid, domain);
}
}
@@ -3359,6 +3444,9 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
* @pasid: the pasid of the device.
* @handle: the attach handle.
*
+ * Caller should always provide a new handle to avoid race with the paths
+ * that have lockless reference to handle if it intends to pass a valid handle.
+ *
* Return: 0 on success, or an error.
*/
int iommu_attach_device_pasid(struct iommu_domain *domain,
@@ -3368,42 +3456,160 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
/* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group;
struct group_device *device;
+ const struct iommu_ops *ops;
+ void *entry;
int ret;
- if (!domain->ops->set_dev_pasid)
- return -EOPNOTSUPP;
-
if (!group)
return -ENODEV;
- if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner ||
+ ops = dev_iommu_ops(dev);
+
+ if (!domain->ops->set_dev_pasid ||
+ !ops->blocked_domain ||
+ !ops->blocked_domain->ops->set_dev_pasid)
+ return -EOPNOTSUPP;
+
+ if (!domain_iommu_ops_compatible(ops, domain) ||
pasid == IOMMU_NO_PASID)
return -EINVAL;
mutex_lock(&group->mutex);
for_each_group_device(group, device) {
- if (pasid >= device->dev->iommu->max_pasids) {
+ /*
+ * Skip PASID validation for devices without PASID support
+ * (max_pasids = 0). These devices cannot issue transactions
+ * with PASID, so they don't affect group's PASID usage.
+ */
+ if ((device->dev->iommu->max_pasids > 0) &&
+ (pasid >= device->dev->iommu->max_pasids)) {
ret = -EINVAL;
goto out_unlock;
}
}
- if (handle)
- handle->domain = domain;
+ entry = iommu_make_pasid_array_entry(domain, handle);
- ret = xa_insert(&group->pasid_array, pasid, handle, GFP_KERNEL);
+ /*
+ * Entry present is a failure case. Use xa_insert() instead of
+ * xa_reserve().
+ */
+ ret = xa_insert(&group->pasid_array, pasid, XA_ZERO_ENTRY, GFP_KERNEL);
if (ret)
goto out_unlock;
- ret = __iommu_set_group_pasid(domain, group, pasid);
- if (ret)
- xa_erase(&group->pasid_array, pasid);
+ ret = __iommu_set_group_pasid(domain, group, pasid, NULL);
+ if (ret) {
+ xa_release(&group->pasid_array, pasid);
+ goto out_unlock;
+ }
+
+ /*
+ * The xa_insert() above reserved the memory, and the group->mutex is
+ * held, this cannot fail. The new domain cannot be visible until the
+ * operation succeeds as we cannot tolerate PRIs becoming concurrently
+ * queued and then failing attach.
+ */
+ WARN_ON(xa_is_err(xa_store(&group->pasid_array,
+ pasid, entry, GFP_KERNEL)));
+
out_unlock:
mutex_unlock(&group->mutex);
return ret;
}
EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
+/**
+ * iommu_replace_device_pasid - Replace the domain that a specific pasid
+ * of the device is attached to
+ * @domain: the new iommu domain
+ * @dev: the attached device.
+ * @pasid: the pasid of the device.
+ * @handle: the attach handle.
+ *
+ * This API allows the pasid to switch domains. The @pasid should have been
+ * attached. Otherwise, this fails. The pasid will keep the old configuration
+ * if replacement failed.
+ *
+ * Caller should always provide a new handle to avoid race with the paths
+ * that have lockless reference to handle if it intends to pass a valid handle.
+ *
+ * Return 0 on success, or an error.
+ */
+int iommu_replace_device_pasid(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_attach_handle *handle)
+{
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
+ struct iommu_attach_handle *entry;
+ struct iommu_domain *curr_domain;
+ void *curr;
+ int ret;
+
+ if (!group)
+ return -ENODEV;
+
+ if (!domain->ops->set_dev_pasid)
+ return -EOPNOTSUPP;
+
+ if (!domain_iommu_ops_compatible(dev_iommu_ops(dev), domain) ||
+ pasid == IOMMU_NO_PASID || !handle)
+ return -EINVAL;
+
+ mutex_lock(&group->mutex);
+ entry = iommu_make_pasid_array_entry(domain, handle);
+ curr = xa_cmpxchg(&group->pasid_array, pasid, NULL,
+ XA_ZERO_ENTRY, GFP_KERNEL);
+ if (xa_is_err(curr)) {
+ ret = xa_err(curr);
+ goto out_unlock;
+ }
+
+ /*
+ * No domain (with or without handle) attached, hence not
+ * a replace case.
+ */
+ if (!curr) {
+ xa_release(&group->pasid_array, pasid);
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ /*
+ * Reusing handle is problematic as there are paths that refers
+ * the handle without lock. To avoid race, reject the callers that
+ * attempt it.
+ */
+ if (curr == entry) {
+ WARN_ON(1);
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ curr_domain = pasid_array_entry_to_domain(curr);
+ ret = 0;
+
+ if (curr_domain != domain) {
+ ret = __iommu_set_group_pasid(domain, group,
+ pasid, curr_domain);
+ if (ret)
+ goto out_unlock;
+ }
+
+ /*
+ * The above xa_cmpxchg() reserved the memory, and the
+ * group->mutex is held, this cannot fail.
+ */
+ WARN_ON(xa_is_err(xa_store(&group->pasid_array,
+ pasid, entry, GFP_KERNEL)));
+
+out_unlock:
+ mutex_unlock(&group->mutex);
+ return ret;
+}
+EXPORT_SYMBOL_NS_GPL(iommu_replace_device_pasid, "IOMMUFD_INTERNAL");
+
/*
* iommu_detach_device_pasid() - Detach the domain from pasid of device
* @domain: the iommu domain.
@@ -3471,13 +3677,17 @@ struct iommu_attach_handle *
iommu_attach_handle_get(struct iommu_group *group, ioasid_t pasid, unsigned int type)
{
struct iommu_attach_handle *handle;
+ void *entry;
xa_lock(&group->pasid_array);
- handle = xa_load(&group->pasid_array, pasid);
- if (!handle)
+ entry = xa_load(&group->pasid_array, pasid);
+ if (!entry || xa_pointer_tag(entry) != IOMMU_PASID_ARRAY_HANDLE) {
handle = ERR_PTR(-ENOENT);
- else if (type && handle->domain->type != type)
- handle = ERR_PTR(-EBUSY);
+ } else {
+ handle = xa_untag_pointer(entry);
+ if (type && handle->domain->type != type)
+ handle = ERR_PTR(-EBUSY);
+ }
xa_unlock(&group->pasid_array);
return handle;
@@ -3495,30 +3705,43 @@ EXPORT_SYMBOL_NS_GPL(iommu_attach_handle_get, "IOMMUFD_INTERNAL");
* This is a variant of iommu_attach_group(). It allows the caller to provide
* an attach handle and use it when the domain is attached. This is currently
* used by IOMMUFD to deliver the I/O page faults.
+ *
+ * Caller should always provide a new handle to avoid race with the paths
+ * that have lockless reference to handle.
*/
int iommu_attach_group_handle(struct iommu_domain *domain,
struct iommu_group *group,
struct iommu_attach_handle *handle)
{
+ void *entry;
int ret;
- if (handle)
- handle->domain = domain;
+ if (!handle)
+ return -EINVAL;
mutex_lock(&group->mutex);
- ret = xa_insert(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL);
+ entry = iommu_make_pasid_array_entry(domain, handle);
+ ret = xa_insert(&group->pasid_array,
+ IOMMU_NO_PASID, XA_ZERO_ENTRY, GFP_KERNEL);
if (ret)
- goto err_unlock;
+ goto out_unlock;
ret = __iommu_attach_group(domain, group);
- if (ret)
- goto err_erase;
- mutex_unlock(&group->mutex);
+ if (ret) {
+ xa_release(&group->pasid_array, IOMMU_NO_PASID);
+ goto out_unlock;
+ }
- return 0;
-err_erase:
- xa_erase(&group->pasid_array, IOMMU_NO_PASID);
-err_unlock:
+ /*
+ * The xa_insert() above reserved the memory, and the group->mutex is
+ * held, this cannot fail. The new domain cannot be visible until the
+ * operation succeeds as we cannot tolerate PRIs becoming concurrently
+ * queued and then failing attach.
+ */
+ WARN_ON(xa_is_err(xa_store(&group->pasid_array,
+ IOMMU_NO_PASID, entry, GFP_KERNEL)));
+
+out_unlock:
mutex_unlock(&group->mutex);
return ret;
}
@@ -3548,33 +3771,37 @@ EXPORT_SYMBOL_NS_GPL(iommu_detach_group_handle, "IOMMUFD_INTERNAL");
* @new_domain: new IOMMU domain to replace with
* @handle: attach handle
*
- * This is a variant of iommu_group_replace_domain(). It allows the caller to
- * provide an attach handle for the new domain and use it when the domain is
- * attached.
+ * This API allows the group to switch domains without being forced to go to
+ * the blocking domain in-between. It allows the caller to provide an attach
+ * handle for the new domain and use it when the domain is attached.
+ *
+ * If the currently attached domain is a core domain (e.g. a default_domain),
+ * it will act just like the iommu_attach_group_handle().
+ *
+ * Caller should always provide a new handle to avoid race with the paths
+ * that have lockless reference to handle.
*/
int iommu_replace_group_handle(struct iommu_group *group,
struct iommu_domain *new_domain,
struct iommu_attach_handle *handle)
{
- void *curr;
+ void *curr, *entry;
int ret;
- if (!new_domain)
+ if (!new_domain || !handle)
return -EINVAL;
mutex_lock(&group->mutex);
- if (handle) {
- ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL);
- if (ret)
- goto err_unlock;
- handle->domain = new_domain;
- }
+ entry = iommu_make_pasid_array_entry(new_domain, handle);
+ ret = xa_reserve(&group->pasid_array, IOMMU_NO_PASID, GFP_KERNEL);
+ if (ret)
+ goto err_unlock;
ret = __iommu_group_set_domain(group, new_domain);
if (ret)
goto err_release;
- curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, handle, GFP_KERNEL);
+ curr = xa_store(&group->pasid_array, IOMMU_NO_PASID, entry, GFP_KERNEL);
WARN_ON(xa_is_err(curr));
mutex_unlock(&group->mutex);
@@ -3587,3 +3814,45 @@ err_unlock:
return ret;
}
EXPORT_SYMBOL_NS_GPL(iommu_replace_group_handle, "IOMMUFD_INTERNAL");
+
+#if IS_ENABLED(CONFIG_IRQ_MSI_IOMMU)
+/**
+ * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain
+ * @desc: MSI descriptor, will store the MSI page
+ * @msi_addr: MSI target address to be mapped
+ *
+ * The implementation of sw_msi() should take msi_addr and map it to
+ * an IOVA in the domain and call msi_desc_set_iommu_msi_iova() with the
+ * mapping information.
+ *
+ * Return: 0 on success or negative error code if the mapping failed.
+ */
+int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
+{
+ struct device *dev = msi_desc_to_dev(desc);
+ struct iommu_group *group = dev->iommu_group;
+ int ret = 0;
+
+ if (!group)
+ return 0;
+
+ mutex_lock(&group->mutex);
+ /* An IDENTITY domain must pass through */
+ if (group->domain && group->domain->type != IOMMU_DOMAIN_IDENTITY) {
+ switch (group->domain->cookie_type) {
+ case IOMMU_COOKIE_DMA_MSI:
+ case IOMMU_COOKIE_DMA_IOVA:
+ ret = iommu_dma_sw_msi(group->domain, desc, msi_addr);
+ break;
+ case IOMMU_COOKIE_IOMMUFD:
+ ret = iommufd_sw_msi(group->domain, desc, msi_addr);
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ }
+ mutex_unlock(&group->mutex);
+ return ret;
+}
+#endif /* CONFIG_IRQ_MSI_IOMMU */
diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig
index 0a07f9449fd9..2beeb4f60ee5 100644
--- a/drivers/iommu/iommufd/Kconfig
+++ b/drivers/iommu/iommufd/Kconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
config IOMMUFD_DRIVER_CORE
- tristate
+ bool
default (IOMMUFD_DRIVER || IOMMUFD) if IOMMUFD!=n
config IOMMUFD
diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index cb784da6cddc..71d692c9a8f4 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
iommufd-y := \
device.o \
- fault.o \
+ eventq.o \
hw_pagetable.o \
io_pagetable.o \
ioas.o \
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index dfd0898fb6c1..2111bad72c72 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -3,6 +3,7 @@
*/
#include <linux/iommu.h>
#include <linux/iommufd.h>
+#include <linux/pci-ats.h>
#include <linux/slab.h>
#include <uapi/linux/iommufd.h>
@@ -17,12 +18,17 @@ MODULE_PARM_DESC(
"Allow IOMMUFD to bind to devices even if the platform cannot isolate "
"the MSI interrupt window. Enabling this is a security weakness.");
+struct iommufd_attach {
+ struct iommufd_hw_pagetable *hwpt;
+ struct xarray device_array;
+};
+
static void iommufd_group_release(struct kref *kref)
{
struct iommufd_group *igroup =
container_of(kref, struct iommufd_group, ref);
- WARN_ON(igroup->hwpt || !list_empty(&igroup->device_list));
+ WARN_ON(!xa_empty(&igroup->pasid_attach));
xa_cmpxchg(&igroup->ictx->groups, iommu_group_id(igroup->group), igroup,
NULL, GFP_KERNEL);
@@ -89,7 +95,7 @@ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx,
kref_init(&new_igroup->ref);
mutex_init(&new_igroup->lock);
- INIT_LIST_HEAD(&new_igroup->device_list);
+ xa_init(&new_igroup->pasid_attach);
new_igroup->sw_msi_start = PHYS_ADDR_MAX;
/* group reference moves into new_igroup */
new_igroup->group = group;
@@ -293,56 +299,83 @@ u32 iommufd_device_to_id(struct iommufd_device *idev)
}
EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, "IOMMUFD");
+static unsigned int iommufd_group_device_num(struct iommufd_group *igroup,
+ ioasid_t pasid)
+{
+ struct iommufd_attach *attach;
+ struct iommufd_device *idev;
+ unsigned int count = 0;
+ unsigned long index;
+
+ lockdep_assert_held(&igroup->lock);
+
+ attach = xa_load(&igroup->pasid_attach, pasid);
+ if (attach)
+ xa_for_each(&attach->device_array, index, idev)
+ count++;
+ return count;
+}
+
+#ifdef CONFIG_IRQ_MSI_IOMMU
static int iommufd_group_setup_msi(struct iommufd_group *igroup,
struct iommufd_hwpt_paging *hwpt_paging)
{
- phys_addr_t sw_msi_start = igroup->sw_msi_start;
- int rc;
+ struct iommufd_ctx *ictx = igroup->ictx;
+ struct iommufd_sw_msi_map *cur;
+
+ if (igroup->sw_msi_start == PHYS_ADDR_MAX)
+ return 0;
/*
- * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to
- * call iommu_get_msi_cookie() on its behalf. This is necessary to setup
- * the MSI window so iommu_dma_prepare_msi() can install pages into our
- * domain after request_irq(). If it is not done interrupts will not
- * work on this domain.
- *
- * FIXME: This is conceptually broken for iommufd since we want to allow
- * userspace to change the domains, eg switch from an identity IOAS to a
- * DMA IOAS. There is currently no way to create a MSI window that
- * matches what the IRQ layer actually expects in a newly created
- * domain.
+ * Install all the MSI pages the device has been using into the domain
*/
- if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) {
- rc = iommu_get_msi_cookie(hwpt_paging->common.domain,
- sw_msi_start);
+ guard(mutex)(&ictx->sw_msi_lock);
+ list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) {
+ int rc;
+
+ if (cur->sw_msi_start != igroup->sw_msi_start ||
+ !test_bit(cur->id, igroup->required_sw_msi.bitmap))
+ continue;
+
+ rc = iommufd_sw_msi_install(ictx, hwpt_paging, cur);
if (rc)
return rc;
-
- /*
- * iommu_get_msi_cookie() can only be called once per domain,
- * it returns -EBUSY on later calls.
- */
- hwpt_paging->msi_cookie = true;
}
return 0;
}
+#else
+static inline int
+iommufd_group_setup_msi(struct iommufd_group *igroup,
+ struct iommufd_hwpt_paging *hwpt_paging)
+{
+ return 0;
+}
+#endif
+
+static bool
+iommufd_group_first_attach(struct iommufd_group *igroup, ioasid_t pasid)
+{
+ lockdep_assert_held(&igroup->lock);
+ return !xa_load(&igroup->pasid_attach, pasid);
+}
static int
iommufd_device_attach_reserved_iova(struct iommufd_device *idev,
struct iommufd_hwpt_paging *hwpt_paging)
{
+ struct iommufd_group *igroup = idev->igroup;
int rc;
- lockdep_assert_held(&idev->igroup->lock);
+ lockdep_assert_held(&igroup->lock);
rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt,
idev->dev,
- &idev->igroup->sw_msi_start);
+ &igroup->sw_msi_start);
if (rc)
return rc;
- if (list_empty(&idev->igroup->device_list)) {
- rc = iommufd_group_setup_msi(idev->igroup, hwpt_paging);
+ if (iommufd_group_first_attach(igroup, IOMMU_NO_PASID)) {
+ rc = iommufd_group_setup_msi(igroup, hwpt_paging);
if (rc) {
iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt,
idev->dev);
@@ -352,23 +385,217 @@ iommufd_device_attach_reserved_iova(struct iommufd_device *idev,
return 0;
}
+/* The device attach/detach/replace helpers for attach_handle */
+
+static bool iommufd_device_is_attached(struct iommufd_device *idev,
+ ioasid_t pasid)
+{
+ struct iommufd_attach *attach;
+
+ attach = xa_load(&idev->igroup->pasid_attach, pasid);
+ return xa_load(&attach->device_array, idev->obj.id);
+}
+
+static int iommufd_hwpt_pasid_compat(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev,
+ ioasid_t pasid)
+{
+ struct iommufd_group *igroup = idev->igroup;
+
+ lockdep_assert_held(&igroup->lock);
+
+ if (pasid == IOMMU_NO_PASID) {
+ unsigned long start = IOMMU_NO_PASID;
+
+ if (!hwpt->pasid_compat &&
+ xa_find_after(&igroup->pasid_attach,
+ &start, UINT_MAX, XA_PRESENT))
+ return -EINVAL;
+ } else {
+ struct iommufd_attach *attach;
+
+ if (!hwpt->pasid_compat)
+ return -EINVAL;
+
+ attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID);
+ if (attach && attach->hwpt && !attach->hwpt->pasid_compat)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev,
+ ioasid_t pasid)
+{
+ struct iommufd_attach_handle *handle;
+ int rc;
+
+ rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid);
+ if (rc)
+ return rc;
+
+ handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+ if (!handle)
+ return -ENOMEM;
+
+ if (hwpt->fault) {
+ rc = iommufd_fault_iopf_enable(idev);
+ if (rc)
+ goto out_free_handle;
+ }
+
+ handle->idev = idev;
+ if (pasid == IOMMU_NO_PASID)
+ rc = iommu_attach_group_handle(hwpt->domain, idev->igroup->group,
+ &handle->handle);
+ else
+ rc = iommu_attach_device_pasid(hwpt->domain, idev->dev, pasid,
+ &handle->handle);
+ if (rc)
+ goto out_disable_iopf;
+
+ return 0;
+
+out_disable_iopf:
+ if (hwpt->fault)
+ iommufd_fault_iopf_disable(idev);
+out_free_handle:
+ kfree(handle);
+ return rc;
+}
+
+static struct iommufd_attach_handle *
+iommufd_device_get_attach_handle(struct iommufd_device *idev, ioasid_t pasid)
+{
+ struct iommu_attach_handle *handle;
+
+ lockdep_assert_held(&idev->igroup->lock);
+
+ handle =
+ iommu_attach_handle_get(idev->igroup->group, pasid, 0);
+ if (IS_ERR(handle))
+ return NULL;
+ return to_iommufd_handle(handle);
+}
+
+static void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev,
+ ioasid_t pasid)
+{
+ struct iommufd_attach_handle *handle;
+
+ handle = iommufd_device_get_attach_handle(idev, pasid);
+ if (pasid == IOMMU_NO_PASID)
+ iommu_detach_group_handle(hwpt->domain, idev->igroup->group);
+ else
+ iommu_detach_device_pasid(hwpt->domain, idev->dev, pasid);
+
+ if (hwpt->fault) {
+ iommufd_auto_response_faults(hwpt, handle);
+ iommufd_fault_iopf_disable(idev);
+ }
+ kfree(handle);
+}
+
+static int iommufd_hwpt_replace_device(struct iommufd_device *idev,
+ ioasid_t pasid,
+ struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_hw_pagetable *old)
+{
+ struct iommufd_attach_handle *handle, *old_handle;
+ int rc;
+
+ rc = iommufd_hwpt_pasid_compat(hwpt, idev, pasid);
+ if (rc)
+ return rc;
+
+ old_handle = iommufd_device_get_attach_handle(idev, pasid);
+
+ handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+ if (!handle)
+ return -ENOMEM;
+
+ if (hwpt->fault && !old->fault) {
+ rc = iommufd_fault_iopf_enable(idev);
+ if (rc)
+ goto out_free_handle;
+ }
+
+ handle->idev = idev;
+ if (pasid == IOMMU_NO_PASID)
+ rc = iommu_replace_group_handle(idev->igroup->group,
+ hwpt->domain, &handle->handle);
+ else
+ rc = iommu_replace_device_pasid(hwpt->domain, idev->dev,
+ pasid, &handle->handle);
+ if (rc)
+ goto out_disable_iopf;
+
+ if (old->fault) {
+ iommufd_auto_response_faults(hwpt, old_handle);
+ if (!hwpt->fault)
+ iommufd_fault_iopf_disable(idev);
+ }
+ kfree(old_handle);
+
+ return 0;
+
+out_disable_iopf:
+ if (hwpt->fault && !old->fault)
+ iommufd_fault_iopf_disable(idev);
+out_free_handle:
+ kfree(handle);
+ return rc;
+}
+
int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
+ struct iommufd_device *idev, ioasid_t pasid)
{
struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
+ bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID;
+ struct iommufd_group *igroup = idev->igroup;
+ struct iommufd_hw_pagetable *old_hwpt;
+ struct iommufd_attach *attach;
int rc;
- mutex_lock(&idev->igroup->lock);
+ mutex_lock(&igroup->lock);
- if (idev->igroup->hwpt != NULL && idev->igroup->hwpt != hwpt) {
- rc = -EINVAL;
+ attach = xa_cmpxchg(&igroup->pasid_attach, pasid, NULL,
+ XA_ZERO_ENTRY, GFP_KERNEL);
+ if (xa_is_err(attach)) {
+ rc = xa_err(attach);
goto err_unlock;
}
- if (hwpt_paging) {
+ if (!attach) {
+ attach = kzalloc(sizeof(*attach), GFP_KERNEL);
+ if (!attach) {
+ rc = -ENOMEM;
+ goto err_release_pasid;
+ }
+ xa_init(&attach->device_array);
+ }
+
+ old_hwpt = attach->hwpt;
+
+ rc = xa_insert(&attach->device_array, idev->obj.id, XA_ZERO_ENTRY,
+ GFP_KERNEL);
+ if (rc) {
+ WARN_ON(rc == -EBUSY && !old_hwpt);
+ goto err_free_attach;
+ }
+
+ if (old_hwpt && old_hwpt != hwpt) {
+ rc = -EINVAL;
+ goto err_release_devid;
+ }
+
+ if (attach_resv) {
rc = iommufd_device_attach_reserved_iova(idev, hwpt_paging);
if (rc)
- goto err_unlock;
+ goto err_release_devid;
}
/*
@@ -378,51 +605,74 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
* reserved regions are only updated during individual device
* attachment.
*/
- if (list_empty(&idev->igroup->device_list)) {
- rc = iommufd_hwpt_attach_device(hwpt, idev);
+ if (iommufd_group_first_attach(igroup, pasid)) {
+ rc = iommufd_hwpt_attach_device(hwpt, idev, pasid);
if (rc)
goto err_unresv;
- idev->igroup->hwpt = hwpt;
+ attach->hwpt = hwpt;
+ WARN_ON(xa_is_err(xa_store(&igroup->pasid_attach, pasid, attach,
+ GFP_KERNEL)));
}
refcount_inc(&hwpt->obj.users);
- list_add_tail(&idev->group_item, &idev->igroup->device_list);
- mutex_unlock(&idev->igroup->lock);
+ WARN_ON(xa_is_err(xa_store(&attach->device_array, idev->obj.id,
+ idev, GFP_KERNEL)));
+ mutex_unlock(&igroup->lock);
return 0;
err_unresv:
- if (hwpt_paging)
+ if (attach_resv)
iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
+err_release_devid:
+ xa_release(&attach->device_array, idev->obj.id);
+err_free_attach:
+ if (iommufd_group_first_attach(igroup, pasid))
+ kfree(attach);
+err_release_pasid:
+ if (iommufd_group_first_attach(igroup, pasid))
+ xa_release(&igroup->pasid_attach, pasid);
err_unlock:
- mutex_unlock(&idev->igroup->lock);
+ mutex_unlock(&igroup->lock);
return rc;
}
struct iommufd_hw_pagetable *
-iommufd_hw_pagetable_detach(struct iommufd_device *idev)
+iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid)
{
- struct iommufd_hw_pagetable *hwpt = idev->igroup->hwpt;
- struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
+ struct iommufd_group *igroup = idev->igroup;
+ struct iommufd_hwpt_paging *hwpt_paging;
+ struct iommufd_hw_pagetable *hwpt;
+ struct iommufd_attach *attach;
- mutex_lock(&idev->igroup->lock);
- list_del(&idev->group_item);
- if (list_empty(&idev->igroup->device_list)) {
- iommufd_hwpt_detach_device(hwpt, idev);
- idev->igroup->hwpt = NULL;
+ mutex_lock(&igroup->lock);
+ attach = xa_load(&igroup->pasid_attach, pasid);
+ if (!attach) {
+ mutex_unlock(&igroup->lock);
+ return NULL;
}
- if (hwpt_paging)
+
+ hwpt = attach->hwpt;
+ hwpt_paging = find_hwpt_paging(hwpt);
+
+ xa_erase(&attach->device_array, idev->obj.id);
+ if (xa_empty(&attach->device_array)) {
+ iommufd_hwpt_detach_device(hwpt, idev, pasid);
+ xa_erase(&igroup->pasid_attach, pasid);
+ kfree(attach);
+ }
+ if (hwpt_paging && pasid == IOMMU_NO_PASID)
iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
- mutex_unlock(&idev->igroup->lock);
+ mutex_unlock(&igroup->lock);
/* Caller must destroy hwpt */
return hwpt;
}
static struct iommufd_hw_pagetable *
-iommufd_device_do_attach(struct iommufd_device *idev,
+iommufd_device_do_attach(struct iommufd_device *idev, ioasid_t pasid,
struct iommufd_hw_pagetable *hwpt)
{
int rc;
- rc = iommufd_hw_pagetable_attach(hwpt, idev);
+ rc = iommufd_hw_pagetable_attach(hwpt, idev, pasid);
if (rc)
return ERR_PTR(rc);
return NULL;
@@ -432,11 +682,14 @@ static void
iommufd_group_remove_reserved_iova(struct iommufd_group *igroup,
struct iommufd_hwpt_paging *hwpt_paging)
{
+ struct iommufd_attach *attach;
struct iommufd_device *cur;
+ unsigned long index;
lockdep_assert_held(&igroup->lock);
- list_for_each_entry(cur, &igroup->device_list, group_item)
+ attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID);
+ xa_for_each(&attach->device_array, index, cur)
iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev);
}
@@ -445,14 +698,17 @@ iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup,
struct iommufd_hwpt_paging *hwpt_paging)
{
struct iommufd_hwpt_paging *old_hwpt_paging;
+ struct iommufd_attach *attach;
struct iommufd_device *cur;
+ unsigned long index;
int rc;
lockdep_assert_held(&igroup->lock);
- old_hwpt_paging = find_hwpt_paging(igroup->hwpt);
+ attach = xa_load(&igroup->pasid_attach, IOMMU_NO_PASID);
+ old_hwpt_paging = find_hwpt_paging(attach->hwpt);
if (!old_hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas) {
- list_for_each_entry(cur, &igroup->device_list, group_item) {
+ xa_for_each(&attach->device_array, index, cur) {
rc = iopt_table_enforce_dev_resv_regions(
&hwpt_paging->ioas->iopt, cur->dev, NULL);
if (rc)
@@ -471,69 +727,81 @@ err_unresv:
}
static struct iommufd_hw_pagetable *
-iommufd_device_do_replace(struct iommufd_device *idev,
+iommufd_device_do_replace(struct iommufd_device *idev, ioasid_t pasid,
struct iommufd_hw_pagetable *hwpt)
{
struct iommufd_hwpt_paging *hwpt_paging = find_hwpt_paging(hwpt);
+ bool attach_resv = hwpt_paging && pasid == IOMMU_NO_PASID;
struct iommufd_hwpt_paging *old_hwpt_paging;
struct iommufd_group *igroup = idev->igroup;
struct iommufd_hw_pagetable *old_hwpt;
+ struct iommufd_attach *attach;
unsigned int num_devices;
int rc;
- mutex_lock(&idev->igroup->lock);
+ mutex_lock(&igroup->lock);
- if (igroup->hwpt == NULL) {
+ attach = xa_load(&igroup->pasid_attach, pasid);
+ if (!attach) {
rc = -EINVAL;
goto err_unlock;
}
- if (hwpt == igroup->hwpt) {
- mutex_unlock(&idev->igroup->lock);
+ old_hwpt = attach->hwpt;
+
+ WARN_ON(!old_hwpt || xa_empty(&attach->device_array));
+
+ if (!iommufd_device_is_attached(idev, pasid)) {
+ rc = -EINVAL;
+ goto err_unlock;
+ }
+
+ if (hwpt == old_hwpt) {
+ mutex_unlock(&igroup->lock);
return NULL;
}
- old_hwpt = igroup->hwpt;
- if (hwpt_paging) {
+ if (attach_resv) {
rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt_paging);
if (rc)
goto err_unlock;
}
- rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt);
+ rc = iommufd_hwpt_replace_device(idev, pasid, hwpt, old_hwpt);
if (rc)
goto err_unresv;
old_hwpt_paging = find_hwpt_paging(old_hwpt);
- if (old_hwpt_paging &&
+ if (old_hwpt_paging && pasid == IOMMU_NO_PASID &&
(!hwpt_paging || hwpt_paging->ioas != old_hwpt_paging->ioas))
iommufd_group_remove_reserved_iova(igroup, old_hwpt_paging);
- igroup->hwpt = hwpt;
+ attach->hwpt = hwpt;
- num_devices = list_count_nodes(&igroup->device_list);
+ num_devices = iommufd_group_device_num(igroup, pasid);
/*
- * Move the refcounts held by the device_list to the new hwpt. Retain a
+ * Move the refcounts held by the device_array to the new hwpt. Retain a
* refcount for this thread as the caller will free it.
*/
refcount_add(num_devices, &hwpt->obj.users);
if (num_devices > 1)
WARN_ON(refcount_sub_and_test(num_devices - 1,
&old_hwpt->obj.users));
- mutex_unlock(&idev->igroup->lock);
+ mutex_unlock(&igroup->lock);
/* Caller must destroy old_hwpt */
return old_hwpt;
err_unresv:
- if (hwpt_paging)
+ if (attach_resv)
iommufd_group_remove_reserved_iova(igroup, hwpt_paging);
err_unlock:
- mutex_unlock(&idev->igroup->lock);
+ mutex_unlock(&igroup->lock);
return ERR_PTR(rc);
}
typedef struct iommufd_hw_pagetable *(*attach_fn)(
- struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt);
+ struct iommufd_device *idev, ioasid_t pasid,
+ struct iommufd_hw_pagetable *hwpt);
/*
* When automatically managing the domains we search for a compatible domain in
@@ -541,7 +809,7 @@ typedef struct iommufd_hw_pagetable *(*attach_fn)(
* Automatic domain selection will never pick a manually created domain.
*/
static struct iommufd_hw_pagetable *
-iommufd_device_auto_get_domain(struct iommufd_device *idev,
+iommufd_device_auto_get_domain(struct iommufd_device *idev, ioasid_t pasid,
struct iommufd_ioas *ioas, u32 *pt_id,
attach_fn do_attach)
{
@@ -570,7 +838,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
hwpt = &hwpt_paging->common;
if (!iommufd_lock_obj(&hwpt->obj))
continue;
- destroy_hwpt = (*do_attach)(idev, hwpt);
+ destroy_hwpt = (*do_attach)(idev, pasid, hwpt);
if (IS_ERR(destroy_hwpt)) {
iommufd_put_object(idev->ictx, &hwpt->obj);
/*
@@ -588,8 +856,8 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
goto out_unlock;
}
- hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, 0,
- immediate_attach, NULL);
+ hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, pasid,
+ 0, immediate_attach, NULL);
if (IS_ERR(hwpt_paging)) {
destroy_hwpt = ERR_CAST(hwpt_paging);
goto out_unlock;
@@ -597,7 +865,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev,
hwpt = &hwpt_paging->common;
if (!immediate_attach) {
- destroy_hwpt = (*do_attach)(idev, hwpt);
+ destroy_hwpt = (*do_attach)(idev, pasid, hwpt);
if (IS_ERR(destroy_hwpt))
goto out_abort;
} else {
@@ -618,8 +886,9 @@ out_unlock:
return destroy_hwpt;
}
-static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
- attach_fn do_attach)
+static int iommufd_device_change_pt(struct iommufd_device *idev,
+ ioasid_t pasid,
+ u32 *pt_id, attach_fn do_attach)
{
struct iommufd_hw_pagetable *destroy_hwpt;
struct iommufd_object *pt_obj;
@@ -634,7 +903,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
struct iommufd_hw_pagetable *hwpt =
container_of(pt_obj, struct iommufd_hw_pagetable, obj);
- destroy_hwpt = (*do_attach)(idev, hwpt);
+ destroy_hwpt = (*do_attach)(idev, pasid, hwpt);
if (IS_ERR(destroy_hwpt))
goto out_put_pt_obj;
break;
@@ -643,8 +912,8 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id,
struct iommufd_ioas *ioas =
container_of(pt_obj, struct iommufd_ioas, obj);
- destroy_hwpt = iommufd_device_auto_get_domain(idev, ioas, pt_id,
- do_attach);
+ destroy_hwpt = iommufd_device_auto_get_domain(idev, pasid, ioas,
+ pt_id, do_attach);
if (IS_ERR(destroy_hwpt))
goto out_put_pt_obj;
break;
@@ -666,22 +935,26 @@ out_put_pt_obj:
}
/**
- * iommufd_device_attach - Connect a device to an iommu_domain
+ * iommufd_device_attach - Connect a device/pasid to an iommu_domain
* @idev: device to attach
+ * @pasid: pasid to attach
* @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
* Output the IOMMUFD_OBJ_HWPT_PAGING ID
*
- * This connects the device to an iommu_domain, either automatically or manually
- * selected. Once this completes the device could do DMA.
+ * This connects the device/pasid to an iommu_domain, either automatically
+ * or manually selected. Once this completes the device could do DMA with
+ * @pasid. @pasid is IOMMU_NO_PASID if this attach is for no pasid usage.
*
* The caller should return the resulting pt_id back to userspace.
* This function is undone by calling iommufd_device_detach().
*/
-int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
+int iommufd_device_attach(struct iommufd_device *idev, ioasid_t pasid,
+ u32 *pt_id)
{
int rc;
- rc = iommufd_device_change_pt(idev, pt_id, &iommufd_device_do_attach);
+ rc = iommufd_device_change_pt(idev, pasid, pt_id,
+ &iommufd_device_do_attach);
if (rc)
return rc;
@@ -695,8 +968,9 @@ int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD");
/**
- * iommufd_device_replace - Change the device's iommu_domain
+ * iommufd_device_replace - Change the device/pasid's iommu_domain
* @idev: device to change
+ * @pasid: pasid to change
* @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING
* Output the IOMMUFD_OBJ_HWPT_PAGING ID
*
@@ -707,27 +981,33 @@ EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, "IOMMUFD");
*
* If it fails then no change is made to the attachment. The iommu driver may
* implement this so there is no disruption in translation. This can only be
- * called if iommufd_device_attach() has already succeeded.
+ * called if iommufd_device_attach() has already succeeded. @pasid is
+ * IOMMU_NO_PASID for no pasid usage.
*/
-int iommufd_device_replace(struct iommufd_device *idev, u32 *pt_id)
+int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid,
+ u32 *pt_id)
{
- return iommufd_device_change_pt(idev, pt_id,
+ return iommufd_device_change_pt(idev, pasid, pt_id,
&iommufd_device_do_replace);
}
EXPORT_SYMBOL_NS_GPL(iommufd_device_replace, "IOMMUFD");
/**
- * iommufd_device_detach - Disconnect a device to an iommu_domain
+ * iommufd_device_detach - Disconnect a device/device to an iommu_domain
* @idev: device to detach
+ * @pasid: pasid to detach
*
* Undo iommufd_device_attach(). This disconnects the idev from the previously
* attached pt_id. The device returns back to a blocked DMA translation.
+ * @pasid is IOMMU_NO_PASID for no pasid usage.
*/
-void iommufd_device_detach(struct iommufd_device *idev)
+void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid)
{
struct iommufd_hw_pagetable *hwpt;
- hwpt = iommufd_hw_pagetable_detach(idev);
+ hwpt = iommufd_hw_pagetable_detach(idev, pasid);
+ if (!hwpt)
+ return;
iommufd_hw_pagetable_put(idev->ictx, hwpt);
refcount_dec(&idev->obj.users);
}
@@ -1127,7 +1407,7 @@ int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
struct io_pagetable *iopt;
struct iopt_area *area;
unsigned long last_iova;
- int rc;
+ int rc = -EINVAL;
if (!length)
return -EINVAL;
@@ -1183,7 +1463,8 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
void *data;
int rc;
- if (cmd->flags || cmd->__reserved)
+ if (cmd->flags || cmd->__reserved[0] || cmd->__reserved[1] ||
+ cmd->__reserved[2])
return -EOPNOTSUPP;
idev = iommufd_get_device(ucmd, cmd->dev_id);
@@ -1240,6 +1521,36 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING))
cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING;
+ cmd->out_max_pasid_log2 = 0;
+ /*
+ * Currently, all iommu drivers enable PASID in the probe_device()
+ * op if iommu and device supports it. So the max_pasids stored in
+ * dev->iommu indicates both PASID support and enable status. A
+ * non-zero dev->iommu->max_pasids means PASID is supported and
+ * enabled. The iommufd only reports PASID capability to userspace
+ * if it's enabled.
+ */
+ if (idev->dev->iommu->max_pasids) {
+ cmd->out_max_pasid_log2 = ilog2(idev->dev->iommu->max_pasids);
+
+ if (dev_is_pci(idev->dev)) {
+ struct pci_dev *pdev = to_pci_dev(idev->dev);
+ int ctrl;
+
+ ctrl = pci_pasid_status(pdev);
+
+ WARN_ON_ONCE(ctrl < 0 ||
+ !(ctrl & PCI_PASID_CTRL_ENABLE));
+
+ if (ctrl & PCI_PASID_CTRL_EXEC)
+ cmd->out_capabilities |=
+ IOMMU_HW_CAP_PCI_PASID_EXEC;
+ if (ctrl & PCI_PASID_CTRL_PRIV)
+ cmd->out_capabilities |=
+ IOMMU_HW_CAP_PCI_PASID_PRIV;
+ }
+ }
+
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
out_free:
kfree(data);
diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c
index 2d98b04ff1cb..922cd1fe7ec2 100644
--- a/drivers/iommu/iommufd/driver.c
+++ b/drivers/iommu/iommufd/driver.c
@@ -49,5 +49,203 @@ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
}
EXPORT_SYMBOL_NS_GPL(iommufd_viommu_find_dev, "IOMMUFD");
+/* Return -ENOENT if device is not associated to the vIOMMU */
+int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu,
+ struct device *dev, unsigned long *vdev_id)
+{
+ struct iommufd_vdevice *vdev;
+ unsigned long index;
+ int rc = -ENOENT;
+
+ if (WARN_ON_ONCE(!vdev_id))
+ return -EINVAL;
+
+ xa_lock(&viommu->vdevs);
+ xa_for_each(&viommu->vdevs, index, vdev) {
+ if (vdev->dev == dev) {
+ *vdev_id = vdev->id;
+ rc = 0;
+ break;
+ }
+ }
+ xa_unlock(&viommu->vdevs);
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_viommu_get_vdev_id, "IOMMUFD");
+
+/*
+ * Typically called in driver's threaded IRQ handler.
+ * The @type and @event_data must be defined in include/uapi/linux/iommufd.h
+ */
+int iommufd_viommu_report_event(struct iommufd_viommu *viommu,
+ enum iommu_veventq_type type, void *event_data,
+ size_t data_len)
+{
+ struct iommufd_veventq *veventq;
+ struct iommufd_vevent *vevent;
+ int rc = 0;
+
+ if (WARN_ON_ONCE(!data_len || !event_data))
+ return -EINVAL;
+
+ down_read(&viommu->veventqs_rwsem);
+
+ veventq = iommufd_viommu_find_veventq(viommu, type);
+ if (!veventq) {
+ rc = -EOPNOTSUPP;
+ goto out_unlock_veventqs;
+ }
+
+ spin_lock(&veventq->common.lock);
+ if (veventq->num_events == veventq->depth) {
+ vevent = &veventq->lost_events_header;
+ goto out_set_header;
+ }
+
+ vevent = kzalloc(struct_size(vevent, event_data, data_len), GFP_ATOMIC);
+ if (!vevent) {
+ rc = -ENOMEM;
+ vevent = &veventq->lost_events_header;
+ goto out_set_header;
+ }
+ memcpy(vevent->event_data, event_data, data_len);
+ vevent->data_len = data_len;
+ veventq->num_events++;
+
+out_set_header:
+ iommufd_vevent_handler(veventq, vevent);
+ spin_unlock(&veventq->common.lock);
+out_unlock_veventqs:
+ up_read(&viommu->veventqs_rwsem);
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_viommu_report_event, "IOMMUFD");
+
+#ifdef CONFIG_IRQ_MSI_IOMMU
+/*
+ * Get a iommufd_sw_msi_map for the msi physical address requested by the irq
+ * layer. The mapping to IOVA is global to the iommufd file descriptor, every
+ * domain that is attached to a device using the same MSI parameters will use
+ * the same IOVA.
+ */
+static struct iommufd_sw_msi_map *
+iommufd_sw_msi_get_map(struct iommufd_ctx *ictx, phys_addr_t msi_addr,
+ phys_addr_t sw_msi_start)
+{
+ struct iommufd_sw_msi_map *cur;
+ unsigned int max_pgoff = 0;
+
+ lockdep_assert_held(&ictx->sw_msi_lock);
+
+ list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) {
+ if (cur->sw_msi_start != sw_msi_start)
+ continue;
+ max_pgoff = max(max_pgoff, cur->pgoff + 1);
+ if (cur->msi_addr == msi_addr)
+ return cur;
+ }
+
+ if (ictx->sw_msi_id >=
+ BITS_PER_BYTE * sizeof_field(struct iommufd_sw_msi_maps, bitmap))
+ return ERR_PTR(-EOVERFLOW);
+
+ cur = kzalloc(sizeof(*cur), GFP_KERNEL);
+ if (!cur)
+ return ERR_PTR(-ENOMEM);
+
+ cur->sw_msi_start = sw_msi_start;
+ cur->msi_addr = msi_addr;
+ cur->pgoff = max_pgoff;
+ cur->id = ictx->sw_msi_id++;
+ list_add_tail(&cur->sw_msi_item, &ictx->sw_msi_list);
+ return cur;
+}
+
+int iommufd_sw_msi_install(struct iommufd_ctx *ictx,
+ struct iommufd_hwpt_paging *hwpt_paging,
+ struct iommufd_sw_msi_map *msi_map)
+{
+ unsigned long iova;
+
+ lockdep_assert_held(&ictx->sw_msi_lock);
+
+ iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE;
+ if (!test_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap)) {
+ int rc;
+
+ rc = iommu_map(hwpt_paging->common.domain, iova,
+ msi_map->msi_addr, PAGE_SIZE,
+ IOMMU_WRITE | IOMMU_READ | IOMMU_MMIO,
+ GFP_KERNEL_ACCOUNT);
+ if (rc)
+ return rc;
+ __set_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap);
+ }
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_sw_msi_install, "IOMMUFD_INTERNAL");
+
+/*
+ * Called by the irq code if the platform translates the MSI address through the
+ * IOMMU. msi_addr is the physical address of the MSI page. iommufd will
+ * allocate a fd global iova for the physical page that is the same on all
+ * domains and devices.
+ */
+int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc,
+ phys_addr_t msi_addr)
+{
+ struct device *dev = msi_desc_to_dev(desc);
+ struct iommufd_hwpt_paging *hwpt_paging;
+ struct iommu_attach_handle *raw_handle;
+ struct iommufd_attach_handle *handle;
+ struct iommufd_sw_msi_map *msi_map;
+ struct iommufd_ctx *ictx;
+ unsigned long iova;
+ int rc;
+
+ /*
+ * It is safe to call iommu_attach_handle_get() here because the iommu
+ * core code invokes this under the group mutex which also prevents any
+ * change of the attach handle for the duration of this function.
+ */
+ iommu_group_mutex_assert(dev);
+
+ raw_handle =
+ iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0);
+ if (IS_ERR(raw_handle))
+ return 0;
+ hwpt_paging = find_hwpt_paging(domain->iommufd_hwpt);
+
+ handle = to_iommufd_handle(raw_handle);
+ /* No IOMMU_RESV_SW_MSI means no change to the msi_msg */
+ if (handle->idev->igroup->sw_msi_start == PHYS_ADDR_MAX)
+ return 0;
+
+ ictx = handle->idev->ictx;
+ guard(mutex)(&ictx->sw_msi_lock);
+ /*
+ * The input msi_addr is the exact byte offset of the MSI doorbell, we
+ * assume the caller has checked that it is contained with a MMIO region
+ * that is secure to map at PAGE_SIZE.
+ */
+ msi_map = iommufd_sw_msi_get_map(handle->idev->ictx,
+ msi_addr & PAGE_MASK,
+ handle->idev->igroup->sw_msi_start);
+ if (IS_ERR(msi_map))
+ return PTR_ERR(msi_map);
+
+ rc = iommufd_sw_msi_install(ictx, hwpt_paging, msi_map);
+ if (rc)
+ return rc;
+ __set_bit(msi_map->id, handle->idev->igroup->required_sw_msi.bitmap);
+
+ iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE;
+ msi_desc_set_iommu_msi_iova(desc, iova, PAGE_SHIFT);
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_sw_msi, "IOMMUFD");
+#endif
+
MODULE_DESCRIPTION("iommufd code shared with builtin modules");
+MODULE_IMPORT_NS("IOMMUFD_INTERNAL");
MODULE_LICENSE("GPL");
diff --git a/drivers/iommu/iommufd/eventq.c b/drivers/iommu/iommufd/eventq.c
new file mode 100644
index 000000000000..f39cf0797347
--- /dev/null
+++ b/drivers/iommu/iommufd/eventq.c
@@ -0,0 +1,598 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2024 Intel Corporation
+ */
+#define pr_fmt(fmt) "iommufd: " fmt
+
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/iommufd.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/pci-ats.h>
+#include <linux/poll.h>
+#include <uapi/linux/iommufd.h>
+
+#include "../iommu-priv.h"
+#include "iommufd_private.h"
+
+/* IOMMUFD_OBJ_FAULT Functions */
+
+int iommufd_fault_iopf_enable(struct iommufd_device *idev)
+{
+ struct device *dev = idev->dev;
+ int ret;
+
+ /*
+ * Once we turn on PCI/PRI support for VF, the response failure code
+ * should not be forwarded to the hardware due to PRI being a shared
+ * resource between PF and VFs. There is no coordination for this
+ * shared capability. This waits for a vPRI reset to recover.
+ */
+ if (dev_is_pci(dev)) {
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (pdev->is_virtfn && pci_pri_supported(pdev))
+ return -EINVAL;
+ }
+
+ mutex_lock(&idev->iopf_lock);
+ /* Device iopf has already been on. */
+ if (++idev->iopf_enabled > 1) {
+ mutex_unlock(&idev->iopf_lock);
+ return 0;
+ }
+
+ ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF);
+ if (ret)
+ --idev->iopf_enabled;
+ mutex_unlock(&idev->iopf_lock);
+
+ return ret;
+}
+
+void iommufd_fault_iopf_disable(struct iommufd_device *idev)
+{
+ mutex_lock(&idev->iopf_lock);
+ if (!WARN_ON(idev->iopf_enabled == 0)) {
+ if (--idev->iopf_enabled == 0)
+ iommu_dev_disable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF);
+ }
+ mutex_unlock(&idev->iopf_lock);
+}
+
+void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_attach_handle *handle)
+{
+ struct iommufd_fault *fault = hwpt->fault;
+ struct iopf_group *group, *next;
+ struct list_head free_list;
+ unsigned long index;
+
+ if (!fault)
+ return;
+ INIT_LIST_HEAD(&free_list);
+
+ mutex_lock(&fault->mutex);
+ spin_lock(&fault->common.lock);
+ list_for_each_entry_safe(group, next, &fault->common.deliver, node) {
+ if (group->attach_handle != &handle->handle)
+ continue;
+ list_move(&group->node, &free_list);
+ }
+ spin_unlock(&fault->common.lock);
+
+ list_for_each_entry_safe(group, next, &free_list, node) {
+ list_del(&group->node);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+
+ xa_for_each(&fault->response, index, group) {
+ if (group->attach_handle != &handle->handle)
+ continue;
+ xa_erase(&fault->response, index);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+ mutex_unlock(&fault->mutex);
+}
+
+void iommufd_fault_destroy(struct iommufd_object *obj)
+{
+ struct iommufd_eventq *eventq =
+ container_of(obj, struct iommufd_eventq, obj);
+ struct iommufd_fault *fault = eventq_to_fault(eventq);
+ struct iopf_group *group, *next;
+ unsigned long index;
+
+ /*
+ * The iommufd object's reference count is zero at this point.
+ * We can be confident that no other threads are currently
+ * accessing this pointer. Therefore, acquiring the mutex here
+ * is unnecessary.
+ */
+ list_for_each_entry_safe(group, next, &fault->common.deliver, node) {
+ list_del(&group->node);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+ xa_for_each(&fault->response, index, group) {
+ xa_erase(&fault->response, index);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+ xa_destroy(&fault->response);
+ mutex_destroy(&fault->mutex);
+}
+
+static void iommufd_compose_fault_message(struct iommu_fault *fault,
+ struct iommu_hwpt_pgfault *hwpt_fault,
+ struct iommufd_device *idev,
+ u32 cookie)
+{
+ hwpt_fault->flags = fault->prm.flags;
+ hwpt_fault->dev_id = idev->obj.id;
+ hwpt_fault->pasid = fault->prm.pasid;
+ hwpt_fault->grpid = fault->prm.grpid;
+ hwpt_fault->perm = fault->prm.perm;
+ hwpt_fault->addr = fault->prm.addr;
+ hwpt_fault->length = 0;
+ hwpt_fault->cookie = cookie;
+}
+
+/* Fetch the first node out of the fault->deliver list */
+static struct iopf_group *
+iommufd_fault_deliver_fetch(struct iommufd_fault *fault)
+{
+ struct list_head *list = &fault->common.deliver;
+ struct iopf_group *group = NULL;
+
+ spin_lock(&fault->common.lock);
+ if (!list_empty(list)) {
+ group = list_first_entry(list, struct iopf_group, node);
+ list_del(&group->node);
+ }
+ spin_unlock(&fault->common.lock);
+ return group;
+}
+
+/* Restore a node back to the head of the fault->deliver list */
+static void iommufd_fault_deliver_restore(struct iommufd_fault *fault,
+ struct iopf_group *group)
+{
+ spin_lock(&fault->common.lock);
+ list_add(&group->node, &fault->common.deliver);
+ spin_unlock(&fault->common.lock);
+}
+
+static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ size_t fault_size = sizeof(struct iommu_hwpt_pgfault);
+ struct iommufd_eventq *eventq = filep->private_data;
+ struct iommufd_fault *fault = eventq_to_fault(eventq);
+ struct iommu_hwpt_pgfault data = {};
+ struct iommufd_device *idev;
+ struct iopf_group *group;
+ struct iopf_fault *iopf;
+ size_t done = 0;
+ int rc = 0;
+
+ if (*ppos || count % fault_size)
+ return -ESPIPE;
+
+ mutex_lock(&fault->mutex);
+ while ((group = iommufd_fault_deliver_fetch(fault))) {
+ if (done >= count ||
+ group->fault_count * fault_size > count - done) {
+ iommufd_fault_deliver_restore(fault, group);
+ break;
+ }
+
+ rc = xa_alloc(&fault->response, &group->cookie, group,
+ xa_limit_32b, GFP_KERNEL);
+ if (rc) {
+ iommufd_fault_deliver_restore(fault, group);
+ break;
+ }
+
+ idev = to_iommufd_handle(group->attach_handle)->idev;
+ list_for_each_entry(iopf, &group->faults, list) {
+ iommufd_compose_fault_message(&iopf->fault,
+ &data, idev,
+ group->cookie);
+ if (copy_to_user(buf + done, &data, fault_size)) {
+ xa_erase(&fault->response, group->cookie);
+ iommufd_fault_deliver_restore(fault, group);
+ rc = -EFAULT;
+ break;
+ }
+ done += fault_size;
+ }
+ }
+ mutex_unlock(&fault->mutex);
+
+ return done == 0 ? rc : done;
+}
+
+static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ size_t response_size = sizeof(struct iommu_hwpt_page_response);
+ struct iommufd_eventq *eventq = filep->private_data;
+ struct iommufd_fault *fault = eventq_to_fault(eventq);
+ struct iommu_hwpt_page_response response;
+ struct iopf_group *group;
+ size_t done = 0;
+ int rc = 0;
+
+ if (*ppos || count % response_size)
+ return -ESPIPE;
+
+ mutex_lock(&fault->mutex);
+ while (count > done) {
+ rc = copy_from_user(&response, buf + done, response_size);
+ if (rc)
+ break;
+
+ static_assert((int)IOMMUFD_PAGE_RESP_SUCCESS ==
+ (int)IOMMU_PAGE_RESP_SUCCESS);
+ static_assert((int)IOMMUFD_PAGE_RESP_INVALID ==
+ (int)IOMMU_PAGE_RESP_INVALID);
+ if (response.code != IOMMUFD_PAGE_RESP_SUCCESS &&
+ response.code != IOMMUFD_PAGE_RESP_INVALID) {
+ rc = -EINVAL;
+ break;
+ }
+
+ group = xa_erase(&fault->response, response.cookie);
+ if (!group) {
+ rc = -EINVAL;
+ break;
+ }
+
+ iopf_group_response(group, response.code);
+ iopf_free_group(group);
+ done += response_size;
+ }
+ mutex_unlock(&fault->mutex);
+
+ return done == 0 ? rc : done;
+}
+
+/* IOMMUFD_OBJ_VEVENTQ Functions */
+
+void iommufd_veventq_abort(struct iommufd_object *obj)
+{
+ struct iommufd_eventq *eventq =
+ container_of(obj, struct iommufd_eventq, obj);
+ struct iommufd_veventq *veventq = eventq_to_veventq(eventq);
+ struct iommufd_viommu *viommu = veventq->viommu;
+ struct iommufd_vevent *cur, *next;
+
+ lockdep_assert_held_write(&viommu->veventqs_rwsem);
+
+ list_for_each_entry_safe(cur, next, &eventq->deliver, node) {
+ list_del(&cur->node);
+ if (cur != &veventq->lost_events_header)
+ kfree(cur);
+ }
+
+ refcount_dec(&viommu->obj.users);
+ list_del(&veventq->node);
+}
+
+void iommufd_veventq_destroy(struct iommufd_object *obj)
+{
+ struct iommufd_veventq *veventq = eventq_to_veventq(
+ container_of(obj, struct iommufd_eventq, obj));
+
+ down_write(&veventq->viommu->veventqs_rwsem);
+ iommufd_veventq_abort(obj);
+ up_write(&veventq->viommu->veventqs_rwsem);
+}
+
+static struct iommufd_vevent *
+iommufd_veventq_deliver_fetch(struct iommufd_veventq *veventq)
+{
+ struct iommufd_eventq *eventq = &veventq->common;
+ struct list_head *list = &eventq->deliver;
+ struct iommufd_vevent *vevent = NULL;
+
+ spin_lock(&eventq->lock);
+ if (!list_empty(list)) {
+ struct iommufd_vevent *next;
+
+ next = list_first_entry(list, struct iommufd_vevent, node);
+ /* Make a copy of the lost_events_header for copy_to_user */
+ if (next == &veventq->lost_events_header) {
+ vevent = kzalloc(sizeof(*vevent), GFP_ATOMIC);
+ if (!vevent)
+ goto out_unlock;
+ }
+ list_del(&next->node);
+ if (vevent)
+ memcpy(vevent, next, sizeof(*vevent));
+ else
+ vevent = next;
+ }
+out_unlock:
+ spin_unlock(&eventq->lock);
+ return vevent;
+}
+
+static void iommufd_veventq_deliver_restore(struct iommufd_veventq *veventq,
+ struct iommufd_vevent *vevent)
+{
+ struct iommufd_eventq *eventq = &veventq->common;
+ struct list_head *list = &eventq->deliver;
+
+ spin_lock(&eventq->lock);
+ if (vevent_for_lost_events_header(vevent)) {
+ /* Remove the copy of the lost_events_header */
+ kfree(vevent);
+ vevent = NULL;
+ /* An empty list needs the lost_events_header back */
+ if (list_empty(list))
+ vevent = &veventq->lost_events_header;
+ }
+ if (vevent)
+ list_add(&vevent->node, list);
+ spin_unlock(&eventq->lock);
+}
+
+static ssize_t iommufd_veventq_fops_read(struct file *filep, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct iommufd_eventq *eventq = filep->private_data;
+ struct iommufd_veventq *veventq = eventq_to_veventq(eventq);
+ struct iommufd_vevent_header *hdr;
+ struct iommufd_vevent *cur;
+ size_t done = 0;
+ int rc = 0;
+
+ if (*ppos)
+ return -ESPIPE;
+
+ while ((cur = iommufd_veventq_deliver_fetch(veventq))) {
+ /* Validate the remaining bytes against the header size */
+ if (done >= count || sizeof(*hdr) > count - done) {
+ iommufd_veventq_deliver_restore(veventq, cur);
+ break;
+ }
+ hdr = &cur->header;
+
+ /* If being a normal vEVENT, validate against the full size */
+ if (!vevent_for_lost_events_header(cur) &&
+ sizeof(hdr) + cur->data_len > count - done) {
+ iommufd_veventq_deliver_restore(veventq, cur);
+ break;
+ }
+
+ if (copy_to_user(buf + done, hdr, sizeof(*hdr))) {
+ iommufd_veventq_deliver_restore(veventq, cur);
+ rc = -EFAULT;
+ break;
+ }
+ done += sizeof(*hdr);
+
+ if (cur->data_len &&
+ copy_to_user(buf + done, cur->event_data, cur->data_len)) {
+ iommufd_veventq_deliver_restore(veventq, cur);
+ rc = -EFAULT;
+ break;
+ }
+ spin_lock(&eventq->lock);
+ if (!vevent_for_lost_events_header(cur))
+ veventq->num_events--;
+ spin_unlock(&eventq->lock);
+ done += cur->data_len;
+ kfree(cur);
+ }
+
+ return done == 0 ? rc : done;
+}
+
+/* Common Event Queue Functions */
+
+static __poll_t iommufd_eventq_fops_poll(struct file *filep,
+ struct poll_table_struct *wait)
+{
+ struct iommufd_eventq *eventq = filep->private_data;
+ __poll_t pollflags = 0;
+
+ if (eventq->obj.type == IOMMUFD_OBJ_FAULT)
+ pollflags |= EPOLLOUT;
+
+ poll_wait(filep, &eventq->wait_queue, wait);
+ spin_lock(&eventq->lock);
+ if (!list_empty(&eventq->deliver))
+ pollflags |= EPOLLIN | EPOLLRDNORM;
+ spin_unlock(&eventq->lock);
+
+ return pollflags;
+}
+
+static int iommufd_eventq_fops_release(struct inode *inode, struct file *filep)
+{
+ struct iommufd_eventq *eventq = filep->private_data;
+
+ refcount_dec(&eventq->obj.users);
+ iommufd_ctx_put(eventq->ictx);
+ return 0;
+}
+
+#define INIT_EVENTQ_FOPS(read_op, write_op) \
+ ((const struct file_operations){ \
+ .owner = THIS_MODULE, \
+ .open = nonseekable_open, \
+ .read = read_op, \
+ .write = write_op, \
+ .poll = iommufd_eventq_fops_poll, \
+ .release = iommufd_eventq_fops_release, \
+ })
+
+static int iommufd_eventq_init(struct iommufd_eventq *eventq, char *name,
+ struct iommufd_ctx *ictx,
+ const struct file_operations *fops)
+{
+ struct file *filep;
+ int fdno;
+
+ spin_lock_init(&eventq->lock);
+ INIT_LIST_HEAD(&eventq->deliver);
+ init_waitqueue_head(&eventq->wait_queue);
+
+ filep = anon_inode_getfile(name, fops, eventq, O_RDWR);
+ if (IS_ERR(filep))
+ return PTR_ERR(filep);
+
+ eventq->ictx = ictx;
+ iommufd_ctx_get(eventq->ictx);
+ eventq->filep = filep;
+ refcount_inc(&eventq->obj.users);
+
+ fdno = get_unused_fd_flags(O_CLOEXEC);
+ if (fdno < 0)
+ fput(filep);
+ return fdno;
+}
+
+static const struct file_operations iommufd_fault_fops =
+ INIT_EVENTQ_FOPS(iommufd_fault_fops_read, iommufd_fault_fops_write);
+
+int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_fault_alloc *cmd = ucmd->cmd;
+ struct iommufd_fault *fault;
+ int fdno;
+ int rc;
+
+ if (cmd->flags)
+ return -EOPNOTSUPP;
+
+ fault = __iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT,
+ common.obj);
+ if (IS_ERR(fault))
+ return PTR_ERR(fault);
+
+ xa_init_flags(&fault->response, XA_FLAGS_ALLOC1);
+ mutex_init(&fault->mutex);
+
+ fdno = iommufd_eventq_init(&fault->common, "[iommufd-pgfault]",
+ ucmd->ictx, &iommufd_fault_fops);
+ if (fdno < 0) {
+ rc = fdno;
+ goto out_abort;
+ }
+
+ cmd->out_fault_id = fault->common.obj.id;
+ cmd->out_fault_fd = fdno;
+
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+ if (rc)
+ goto out_put_fdno;
+ iommufd_object_finalize(ucmd->ictx, &fault->common.obj);
+
+ fd_install(fdno, fault->common.filep);
+
+ return 0;
+out_put_fdno:
+ put_unused_fd(fdno);
+ fput(fault->common.filep);
+out_abort:
+ iommufd_object_abort_and_destroy(ucmd->ictx, &fault->common.obj);
+
+ return rc;
+}
+
+int iommufd_fault_iopf_handler(struct iopf_group *group)
+{
+ struct iommufd_hw_pagetable *hwpt;
+ struct iommufd_fault *fault;
+
+ hwpt = group->attach_handle->domain->iommufd_hwpt;
+ fault = hwpt->fault;
+
+ spin_lock(&fault->common.lock);
+ list_add_tail(&group->node, &fault->common.deliver);
+ spin_unlock(&fault->common.lock);
+
+ wake_up_interruptible(&fault->common.wait_queue);
+
+ return 0;
+}
+
+static const struct file_operations iommufd_veventq_fops =
+ INIT_EVENTQ_FOPS(iommufd_veventq_fops_read, NULL);
+
+int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_veventq_alloc *cmd = ucmd->cmd;
+ struct iommufd_veventq *veventq;
+ struct iommufd_viommu *viommu;
+ int fdno;
+ int rc;
+
+ if (cmd->flags || cmd->__reserved ||
+ cmd->type == IOMMU_VEVENTQ_TYPE_DEFAULT)
+ return -EOPNOTSUPP;
+ if (!cmd->veventq_depth)
+ return -EINVAL;
+
+ viommu = iommufd_get_viommu(ucmd, cmd->viommu_id);
+ if (IS_ERR(viommu))
+ return PTR_ERR(viommu);
+
+ down_write(&viommu->veventqs_rwsem);
+
+ if (iommufd_viommu_find_veventq(viommu, cmd->type)) {
+ rc = -EEXIST;
+ goto out_unlock_veventqs;
+ }
+
+ veventq = __iommufd_object_alloc(ucmd->ictx, veventq,
+ IOMMUFD_OBJ_VEVENTQ, common.obj);
+ if (IS_ERR(veventq)) {
+ rc = PTR_ERR(veventq);
+ goto out_unlock_veventqs;
+ }
+
+ veventq->type = cmd->type;
+ veventq->viommu = viommu;
+ refcount_inc(&viommu->obj.users);
+ veventq->depth = cmd->veventq_depth;
+ list_add_tail(&veventq->node, &viommu->veventqs);
+ veventq->lost_events_header.header.flags =
+ IOMMU_VEVENTQ_FLAG_LOST_EVENTS;
+
+ fdno = iommufd_eventq_init(&veventq->common, "[iommufd-viommu-event]",
+ ucmd->ictx, &iommufd_veventq_fops);
+ if (fdno < 0) {
+ rc = fdno;
+ goto out_abort;
+ }
+
+ cmd->out_veventq_id = veventq->common.obj.id;
+ cmd->out_veventq_fd = fdno;
+
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+ if (rc)
+ goto out_put_fdno;
+
+ iommufd_object_finalize(ucmd->ictx, &veventq->common.obj);
+ fd_install(fdno, veventq->common.filep);
+ goto out_unlock_veventqs;
+
+out_put_fdno:
+ put_unused_fd(fdno);
+ fput(veventq->common.filep);
+out_abort:
+ iommufd_object_abort_and_destroy(ucmd->ictx, &veventq->common.obj);
+out_unlock_veventqs:
+ up_write(&viommu->veventqs_rwsem);
+ iommufd_put_object(ucmd->ictx, &viommu->obj);
+ return rc;
+}
diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c
deleted file mode 100644
index d9a937450e55..000000000000
--- a/drivers/iommu/iommufd/fault.c
+++ /dev/null
@@ -1,462 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright (C) 2024 Intel Corporation
- */
-#define pr_fmt(fmt) "iommufd: " fmt
-
-#include <linux/anon_inodes.h>
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/iommufd.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/pci.h>
-#include <linux/pci-ats.h>
-#include <linux/poll.h>
-#include <uapi/linux/iommufd.h>
-
-#include "../iommu-priv.h"
-#include "iommufd_private.h"
-
-static int iommufd_fault_iopf_enable(struct iommufd_device *idev)
-{
- struct device *dev = idev->dev;
- int ret;
-
- /*
- * Once we turn on PCI/PRI support for VF, the response failure code
- * should not be forwarded to the hardware due to PRI being a shared
- * resource between PF and VFs. There is no coordination for this
- * shared capability. This waits for a vPRI reset to recover.
- */
- if (dev_is_pci(dev)) {
- struct pci_dev *pdev = to_pci_dev(dev);
-
- if (pdev->is_virtfn && pci_pri_supported(pdev))
- return -EINVAL;
- }
-
- mutex_lock(&idev->iopf_lock);
- /* Device iopf has already been on. */
- if (++idev->iopf_enabled > 1) {
- mutex_unlock(&idev->iopf_lock);
- return 0;
- }
-
- ret = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_IOPF);
- if (ret)
- --idev->iopf_enabled;
- mutex_unlock(&idev->iopf_lock);
-
- return ret;
-}
-
-static void iommufd_fault_iopf_disable(struct iommufd_device *idev)
-{
- mutex_lock(&idev->iopf_lock);
- if (!WARN_ON(idev->iopf_enabled == 0)) {
- if (--idev->iopf_enabled == 0)
- iommu_dev_disable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF);
- }
- mutex_unlock(&idev->iopf_lock);
-}
-
-static int __fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
-{
- struct iommufd_attach_handle *handle;
- int ret;
-
- handle = kzalloc(sizeof(*handle), GFP_KERNEL);
- if (!handle)
- return -ENOMEM;
-
- handle->idev = idev;
- ret = iommu_attach_group_handle(hwpt->domain, idev->igroup->group,
- &handle->handle);
- if (ret)
- kfree(handle);
-
- return ret;
-}
-
-int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
-{
- int ret;
-
- if (!hwpt->fault)
- return -EINVAL;
-
- ret = iommufd_fault_iopf_enable(idev);
- if (ret)
- return ret;
-
- ret = __fault_domain_attach_dev(hwpt, idev);
- if (ret)
- iommufd_fault_iopf_disable(idev);
-
- return ret;
-}
-
-static void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_attach_handle *handle)
-{
- struct iommufd_fault *fault = hwpt->fault;
- struct iopf_group *group, *next;
- struct list_head free_list;
- unsigned long index;
-
- if (!fault)
- return;
- INIT_LIST_HEAD(&free_list);
-
- mutex_lock(&fault->mutex);
- spin_lock(&fault->lock);
- list_for_each_entry_safe(group, next, &fault->deliver, node) {
- if (group->attach_handle != &handle->handle)
- continue;
- list_move(&group->node, &free_list);
- }
- spin_unlock(&fault->lock);
-
- list_for_each_entry_safe(group, next, &free_list, node) {
- list_del(&group->node);
- iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
- iopf_free_group(group);
- }
-
- xa_for_each(&fault->response, index, group) {
- if (group->attach_handle != &handle->handle)
- continue;
- xa_erase(&fault->response, index);
- iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
- iopf_free_group(group);
- }
- mutex_unlock(&fault->mutex);
-}
-
-static struct iommufd_attach_handle *
-iommufd_device_get_attach_handle(struct iommufd_device *idev)
-{
- struct iommu_attach_handle *handle;
-
- handle = iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0);
- if (IS_ERR(handle))
- return NULL;
-
- return to_iommufd_handle(handle);
-}
-
-void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
-{
- struct iommufd_attach_handle *handle;
-
- handle = iommufd_device_get_attach_handle(idev);
- iommu_detach_group_handle(hwpt->domain, idev->igroup->group);
- iommufd_auto_response_faults(hwpt, handle);
- iommufd_fault_iopf_disable(idev);
- kfree(handle);
-}
-
-static int __fault_domain_replace_dev(struct iommufd_device *idev,
- struct iommufd_hw_pagetable *hwpt,
- struct iommufd_hw_pagetable *old)
-{
- struct iommufd_attach_handle *handle, *curr = NULL;
- int ret;
-
- if (old->fault)
- curr = iommufd_device_get_attach_handle(idev);
-
- if (hwpt->fault) {
- handle = kzalloc(sizeof(*handle), GFP_KERNEL);
- if (!handle)
- return -ENOMEM;
-
- handle->idev = idev;
- ret = iommu_replace_group_handle(idev->igroup->group,
- hwpt->domain, &handle->handle);
- } else {
- ret = iommu_replace_group_handle(idev->igroup->group,
- hwpt->domain, NULL);
- }
-
- if (!ret && curr) {
- iommufd_auto_response_faults(old, curr);
- kfree(curr);
- }
-
- return ret;
-}
-
-int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
- struct iommufd_hw_pagetable *hwpt,
- struct iommufd_hw_pagetable *old)
-{
- bool iopf_off = !hwpt->fault && old->fault;
- bool iopf_on = hwpt->fault && !old->fault;
- int ret;
-
- if (iopf_on) {
- ret = iommufd_fault_iopf_enable(idev);
- if (ret)
- return ret;
- }
-
- ret = __fault_domain_replace_dev(idev, hwpt, old);
- if (ret) {
- if (iopf_on)
- iommufd_fault_iopf_disable(idev);
- return ret;
- }
-
- if (iopf_off)
- iommufd_fault_iopf_disable(idev);
-
- return 0;
-}
-
-void iommufd_fault_destroy(struct iommufd_object *obj)
-{
- struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj);
- struct iopf_group *group, *next;
- unsigned long index;
-
- /*
- * The iommufd object's reference count is zero at this point.
- * We can be confident that no other threads are currently
- * accessing this pointer. Therefore, acquiring the mutex here
- * is unnecessary.
- */
- list_for_each_entry_safe(group, next, &fault->deliver, node) {
- list_del(&group->node);
- iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
- iopf_free_group(group);
- }
- xa_for_each(&fault->response, index, group) {
- xa_erase(&fault->response, index);
- iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
- iopf_free_group(group);
- }
- xa_destroy(&fault->response);
- mutex_destroy(&fault->mutex);
-}
-
-static void iommufd_compose_fault_message(struct iommu_fault *fault,
- struct iommu_hwpt_pgfault *hwpt_fault,
- struct iommufd_device *idev,
- u32 cookie)
-{
- hwpt_fault->flags = fault->prm.flags;
- hwpt_fault->dev_id = idev->obj.id;
- hwpt_fault->pasid = fault->prm.pasid;
- hwpt_fault->grpid = fault->prm.grpid;
- hwpt_fault->perm = fault->prm.perm;
- hwpt_fault->addr = fault->prm.addr;
- hwpt_fault->length = 0;
- hwpt_fault->cookie = cookie;
-}
-
-static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
- size_t count, loff_t *ppos)
-{
- size_t fault_size = sizeof(struct iommu_hwpt_pgfault);
- struct iommufd_fault *fault = filep->private_data;
- struct iommu_hwpt_pgfault data = {};
- struct iommufd_device *idev;
- struct iopf_group *group;
- struct iopf_fault *iopf;
- size_t done = 0;
- int rc = 0;
-
- if (*ppos || count % fault_size)
- return -ESPIPE;
-
- mutex_lock(&fault->mutex);
- while ((group = iommufd_fault_deliver_fetch(fault))) {
- if (done >= count ||
- group->fault_count * fault_size > count - done) {
- iommufd_fault_deliver_restore(fault, group);
- break;
- }
-
- rc = xa_alloc(&fault->response, &group->cookie, group,
- xa_limit_32b, GFP_KERNEL);
- if (rc) {
- iommufd_fault_deliver_restore(fault, group);
- break;
- }
-
- idev = to_iommufd_handle(group->attach_handle)->idev;
- list_for_each_entry(iopf, &group->faults, list) {
- iommufd_compose_fault_message(&iopf->fault,
- &data, idev,
- group->cookie);
- if (copy_to_user(buf + done, &data, fault_size)) {
- xa_erase(&fault->response, group->cookie);
- iommufd_fault_deliver_restore(fault, group);
- rc = -EFAULT;
- break;
- }
- done += fault_size;
- }
- }
- mutex_unlock(&fault->mutex);
-
- return done == 0 ? rc : done;
-}
-
-static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- size_t response_size = sizeof(struct iommu_hwpt_page_response);
- struct iommufd_fault *fault = filep->private_data;
- struct iommu_hwpt_page_response response;
- struct iopf_group *group;
- size_t done = 0;
- int rc = 0;
-
- if (*ppos || count % response_size)
- return -ESPIPE;
-
- mutex_lock(&fault->mutex);
- while (count > done) {
- rc = copy_from_user(&response, buf + done, response_size);
- if (rc)
- break;
-
- static_assert((int)IOMMUFD_PAGE_RESP_SUCCESS ==
- (int)IOMMU_PAGE_RESP_SUCCESS);
- static_assert((int)IOMMUFD_PAGE_RESP_INVALID ==
- (int)IOMMU_PAGE_RESP_INVALID);
- if (response.code != IOMMUFD_PAGE_RESP_SUCCESS &&
- response.code != IOMMUFD_PAGE_RESP_INVALID) {
- rc = -EINVAL;
- break;
- }
-
- group = xa_erase(&fault->response, response.cookie);
- if (!group) {
- rc = -EINVAL;
- break;
- }
-
- iopf_group_response(group, response.code);
- iopf_free_group(group);
- done += response_size;
- }
- mutex_unlock(&fault->mutex);
-
- return done == 0 ? rc : done;
-}
-
-static __poll_t iommufd_fault_fops_poll(struct file *filep,
- struct poll_table_struct *wait)
-{
- struct iommufd_fault *fault = filep->private_data;
- __poll_t pollflags = EPOLLOUT;
-
- poll_wait(filep, &fault->wait_queue, wait);
- spin_lock(&fault->lock);
- if (!list_empty(&fault->deliver))
- pollflags |= EPOLLIN | EPOLLRDNORM;
- spin_unlock(&fault->lock);
-
- return pollflags;
-}
-
-static int iommufd_fault_fops_release(struct inode *inode, struct file *filep)
-{
- struct iommufd_fault *fault = filep->private_data;
-
- refcount_dec(&fault->obj.users);
- iommufd_ctx_put(fault->ictx);
- return 0;
-}
-
-static const struct file_operations iommufd_fault_fops = {
- .owner = THIS_MODULE,
- .open = nonseekable_open,
- .read = iommufd_fault_fops_read,
- .write = iommufd_fault_fops_write,
- .poll = iommufd_fault_fops_poll,
- .release = iommufd_fault_fops_release,
-};
-
-int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
-{
- struct iommu_fault_alloc *cmd = ucmd->cmd;
- struct iommufd_fault *fault;
- struct file *filep;
- int fdno;
- int rc;
-
- if (cmd->flags)
- return -EOPNOTSUPP;
-
- fault = iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT);
- if (IS_ERR(fault))
- return PTR_ERR(fault);
-
- fault->ictx = ucmd->ictx;
- INIT_LIST_HEAD(&fault->deliver);
- xa_init_flags(&fault->response, XA_FLAGS_ALLOC1);
- mutex_init(&fault->mutex);
- spin_lock_init(&fault->lock);
- init_waitqueue_head(&fault->wait_queue);
-
- filep = anon_inode_getfile("[iommufd-pgfault]", &iommufd_fault_fops,
- fault, O_RDWR);
- if (IS_ERR(filep)) {
- rc = PTR_ERR(filep);
- goto out_abort;
- }
-
- refcount_inc(&fault->obj.users);
- iommufd_ctx_get(fault->ictx);
- fault->filep = filep;
-
- fdno = get_unused_fd_flags(O_CLOEXEC);
- if (fdno < 0) {
- rc = fdno;
- goto out_fput;
- }
-
- cmd->out_fault_id = fault->obj.id;
- cmd->out_fault_fd = fdno;
-
- rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
- if (rc)
- goto out_put_fdno;
- iommufd_object_finalize(ucmd->ictx, &fault->obj);
-
- fd_install(fdno, fault->filep);
-
- return 0;
-out_put_fdno:
- put_unused_fd(fdno);
-out_fput:
- fput(filep);
-out_abort:
- iommufd_object_abort_and_destroy(ucmd->ictx, &fault->obj);
-
- return rc;
-}
-
-int iommufd_fault_iopf_handler(struct iopf_group *group)
-{
- struct iommufd_hw_pagetable *hwpt;
- struct iommufd_fault *fault;
-
- hwpt = group->attach_handle->domain->fault_data;
- fault = hwpt->fault;
-
- spin_lock(&fault->lock);
- list_add_tail(&group->node, &fault->deliver);
- spin_unlock(&fault->lock);
-
- wake_up_interruptible(&fault->wait_queue);
-
- return 0;
-}
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index ce03c3804651..487779470261 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -14,7 +14,7 @@ static void __iommufd_hwpt_destroy(struct iommufd_hw_pagetable *hwpt)
iommu_domain_free(hwpt->domain);
if (hwpt->fault)
- refcount_dec(&hwpt->fault->obj.users);
+ refcount_dec(&hwpt->fault->common.obj.users);
}
void iommufd_hwpt_paging_destroy(struct iommufd_object *obj)
@@ -90,6 +90,7 @@ iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging)
* @ictx: iommufd context
* @ioas: IOAS to associate the domain with
* @idev: Device to get an iommu_domain for
+ * @pasid: PASID to get an iommu_domain for
* @flags: Flags from userspace
* @immediate_attach: True if idev should be attached to the hwpt
* @user_data: The user provided driver specific data describing the domain to
@@ -105,13 +106,14 @@ iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging)
*/
struct iommufd_hwpt_paging *
iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
- struct iommufd_device *idev, u32 flags,
- bool immediate_attach,
+ struct iommufd_device *idev, ioasid_t pasid,
+ u32 flags, bool immediate_attach,
const struct iommu_user_data *user_data)
{
const u32 valid_flags = IOMMU_HWPT_ALLOC_NEST_PARENT |
IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
- IOMMU_HWPT_FAULT_ID_VALID;
+ IOMMU_HWPT_FAULT_ID_VALID |
+ IOMMU_HWPT_ALLOC_PASID;
const struct iommu_ops *ops = dev_iommu_ops(idev->dev);
struct iommufd_hwpt_paging *hwpt_paging;
struct iommufd_hw_pagetable *hwpt;
@@ -126,12 +128,16 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
if ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) &&
!device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING))
return ERR_PTR(-EOPNOTSUPP);
+ if ((flags & IOMMU_HWPT_FAULT_ID_VALID) &&
+ (flags & IOMMU_HWPT_ALLOC_NEST_PARENT))
+ return ERR_PTR(-EOPNOTSUPP);
hwpt_paging = __iommufd_object_alloc(
ictx, hwpt_paging, IOMMUFD_OBJ_HWPT_PAGING, common.obj);
if (IS_ERR(hwpt_paging))
return ERR_CAST(hwpt_paging);
hwpt = &hwpt_paging->common;
+ hwpt->pasid_compat = flags & IOMMU_HWPT_ALLOC_PASID;
INIT_LIST_HEAD(&hwpt_paging->hwpt_item);
/* Pairs with iommufd_hw_pagetable_destroy() */
@@ -140,8 +146,8 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
hwpt_paging->nest_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT;
if (ops->domain_alloc_paging_flags) {
- hwpt->domain = ops->domain_alloc_paging_flags(idev->dev, flags,
- user_data);
+ hwpt->domain = ops->domain_alloc_paging_flags(idev->dev,
+ flags & ~IOMMU_HWPT_FAULT_ID_VALID, user_data);
if (IS_ERR(hwpt->domain)) {
rc = PTR_ERR(hwpt->domain);
hwpt->domain = NULL;
@@ -156,6 +162,8 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
goto out_abort;
}
}
+ hwpt->domain->iommufd_hwpt = hwpt;
+ hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD;
/*
* Set the coherency mode before we do iopt_table_add_domain() as some
@@ -184,7 +192,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
* sequence. Once those drivers are fixed this should be removed.
*/
if (immediate_attach) {
- rc = iommufd_hw_pagetable_attach(hwpt, idev);
+ rc = iommufd_hw_pagetable_attach(hwpt, idev, pasid);
if (rc)
goto out_abort;
}
@@ -197,7 +205,7 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
out_detach:
if (immediate_attach)
- iommufd_hw_pagetable_detach(idev);
+ iommufd_hw_pagetable_detach(idev, pasid);
out_abort:
iommufd_object_abort_and_destroy(ictx, &hwpt->obj);
return ERR_PTR(rc);
@@ -226,7 +234,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
struct iommufd_hw_pagetable *hwpt;
int rc;
- if ((flags & ~IOMMU_HWPT_FAULT_ID_VALID) ||
+ if ((flags & ~(IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID)) ||
!user_data->len || !ops->domain_alloc_nested)
return ERR_PTR(-EOPNOTSUPP);
if (parent->auto_domain || !parent->nest_parent ||
@@ -238,6 +246,7 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
if (IS_ERR(hwpt_nested))
return ERR_CAST(hwpt_nested);
hwpt = &hwpt_nested->common;
+ hwpt->pasid_compat = flags & IOMMU_HWPT_ALLOC_PASID;
refcount_inc(&parent->common.obj.users);
hwpt_nested->parent = parent;
@@ -251,6 +260,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
goto out_abort;
}
hwpt->domain->owner = ops;
+ hwpt->domain->iommufd_hwpt = hwpt;
+ hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD;
if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) {
rc = -EINVAL;
@@ -280,6 +291,8 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags,
struct iommufd_hw_pagetable *hwpt;
int rc;
+ if (flags & ~(IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID))
+ return ERR_PTR(-EOPNOTSUPP);
if (!user_data->len)
return ERR_PTR(-EOPNOTSUPP);
if (!viommu->ops || !viommu->ops->alloc_domain_nested)
@@ -290,19 +303,24 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags,
if (IS_ERR(hwpt_nested))
return ERR_CAST(hwpt_nested);
hwpt = &hwpt_nested->common;
+ hwpt->pasid_compat = flags & IOMMU_HWPT_ALLOC_PASID;
hwpt_nested->viommu = viommu;
refcount_inc(&viommu->obj.users);
hwpt_nested->parent = viommu->hwpt;
hwpt->domain =
- viommu->ops->alloc_domain_nested(viommu, flags, user_data);
+ viommu->ops->alloc_domain_nested(viommu,
+ flags & ~IOMMU_HWPT_FAULT_ID_VALID,
+ user_data);
if (IS_ERR(hwpt->domain)) {
rc = PTR_ERR(hwpt->domain);
hwpt->domain = NULL;
goto out_abort;
}
+ hwpt->domain->iommufd_hwpt = hwpt;
hwpt->domain->owner = viommu->iommu_dev->ops;
+ hwpt->domain->cookie_type = IOMMU_COOKIE_IOMMUFD;
if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) {
rc = -EINVAL;
@@ -351,8 +369,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
ioas = container_of(pt_obj, struct iommufd_ioas, obj);
mutex_lock(&ioas->mutex);
hwpt_paging = iommufd_hwpt_paging_alloc(
- ucmd->ictx, ioas, idev, cmd->flags, false,
- user_data.len ? &user_data : NULL);
+ ucmd->ictx, ioas, idev, IOMMU_NO_PASID, cmd->flags,
+ false, user_data.len ? &user_data : NULL);
if (IS_ERR(hwpt_paging)) {
rc = PTR_ERR(hwpt_paging);
goto out_unlock;
@@ -402,9 +420,8 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
}
hwpt->fault = fault;
hwpt->domain->iopf_handler = iommufd_fault_iopf_handler;
- hwpt->domain->fault_data = hwpt;
- refcount_inc(&fault->obj.users);
- iommufd_put_object(ucmd->ictx, &fault->obj);
+ refcount_inc(&fault->common.obj.users);
+ iommufd_put_object(ucmd->ictx, &fault->common.obj);
}
cmd->out_hwpt_id = hwpt->obj.id;
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 0b1bafc7fd99..80e8c76d25f2 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -19,6 +19,25 @@ struct iommu_group;
struct iommu_option;
struct iommufd_device;
+struct iommufd_sw_msi_map {
+ struct list_head sw_msi_item;
+ phys_addr_t sw_msi_start;
+ phys_addr_t msi_addr;
+ unsigned int pgoff;
+ unsigned int id;
+};
+
+/* Bitmap of struct iommufd_sw_msi_map::id */
+struct iommufd_sw_msi_maps {
+ DECLARE_BITMAP(bitmap, 64);
+};
+
+#ifdef CONFIG_IRQ_MSI_IOMMU
+int iommufd_sw_msi_install(struct iommufd_ctx *ictx,
+ struct iommufd_hwpt_paging *hwpt_paging,
+ struct iommufd_sw_msi_map *msi_map);
+#endif
+
struct iommufd_ctx {
struct file *file;
struct xarray objects;
@@ -26,6 +45,10 @@ struct iommufd_ctx {
wait_queue_head_t destroy_wait;
struct rw_semaphore ioas_creation_lock;
+ struct mutex sw_msi_lock;
+ struct list_head sw_msi_list;
+ unsigned int sw_msi_id;
+
u8 account_mode;
/* Compatibility with VFIO no iommu */
u8 no_iommu_mode;
@@ -276,6 +299,7 @@ struct iommufd_hw_pagetable {
struct iommufd_object obj;
struct iommu_domain *domain;
struct iommufd_fault *fault;
+ bool pasid_compat : 1;
};
struct iommufd_hwpt_paging {
@@ -283,10 +307,10 @@ struct iommufd_hwpt_paging {
struct iommufd_ioas *ioas;
bool auto_domain : 1;
bool enforce_cache_coherency : 1;
- bool msi_cookie : 1;
bool nest_parent : 1;
/* Head at iommufd_ioas::hwpt_list */
struct list_head hwpt_item;
+ struct iommufd_sw_msi_maps present_sw_msi;
};
struct iommufd_hwpt_nested {
@@ -346,13 +370,13 @@ int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd);
struct iommufd_hwpt_paging *
iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
- struct iommufd_device *idev, u32 flags,
- bool immediate_attach,
+ struct iommufd_device *idev, ioasid_t pasid,
+ u32 flags, bool immediate_attach,
const struct iommu_user_data *user_data);
int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev);
+ struct iommufd_device *idev, ioasid_t pasid);
struct iommufd_hw_pagetable *
-iommufd_hw_pagetable_detach(struct iommufd_device *idev);
+iommufd_hw_pagetable_detach(struct iommufd_device *idev, ioasid_t pasid);
void iommufd_hwpt_paging_destroy(struct iommufd_object *obj);
void iommufd_hwpt_paging_abort(struct iommufd_object *obj);
void iommufd_hwpt_nested_destroy(struct iommufd_object *obj);
@@ -376,13 +400,15 @@ static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx,
refcount_dec(&hwpt->obj.users);
}
+struct iommufd_attach;
+
struct iommufd_group {
struct kref ref;
struct mutex lock;
struct iommufd_ctx *ictx;
struct iommu_group *group;
- struct iommufd_hw_pagetable *hwpt;
- struct list_head device_list;
+ struct xarray pasid_attach;
+ struct iommufd_sw_msi_maps required_sw_msi;
phys_addr_t sw_msi_start;
};
@@ -433,49 +459,17 @@ void iopt_remove_access(struct io_pagetable *iopt,
u32 iopt_access_list_id);
void iommufd_access_destroy_object(struct iommufd_object *obj);
-/*
- * An iommufd_fault object represents an interface to deliver I/O page faults
- * to the user space. These objects are created/destroyed by the user space and
- * associated with hardware page table objects during page-table allocation.
- */
-struct iommufd_fault {
+struct iommufd_eventq {
struct iommufd_object obj;
struct iommufd_ctx *ictx;
struct file *filep;
spinlock_t lock; /* protects the deliver list */
struct list_head deliver;
- struct mutex mutex; /* serializes response flows */
- struct xarray response;
struct wait_queue_head wait_queue;
};
-/* Fetch the first node out of the fault->deliver list */
-static inline struct iopf_group *
-iommufd_fault_deliver_fetch(struct iommufd_fault *fault)
-{
- struct list_head *list = &fault->deliver;
- struct iopf_group *group = NULL;
-
- spin_lock(&fault->lock);
- if (!list_empty(list)) {
- group = list_first_entry(list, struct iopf_group, node);
- list_del(&group->node);
- }
- spin_unlock(&fault->lock);
- return group;
-}
-
-/* Restore a node back to the head of the fault->deliver list */
-static inline void iommufd_fault_deliver_restore(struct iommufd_fault *fault,
- struct iopf_group *group)
-{
- spin_lock(&fault->lock);
- list_add(&group->node, &fault->deliver);
- spin_unlock(&fault->lock);
-}
-
struct iommufd_attach_handle {
struct iommu_attach_handle handle;
struct iommufd_device *idev;
@@ -484,54 +478,106 @@ struct iommufd_attach_handle {
/* Convert an iommu attach handle to iommufd handle. */
#define to_iommufd_handle(hdl) container_of(hdl, struct iommufd_attach_handle, handle)
+/*
+ * An iommufd_fault object represents an interface to deliver I/O page faults
+ * to the user space. These objects are created/destroyed by the user space and
+ * associated with hardware page table objects during page-table allocation.
+ */
+struct iommufd_fault {
+ struct iommufd_eventq common;
+ struct mutex mutex; /* serializes response flows */
+ struct xarray response;
+};
+
+static inline struct iommufd_fault *
+eventq_to_fault(struct iommufd_eventq *eventq)
+{
+ return container_of(eventq, struct iommufd_fault, common);
+}
+
static inline struct iommufd_fault *
iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id)
{
return container_of(iommufd_get_object(ucmd->ictx, id,
IOMMUFD_OBJ_FAULT),
- struct iommufd_fault, obj);
+ struct iommufd_fault, common.obj);
}
int iommufd_fault_alloc(struct iommufd_ucmd *ucmd);
void iommufd_fault_destroy(struct iommufd_object *obj);
int iommufd_fault_iopf_handler(struct iopf_group *group);
-int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev);
-void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev);
-int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
- struct iommufd_hw_pagetable *hwpt,
- struct iommufd_hw_pagetable *old);
+int iommufd_fault_iopf_enable(struct iommufd_device *idev);
+void iommufd_fault_iopf_disable(struct iommufd_device *idev);
+void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_attach_handle *handle);
+
+/* An iommufd_vevent represents a vIOMMU event in an iommufd_veventq */
+struct iommufd_vevent {
+ struct iommufd_vevent_header header;
+ struct list_head node; /* for iommufd_eventq::deliver */
+ ssize_t data_len;
+ u64 event_data[] __counted_by(data_len);
+};
+
+#define vevent_for_lost_events_header(vevent) \
+ (vevent->header.flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS)
-static inline int iommufd_hwpt_attach_device(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
-{
- if (hwpt->fault)
- return iommufd_fault_domain_attach_dev(hwpt, idev);
+/*
+ * An iommufd_veventq object represents an interface to deliver vIOMMU events to
+ * the user space. It is created/destroyed by the user space and associated with
+ * a vIOMMU object during the allocations.
+ */
+struct iommufd_veventq {
+ struct iommufd_eventq common;
+ struct iommufd_viommu *viommu;
+ struct list_head node; /* for iommufd_viommu::veventqs */
+ struct iommufd_vevent lost_events_header;
- return iommu_attach_group(hwpt->domain, idev->igroup->group);
-}
+ unsigned int type;
+ unsigned int depth;
+
+ /* Use common.lock for protection */
+ u32 num_events;
+ u32 sequence;
+};
-static inline void iommufd_hwpt_detach_device(struct iommufd_hw_pagetable *hwpt,
- struct iommufd_device *idev)
+static inline struct iommufd_veventq *
+eventq_to_veventq(struct iommufd_eventq *eventq)
{
- if (hwpt->fault) {
- iommufd_fault_domain_detach_dev(hwpt, idev);
- return;
- }
+ return container_of(eventq, struct iommufd_veventq, common);
+}
- iommu_detach_group(hwpt->domain, idev->igroup->group);
+static inline struct iommufd_veventq *
+iommufd_get_veventq(struct iommufd_ucmd *ucmd, u32 id)
+{
+ return container_of(iommufd_get_object(ucmd->ictx, id,
+ IOMMUFD_OBJ_VEVENTQ),
+ struct iommufd_veventq, common.obj);
}
-static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev,
- struct iommufd_hw_pagetable *hwpt,
- struct iommufd_hw_pagetable *old)
+int iommufd_veventq_alloc(struct iommufd_ucmd *ucmd);
+void iommufd_veventq_destroy(struct iommufd_object *obj);
+void iommufd_veventq_abort(struct iommufd_object *obj);
+
+static inline void iommufd_vevent_handler(struct iommufd_veventq *veventq,
+ struct iommufd_vevent *vevent)
{
- if (old->fault || hwpt->fault)
- return iommufd_fault_domain_replace_dev(idev, hwpt, old);
+ struct iommufd_eventq *eventq = &veventq->common;
+
+ lockdep_assert_held(&eventq->lock);
- return iommu_group_replace_domain(idev->igroup->group, hwpt->domain);
+ /*
+ * Remove the lost_events_header and add the new node at the same time.
+ * Note the new node can be lost_events_header, for a sequence update.
+ */
+ if (list_is_last(&veventq->lost_events_header.node, &eventq->deliver))
+ list_del(&veventq->lost_events_header.node);
+ list_add_tail(&vevent->node, &eventq->deliver);
+ vevent->header.sequence = veventq->sequence;
+ veventq->sequence = (veventq->sequence + 1) & INT_MAX;
+
+ wake_up_interruptible(&eventq->wait_queue);
}
static inline struct iommufd_viommu *
@@ -542,6 +588,20 @@ iommufd_get_viommu(struct iommufd_ucmd *ucmd, u32 id)
struct iommufd_viommu, obj);
}
+static inline struct iommufd_veventq *
+iommufd_viommu_find_veventq(struct iommufd_viommu *viommu, u32 type)
+{
+ struct iommufd_veventq *veventq, *next;
+
+ lockdep_assert_held(&viommu->veventqs_rwsem);
+
+ list_for_each_entry_safe(veventq, next, &viommu->veventqs, node) {
+ if (veventq->type == type)
+ return veventq;
+ }
+ return NULL;
+}
+
int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd);
void iommufd_viommu_destroy(struct iommufd_object *obj);
int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd);
diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h
index a6b7a163f636..1cd7e8394129 100644
--- a/drivers/iommu/iommufd/iommufd_test.h
+++ b/drivers/iommu/iommufd/iommufd_test.h
@@ -24,6 +24,11 @@ enum {
IOMMU_TEST_OP_MD_CHECK_IOTLB,
IOMMU_TEST_OP_TRIGGER_IOPF,
IOMMU_TEST_OP_DEV_CHECK_CACHE,
+ IOMMU_TEST_OP_TRIGGER_VEVENT,
+ IOMMU_TEST_OP_PASID_ATTACH,
+ IOMMU_TEST_OP_PASID_REPLACE,
+ IOMMU_TEST_OP_PASID_DETACH,
+ IOMMU_TEST_OP_PASID_CHECK_HWPT,
};
enum {
@@ -48,6 +53,7 @@ enum {
enum {
MOCK_FLAGS_DEVICE_NO_DIRTY = 1 << 0,
MOCK_FLAGS_DEVICE_HUGE_IOVA = 1 << 1,
+ MOCK_FLAGS_DEVICE_PASID = 1 << 2,
};
enum {
@@ -60,6 +66,9 @@ enum {
MOCK_DEV_CACHE_NUM = 4,
};
+/* Reserved for special pasid replace test */
+#define IOMMU_TEST_PASID_RESERVED 1024
+
struct iommu_test_cmd {
__u32 size;
__u32 op;
@@ -145,11 +154,36 @@ struct iommu_test_cmd {
__u32 id;
__u32 cache;
} check_dev_cache;
+ struct {
+ __u32 dev_id;
+ } trigger_vevent;
+ struct {
+ __u32 pasid;
+ __u32 pt_id;
+ /* @id is stdev_id */
+ } pasid_attach;
+ struct {
+ __u32 pasid;
+ __u32 pt_id;
+ /* @id is stdev_id */
+ } pasid_replace;
+ struct {
+ __u32 pasid;
+ /* @id is stdev_id */
+ } pasid_detach;
+ struct {
+ __u32 pasid;
+ __u32 hwpt_id;
+ /* @id is stdev_id */
+ } pasid_check;
};
__u32 last;
};
#define IOMMU_TEST_CMD _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE + 32)
+/* Mock device/iommu PASID width */
+#define MOCK_PASID_WIDTH 20
+
/* Mock structs for IOMMU_DEVICE_GET_HW_INFO ioctl */
#define IOMMU_HW_INFO_TYPE_SELFTEST 0xfeedbeef
#define IOMMU_HW_INFO_SELFTEST_REGVAL 0xdeadbeef
@@ -212,4 +246,10 @@ struct iommu_viommu_invalidate_selftest {
__u32 cache_id;
};
+#define IOMMU_VEVENTQ_TYPE_SELFTEST 0xbeefbeef
+
+struct iommu_viommu_event_selftest {
+ __u32 virt_id;
+};
+
#endif
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index d898d05be690..3df468f64e7d 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -227,6 +227,8 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp)
xa_init(&ictx->groups);
ictx->file = filp;
init_waitqueue_head(&ictx->destroy_wait);
+ mutex_init(&ictx->sw_msi_lock);
+ INIT_LIST_HEAD(&ictx->sw_msi_list);
filp->private_data = ictx;
return 0;
}
@@ -234,6 +236,8 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp)
static int iommufd_fops_release(struct inode *inode, struct file *filp)
{
struct iommufd_ctx *ictx = filp->private_data;
+ struct iommufd_sw_msi_map *next;
+ struct iommufd_sw_msi_map *cur;
struct iommufd_object *obj;
/*
@@ -262,6 +266,11 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp)
break;
}
WARN_ON(!xa_empty(&ictx->groups));
+
+ mutex_destroy(&ictx->sw_msi_lock);
+ list_for_each_entry_safe(cur, next, &ictx->sw_msi_list, sw_msi_item)
+ kfree(cur);
+
kfree(ictx);
return 0;
}
@@ -307,9 +316,10 @@ union ucmd_buffer {
struct iommu_ioas_map map;
struct iommu_ioas_unmap unmap;
struct iommu_option option;
+ struct iommu_vdevice_alloc vdev;
+ struct iommu_veventq_alloc veventq;
struct iommu_vfio_ioas vfio_ioas;
struct iommu_viommu_alloc viommu;
- struct iommu_vdevice_alloc vdev;
#ifdef CONFIG_IOMMUFD_TEST
struct iommu_test_cmd test;
#endif
@@ -333,8 +343,8 @@ struct iommufd_ioctl_op {
}
static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id),
- IOCTL_OP(IOMMU_FAULT_QUEUE_ALLOC, iommufd_fault_alloc, struct iommu_fault_alloc,
- out_fault_fd),
+ IOCTL_OP(IOMMU_FAULT_QUEUE_ALLOC, iommufd_fault_alloc,
+ struct iommu_fault_alloc, out_fault_fd),
IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info,
__reserved),
IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc,
@@ -355,20 +365,20 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
src_iova),
IOCTL_OP(IOMMU_IOAS_IOVA_RANGES, iommufd_ioas_iova_ranges,
struct iommu_ioas_iova_ranges, out_iova_alignment),
- IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map,
- iova),
+ IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map, iova),
IOCTL_OP(IOMMU_IOAS_MAP_FILE, iommufd_ioas_map_file,
struct iommu_ioas_map_file, iova),
IOCTL_OP(IOMMU_IOAS_UNMAP, iommufd_ioas_unmap, struct iommu_ioas_unmap,
length),
- IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option,
- val64),
+ IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, val64),
+ IOCTL_OP(IOMMU_VDEVICE_ALLOC, iommufd_vdevice_alloc_ioctl,
+ struct iommu_vdevice_alloc, virt_id),
+ IOCTL_OP(IOMMU_VEVENTQ_ALLOC, iommufd_veventq_alloc,
+ struct iommu_veventq_alloc, out_veventq_fd),
IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas,
__reserved),
IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl,
struct iommu_viommu_alloc, out_viommu_id),
- IOCTL_OP(IOMMU_VDEVICE_ALLOC, iommufd_vdevice_alloc_ioctl,
- struct iommu_vdevice_alloc, virt_id),
#ifdef CONFIG_IOMMUFD_TEST
IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last),
#endif
@@ -490,8 +500,8 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
[IOMMUFD_OBJ_DEVICE] = {
.destroy = iommufd_device_destroy,
},
- [IOMMUFD_OBJ_IOAS] = {
- .destroy = iommufd_ioas_destroy,
+ [IOMMUFD_OBJ_FAULT] = {
+ .destroy = iommufd_fault_destroy,
},
[IOMMUFD_OBJ_HWPT_PAGING] = {
.destroy = iommufd_hwpt_paging_destroy,
@@ -501,15 +511,19 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
.destroy = iommufd_hwpt_nested_destroy,
.abort = iommufd_hwpt_nested_abort,
},
- [IOMMUFD_OBJ_FAULT] = {
- .destroy = iommufd_fault_destroy,
- },
- [IOMMUFD_OBJ_VIOMMU] = {
- .destroy = iommufd_viommu_destroy,
+ [IOMMUFD_OBJ_IOAS] = {
+ .destroy = iommufd_ioas_destroy,
},
[IOMMUFD_OBJ_VDEVICE] = {
.destroy = iommufd_vdevice_destroy,
},
+ [IOMMUFD_OBJ_VEVENTQ] = {
+ .destroy = iommufd_veventq_destroy,
+ .abort = iommufd_veventq_abort,
+ },
+ [IOMMUFD_OBJ_VIOMMU] = {
+ .destroy = iommufd_viommu_destroy,
+ },
#ifdef CONFIG_IOMMUFD_TEST
[IOMMUFD_OBJ_SELFTEST] = {
.destroy = iommufd_selftest_destroy,
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index a0de6d6d4e68..18d9a216eb30 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -161,9 +161,13 @@ enum selftest_obj_type {
struct mock_dev {
struct device dev;
+ struct mock_viommu *viommu;
+ struct rw_semaphore viommu_rwsem;
unsigned long flags;
+ unsigned long vdev_id;
int id;
u32 cache[MOCK_DEV_CACHE_NUM];
+ atomic_t pasid_1024_fake_error;
};
static inline struct mock_dev *to_mock_dev(struct device *dev)
@@ -193,15 +197,71 @@ static int mock_domain_nop_attach(struct iommu_domain *domain,
struct device *dev)
{
struct mock_dev *mdev = to_mock_dev(dev);
+ struct mock_viommu *new_viommu = NULL;
+ unsigned long vdev_id = 0;
+ int rc;
if (domain->dirty_ops && (mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY))
return -EINVAL;
+ iommu_group_mutex_assert(dev);
+ if (domain->type == IOMMU_DOMAIN_NESTED) {
+ new_viommu = to_mock_nested(domain)->mock_viommu;
+ if (new_viommu) {
+ rc = iommufd_viommu_get_vdev_id(&new_viommu->core, dev,
+ &vdev_id);
+ if (rc)
+ return rc;
+ }
+ }
+ if (new_viommu != mdev->viommu) {
+ down_write(&mdev->viommu_rwsem);
+ mdev->viommu = new_viommu;
+ mdev->vdev_id = vdev_id;
+ up_write(&mdev->viommu_rwsem);
+ }
+
+ return 0;
+}
+
+static int mock_domain_set_dev_pasid_nop(struct iommu_domain *domain,
+ struct device *dev, ioasid_t pasid,
+ struct iommu_domain *old)
+{
+ struct mock_dev *mdev = to_mock_dev(dev);
+
+ /*
+ * Per the first attach with pasid 1024, set the
+ * mdev->pasid_1024_fake_error. Hence the second call of this op
+ * can fake an error to validate the error path of the core. This
+ * is helpful to test the case in which the iommu core needs to
+ * rollback to the old domain due to driver failure. e.g. replace.
+ * User should be careful about the third call of this op, it shall
+ * succeed since the mdev->pasid_1024_fake_error is cleared in the
+ * second call.
+ */
+ if (pasid == 1024) {
+ if (domain->type == IOMMU_DOMAIN_BLOCKED) {
+ atomic_set(&mdev->pasid_1024_fake_error, 0);
+ } else if (atomic_read(&mdev->pasid_1024_fake_error)) {
+ /*
+ * Clear the flag, and fake an error to fail the
+ * replacement.
+ */
+ atomic_set(&mdev->pasid_1024_fake_error, 0);
+ return -ENOMEM;
+ } else {
+ /* Set the flag to fake an error in next call */
+ atomic_set(&mdev->pasid_1024_fake_error, 1);
+ }
+ }
+
return 0;
}
static const struct iommu_domain_ops mock_blocking_ops = {
.attach_dev = mock_domain_nop_attach,
+ .set_dev_pasid = mock_domain_set_dev_pasid_nop
};
static struct iommu_domain mock_blocking_domain = {
@@ -311,25 +371,6 @@ static const struct iommu_dirty_ops dirty_ops = {
.read_and_clear_dirty = mock_domain_read_and_clear_dirty,
};
-static struct iommu_domain *mock_domain_alloc_paging(struct device *dev)
-{
- struct mock_dev *mdev = to_mock_dev(dev);
- struct mock_iommu_domain *mock;
-
- mock = kzalloc(sizeof(*mock), GFP_KERNEL);
- if (!mock)
- return NULL;
- mock->domain.geometry.aperture_start = MOCK_APERTURE_START;
- mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST;
- mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE;
- if (dev && mdev->flags & MOCK_FLAGS_DEVICE_HUGE_IOVA)
- mock->domain.pgsize_bitmap |= MOCK_HUGE_PAGE_SIZE;
- mock->domain.ops = mock_ops.default_domain_ops;
- mock->domain.type = IOMMU_DOMAIN_UNMANAGED;
- xa_init(&mock->pfns);
- return &mock->domain;
-}
-
static struct mock_iommu_domain_nested *
__mock_domain_alloc_nested(const struct iommu_user_data *user_data)
{
@@ -362,7 +403,7 @@ mock_domain_alloc_nested(struct device *dev, struct iommu_domain *parent,
struct mock_iommu_domain_nested *mock_nested;
struct mock_iommu_domain *mock_parent;
- if (flags)
+ if (flags & ~IOMMU_HWPT_ALLOC_PASID)
return ERR_PTR(-EOPNOTSUPP);
if (!parent || parent->ops != mock_ops.default_domain_ops)
return ERR_PTR(-EINVAL);
@@ -384,22 +425,32 @@ mock_domain_alloc_paging_flags(struct device *dev, u32 flags,
{
bool has_dirty_flag = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
- IOMMU_HWPT_ALLOC_NEST_PARENT;
- bool no_dirty_ops = to_mock_dev(dev)->flags &
- MOCK_FLAGS_DEVICE_NO_DIRTY;
- struct iommu_domain *domain;
+ IOMMU_HWPT_ALLOC_NEST_PARENT |
+ IOMMU_HWPT_ALLOC_PASID;
+ struct mock_dev *mdev = to_mock_dev(dev);
+ bool no_dirty_ops = mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY;
+ struct mock_iommu_domain *mock;
if (user_data)
return ERR_PTR(-EOPNOTSUPP);
if ((flags & ~PAGING_FLAGS) || (has_dirty_flag && no_dirty_ops))
return ERR_PTR(-EOPNOTSUPP);
- domain = mock_domain_alloc_paging(dev);
- if (!domain)
+ mock = kzalloc(sizeof(*mock), GFP_KERNEL);
+ if (!mock)
return ERR_PTR(-ENOMEM);
+ mock->domain.geometry.aperture_start = MOCK_APERTURE_START;
+ mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST;
+ mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE;
+ if (dev && mdev->flags & MOCK_FLAGS_DEVICE_HUGE_IOVA)
+ mock->domain.pgsize_bitmap |= MOCK_HUGE_PAGE_SIZE;
+ mock->domain.ops = mock_ops.default_domain_ops;
+ mock->domain.type = IOMMU_DOMAIN_UNMANAGED;
+ xa_init(&mock->pfns);
+
if (has_dirty_flag)
- domain->dirty_ops = &dirty_ops;
- return domain;
+ mock->domain.dirty_ops = &dirty_ops;
+ return &mock->domain;
}
static void mock_domain_free(struct iommu_domain *domain)
@@ -595,7 +646,7 @@ mock_viommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
struct mock_viommu *mock_viommu = to_mock_viommu(viommu);
struct mock_iommu_domain_nested *mock_nested;
- if (flags & ~IOMMU_HWPT_FAULT_ID_VALID)
+ if (flags & ~IOMMU_HWPT_ALLOC_PASID)
return ERR_PTR(-EOPNOTSUPP);
mock_nested = __mock_domain_alloc_nested(user_data);
@@ -713,7 +764,6 @@ static const struct iommu_ops mock_ops = {
.owner = THIS_MODULE,
.pgsize_bitmap = MOCK_IO_PAGE_SIZE,
.hw_info = mock_domain_hw_info,
- .domain_alloc_paging = mock_domain_alloc_paging,
.domain_alloc_paging_flags = mock_domain_alloc_paging_flags,
.domain_alloc_nested = mock_domain_alloc_nested,
.capable = mock_domain_capable,
@@ -731,6 +781,7 @@ static const struct iommu_ops mock_ops = {
.map_pages = mock_domain_map_pages,
.unmap_pages = mock_domain_unmap_pages,
.iova_to_phys = mock_domain_iova_to_phys,
+ .set_dev_pasid = mock_domain_set_dev_pasid_nop,
},
};
@@ -791,6 +842,7 @@ static struct iommu_domain_ops domain_nested_ops = {
.free = mock_domain_free_nested,
.attach_dev = mock_domain_nop_attach,
.cache_invalidate_user = mock_domain_cache_invalidate_user,
+ .set_dev_pasid = mock_domain_set_dev_pasid_nop,
};
static inline struct iommufd_hw_pagetable *
@@ -850,17 +902,24 @@ static void mock_dev_release(struct device *dev)
static struct mock_dev *mock_dev_create(unsigned long dev_flags)
{
+ struct property_entry prop[] = {
+ PROPERTY_ENTRY_U32("pasid-num-bits", 0),
+ {},
+ };
+ const u32 valid_flags = MOCK_FLAGS_DEVICE_NO_DIRTY |
+ MOCK_FLAGS_DEVICE_HUGE_IOVA |
+ MOCK_FLAGS_DEVICE_PASID;
struct mock_dev *mdev;
int rc, i;
- if (dev_flags &
- ~(MOCK_FLAGS_DEVICE_NO_DIRTY | MOCK_FLAGS_DEVICE_HUGE_IOVA))
+ if (dev_flags & ~valid_flags)
return ERR_PTR(-EINVAL);
mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
if (!mdev)
return ERR_PTR(-ENOMEM);
+ init_rwsem(&mdev->viommu_rwsem);
device_initialize(&mdev->dev);
mdev->flags = dev_flags;
mdev->dev.release = mock_dev_release;
@@ -877,6 +936,15 @@ static struct mock_dev *mock_dev_create(unsigned long dev_flags)
if (rc)
goto err_put;
+ if (dev_flags & MOCK_FLAGS_DEVICE_PASID)
+ prop[0] = PROPERTY_ENTRY_U32("pasid-num-bits", MOCK_PASID_WIDTH);
+
+ rc = device_create_managed_software_node(&mdev->dev, prop, NULL);
+ if (rc) {
+ dev_err(&mdev->dev, "add pasid-num-bits property failed, rc: %d", rc);
+ goto err_put;
+ }
+
rc = device_add(&mdev->dev);
if (rc)
goto err_put;
@@ -932,7 +1000,7 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd,
}
sobj->idev.idev = idev;
- rc = iommufd_device_attach(idev, &pt_id);
+ rc = iommufd_device_attach(idev, IOMMU_NO_PASID, &pt_id);
if (rc)
goto out_unbind;
@@ -947,7 +1015,7 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd,
return 0;
out_detach:
- iommufd_device_detach(idev);
+ iommufd_device_detach(idev, IOMMU_NO_PASID);
out_unbind:
iommufd_device_unbind(idev);
out_mdev:
@@ -957,39 +1025,49 @@ out_sobj:
return rc;
}
-/* Replace the mock domain with a manually allocated hw_pagetable */
-static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd,
- unsigned int device_id, u32 pt_id,
- struct iommu_test_cmd *cmd)
+static struct selftest_obj *
+iommufd_test_get_selftest_obj(struct iommufd_ctx *ictx, u32 id)
{
struct iommufd_object *dev_obj;
struct selftest_obj *sobj;
- int rc;
/*
* Prefer to use the OBJ_SELFTEST because the destroy_rwsem will ensure
* it doesn't race with detach, which is not allowed.
*/
- dev_obj =
- iommufd_get_object(ucmd->ictx, device_id, IOMMUFD_OBJ_SELFTEST);
+ dev_obj = iommufd_get_object(ictx, id, IOMMUFD_OBJ_SELFTEST);
if (IS_ERR(dev_obj))
- return PTR_ERR(dev_obj);
+ return ERR_CAST(dev_obj);
sobj = to_selftest_obj(dev_obj);
if (sobj->type != TYPE_IDEV) {
- rc = -EINVAL;
- goto out_dev_obj;
+ iommufd_put_object(ictx, dev_obj);
+ return ERR_PTR(-EINVAL);
}
+ return sobj;
+}
+
+/* Replace the mock domain with a manually allocated hw_pagetable */
+static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd,
+ unsigned int device_id, u32 pt_id,
+ struct iommu_test_cmd *cmd)
+{
+ struct selftest_obj *sobj;
+ int rc;
- rc = iommufd_device_replace(sobj->idev.idev, &pt_id);
+ sobj = iommufd_test_get_selftest_obj(ucmd->ictx, device_id);
+ if (IS_ERR(sobj))
+ return PTR_ERR(sobj);
+
+ rc = iommufd_device_replace(sobj->idev.idev, IOMMU_NO_PASID, &pt_id);
if (rc)
- goto out_dev_obj;
+ goto out_sobj;
cmd->mock_domain_replace.pt_id = pt_id;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
-out_dev_obj:
- iommufd_put_object(ucmd->ictx, dev_obj);
+out_sobj:
+ iommufd_put_object(ucmd->ictx, &sobj->obj);
return rc;
}
@@ -1608,13 +1686,166 @@ static int iommufd_test_trigger_iopf(struct iommufd_ucmd *ucmd,
return 0;
}
+static int iommufd_test_trigger_vevent(struct iommufd_ucmd *ucmd,
+ struct iommu_test_cmd *cmd)
+{
+ struct iommu_viommu_event_selftest test = {};
+ struct iommufd_device *idev;
+ struct mock_dev *mdev;
+ int rc = -ENOENT;
+
+ idev = iommufd_get_device(ucmd, cmd->trigger_vevent.dev_id);
+ if (IS_ERR(idev))
+ return PTR_ERR(idev);
+ mdev = to_mock_dev(idev->dev);
+
+ down_read(&mdev->viommu_rwsem);
+ if (!mdev->viommu || !mdev->vdev_id)
+ goto out_unlock;
+
+ test.virt_id = mdev->vdev_id;
+ rc = iommufd_viommu_report_event(&mdev->viommu->core,
+ IOMMU_VEVENTQ_TYPE_SELFTEST, &test,
+ sizeof(test));
+out_unlock:
+ up_read(&mdev->viommu_rwsem);
+ iommufd_put_object(ucmd->ictx, &idev->obj);
+
+ return rc;
+}
+
+static inline struct iommufd_hw_pagetable *
+iommufd_get_hwpt(struct iommufd_ucmd *ucmd, u32 id)
+{
+ struct iommufd_object *pt_obj;
+
+ pt_obj = iommufd_get_object(ucmd->ictx, id, IOMMUFD_OBJ_ANY);
+ if (IS_ERR(pt_obj))
+ return ERR_CAST(pt_obj);
+
+ if (pt_obj->type != IOMMUFD_OBJ_HWPT_NESTED &&
+ pt_obj->type != IOMMUFD_OBJ_HWPT_PAGING) {
+ iommufd_put_object(ucmd->ictx, pt_obj);
+ return ERR_PTR(-EINVAL);
+ }
+
+ return container_of(pt_obj, struct iommufd_hw_pagetable, obj);
+}
+
+static int iommufd_test_pasid_check_hwpt(struct iommufd_ucmd *ucmd,
+ struct iommu_test_cmd *cmd)
+{
+ u32 hwpt_id = cmd->pasid_check.hwpt_id;
+ struct iommu_domain *attached_domain;
+ struct iommu_attach_handle *handle;
+ struct iommufd_hw_pagetable *hwpt;
+ struct selftest_obj *sobj;
+ struct mock_dev *mdev;
+ int rc = 0;
+
+ sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id);
+ if (IS_ERR(sobj))
+ return PTR_ERR(sobj);
+
+ mdev = sobj->idev.mock_dev;
+
+ handle = iommu_attach_handle_get(mdev->dev.iommu_group,
+ cmd->pasid_check.pasid, 0);
+ if (IS_ERR(handle))
+ attached_domain = NULL;
+ else
+ attached_domain = handle->domain;
+
+ /* hwpt_id == 0 means to check if pasid is detached */
+ if (!hwpt_id) {
+ if (attached_domain)
+ rc = -EINVAL;
+ goto out_sobj;
+ }
+
+ hwpt = iommufd_get_hwpt(ucmd, hwpt_id);
+ if (IS_ERR(hwpt)) {
+ rc = PTR_ERR(hwpt);
+ goto out_sobj;
+ }
+
+ if (attached_domain != hwpt->domain)
+ rc = -EINVAL;
+
+ iommufd_put_object(ucmd->ictx, &hwpt->obj);
+out_sobj:
+ iommufd_put_object(ucmd->ictx, &sobj->obj);
+ return rc;
+}
+
+static int iommufd_test_pasid_attach(struct iommufd_ucmd *ucmd,
+ struct iommu_test_cmd *cmd)
+{
+ struct selftest_obj *sobj;
+ int rc;
+
+ sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id);
+ if (IS_ERR(sobj))
+ return PTR_ERR(sobj);
+
+ rc = iommufd_device_attach(sobj->idev.idev, cmd->pasid_attach.pasid,
+ &cmd->pasid_attach.pt_id);
+ if (rc)
+ goto out_sobj;
+
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+ if (rc)
+ iommufd_device_detach(sobj->idev.idev,
+ cmd->pasid_attach.pasid);
+
+out_sobj:
+ iommufd_put_object(ucmd->ictx, &sobj->obj);
+ return rc;
+}
+
+static int iommufd_test_pasid_replace(struct iommufd_ucmd *ucmd,
+ struct iommu_test_cmd *cmd)
+{
+ struct selftest_obj *sobj;
+ int rc;
+
+ sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id);
+ if (IS_ERR(sobj))
+ return PTR_ERR(sobj);
+
+ rc = iommufd_device_replace(sobj->idev.idev, cmd->pasid_attach.pasid,
+ &cmd->pasid_attach.pt_id);
+ if (rc)
+ goto out_sobj;
+
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+
+out_sobj:
+ iommufd_put_object(ucmd->ictx, &sobj->obj);
+ return rc;
+}
+
+static int iommufd_test_pasid_detach(struct iommufd_ucmd *ucmd,
+ struct iommu_test_cmd *cmd)
+{
+ struct selftest_obj *sobj;
+
+ sobj = iommufd_test_get_selftest_obj(ucmd->ictx, cmd->id);
+ if (IS_ERR(sobj))
+ return PTR_ERR(sobj);
+
+ iommufd_device_detach(sobj->idev.idev, cmd->pasid_detach.pasid);
+ iommufd_put_object(ucmd->ictx, &sobj->obj);
+ return 0;
+}
+
void iommufd_selftest_destroy(struct iommufd_object *obj)
{
struct selftest_obj *sobj = to_selftest_obj(obj);
switch (sobj->type) {
case TYPE_IDEV:
- iommufd_device_detach(sobj->idev.idev);
+ iommufd_device_detach(sobj->idev.idev, IOMMU_NO_PASID);
iommufd_device_unbind(sobj->idev.idev);
mock_dev_destroy(sobj->idev.mock_dev);
break;
@@ -1689,6 +1920,16 @@ int iommufd_test(struct iommufd_ucmd *ucmd)
cmd->dirty.flags);
case IOMMU_TEST_OP_TRIGGER_IOPF:
return iommufd_test_trigger_iopf(ucmd, cmd);
+ case IOMMU_TEST_OP_TRIGGER_VEVENT:
+ return iommufd_test_trigger_vevent(ucmd, cmd);
+ case IOMMU_TEST_OP_PASID_ATTACH:
+ return iommufd_test_pasid_attach(ucmd, cmd);
+ case IOMMU_TEST_OP_PASID_REPLACE:
+ return iommufd_test_pasid_replace(ucmd, cmd);
+ case IOMMU_TEST_OP_PASID_DETACH:
+ return iommufd_test_pasid_detach(ucmd, cmd);
+ case IOMMU_TEST_OP_PASID_CHECK_HWPT:
+ return iommufd_test_pasid_check_hwpt(ucmd, cmd);
default:
return -EOPNOTSUPP;
}
@@ -1735,6 +1976,7 @@ int __init iommufd_test_init(void)
init_completion(&mock_iommu.complete);
mock_iommu_iopf_queue = iopf_queue_alloc("mock-iopfq");
+ mock_iommu.iommu_dev.max_pasids = (1 << MOCK_PASID_WIDTH);
return 0;
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
index 69b88e8c7c26..01df2b985f02 100644
--- a/drivers/iommu/iommufd/viommu.c
+++ b/drivers/iommu/iommufd/viommu.c
@@ -59,6 +59,8 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
viommu->ictx = ucmd->ictx;
viommu->hwpt = hwpt_paging;
refcount_inc(&viommu->hwpt->common.obj.users);
+ INIT_LIST_HEAD(&viommu->veventqs);
+ init_rwsem(&viommu->veventqs_rwsem);
/*
* It is the most likely case that a physical IOMMU is unpluggable. A
* pluggable IOMMU instance (if exists) is responsible for refcounting
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 074daf1aac4e..90341b24a811 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1081,31 +1081,25 @@ static int ipmmu_probe(struct platform_device *pdev)
}
}
+ platform_set_drvdata(pdev, mmu);
/*
* Register the IPMMU to the IOMMU subsystem in the following cases:
* - R-Car Gen2 IPMMU (all devices registered)
* - R-Car Gen3 IPMMU (leaf devices only - skip root IPMMU-MM device)
*/
- if (!mmu->features->has_cache_leaf_nodes || !ipmmu_is_root(mmu)) {
- ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL,
- dev_name(&pdev->dev));
- if (ret)
- return ret;
-
- ret = iommu_device_register(&mmu->iommu, &ipmmu_ops, &pdev->dev);
- if (ret)
- return ret;
- }
+ if (mmu->features->has_cache_leaf_nodes && ipmmu_is_root(mmu))
+ return 0;
- /*
- * We can't create the ARM mapping here as it requires the bus to have
- * an IOMMU, which only happens when bus_set_iommu() is called in
- * ipmmu_init() after the probe function returns.
- */
+ ret = iommu_device_sysfs_add(&mmu->iommu, &pdev->dev, NULL, "%s",
+ dev_name(&pdev->dev));
+ if (ret)
+ return ret;
- platform_set_drvdata(pdev, mmu);
+ ret = iommu_device_register(&mmu->iommu, &ipmmu_ops, &pdev->dev);
+ if (ret)
+ iommu_device_sysfs_remove(&mmu->iommu);
- return 0;
+ return ret;
}
static void ipmmu_remove(struct platform_device *pdev)
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index ce40f0a419ea..2769e4544038 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -725,47 +725,32 @@ static int msm_iommu_probe(struct platform_device *pdev)
iommu->dev = &pdev->dev;
INIT_LIST_HEAD(&iommu->ctx_list);
- iommu->pclk = devm_clk_get(iommu->dev, "smmu_pclk");
+ iommu->pclk = devm_clk_get_prepared(iommu->dev, "smmu_pclk");
if (IS_ERR(iommu->pclk))
return dev_err_probe(iommu->dev, PTR_ERR(iommu->pclk),
"could not get smmu_pclk\n");
- ret = clk_prepare(iommu->pclk);
- if (ret)
- return dev_err_probe(iommu->dev, ret,
- "could not prepare smmu_pclk\n");
-
- iommu->clk = devm_clk_get(iommu->dev, "iommu_clk");
- if (IS_ERR(iommu->clk)) {
- clk_unprepare(iommu->pclk);
+ iommu->clk = devm_clk_get_prepared(iommu->dev, "iommu_clk");
+ if (IS_ERR(iommu->clk))
return dev_err_probe(iommu->dev, PTR_ERR(iommu->clk),
"could not get iommu_clk\n");
- }
-
- ret = clk_prepare(iommu->clk);
- if (ret) {
- clk_unprepare(iommu->pclk);
- return dev_err_probe(iommu->dev, ret, "could not prepare iommu_clk\n");
- }
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
iommu->base = devm_ioremap_resource(iommu->dev, r);
if (IS_ERR(iommu->base)) {
ret = dev_err_probe(iommu->dev, PTR_ERR(iommu->base), "could not get iommu base\n");
- goto fail;
+ return ret;
}
ioaddr = r->start;
iommu->irq = platform_get_irq(pdev, 0);
- if (iommu->irq < 0) {
- ret = -ENODEV;
- goto fail;
- }
+ if (iommu->irq < 0)
+ return -ENODEV;
ret = of_property_read_u32(iommu->dev->of_node, "qcom,ncb", &val);
if (ret) {
dev_err(iommu->dev, "could not get ncb\n");
- goto fail;
+ return ret;
}
iommu->ncb = val;
@@ -780,8 +765,7 @@ static int msm_iommu_probe(struct platform_device *pdev)
if (!par) {
pr_err("Invalid PAR value detected\n");
- ret = -ENODEV;
- goto fail;
+ return -ENODEV;
}
ret = devm_request_threaded_irq(iommu->dev, iommu->irq, NULL,
@@ -791,7 +775,7 @@ static int msm_iommu_probe(struct platform_device *pdev)
iommu);
if (ret) {
pr_err("Request IRQ %d failed with ret=%d\n", iommu->irq, ret);
- goto fail;
+ return ret;
}
list_add(&iommu->dev_node, &qcom_iommu_devices);
@@ -800,23 +784,19 @@ static int msm_iommu_probe(struct platform_device *pdev)
"msm-smmu.%pa", &ioaddr);
if (ret) {
pr_err("Could not add msm-smmu at %pa to sysfs\n", &ioaddr);
- goto fail;
+ return ret;
}
ret = iommu_device_register(&iommu->iommu, &msm_iommu_ops, &pdev->dev);
if (ret) {
pr_err("Could not register msm-smmu at %pa\n", &ioaddr);
- goto fail;
+ return ret;
}
pr_info("device mapped at %p, irq %d with %d ctx banks\n",
iommu->base, iommu->irq, iommu->ncb);
return ret;
-fail:
- clk_unprepare(iommu->clk);
- clk_unprepare(iommu->pclk);
- return ret;
}
static const struct of_device_id msm_iommu_dt_match[] = {
@@ -824,20 +804,11 @@ static const struct of_device_id msm_iommu_dt_match[] = {
{}
};
-static void msm_iommu_remove(struct platform_device *pdev)
-{
- struct msm_iommu_dev *iommu = platform_get_drvdata(pdev);
-
- clk_unprepare(iommu->clk);
- clk_unprepare(iommu->pclk);
-}
-
static struct platform_driver msm_iommu_driver = {
.driver = {
.name = "msm_iommu",
.of_match_table = msm_iommu_dt_match,
},
.probe = msm_iommu_probe,
- .remove = msm_iommu_remove,
};
builtin_platform_driver(msm_iommu_driver);
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index ab60901f8f92..df98d0c65f54 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -29,6 +29,7 @@
#include <linux/spinlock.h>
#include <linux/soc/mediatek/infracfg.h>
#include <linux/soc/mediatek/mtk_sip_svc.h>
+#include <linux/string_choices.h>
#include <asm/barrier.h>
#include <soc/mediatek/smi.h>
@@ -510,7 +511,7 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
bank->parent_dev,
"fault type=0x%x iova=0x%llx pa=0x%llx master=0x%x(larb=%d port=%d) layer=%d %s\n",
int_state, fault_iova, fault_pa, regval, fault_larb, fault_port,
- layer, write ? "write" : "read");
+ layer, str_write_read(write));
}
/* Interrupt clear */
@@ -602,7 +603,7 @@ static int mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev,
larb_mmu->bank[portid] = upper_32_bits(region->iova_base);
dev_dbg(dev, "%s iommu for larb(%s) port 0x%lx region %d rgn-bank %d.\n",
- enable ? "enable" : "disable", dev_name(larb_mmu->dev),
+ str_enable_disable(enable), dev_name(larb_mmu->dev),
portid_msk, regionid, upper_32_bits(region->iova_base));
if (enable)
@@ -630,8 +631,8 @@ static int mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev,
}
if (ret)
dev_err(dev, "%s iommu(%s) inframaster 0x%lx fail(%d).\n",
- enable ? "enable" : "disable",
- dev_name(data->dev), portid_msk, ret);
+ str_enable_disable(enable), dev_name(data->dev),
+ portid_msk, ret);
}
return ret;
}
@@ -1371,15 +1372,6 @@ static int mtk_iommu_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, data);
mutex_init(&data->mutex);
- ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
- "mtk-iommu.%pa", &ioaddr);
- if (ret)
- goto out_link_remove;
-
- ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
- if (ret)
- goto out_sysfs_remove;
-
if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) {
list_add_tail(&data->list, data->plat_data->hw_list);
data->hw_list = data->plat_data->hw_list;
@@ -1389,19 +1381,28 @@ static int mtk_iommu_probe(struct platform_device *pdev)
data->hw_list = &data->hw_list_head;
}
+ ret = iommu_device_sysfs_add(&data->iommu, dev, NULL,
+ "mtk-iommu.%pa", &ioaddr);
+ if (ret)
+ goto out_list_del;
+
+ ret = iommu_device_register(&data->iommu, &mtk_iommu_ops, dev);
+ if (ret)
+ goto out_sysfs_remove;
+
if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) {
ret = component_master_add_with_match(dev, &mtk_iommu_com_ops, match);
if (ret)
- goto out_list_del;
+ goto out_device_unregister;
}
return ret;
-out_list_del:
- list_del(&data->list);
+out_device_unregister:
iommu_device_unregister(&data->iommu);
out_sysfs_remove:
iommu_device_sysfs_remove(&data->iommu);
-out_link_remove:
+out_list_del:
+ list_del(&data->list);
if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM))
device_link_remove(data->smicomm_dev, dev);
out_runtime_disable:
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index b6de1ca00cef..66824982e05f 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -25,12 +25,22 @@
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/string_choices.h>
#include <asm/barrier.h>
-#include <asm/dma-iommu.h>
#include <dt-bindings/memory/mtk-memory-port.h>
#include <dt-bindings/memory/mt2701-larb-port.h>
#include <soc/mediatek/smi.h>
+#if defined(CONFIG_ARM)
+#include <asm/dma-iommu.h>
+#else
+#define arm_iommu_create_mapping(...) NULL
+#define arm_iommu_attach_device(...) -ENODEV
+struct dma_iommu_mapping {
+ struct iommu_domain *domain;
+};
+#endif
+
#define REG_MMU_PT_BASE_ADDR 0x000
#define F_ALL_INVLD 0x2
@@ -243,7 +253,7 @@ static void mtk_iommu_v1_config(struct mtk_iommu_v1_data *data,
larb_mmu = &data->larb_imu[larbid];
dev_dbg(dev, "%s iommu port: %d\n",
- enable ? "enable" : "disable", portid);
+ str_enable_disable(enable), portid);
if (enable)
larb_mmu->mmu |= MTK_SMI_MMU_EN(portid);
@@ -445,22 +455,13 @@ static int mtk_iommu_v1_create_mapping(struct device *dev,
static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev)
{
- struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct iommu_fwspec *fwspec = NULL;
struct of_phandle_args iommu_spec;
struct mtk_iommu_v1_data *data;
int err, idx = 0, larbid, larbidx;
struct device_link *link;
struct device *larbdev;
- /*
- * In the deferred case, free the existed fwspec.
- * Always initialize the fwspec internally.
- */
- if (fwspec) {
- iommu_fwspec_free(dev);
- fwspec = dev_iommu_fwspec_get(dev);
- }
-
while (!of_parse_phandle_with_args(dev->of_node, "iommus",
"#iommu-cells",
idx, &iommu_spec)) {
@@ -475,6 +476,9 @@ static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev)
idx++;
}
+ if (!fwspec)
+ return ERR_PTR(-ENODEV);
+
data = dev_iommu_priv_get(dev);
/* Link the consumer device with the smi-larb device(supplier) */
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index e7a6a1611d19..6b989a62def2 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -29,8 +29,6 @@ static int of_iommu_xlate(struct device *dev,
return -ENODEV;
ret = iommu_fwspec_init(dev, of_fwnode_handle(iommu_spec->np));
- if (ret == -EPROBE_DEFER)
- return driver_deferred_probe_check_state(dev);
if (ret)
return ret;
@@ -118,6 +116,7 @@ static void of_pci_check_device_ats(struct device *dev, struct device_node *np)
int of_iommu_configure(struct device *dev, struct device_node *master_np,
const u32 *id)
{
+ bool dev_iommu_present;
int err;
if (!master_np)
@@ -129,6 +128,7 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np,
mutex_unlock(&iommu_probe_device_lock);
return 0;
}
+ dev_iommu_present = dev->iommu;
/*
* We don't currently walk up the tree looking for a parent IOMMU.
@@ -149,11 +149,18 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np,
err = of_iommu_configure_device(master_np, dev, id);
}
- if (err)
+ if (err && dev_iommu_present)
iommu_fwspec_free(dev);
+ else if (err && dev->iommu)
+ dev_iommu_free(dev);
mutex_unlock(&iommu_probe_device_lock);
- if (!err && dev->bus)
+ /*
+ * If we're not on the iommu_probe_device() path (as indicated by the
+ * initial dev->iommu) then try to simulate it. This should no longer
+ * happen unless of_dma_configure() is being misused outside bus code.
+ */
+ if (!err && dev->bus && !dev_iommu_present)
err = iommu_probe_device(dev);
if (err && err != -EPROBE_DEFER)
diff --git a/drivers/iommu/riscv/iommu-pci.c b/drivers/iommu/riscv/iommu-pci.c
index c7a89143014c..d82d2b00904c 100644
--- a/drivers/iommu/riscv/iommu-pci.c
+++ b/drivers/iommu/riscv/iommu-pci.c
@@ -101,6 +101,13 @@ static void riscv_iommu_pci_remove(struct pci_dev *pdev)
riscv_iommu_remove(iommu);
}
+static void riscv_iommu_pci_shutdown(struct pci_dev *pdev)
+{
+ struct riscv_iommu_device *iommu = dev_get_drvdata(&pdev->dev);
+
+ riscv_iommu_disable(iommu);
+}
+
static const struct pci_device_id riscv_iommu_pci_tbl[] = {
{PCI_VDEVICE(REDHAT, PCI_DEVICE_ID_REDHAT_RISCV_IOMMU), 0},
{PCI_VDEVICE(RIVOS, PCI_DEVICE_ID_RIVOS_RISCV_IOMMU_GA), 0},
@@ -112,6 +119,7 @@ static struct pci_driver riscv_iommu_pci_driver = {
.id_table = riscv_iommu_pci_tbl,
.probe = riscv_iommu_pci_probe,
.remove = riscv_iommu_pci_remove,
+ .shutdown = riscv_iommu_pci_shutdown,
.driver = {
.suppress_bind_attrs = true,
},
diff --git a/drivers/iommu/riscv/iommu-platform.c b/drivers/iommu/riscv/iommu-platform.c
index 382ba2841849..725e919b97ef 100644
--- a/drivers/iommu/riscv/iommu-platform.c
+++ b/drivers/iommu/riscv/iommu-platform.c
@@ -11,18 +11,43 @@
*/
#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include "iommu-bits.h"
#include "iommu.h"
+static void riscv_iommu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+ struct device *dev = msi_desc_to_dev(desc);
+ struct riscv_iommu_device *iommu = dev_get_drvdata(dev);
+ u16 idx = desc->msi_index;
+ u64 addr;
+
+ addr = ((u64)msg->address_hi << 32) | msg->address_lo;
+
+ if (addr != (addr & RISCV_IOMMU_MSI_CFG_TBL_ADDR)) {
+ dev_err_once(dev,
+ "uh oh, the IOMMU can't send MSIs to 0x%llx, sending to 0x%llx instead\n",
+ addr, addr & RISCV_IOMMU_MSI_CFG_TBL_ADDR);
+ }
+
+ addr &= RISCV_IOMMU_MSI_CFG_TBL_ADDR;
+
+ riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_MSI_CFG_TBL_ADDR(idx), addr);
+ riscv_iommu_writel(iommu, RISCV_IOMMU_REG_MSI_CFG_TBL_DATA(idx), msg->data);
+ riscv_iommu_writel(iommu, RISCV_IOMMU_REG_MSI_CFG_TBL_CTRL(idx), 0);
+}
+
static int riscv_iommu_platform_probe(struct platform_device *pdev)
{
+ enum riscv_iommu_igs_settings igs;
struct device *dev = &pdev->dev;
struct riscv_iommu_device *iommu = NULL;
struct resource *res = NULL;
- int vec;
+ int vec, ret;
iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
if (!iommu)
@@ -40,16 +65,6 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
iommu->caps = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_CAPABILITIES);
iommu->fctl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_FCTL);
- /* For now we only support WSI */
- switch (FIELD_GET(RISCV_IOMMU_CAPABILITIES_IGS, iommu->caps)) {
- case RISCV_IOMMU_CAPABILITIES_IGS_WSI:
- case RISCV_IOMMU_CAPABILITIES_IGS_BOTH:
- break;
- default:
- return dev_err_probe(dev, -ENODEV,
- "unable to use wire-signaled interrupts\n");
- }
-
iommu->irqs_count = platform_irq_count(pdev);
if (iommu->irqs_count <= 0)
return dev_err_probe(dev, -ENODEV,
@@ -57,13 +72,58 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
if (iommu->irqs_count > RISCV_IOMMU_INTR_COUNT)
iommu->irqs_count = RISCV_IOMMU_INTR_COUNT;
- for (vec = 0; vec < iommu->irqs_count; vec++)
- iommu->irqs[vec] = platform_get_irq(pdev, vec);
+ igs = FIELD_GET(RISCV_IOMMU_CAPABILITIES_IGS, iommu->caps);
+ switch (igs) {
+ case RISCV_IOMMU_CAPABILITIES_IGS_BOTH:
+ case RISCV_IOMMU_CAPABILITIES_IGS_MSI:
+ if (is_of_node(dev->fwnode))
+ of_msi_configure(dev, to_of_node(dev->fwnode));
+
+ if (!dev_get_msi_domain(dev)) {
+ dev_warn(dev, "failed to find an MSI domain\n");
+ goto msi_fail;
+ }
+
+ ret = platform_device_msi_init_and_alloc_irqs(dev, iommu->irqs_count,
+ riscv_iommu_write_msi_msg);
+ if (ret) {
+ dev_warn(dev, "failed to allocate MSIs\n");
+ goto msi_fail;
+ }
+
+ for (vec = 0; vec < iommu->irqs_count; vec++)
+ iommu->irqs[vec] = msi_get_virq(dev, vec);
+
+ /* Enable message-signaled interrupts, fctl.WSI */
+ if (iommu->fctl & RISCV_IOMMU_FCTL_WSI) {
+ iommu->fctl ^= RISCV_IOMMU_FCTL_WSI;
+ riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, iommu->fctl);
+ }
+
+ dev_info(dev, "using MSIs\n");
+ break;
+
+msi_fail:
+ if (igs != RISCV_IOMMU_CAPABILITIES_IGS_BOTH) {
+ return dev_err_probe(dev, -ENODEV,
+ "unable to use wire-signaled interrupts\n");
+ }
- /* Enable wire-signaled interrupts, fctl.WSI */
- if (!(iommu->fctl & RISCV_IOMMU_FCTL_WSI)) {
- iommu->fctl |= RISCV_IOMMU_FCTL_WSI;
- riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, iommu->fctl);
+ fallthrough;
+
+ case RISCV_IOMMU_CAPABILITIES_IGS_WSI:
+ for (vec = 0; vec < iommu->irqs_count; vec++)
+ iommu->irqs[vec] = platform_get_irq(pdev, vec);
+
+ /* Enable wire-signaled interrupts, fctl.WSI */
+ if (!(iommu->fctl & RISCV_IOMMU_FCTL_WSI)) {
+ iommu->fctl |= RISCV_IOMMU_FCTL_WSI;
+ riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL, iommu->fctl);
+ }
+ dev_info(dev, "using wire-signaled interrupts\n");
+ break;
+ default:
+ return dev_err_probe(dev, -ENODEV, "invalid IGS\n");
}
return riscv_iommu_init(iommu);
@@ -71,7 +131,18 @@ static int riscv_iommu_platform_probe(struct platform_device *pdev)
static void riscv_iommu_platform_remove(struct platform_device *pdev)
{
- riscv_iommu_remove(dev_get_drvdata(&pdev->dev));
+ struct riscv_iommu_device *iommu = dev_get_drvdata(&pdev->dev);
+ bool msi = !(iommu->fctl & RISCV_IOMMU_FCTL_WSI);
+
+ riscv_iommu_remove(iommu);
+
+ if (msi)
+ platform_device_msi_free_irqs_all(&pdev->dev);
+};
+
+static void riscv_iommu_platform_shutdown(struct platform_device *pdev)
+{
+ riscv_iommu_disable(dev_get_drvdata(&pdev->dev));
};
static const struct of_device_id riscv_iommu_of_match[] = {
@@ -82,6 +153,7 @@ static const struct of_device_id riscv_iommu_of_match[] = {
static struct platform_driver riscv_iommu_platform_driver = {
.probe = riscv_iommu_platform_probe,
.remove = riscv_iommu_platform_remove,
+ .shutdown = riscv_iommu_platform_shutdown,
.driver = {
.name = "riscv,iommu",
.of_match_table = riscv_iommu_of_match,
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index 38d381164385..8f049d4a0e2c 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -240,6 +240,12 @@ static int riscv_iommu_queue_enable(struct riscv_iommu_device *iommu,
return rc;
}
+ /* Empty queue before enabling it */
+ if (queue->qid == RISCV_IOMMU_INTR_CQ)
+ riscv_iommu_writel(queue->iommu, Q_TAIL(queue), 0);
+ else
+ riscv_iommu_writel(queue->iommu, Q_HEAD(queue), 0);
+
/*
* Enable queue with interrupts, clear any memory fault if any.
* Wait for the hardware to acknowledge request and activate queue
@@ -645,9 +651,11 @@ static struct riscv_iommu_dc *riscv_iommu_get_dc(struct riscv_iommu_device *iomm
* This is best effort IOMMU translation shutdown flow.
* Disable IOMMU without waiting for hardware response.
*/
-static void riscv_iommu_disable(struct riscv_iommu_device *iommu)
+void riscv_iommu_disable(struct riscv_iommu_device *iommu)
{
- riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP, 0);
+ riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP,
+ FIELD_PREP(RISCV_IOMMU_DDTP_IOMMU_MODE,
+ RISCV_IOMMU_DDTP_IOMMU_MODE_BARE));
riscv_iommu_writel(iommu, RISCV_IOMMU_REG_CQCSR, 0);
riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FQCSR, 0);
riscv_iommu_writel(iommu, RISCV_IOMMU_REG_PQCSR, 0);
diff --git a/drivers/iommu/riscv/iommu.h b/drivers/iommu/riscv/iommu.h
index b1c4664542b4..46df79dd5495 100644
--- a/drivers/iommu/riscv/iommu.h
+++ b/drivers/iommu/riscv/iommu.h
@@ -64,6 +64,7 @@ struct riscv_iommu_device {
int riscv_iommu_init(struct riscv_iommu_device *iommu);
void riscv_iommu_remove(struct riscv_iommu_device *iommu);
+void riscv_iommu_disable(struct riscv_iommu_device *iommu);
#define riscv_iommu_readl(iommu, addr) \
readl_relaxed((iommu)->reg + (addr))
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 4b369419b32c..97d4bfcd8241 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -25,6 +25,7 @@
#include <linux/pm_runtime.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/string_choices.h>
#include "iommu-pages.h"
@@ -87,6 +88,7 @@ struct rk_iommu_domain {
dma_addr_t dt_dma;
spinlock_t iommus_lock; /* lock for iommus list */
spinlock_t dt_lock; /* lock for modifying page directory table */
+ struct device *dma_dev;
struct iommu_domain domain;
};
@@ -122,7 +124,6 @@ struct rk_iommudata {
struct rk_iommu *iommu;
};
-static struct device *dma_dev;
static const struct rk_iommu_ops *rk_ops;
static struct iommu_domain rk_identity_domain;
@@ -131,7 +132,7 @@ static inline void rk_table_flush(struct rk_iommu_domain *dom, dma_addr_t dma,
{
size_t size = count * sizeof(u32); /* count of u32 entry */
- dma_sync_single_for_device(dma_dev, dma, size, DMA_TO_DEVICE);
+ dma_sync_single_for_device(dom->dma_dev, dma, size, DMA_TO_DEVICE);
}
static struct rk_iommu_domain *to_rk_domain(struct iommu_domain *dom)
@@ -611,7 +612,7 @@ static irqreturn_t rk_iommu_irq(int irq, void *dev_id)
dev_err(iommu->dev, "Page fault at %pad of type %s\n",
&iova,
- (flags == IOMMU_FAULT_WRITE) ? "write" : "read");
+ str_write_read(flags == IOMMU_FAULT_WRITE));
log_iova(iommu, i, iova);
@@ -733,9 +734,9 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
if (!page_table)
return ERR_PTR(-ENOMEM);
- pt_dma = dma_map_single(dma_dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE);
- if (dma_mapping_error(dma_dev, pt_dma)) {
- dev_err(dma_dev, "DMA mapping error while allocating page table\n");
+ pt_dma = dma_map_single(rk_domain->dma_dev, page_table, SPAGE_SIZE, DMA_TO_DEVICE);
+ if (dma_mapping_error(rk_domain->dma_dev, pt_dma)) {
+ dev_err(rk_domain->dma_dev, "DMA mapping error while allocating page table\n");
iommu_free_page(page_table);
return ERR_PTR(-ENOMEM);
}
@@ -1050,9 +1051,7 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev)
{
struct rk_iommu_domain *rk_domain;
-
- if (!dma_dev)
- return NULL;
+ struct rk_iommu *iommu;
rk_domain = kzalloc(sizeof(*rk_domain), GFP_KERNEL);
if (!rk_domain)
@@ -1067,10 +1066,12 @@ static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev)
if (!rk_domain->dt)
goto err_free_domain;
- rk_domain->dt_dma = dma_map_single(dma_dev, rk_domain->dt,
+ iommu = rk_iommu_from_dev(dev);
+ rk_domain->dma_dev = iommu->dev;
+ rk_domain->dt_dma = dma_map_single(rk_domain->dma_dev, rk_domain->dt,
SPAGE_SIZE, DMA_TO_DEVICE);
- if (dma_mapping_error(dma_dev, rk_domain->dt_dma)) {
- dev_err(dma_dev, "DMA map error for DT\n");
+ if (dma_mapping_error(rk_domain->dma_dev, rk_domain->dt_dma)) {
+ dev_err(rk_domain->dma_dev, "DMA map error for DT\n");
goto err_free_dt;
}
@@ -1104,13 +1105,13 @@ static void rk_iommu_domain_free(struct iommu_domain *domain)
if (rk_dte_is_pt_valid(dte)) {
phys_addr_t pt_phys = rk_ops->pt_address(dte);
u32 *page_table = phys_to_virt(pt_phys);
- dma_unmap_single(dma_dev, pt_phys,
+ dma_unmap_single(rk_domain->dma_dev, pt_phys,
SPAGE_SIZE, DMA_TO_DEVICE);
iommu_free_page(page_table);
}
}
- dma_unmap_single(dma_dev, rk_domain->dt_dma,
+ dma_unmap_single(rk_domain->dma_dev, rk_domain->dt_dma,
SPAGE_SIZE, DMA_TO_DEVICE);
iommu_free_page(rk_domain->dt);
@@ -1147,14 +1148,13 @@ static int rk_iommu_of_xlate(struct device *dev,
struct platform_device *iommu_dev;
struct rk_iommudata *data;
- data = devm_kzalloc(dma_dev, sizeof(*data), GFP_KERNEL);
+ iommu_dev = of_find_device_by_node(args->np);
+
+ data = devm_kzalloc(&iommu_dev->dev, sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
- iommu_dev = of_find_device_by_node(args->np);
-
data->iommu = platform_get_drvdata(iommu_dev);
- data->iommu->domain = &rk_identity_domain;
dev_iommu_priv_set(dev, data);
platform_device_put(iommu_dev);
@@ -1192,6 +1192,8 @@ static int rk_iommu_probe(struct platform_device *pdev)
if (!iommu)
return -ENOMEM;
+ iommu->domain = &rk_identity_domain;
+
platform_set_drvdata(pdev, iommu);
iommu->dev = dev;
iommu->num_mmu = 0;
@@ -1255,22 +1257,6 @@ static int rk_iommu_probe(struct platform_device *pdev)
if (err)
return err;
- err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
- if (err)
- goto err_unprepare_clocks;
-
- err = iommu_device_register(&iommu->iommu, &rk_iommu_ops, dev);
- if (err)
- goto err_remove_sysfs;
-
- /*
- * Use the first registered IOMMU device for domain to use with DMA
- * API, since a domain might not physically correspond to a single
- * IOMMU device..
- */
- if (!dma_dev)
- dma_dev = &pdev->dev;
-
pm_runtime_enable(dev);
for (i = 0; i < iommu->num_irq; i++) {
@@ -1289,12 +1275,19 @@ static int rk_iommu_probe(struct platform_device *pdev)
dma_set_mask_and_coherent(dev, rk_ops->dma_bit_mask);
+ err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
+ if (err)
+ goto err_pm_disable;
+
+ err = iommu_device_register(&iommu->iommu, &rk_iommu_ops, dev);
+ if (err)
+ goto err_remove_sysfs;
+
return 0;
-err_pm_disable:
- pm_runtime_disable(dev);
err_remove_sysfs:
iommu_device_sysfs_remove(&iommu->iommu);
-err_unprepare_clocks:
+err_pm_disable:
+ pm_runtime_disable(dev);
clk_bulk_unprepare(iommu->num_clocks, iommu->clocks);
return err;
}
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index fbdeded3d48b..e1c76e0f9c2b 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -16,7 +16,7 @@
#include "dma-iommu.h"
-static const struct iommu_ops s390_iommu_ops;
+static const struct iommu_ops s390_iommu_ops, s390_iommu_rtr_ops;
static struct kmem_cache *dma_region_table_cache;
static struct kmem_cache *dma_page_table_cache;
@@ -381,6 +381,46 @@ static void zdev_s390_domain_update(struct zpci_dev *zdev,
spin_unlock_irqrestore(&zdev->dom_lock, flags);
}
+static int s390_iommu_domain_reg_ioat(struct zpci_dev *zdev,
+ struct iommu_domain *domain, u8 *status)
+{
+ struct s390_domain *s390_domain;
+ int rc = 0;
+ u64 iota;
+
+ switch (domain->type) {
+ case IOMMU_DOMAIN_IDENTITY:
+ rc = zpci_register_ioat(zdev, 0, zdev->start_dma,
+ zdev->end_dma, 0, status);
+ break;
+ case IOMMU_DOMAIN_BLOCKED:
+ /* Nothing to do in this case */
+ break;
+ default:
+ s390_domain = to_s390_domain(domain);
+ iota = virt_to_phys(s390_domain->dma_table) |
+ ZPCI_IOTA_RTTO_FLAG;
+ rc = zpci_register_ioat(zdev, 0, zdev->start_dma,
+ zdev->end_dma, iota, status);
+ }
+
+ return rc;
+}
+
+int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status)
+{
+ unsigned long flags;
+ int rc;
+
+ spin_lock_irqsave(&zdev->dom_lock, flags);
+
+ rc = s390_iommu_domain_reg_ioat(zdev, zdev->s390_domain, status);
+
+ spin_unlock_irqrestore(&zdev->dom_lock, flags);
+
+ return rc;
+}
+
static int blocking_domain_attach_device(struct iommu_domain *domain,
struct device *dev)
{
@@ -392,9 +432,11 @@ static int blocking_domain_attach_device(struct iommu_domain *domain,
return 0;
s390_domain = to_s390_domain(zdev->s390_domain);
- spin_lock_irqsave(&s390_domain->list_lock, flags);
- list_del_rcu(&zdev->iommu_list);
- spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+ if (zdev->dma_table) {
+ spin_lock_irqsave(&s390_domain->list_lock, flags);
+ list_del_rcu(&zdev->iommu_list);
+ spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+ }
zpci_unregister_ioat(zdev, 0);
zdev->dma_table = NULL;
@@ -422,8 +464,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
blocking_domain_attach_device(&blocking_domain, dev);
/* If we fail now DMA remains blocked via blocking domain */
- cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(s390_domain->dma_table), &status);
+ cc = s390_iommu_domain_reg_ioat(zdev, domain, &status);
if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
return -EIO;
zdev->dma_table = s390_domain->dma_table;
@@ -723,7 +764,13 @@ int zpci_init_iommu(struct zpci_dev *zdev)
if (rc)
goto out_err;
- rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops, NULL);
+ if (zdev->rtr_avail) {
+ rc = iommu_device_register(&zdev->iommu_dev,
+ &s390_iommu_rtr_ops, NULL);
+ } else {
+ rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops,
+ NULL);
+ }
if (rc)
goto out_sysfs;
@@ -787,6 +834,39 @@ static int __init s390_iommu_init(void)
}
subsys_initcall(s390_iommu_init);
+static int s390_attach_dev_identity(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct zpci_dev *zdev = to_zpci_dev(dev);
+ u8 status;
+ int cc;
+
+ blocking_domain_attach_device(&blocking_domain, dev);
+
+ /* If we fail now DMA remains blocked via blocking domain */
+ cc = s390_iommu_domain_reg_ioat(zdev, domain, &status);
+
+ /*
+ * If the device is undergoing error recovery the reset code
+ * will re-establish the new domain.
+ */
+ if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
+ return -EIO;
+
+ zdev_s390_domain_update(zdev, domain);
+
+ return 0;
+}
+
+static const struct iommu_domain_ops s390_identity_ops = {
+ .attach_dev = s390_attach_dev_identity,
+};
+
+static struct iommu_domain s390_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &s390_identity_ops,
+};
+
static struct iommu_domain blocking_domain = {
.type = IOMMU_DOMAIN_BLOCKED,
.ops = &(const struct iommu_domain_ops) {
@@ -794,23 +874,31 @@ static struct iommu_domain blocking_domain = {
}
};
-static const struct iommu_ops s390_iommu_ops = {
- .blocked_domain = &blocking_domain,
- .release_domain = &blocking_domain,
- .capable = s390_iommu_capable,
- .domain_alloc_paging = s390_domain_alloc_paging,
- .probe_device = s390_iommu_probe_device,
- .device_group = generic_device_group,
- .pgsize_bitmap = SZ_4K,
- .get_resv_regions = s390_iommu_get_resv_regions,
- .default_domain_ops = &(const struct iommu_domain_ops) {
- .attach_dev = s390_iommu_attach_device,
- .map_pages = s390_iommu_map_pages,
- .unmap_pages = s390_iommu_unmap_pages,
- .flush_iotlb_all = s390_iommu_flush_iotlb_all,
- .iotlb_sync = s390_iommu_iotlb_sync,
- .iotlb_sync_map = s390_iommu_iotlb_sync_map,
- .iova_to_phys = s390_iommu_iova_to_phys,
- .free = s390_domain_free,
+#define S390_IOMMU_COMMON_OPS() \
+ .blocked_domain = &blocking_domain, \
+ .release_domain = &blocking_domain, \
+ .capable = s390_iommu_capable, \
+ .domain_alloc_paging = s390_domain_alloc_paging, \
+ .probe_device = s390_iommu_probe_device, \
+ .device_group = generic_device_group, \
+ .pgsize_bitmap = SZ_4K, \
+ .get_resv_regions = s390_iommu_get_resv_regions, \
+ .default_domain_ops = &(const struct iommu_domain_ops) { \
+ .attach_dev = s390_iommu_attach_device, \
+ .map_pages = s390_iommu_map_pages, \
+ .unmap_pages = s390_iommu_unmap_pages, \
+ .flush_iotlb_all = s390_iommu_flush_iotlb_all, \
+ .iotlb_sync = s390_iommu_iotlb_sync, \
+ .iotlb_sync_map = s390_iommu_iotlb_sync_map, \
+ .iova_to_phys = s390_iommu_iova_to_phys, \
+ .free = s390_domain_free, \
}
+
+static const struct iommu_ops s390_iommu_ops = {
+ S390_IOMMU_COMMON_OPS()
+};
+
+static const struct iommu_ops s390_iommu_rtr_ops = {
+ .identity_domain = &s390_identity_domain,
+ S390_IOMMU_COMMON_OPS()
};
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 7f633bb5efef..69d353e1df84 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -846,7 +846,6 @@ static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev,
err = ops->of_xlate(dev, args);
if (err < 0) {
dev_err(dev, "failed to parse SW group ID: %d\n", err);
- iommu_fwspec_free(dev);
return err;
}