summaryrefslogtreecommitdiff
path: root/drivers/iommu/amd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/amd')
-rw-r--r--drivers/iommu/amd/Kconfig1
-rw-r--r--drivers/iommu/amd/Makefile2
-rw-r--r--drivers/iommu/amd/amd_iommu.h8
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h27
-rw-r--r--drivers/iommu/amd/debugfs.c378
-rw-r--r--drivers/iommu/amd/init.c149
-rw-r--r--drivers/iommu/amd/io_pgtable.c42
-rw-r--r--drivers/iommu/amd/io_pgtable_v2.c12
-rw-r--r--drivers/iommu/amd/iommu.c286
-rw-r--r--drivers/iommu/amd/ppr.c2
10 files changed, 678 insertions, 229 deletions
diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig
index 994063e5586f..ecef69c11144 100644
--- a/drivers/iommu/amd/Kconfig
+++ b/drivers/iommu/amd/Kconfig
@@ -7,6 +7,7 @@ config AMD_IOMMU
select PCI_ATS
select PCI_PRI
select PCI_PASID
+ select IRQ_MSI_LIB
select MMU_NOTIFIER
select IOMMU_API
select IOMMU_IOVA
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index 9de33b2d42f5..59c04a67f398 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o pasid.o
+obj-y += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o pasid.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 220c598b7e14..9b4b589a54b5 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -28,9 +28,9 @@ void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
gfp_t gfp, size_t size);
#ifdef CONFIG_AMD_IOMMU_DEBUGFS
-void amd_iommu_debugfs_setup(struct amd_iommu *iommu);
+void amd_iommu_debugfs_setup(void);
#else
-static inline void amd_iommu_debugfs_setup(struct amd_iommu *iommu) {}
+static inline void amd_iommu_debugfs_setup(void) {}
#endif
/* Needed for interrupt remapping */
@@ -42,7 +42,9 @@ int amd_iommu_enable_faulting(unsigned int cpu);
extern int amd_iommu_guest_ir;
extern enum protection_domain_mode amd_iommu_pgtable;
extern int amd_iommu_gpt_level;
+extern u8 amd_iommu_hpt_level;
extern unsigned long amd_iommu_pgsize_bitmap;
+extern bool amd_iommu_hatdis;
/* Protection domain ops */
void amd_iommu_init_identity_domain(void);
@@ -147,6 +149,8 @@ static inline int get_pci_sbdf_id(struct pci_dev *pdev)
return PCI_SEG_DEVID_TO_SBDF(seg, devid);
}
+bool amd_iommu_ht_range_ignore(void);
+
/*
* This must be called after device probe completes. During probe
* use rlookup_amd_iommu() get the iommu.
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 5089b58e528a..5219d7ddfdaa 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -29,8 +29,6 @@
* some size calculation constants
*/
#define DEV_TABLE_ENTRY_SIZE 32
-#define ALIAS_TABLE_ENTRY_SIZE 2
-#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *))
/* Capability offsets used by the driver */
#define MMIO_CAP_HDR_OFFSET 0x00
@@ -96,6 +94,7 @@
#define FEATURE_GA BIT_ULL(7)
#define FEATURE_HE BIT_ULL(8)
#define FEATURE_PC BIT_ULL(9)
+#define FEATURE_HATS GENMASK_ULL(11, 10)
#define FEATURE_GATS GENMASK_ULL(13, 12)
#define FEATURE_GLX GENMASK_ULL(15, 14)
#define FEATURE_GAM_VAPIC BIT_ULL(21)
@@ -111,6 +110,7 @@
#define FEATURE_SNPAVICSUP GENMASK_ULL(7, 5)
#define FEATURE_SNPAVICSUP_GAM(x) \
(FIELD_GET(FEATURE_SNPAVICSUP, x) == 0x1)
+#define FEATURE_HT_RANGE_IGNORE BIT_ULL(11)
#define FEATURE_NUM_INT_REMAP_SUP GENMASK_ULL(9, 8)
#define FEATURE_NUM_INT_REMAP_SUP_2K(x) \
@@ -316,6 +316,7 @@
#define DTE_IRQ_REMAP_INTCTL (2ULL << 60)
#define DTE_IRQ_REMAP_ENABLE 1ULL
+#define DTE_INTTAB_ALIGNMENT 128
#define DTE_INTTABLEN_MASK (0xfULL << 1)
#define DTE_INTTABLEN_VALUE_512 9ULL
#define DTE_INTTABLEN_512 (DTE_INTTABLEN_VALUE_512 << 1)
@@ -460,6 +461,9 @@
/* IOMMU Feature Reporting Field (for IVHD type 10h */
#define IOMMU_FEAT_GASUP_SHIFT 6
+/* IOMMU HATDIS for IVHD type 11h and 40h */
+#define IOMMU_IVHD_ATTR_HATDIS_SHIFT 0
+
/* IOMMU Extended Feature Register (EFR) */
#define IOMMU_EFR_XTSUP_SHIFT 2
#define IOMMU_EFR_GASUP_SHIFT 7
@@ -558,7 +562,8 @@ struct amd_io_pgtable {
};
enum protection_domain_mode {
- PD_MODE_V1 = 1,
+ PD_MODE_NONE,
+ PD_MODE_V1,
PD_MODE_V2,
};
@@ -616,12 +621,6 @@ struct amd_iommu_pci_seg {
/* Size of the device table */
u32 dev_table_size;
- /* Size of the alias table */
- u32 alias_table_size;
-
- /* Size of the rlookup table */
- u32 rlookup_table_size;
-
/*
* device table virtual address
*
@@ -796,6 +795,8 @@ struct amd_iommu {
#ifdef CONFIG_AMD_IOMMU_DEBUGFS
/* DebugFS Info */
struct dentry *debugfs;
+ int dbg_mmio_offset;
+ int dbg_cap_offset;
#endif
/* IOPF support */
@@ -897,6 +898,13 @@ struct dev_table_entry {
};
/*
+ * Structure defining one entry in the command buffer
+ */
+struct iommu_cmd {
+ u32 data[4];
+};
+
+/*
* Structure to sture persistent DTE flags from IVHD
*/
struct ivhd_dte_flags {
@@ -1060,7 +1068,6 @@ struct irq_2_irte {
};
struct amd_ir_data {
- u32 cached_ga_tag;
struct amd_iommu *iommu;
struct irq_2_irte irq_2_irte;
struct msi_msg msi_entry;
diff --git a/drivers/iommu/amd/debugfs.c b/drivers/iommu/amd/debugfs.c
index 545372fcc72f..10fa217a7119 100644
--- a/drivers/iommu/amd/debugfs.c
+++ b/drivers/iommu/amd/debugfs.c
@@ -11,22 +11,382 @@
#include <linux/pci.h>
#include "amd_iommu.h"
+#include "../irq_remapping.h"
static struct dentry *amd_iommu_debugfs;
-static DEFINE_MUTEX(amd_iommu_debugfs_lock);
#define MAX_NAME_LEN 20
+#define OFS_IN_SZ 8
+#define DEVID_IN_SZ 16
-void amd_iommu_debugfs_setup(struct amd_iommu *iommu)
+static int sbdf = -1;
+
+static ssize_t iommu_mmio_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct seq_file *m = filp->private_data;
+ struct amd_iommu *iommu = m->private;
+ int ret;
+
+ iommu->dbg_mmio_offset = -1;
+
+ if (cnt > OFS_IN_SZ)
+ return -EINVAL;
+
+ ret = kstrtou32_from_user(ubuf, cnt, 0, &iommu->dbg_mmio_offset);
+ if (ret)
+ return ret;
+
+ if (iommu->dbg_mmio_offset > iommu->mmio_phys_end - 4) {
+ iommu->dbg_mmio_offset = -1;
+ return -EINVAL;
+ }
+
+ return cnt;
+}
+
+static int iommu_mmio_show(struct seq_file *m, void *unused)
+{
+ struct amd_iommu *iommu = m->private;
+ u64 value;
+
+ if (iommu->dbg_mmio_offset < 0) {
+ seq_puts(m, "Please provide mmio register's offset\n");
+ return 0;
+ }
+
+ value = readq(iommu->mmio_base + iommu->dbg_mmio_offset);
+ seq_printf(m, "Offset:0x%x Value:0x%016llx\n", iommu->dbg_mmio_offset, value);
+
+ return 0;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(iommu_mmio);
+
+static ssize_t iommu_capability_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct seq_file *m = filp->private_data;
+ struct amd_iommu *iommu = m->private;
+ int ret;
+
+ iommu->dbg_cap_offset = -1;
+
+ if (cnt > OFS_IN_SZ)
+ return -EINVAL;
+
+ ret = kstrtou32_from_user(ubuf, cnt, 0, &iommu->dbg_cap_offset);
+ if (ret)
+ return ret;
+
+ /* Capability register at offset 0x14 is the last IOMMU capability register. */
+ if (iommu->dbg_cap_offset > 0x14) {
+ iommu->dbg_cap_offset = -1;
+ return -EINVAL;
+ }
+
+ return cnt;
+}
+
+static int iommu_capability_show(struct seq_file *m, void *unused)
+{
+ struct amd_iommu *iommu = m->private;
+ u32 value;
+ int err;
+
+ if (iommu->dbg_cap_offset < 0) {
+ seq_puts(m, "Please provide capability register's offset in the range [0x00 - 0x14]\n");
+ return 0;
+ }
+
+ err = pci_read_config_dword(iommu->dev, iommu->cap_ptr + iommu->dbg_cap_offset, &value);
+ if (err) {
+ seq_printf(m, "Not able to read capability register at 0x%x\n",
+ iommu->dbg_cap_offset);
+ return 0;
+ }
+
+ seq_printf(m, "Offset:0x%x Value:0x%08x\n", iommu->dbg_cap_offset, value);
+
+ return 0;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(iommu_capability);
+
+static int iommu_cmdbuf_show(struct seq_file *m, void *unused)
+{
+ struct amd_iommu *iommu = m->private;
+ struct iommu_cmd *cmd;
+ unsigned long flag;
+ u32 head, tail;
+ int i;
+
+ raw_spin_lock_irqsave(&iommu->lock, flag);
+ head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
+ tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+ seq_printf(m, "CMD Buffer Head Offset:%d Tail Offset:%d\n",
+ (head >> 4) & 0x7fff, (tail >> 4) & 0x7fff);
+ for (i = 0; i < CMD_BUFFER_ENTRIES; i++) {
+ cmd = (struct iommu_cmd *)(iommu->cmd_buf + i * sizeof(*cmd));
+ seq_printf(m, "%3d: %08x %08x %08x %08x\n", i, cmd->data[0],
+ cmd->data[1], cmd->data[2], cmd->data[3]);
+ }
+ raw_spin_unlock_irqrestore(&iommu->lock, flag);
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(iommu_cmdbuf);
+
+static ssize_t devid_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct amd_iommu_pci_seg *pci_seg;
+ int seg, bus, slot, func;
+ struct amd_iommu *iommu;
+ char *srcid_ptr;
+ u16 devid;
+ int i;
+
+ sbdf = -1;
+
+ if (cnt >= DEVID_IN_SZ)
+ return -EINVAL;
+
+ srcid_ptr = memdup_user_nul(ubuf, cnt);
+ if (IS_ERR(srcid_ptr))
+ return PTR_ERR(srcid_ptr);
+
+ i = sscanf(srcid_ptr, "%x:%x:%x.%x", &seg, &bus, &slot, &func);
+ if (i != 4) {
+ i = sscanf(srcid_ptr, "%x:%x.%x", &bus, &slot, &func);
+ if (i != 3) {
+ kfree(srcid_ptr);
+ return -EINVAL;
+ }
+ seg = 0;
+ }
+
+ devid = PCI_DEVID(bus, PCI_DEVFN(slot, func));
+
+ /* Check if user device id input is a valid input */
+ for_each_pci_segment(pci_seg) {
+ if (pci_seg->id != seg)
+ continue;
+ if (devid > pci_seg->last_bdf) {
+ kfree(srcid_ptr);
+ return -EINVAL;
+ }
+ iommu = pci_seg->rlookup_table[devid];
+ if (!iommu) {
+ kfree(srcid_ptr);
+ return -ENODEV;
+ }
+ break;
+ }
+
+ if (pci_seg->id != seg) {
+ kfree(srcid_ptr);
+ return -EINVAL;
+ }
+
+ sbdf = PCI_SEG_DEVID_TO_SBDF(seg, devid);
+
+ kfree(srcid_ptr);
+
+ return cnt;
+}
+
+static int devid_show(struct seq_file *m, void *unused)
{
+ u16 devid;
+
+ if (sbdf >= 0) {
+ devid = PCI_SBDF_TO_DEVID(sbdf);
+ seq_printf(m, "%04x:%02x:%02x.%x\n", PCI_SBDF_TO_SEGID(sbdf),
+ PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid));
+ } else
+ seq_puts(m, "No or Invalid input provided\n");
+
+ return 0;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(devid);
+
+static void dump_dte(struct seq_file *m, struct amd_iommu_pci_seg *pci_seg, u16 devid)
+{
+ struct dev_table_entry *dev_table;
+ struct amd_iommu *iommu;
+
+ iommu = pci_seg->rlookup_table[devid];
+ if (!iommu)
+ return;
+
+ dev_table = get_dev_table(iommu);
+ if (!dev_table) {
+ seq_puts(m, "Device table not found");
+ return;
+ }
+
+ seq_printf(m, "%-12s %16s %16s %16s %16s iommu\n", "DeviceId",
+ "QWORD[3]", "QWORD[2]", "QWORD[1]", "QWORD[0]");
+ seq_printf(m, "%04x:%02x:%02x.%x ", pci_seg->id, PCI_BUS_NUM(devid),
+ PCI_SLOT(devid), PCI_FUNC(devid));
+ for (int i = 3; i >= 0; --i)
+ seq_printf(m, "%016llx ", dev_table[devid].data[i]);
+ seq_printf(m, "iommu%d\n", iommu->index);
+}
+
+static int iommu_devtbl_show(struct seq_file *m, void *unused)
+{
+ struct amd_iommu_pci_seg *pci_seg;
+ u16 seg, devid;
+
+ if (sbdf < 0) {
+ seq_puts(m, "Enter a valid device ID to 'devid' file\n");
+ return 0;
+ }
+ seg = PCI_SBDF_TO_SEGID(sbdf);
+ devid = PCI_SBDF_TO_DEVID(sbdf);
+
+ for_each_pci_segment(pci_seg) {
+ if (pci_seg->id != seg)
+ continue;
+ dump_dte(m, pci_seg, devid);
+ break;
+ }
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(iommu_devtbl);
+
+static void dump_128_irte(struct seq_file *m, struct irq_remap_table *table, u16 int_tab_len)
+{
+ struct irte_ga *ptr, *irte;
+ int index;
+
+ for (index = 0; index < int_tab_len; index++) {
+ ptr = (struct irte_ga *)table->table;
+ irte = &ptr[index];
+
+ if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
+ !irte->lo.fields_vapic.valid)
+ continue;
+ else if (!irte->lo.fields_remap.valid)
+ continue;
+ seq_printf(m, "IRT[%04d] %016llx %016llx\n", index, irte->hi.val, irte->lo.val);
+ }
+}
+
+static void dump_32_irte(struct seq_file *m, struct irq_remap_table *table, u16 int_tab_len)
+{
+ union irte *ptr, *irte;
+ int index;
+
+ for (index = 0; index < int_tab_len; index++) {
+ ptr = (union irte *)table->table;
+ irte = &ptr[index];
+
+ if (!irte->fields.valid)
+ continue;
+ seq_printf(m, "IRT[%04d] %08x\n", index, irte->val);
+ }
+}
+
+static void dump_irte(struct seq_file *m, u16 devid, struct amd_iommu_pci_seg *pci_seg)
+{
+ struct dev_table_entry *dev_table;
+ struct irq_remap_table *table;
+ struct amd_iommu *iommu;
+ unsigned long flags;
+ u16 int_tab_len;
+
+ table = pci_seg->irq_lookup_table[devid];
+ if (!table) {
+ seq_printf(m, "IRQ lookup table not set for %04x:%02x:%02x:%x\n",
+ pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid));
+ return;
+ }
+
+ iommu = pci_seg->rlookup_table[devid];
+ if (!iommu)
+ return;
+
+ dev_table = get_dev_table(iommu);
+ if (!dev_table) {
+ seq_puts(m, "Device table not found");
+ return;
+ }
+
+ int_tab_len = dev_table[devid].data[2] & DTE_INTTABLEN_MASK;
+ if (int_tab_len != DTE_INTTABLEN_512 && int_tab_len != DTE_INTTABLEN_2K) {
+ seq_puts(m, "The device's DTE contains an invalid IRT length value.");
+ return;
+ }
+
+ seq_printf(m, "DeviceId %04x:%02x:%02x.%x\n", pci_seg->id, PCI_BUS_NUM(devid),
+ PCI_SLOT(devid), PCI_FUNC(devid));
+
+ raw_spin_lock_irqsave(&table->lock, flags);
+ if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
+ dump_128_irte(m, table, BIT(int_tab_len >> 1));
+ else
+ dump_32_irte(m, table, BIT(int_tab_len >> 1));
+ seq_puts(m, "\n");
+ raw_spin_unlock_irqrestore(&table->lock, flags);
+}
+
+static int iommu_irqtbl_show(struct seq_file *m, void *unused)
+{
+ struct amd_iommu_pci_seg *pci_seg;
+ u16 devid, seg;
+
+ if (!irq_remapping_enabled) {
+ seq_puts(m, "Interrupt remapping is disabled\n");
+ return 0;
+ }
+
+ if (sbdf < 0) {
+ seq_puts(m, "Enter a valid device ID to 'devid' file\n");
+ return 0;
+ }
+
+ seg = PCI_SBDF_TO_SEGID(sbdf);
+ devid = PCI_SBDF_TO_DEVID(sbdf);
+
+ for_each_pci_segment(pci_seg) {
+ if (pci_seg->id != seg)
+ continue;
+ dump_irte(m, devid, pci_seg);
+ break;
+ }
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(iommu_irqtbl);
+
+void amd_iommu_debugfs_setup(void)
+{
+ struct amd_iommu *iommu;
char name[MAX_NAME_LEN + 1];
- mutex_lock(&amd_iommu_debugfs_lock);
- if (!amd_iommu_debugfs)
- amd_iommu_debugfs = debugfs_create_dir("amd",
- iommu_debugfs_dir);
- mutex_unlock(&amd_iommu_debugfs_lock);
+ amd_iommu_debugfs = debugfs_create_dir("amd", iommu_debugfs_dir);
+
+ for_each_iommu(iommu) {
+ iommu->dbg_mmio_offset = -1;
+ iommu->dbg_cap_offset = -1;
+
+ snprintf(name, MAX_NAME_LEN, "iommu%02d", iommu->index);
+ iommu->debugfs = debugfs_create_dir(name, amd_iommu_debugfs);
+
+ debugfs_create_file("mmio", 0644, iommu->debugfs, iommu,
+ &iommu_mmio_fops);
+ debugfs_create_file("capability", 0644, iommu->debugfs, iommu,
+ &iommu_capability_fops);
+ debugfs_create_file("cmdbuf", 0444, iommu->debugfs, iommu,
+ &iommu_cmdbuf_fops);
+ }
- snprintf(name, MAX_NAME_LEN, "iommu%02d", iommu->index);
- iommu->debugfs = debugfs_create_dir(name, amd_iommu_debugfs);
+ debugfs_create_file("devid", 0644, amd_iommu_debugfs, NULL,
+ &devid_fops);
+ debugfs_create_file("devtbl", 0444, amd_iommu_debugfs, NULL,
+ &iommu_devtbl_fops);
+ debugfs_create_file("irqtbl", 0444, amd_iommu_debugfs, NULL,
+ &iommu_irqtbl_fops);
}
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 14aa0d77df26..7b5af6176de9 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -152,6 +152,8 @@ bool amd_iommu_dump;
bool amd_iommu_irq_remap __read_mostly;
enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1;
+/* Host page table level */
+u8 amd_iommu_hpt_level;
/* Guest page table level */
int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL;
@@ -168,6 +170,9 @@ static int amd_iommu_target_ivhd_type;
u64 amd_iommu_efr;
u64 amd_iommu_efr2;
+/* Host (v1) page table is not supported*/
+bool amd_iommu_hatdis;
+
/* SNP is enabled on the system? */
bool amd_iommu_snp_en;
EXPORT_SYMBOL(amd_iommu_snp_en);
@@ -243,17 +248,14 @@ static void init_translation_status(struct amd_iommu *iommu)
iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
}
-static inline unsigned long tbl_size(int entry_size, int last_bdf)
+int amd_iommu_get_num_iommus(void)
{
- unsigned shift = PAGE_SHIFT +
- get_order((last_bdf + 1) * entry_size);
-
- return 1UL << shift;
+ return amd_iommus_present;
}
-int amd_iommu_get_num_iommus(void)
+bool amd_iommu_ht_range_ignore(void)
{
- return amd_iommus_present;
+ return check_feature2(FEATURE_HT_RANGE_IGNORE);
}
/*
@@ -634,8 +636,8 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_
/* Allocate per PCI segment device table */
static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
{
- pci_seg->dev_table = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32,
- get_order(pci_seg->dev_table_size));
+ pci_seg->dev_table = iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32,
+ pci_seg->dev_table_size);
if (!pci_seg->dev_table)
return -ENOMEM;
@@ -644,16 +646,16 @@ static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg)
{
- iommu_free_pages(pci_seg->dev_table,
- get_order(pci_seg->dev_table_size));
+ iommu_free_pages(pci_seg->dev_table);
pci_seg->dev_table = NULL;
}
/* Allocate per PCI segment IOMMU rlookup table. */
static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
{
- pci_seg->rlookup_table = iommu_alloc_pages(GFP_KERNEL,
- get_order(pci_seg->rlookup_table_size));
+ pci_seg->rlookup_table = kvcalloc(pci_seg->last_bdf + 1,
+ sizeof(*pci_seg->rlookup_table),
+ GFP_KERNEL);
if (pci_seg->rlookup_table == NULL)
return -ENOMEM;
@@ -662,17 +664,15 @@ static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
{
- iommu_free_pages(pci_seg->rlookup_table,
- get_order(pci_seg->rlookup_table_size));
+ kvfree(pci_seg->rlookup_table);
pci_seg->rlookup_table = NULL;
}
static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
{
- pci_seg->irq_lookup_table = iommu_alloc_pages(GFP_KERNEL,
- get_order(pci_seg->rlookup_table_size));
- kmemleak_alloc(pci_seg->irq_lookup_table,
- pci_seg->rlookup_table_size, 1, GFP_KERNEL);
+ pci_seg->irq_lookup_table = kvcalloc(pci_seg->last_bdf + 1,
+ sizeof(*pci_seg->irq_lookup_table),
+ GFP_KERNEL);
if (pci_seg->irq_lookup_table == NULL)
return -ENOMEM;
@@ -681,9 +681,7 @@ static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_se
static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
{
- kmemleak_free(pci_seg->irq_lookup_table);
- iommu_free_pages(pci_seg->irq_lookup_table,
- get_order(pci_seg->rlookup_table_size));
+ kvfree(pci_seg->irq_lookup_table);
pci_seg->irq_lookup_table = NULL;
}
@@ -691,8 +689,9 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg)
{
int i;
- pci_seg->alias_table = iommu_alloc_pages(GFP_KERNEL,
- get_order(pci_seg->alias_table_size));
+ pci_seg->alias_table = kvmalloc_array(pci_seg->last_bdf + 1,
+ sizeof(*pci_seg->alias_table),
+ GFP_KERNEL);
if (!pci_seg->alias_table)
return -ENOMEM;
@@ -707,8 +706,7 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg)
static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg)
{
- iommu_free_pages(pci_seg->alias_table,
- get_order(pci_seg->alias_table_size));
+ kvfree(pci_seg->alias_table);
pci_seg->alias_table = NULL;
}
@@ -719,8 +717,7 @@ static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg)
*/
static int __init alloc_command_buffer(struct amd_iommu *iommu)
{
- iommu->cmd_buf = iommu_alloc_pages(GFP_KERNEL,
- get_order(CMD_BUFFER_SIZE));
+ iommu->cmd_buf = iommu_alloc_pages_sz(GFP_KERNEL, CMD_BUFFER_SIZE);
return iommu->cmd_buf ? 0 : -ENOMEM;
}
@@ -817,20 +814,22 @@ static void iommu_disable_command_buffer(struct amd_iommu *iommu)
static void __init free_command_buffer(struct amd_iommu *iommu)
{
- iommu_free_pages(iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
+ iommu_free_pages(iommu->cmd_buf);
}
void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp,
size_t size)
{
- int order = get_order(size);
- void *buf = iommu_alloc_pages(gfp, order);
+ void *buf;
- if (buf &&
- check_feature(FEATURE_SNP) &&
- set_memory_4k((unsigned long)buf, (1 << order))) {
- iommu_free_pages(buf, order);
- buf = NULL;
+ size = PAGE_ALIGN(size);
+ buf = iommu_alloc_pages_sz(gfp, size);
+ if (!buf)
+ return NULL;
+ if (check_feature(FEATURE_SNP) &&
+ set_memory_4k((unsigned long)buf, size / PAGE_SIZE)) {
+ iommu_free_pages(buf);
+ return NULL;
}
return buf;
@@ -873,14 +872,14 @@ static void iommu_disable_event_buffer(struct amd_iommu *iommu)
static void __init free_event_buffer(struct amd_iommu *iommu)
{
- iommu_free_pages(iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
+ iommu_free_pages(iommu->evt_buf);
}
static void free_ga_log(struct amd_iommu *iommu)
{
#ifdef CONFIG_IRQ_REMAP
- iommu_free_pages(iommu->ga_log, get_order(GA_LOG_SIZE));
- iommu_free_pages(iommu->ga_log_tail, get_order(8));
+ iommu_free_pages(iommu->ga_log);
+ iommu_free_pages(iommu->ga_log_tail);
#endif
}
@@ -925,11 +924,11 @@ static int iommu_init_ga_log(struct amd_iommu *iommu)
if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
return 0;
- iommu->ga_log = iommu_alloc_pages(GFP_KERNEL, get_order(GA_LOG_SIZE));
+ iommu->ga_log = iommu_alloc_pages_sz(GFP_KERNEL, GA_LOG_SIZE);
if (!iommu->ga_log)
goto err_out;
- iommu->ga_log_tail = iommu_alloc_pages(GFP_KERNEL, get_order(8));
+ iommu->ga_log_tail = iommu_alloc_pages_sz(GFP_KERNEL, 8);
if (!iommu->ga_log_tail)
goto err_out;
@@ -950,7 +949,7 @@ static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
static void __init free_cwwb_sem(struct amd_iommu *iommu)
{
if (iommu->cmd_sem)
- iommu_free_page((void *)iommu->cmd_sem);
+ iommu_free_pages((void *)iommu->cmd_sem);
}
static void iommu_enable_xt(struct amd_iommu *iommu)
@@ -1024,8 +1023,8 @@ static bool __copy_device_table(struct amd_iommu *iommu)
if (!old_devtb)
return false;
- pci_seg->old_dev_tbl_cpy = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32,
- get_order(pci_seg->dev_table_size));
+ pci_seg->old_dev_tbl_cpy = iommu_alloc_pages_sz(
+ GFP_KERNEL | GFP_DMA32, pci_seg->dev_table_size);
if (pci_seg->old_dev_tbl_cpy == NULL) {
pr_err("Failed to allocate memory for copying old device table!\n");
memunmap(old_devtb);
@@ -1599,9 +1598,9 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id,
pci_seg->last_bdf = last_bdf;
DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf);
- pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf);
- pci_seg->alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf);
- pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf);
+ pci_seg->dev_table_size =
+ max(roundup_pow_of_two((last_bdf + 1) * DEV_TABLE_ENTRY_SIZE),
+ SZ_4K);
pci_seg->id = id;
init_llist_head(&pci_seg->dev_data_list);
@@ -1798,6 +1797,11 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h,
if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
+ if (h->efr_attr & BIT(IOMMU_IVHD_ATTR_HATDIS_SHIFT)) {
+ pr_warn_once("Host Address Translation is not supported.\n");
+ amd_iommu_hatdis = true;
+ }
+
early_iommu_features_init(iommu, h);
break;
@@ -2030,9 +2034,6 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
if (!iommu->dev)
return -ENODEV;
- /* Prevent binding other PCI device drivers to IOMMU devices */
- iommu->dev->match_driver = false;
-
/* ACPI _PRT won't have an IRQ for IOMMU */
iommu->dev->irq_managed = 1;
@@ -2121,7 +2122,15 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
return ret;
}
- iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);
+ ret = iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);
+ if (ret || amd_iommu_pgtable == PD_MODE_NONE) {
+ /*
+ * Remove sysfs if DMA translation is not supported by the
+ * IOMMU. Do not return an error to enable IRQ remapping
+ * in state_next(), DTE[V, TV] must eventually be set to 0.
+ */
+ iommu_device_sysfs_remove(&iommu->iommu);
+ }
return pci_enable_device(iommu->dev);
}
@@ -2582,7 +2591,7 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg)
u32 devid;
struct dev_table_entry *dev_table = pci_seg->dev_table;
- if (dev_table == NULL)
+ if (!dev_table || amd_iommu_pgtable == PD_MODE_NONE)
return;
for (devid = 0; devid <= pci_seg->last_bdf; ++devid) {
@@ -2789,8 +2798,7 @@ static void early_enable_iommus(void)
for_each_pci_segment(pci_seg) {
if (pci_seg->old_dev_tbl_cpy != NULL) {
- iommu_free_pages(pci_seg->old_dev_tbl_cpy,
- get_order(pci_seg->dev_table_size));
+ iommu_free_pages(pci_seg->old_dev_tbl_cpy);
pci_seg->old_dev_tbl_cpy = NULL;
}
}
@@ -2803,8 +2811,7 @@ static void early_enable_iommus(void)
pr_info("Copied DEV table from previous kernel.\n");
for_each_pci_segment(pci_seg) {
- iommu_free_pages(pci_seg->dev_table,
- get_order(pci_seg->dev_table_size));
+ iommu_free_pages(pci_seg->dev_table);
pci_seg->dev_table = pci_seg->old_dev_tbl_cpy;
}
@@ -3044,6 +3051,7 @@ static int __init early_amd_iommu_init(void)
struct acpi_table_header *ivrs_base;
int ret;
acpi_status status;
+ u8 efr_hats;
if (!amd_iommu_detected)
return -ENODEV;
@@ -3088,6 +3096,19 @@ static int __init early_amd_iommu_init(void)
FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL)
amd_iommu_gpt_level = PAGE_MODE_5_LEVEL;
+ efr_hats = FIELD_GET(FEATURE_HATS, amd_iommu_efr);
+ if (efr_hats != 0x3) {
+ /*
+ * efr[HATS] bits specify the maximum host translation level
+ * supported, with LEVEL 4 being initial max level.
+ */
+ amd_iommu_hpt_level = efr_hats + PAGE_MODE_4_LEVEL;
+ } else {
+ pr_warn_once(FW_BUG "Disable host address translation due to invalid translation level (%#x).\n",
+ efr_hats);
+ amd_iommu_hatdis = true;
+ }
+
if (amd_iommu_pgtable == PD_MODE_V2) {
if (!amd_iommu_v2_pgtbl_supported()) {
pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
@@ -3095,6 +3116,17 @@ static int __init early_amd_iommu_init(void)
}
}
+ if (amd_iommu_hatdis) {
+ /*
+ * Host (v1) page table is not available. Attempt to use
+ * Guest (v2) page table.
+ */
+ if (amd_iommu_v2_pgtbl_supported())
+ amd_iommu_pgtable = PD_MODE_V2;
+ else
+ amd_iommu_pgtable = PD_MODE_NONE;
+ }
+
/* Disable any previously enabled IOMMUs */
if (!is_kdump_kernel() || amd_iommu_disabled)
disable_iommus();
@@ -3387,7 +3419,6 @@ int amd_iommu_enable_faulting(unsigned int cpu)
*/
static int __init amd_iommu_init(void)
{
- struct amd_iommu *iommu;
int ret;
ret = iommu_go_to_state(IOMMU_INITIALIZED);
@@ -3401,8 +3432,8 @@ static int __init amd_iommu_init(void)
}
#endif
- for_each_iommu(iommu)
- amd_iommu_debugfs_setup(iommu);
+ if (!ret)
+ amd_iommu_debugfs_setup();
return ret;
}
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 26cf562dde11..a91e71f981ef 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -47,14 +47,7 @@ static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
return fpte;
}
-static void free_pt_page(u64 *pt, struct list_head *freelist)
-{
- struct page *p = virt_to_page(pt);
-
- list_add_tail(&p->lru, freelist);
-}
-
-static void free_pt_lvl(u64 *pt, struct list_head *freelist, int lvl)
+static void free_pt_lvl(u64 *pt, struct iommu_pages_list *freelist, int lvl)
{
u64 *p;
int i;
@@ -77,20 +70,20 @@ static void free_pt_lvl(u64 *pt, struct list_head *freelist, int lvl)
if (lvl > 2)
free_pt_lvl(p, freelist, lvl - 1);
else
- free_pt_page(p, freelist);
+ iommu_pages_list_add(freelist, p);
}
- free_pt_page(pt, freelist);
+ iommu_pages_list_add(freelist, pt);
}
-static void free_sub_pt(u64 *root, int mode, struct list_head *freelist)
+static void free_sub_pt(u64 *root, int mode, struct iommu_pages_list *freelist)
{
switch (mode) {
case PAGE_MODE_NONE:
case PAGE_MODE_7_LEVEL:
break;
case PAGE_MODE_1_LEVEL:
- free_pt_page(root, freelist);
+ iommu_pages_list_add(freelist, root);
break;
case PAGE_MODE_2_LEVEL:
case PAGE_MODE_3_LEVEL:
@@ -121,7 +114,7 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable,
bool ret = true;
u64 *pte;
- pte = iommu_alloc_page_node(cfg->amd.nid, gfp);
+ pte = iommu_alloc_pages_node_sz(cfg->amd.nid, gfp, SZ_4K);
if (!pte)
return false;
@@ -132,7 +125,7 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable,
goto out;
ret = false;
- if (WARN_ON_ONCE(pgtable->mode == PAGE_MODE_6_LEVEL))
+ if (WARN_ON_ONCE(pgtable->mode == amd_iommu_hpt_level))
goto out;
*pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root));
@@ -146,7 +139,7 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable,
out:
spin_unlock_irqrestore(&domain->lock, flags);
- iommu_free_page(pte);
+ iommu_free_pages(pte);
return ret;
}
@@ -213,7 +206,8 @@ static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
if (!IOMMU_PTE_PRESENT(__pte) ||
pte_level == PAGE_MODE_NONE) {
- page = iommu_alloc_page_node(cfg->amd.nid, gfp);
+ page = iommu_alloc_pages_node_sz(cfg->amd.nid, gfp,
+ SZ_4K);
if (!page)
return NULL;
@@ -222,7 +216,7 @@ static u64 *alloc_pte(struct amd_io_pgtable *pgtable,
/* pte could have been changed somewhere. */
if (!try_cmpxchg64(pte, &__pte, __npte))
- iommu_free_page(page);
+ iommu_free_pages(page);
else if (IOMMU_PTE_PRESENT(__pte))
*updated = true;
@@ -299,7 +293,8 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
return pte;
}
-static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist)
+static void free_clear_pte(u64 *pte, u64 pteval,
+ struct iommu_pages_list *freelist)
{
u64 *pt;
int mode;
@@ -328,7 +323,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
int prot, gfp_t gfp, size_t *mapped)
{
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
- LIST_HEAD(freelist);
+ struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist);
bool updated = false;
u64 __pte, *pte;
int ret, i, count;
@@ -353,7 +348,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
for (i = 0; i < count; ++i)
free_clear_pte(&pte[i], pte[i], &freelist);
- if (!list_empty(&freelist))
+ if (!iommu_pages_list_empty(&freelist))
updated = true;
if (count > 1) {
@@ -524,14 +519,14 @@ static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops,
static void v1_free_pgtable(struct io_pgtable *iop)
{
struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl);
- LIST_HEAD(freelist);
+ struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist);
if (pgtable->mode == PAGE_MODE_NONE)
return;
/* Page-table is not visible to IOMMU anymore, so free it */
BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
- pgtable->mode > PAGE_MODE_6_LEVEL);
+ pgtable->mode > amd_iommu_hpt_level);
free_sub_pt(pgtable->root, pgtable->mode, &freelist);
iommu_put_pages_list(&freelist);
@@ -541,7 +536,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
{
struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
- pgtable->root = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL);
+ pgtable->root =
+ iommu_alloc_pages_node_sz(cfg->amd.nid, GFP_KERNEL, SZ_4K);
if (!pgtable->root)
return NULL;
pgtable->mode = PAGE_MODE_3_LEVEL;
diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c
index a56a27396305..b47941353ccb 100644
--- a/drivers/iommu/amd/io_pgtable_v2.c
+++ b/drivers/iommu/amd/io_pgtable_v2.c
@@ -121,10 +121,10 @@ static void free_pgtable(u64 *pt, int level)
if (level > 2)
free_pgtable(p, level - 1);
else
- iommu_free_page(p);
+ iommu_free_pages(p);
}
- iommu_free_page(pt);
+ iommu_free_pages(pt);
}
/* Allocate page table */
@@ -152,14 +152,14 @@ static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova,
}
if (!IOMMU_PTE_PRESENT(__pte)) {
- page = iommu_alloc_page_node(nid, gfp);
+ page = iommu_alloc_pages_node_sz(nid, gfp, SZ_4K);
if (!page)
return NULL;
__npte = set_pgtable_attr(page);
/* pte could have been changed somewhere. */
if (!try_cmpxchg64(pte, &__pte, __npte))
- iommu_free_page(page);
+ iommu_free_pages(page);
else if (IOMMU_PTE_PRESENT(__pte))
*updated = true;
@@ -181,7 +181,7 @@ static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova,
if (pg_size == IOMMU_PAGE_SIZE_1G)
free_pgtable(__pte, end_level - 1);
else if (pg_size == IOMMU_PAGE_SIZE_2M)
- iommu_free_page(__pte);
+ iommu_free_pages(__pte);
}
return pte;
@@ -346,7 +346,7 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
int ias = IOMMU_IN_ADDR_BIT_SIZE;
- pgtable->pgd = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL);
+ pgtable->pgd = iommu_alloc_pages_node_sz(cfg->amd.nid, GFP_KERNEL, SZ_4K);
if (!pgtable->pgd)
return NULL;
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index f34209b08b4c..eb348c63a8d0 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -25,6 +25,7 @@
#include <linux/notifier.h>
#include <linux/export.h>
#include <linux/irq.h>
+#include <linux/irqchip/irq-msi-lib.h>
#include <linux/msi.h>
#include <linux/irqdomain.h>
#include <linux/percpu.h>
@@ -63,13 +64,6 @@ static const struct iommu_dirty_ops amd_dirty_ops;
int amd_iommu_max_glx_val = -1;
/*
- * general struct to manage commands send to an IOMMU
- */
-struct iommu_cmd {
- u32 data[4];
-};
-
-/*
* AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
* to know which ones are already in use.
*/
@@ -241,7 +235,9 @@ static inline int get_acpihid_device_id(struct device *dev,
struct acpihid_map_entry **entry)
{
struct acpi_device *adev = ACPI_COMPANION(dev);
- struct acpihid_map_entry *p;
+ struct acpihid_map_entry *p, *p1 = NULL;
+ int hid_count = 0;
+ bool fw_bug;
if (!adev)
return -ENODEV;
@@ -249,12 +245,33 @@ static inline int get_acpihid_device_id(struct device *dev,
list_for_each_entry(p, &acpihid_map, list) {
if (acpi_dev_hid_uid_match(adev, p->hid,
p->uid[0] ? p->uid : NULL)) {
- if (entry)
- *entry = p;
- return p->devid;
+ p1 = p;
+ fw_bug = false;
+ hid_count = 1;
+ break;
+ }
+
+ /*
+ * Count HID matches w/o UID, raise FW_BUG but allow exactly one match
+ */
+ if (acpi_dev_hid_match(adev, p->hid)) {
+ p1 = p;
+ hid_count++;
+ fw_bug = true;
}
}
- return -EINVAL;
+
+ if (!p1)
+ return -EINVAL;
+ if (fw_bug)
+ dev_err_once(dev, FW_BUG "No ACPI device matched UID, but %d device%s matched HID.\n",
+ hid_count, hid_count > 1 ? "s" : "");
+ if (hid_count > 1)
+ return -EINVAL;
+ if (entry)
+ *entry = p1;
+
+ return p1->devid;
}
static inline int get_device_sbdf_id(struct device *dev)
@@ -611,8 +628,8 @@ static inline void pdev_disable_cap_pasid(struct pci_dev *pdev)
static void pdev_enable_caps(struct pci_dev *pdev)
{
- pdev_enable_cap_ats(pdev);
pdev_enable_cap_pasid(pdev);
+ pdev_enable_cap_ats(pdev);
pdev_enable_cap_pri(pdev);
}
@@ -982,6 +999,14 @@ int amd_iommu_register_ga_log_notifier(int (*notifier)(u32))
{
iommu_ga_log_notifier = notifier;
+ /*
+ * Ensure all in-flight IRQ handlers run to completion before returning
+ * to the caller, e.g. to ensure module code isn't unloaded while it's
+ * being executed in the IRQ handler.
+ */
+ if (!notifier)
+ synchronize_rcu();
+
return 0;
}
EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier);
@@ -1812,7 +1837,7 @@ static void free_gcr3_tbl_level1(u64 *tbl)
ptr = iommu_phys_to_virt(tbl[i] & PAGE_MASK);
- iommu_free_page(ptr);
+ iommu_free_pages(ptr);
}
}
@@ -1845,7 +1870,7 @@ static void free_gcr3_table(struct gcr3_tbl_info *gcr3_info)
/* Free per device domain ID */
pdom_id_free(gcr3_info->domid);
- iommu_free_page(gcr3_info->gcr3_tbl);
+ iommu_free_pages(gcr3_info->gcr3_tbl);
gcr3_info->gcr3_tbl = NULL;
}
@@ -1884,7 +1909,7 @@ static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info,
return -ENOSPC;
gcr3_info->domid = domid;
- gcr3_info->gcr3_tbl = iommu_alloc_page_node(nid, GFP_ATOMIC);
+ gcr3_info->gcr3_tbl = iommu_alloc_pages_node_sz(nid, GFP_ATOMIC, SZ_4K);
if (gcr3_info->gcr3_tbl == NULL) {
pdom_id_free(domid);
return -ENOMEM;
@@ -2393,6 +2418,13 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
pci_max_pasids(to_pci_dev(dev)));
}
+ if (amd_iommu_pgtable == PD_MODE_NONE) {
+ pr_warn_once("%s: DMA translation not supported by iommu.\n",
+ __func__);
+ iommu_dev = ERR_PTR(-ENODEV);
+ goto out_err;
+ }
+
out_err:
iommu_completion_wait(iommu);
@@ -2480,6 +2512,9 @@ static int pdom_setup_pgtable(struct protection_domain *domain,
case PD_MODE_V2:
fmt = AMD_IOMMU_V2;
break;
+ case PD_MODE_NONE:
+ WARN_ON_ONCE(1);
+ return -EPERM;
}
domain->iop.pgtbl.cfg.amd.nid = dev_to_node(dev);
@@ -2493,14 +2528,30 @@ static int pdom_setup_pgtable(struct protection_domain *domain,
static inline u64 dma_max_address(enum protection_domain_mode pgtable)
{
if (pgtable == PD_MODE_V1)
- return ~0ULL;
+ return PM_LEVEL_SIZE(amd_iommu_hpt_level);
- /* V2 with 4/5 level page table */
- return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1);
+ /*
+ * V2 with 4/5 level page table. Note that "2.2.6.5 AMD64 4-Kbyte Page
+ * Translation" shows that the V2 table sign extends the top of the
+ * address space creating a reserved region in the middle of the
+ * translation, just like the CPU does. Further Vasant says the docs are
+ * incomplete and this only applies to non-zero PASIDs. If the AMDv2
+ * page table is assigned to the 0 PASID then there is no sign extension
+ * check.
+ *
+ * Since the IOMMU must have a fixed geometry, and the core code does
+ * not understand sign extended addressing, we have to chop off the high
+ * bit to get consistent behavior with attachments of the domain to any
+ * PASID.
+ */
+ return ((1ULL << (PM_LEVEL_SHIFT(amd_iommu_gpt_level) - 1)) - 1);
}
static bool amd_iommu_hd_support(struct amd_iommu *iommu)
{
+ if (amd_iommu_hatdis)
+ return false;
+
return iommu && (iommu->features & FEATURE_HDSUP);
}
@@ -2908,6 +2959,9 @@ static void amd_iommu_get_resv_regions(struct device *dev,
return;
list_add_tail(&region->list, head);
+ if (amd_iommu_ht_range_ignore())
+ return;
+
region = iommu_alloc_resv_region(HT_RANGE_START,
HT_RANGE_END - HT_RANGE_START + 1,
0, IOMMU_RESV_RESERVED, GFP_KERNEL);
@@ -2984,38 +3038,6 @@ static const struct iommu_dirty_ops amd_dirty_ops = {
.read_and_clear_dirty = amd_iommu_read_and_clear_dirty,
};
-static int amd_iommu_dev_enable_feature(struct device *dev,
- enum iommu_dev_features feat)
-{
- int ret = 0;
-
- switch (feat) {
- case IOMMU_DEV_FEAT_IOPF:
- case IOMMU_DEV_FEAT_SVA:
- break;
- default:
- ret = -EINVAL;
- break;
- }
- return ret;
-}
-
-static int amd_iommu_dev_disable_feature(struct device *dev,
- enum iommu_dev_features feat)
-{
- int ret = 0;
-
- switch (feat) {
- case IOMMU_DEV_FEAT_IOPF:
- case IOMMU_DEV_FEAT_SVA:
- break;
- default:
- ret = -EINVAL;
- break;
- }
- return ret;
-}
-
const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.blocked_domain = &blocked_domain,
@@ -3029,8 +3051,6 @@ const struct iommu_ops amd_iommu_ops = {
.get_resv_regions = amd_iommu_get_resv_regions,
.is_attach_deferred = amd_iommu_is_attach_deferred,
.def_domain_type = amd_iommu_def_domain_type,
- .dev_enable_feat = amd_iommu_dev_enable_feature,
- .dev_disable_feat = amd_iommu_dev_disable_feature,
.page_response = amd_iommu_page_response,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = amd_iommu_attach_device,
@@ -3129,7 +3149,7 @@ static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid)
return table;
}
-static struct irq_remap_table *__alloc_irq_table(int nid, int order)
+static struct irq_remap_table *__alloc_irq_table(int nid, size_t size)
{
struct irq_remap_table *table;
@@ -3137,7 +3157,8 @@ static struct irq_remap_table *__alloc_irq_table(int nid, int order)
if (!table)
return NULL;
- table->table = iommu_alloc_pages_node(nid, GFP_KERNEL, order);
+ table->table = iommu_alloc_pages_node_sz(
+ nid, GFP_KERNEL, max(DTE_INTTAB_ALIGNMENT, size));
if (!table->table) {
kfree(table);
return NULL;
@@ -3191,7 +3212,6 @@ static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu,
struct irq_remap_table *new_table = NULL;
struct amd_iommu_pci_seg *pci_seg;
unsigned long flags;
- int order = get_order(get_irq_table_size(max_irqs));
int nid = iommu && iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE;
u16 alias;
@@ -3211,7 +3231,7 @@ static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu,
spin_unlock_irqrestore(&iommu_table_lock, flags);
/* Nothing there yet, allocate new irq remapping table */
- new_table = __alloc_irq_table(nid, order);
+ new_table = __alloc_irq_table(nid, get_irq_table_size(max_irqs));
if (!new_table)
return NULL;
@@ -3246,7 +3266,7 @@ out_unlock:
spin_unlock_irqrestore(&iommu_table_lock, flags);
if (new_table) {
- iommu_free_pages(new_table->table, order);
+ iommu_free_pages(new_table->table);
kfree(new_table);
}
return table;
@@ -3804,13 +3824,70 @@ static const struct irq_domain_ops amd_ir_domain_ops = {
.deactivate = irq_remapping_deactivate,
};
-int amd_iommu_activate_guest_mode(void *data)
+static void __amd_iommu_update_ga(struct irte_ga *entry, int cpu,
+ bool ga_log_intr)
+{
+ if (cpu >= 0) {
+ entry->lo.fields_vapic.destination =
+ APICID_TO_IRTE_DEST_LO(cpu);
+ entry->hi.fields.destination =
+ APICID_TO_IRTE_DEST_HI(cpu);
+ entry->lo.fields_vapic.is_run = true;
+ entry->lo.fields_vapic.ga_log_intr = false;
+ } else {
+ entry->lo.fields_vapic.is_run = false;
+ entry->lo.fields_vapic.ga_log_intr = ga_log_intr;
+ }
+}
+
+/*
+ * Update the pCPU information for an IRTE that is configured to post IRQs to
+ * a vCPU, without issuing an IOMMU invalidation for the IRTE.
+ *
+ * If the vCPU is associated with a pCPU (@cpu >= 0), configure the Destination
+ * with the pCPU's APIC ID, set IsRun, and clear GALogIntr. If the vCPU isn't
+ * associated with a pCPU (@cpu < 0), clear IsRun and set/clear GALogIntr based
+ * on input from the caller (e.g. KVM only requests GALogIntr when the vCPU is
+ * blocking and requires a notification wake event). I.e. treat vCPUs that are
+ * associated with a pCPU as running. This API is intended to be used when a
+ * vCPU is scheduled in/out (or stops running for any reason), to do a fast
+ * update of IsRun, GALogIntr, and (conditionally) Destination.
+ *
+ * Per the IOMMU spec, the Destination, IsRun, and GATag fields are not cached
+ * and thus don't require an invalidation to ensure the IOMMU consumes fresh
+ * information.
+ */
+int amd_iommu_update_ga(void *data, int cpu, bool ga_log_intr)
+{
+ struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
+ struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
+
+ if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)))
+ return -EINVAL;
+
+ if (!entry || !entry->lo.fields_vapic.guest_mode)
+ return 0;
+
+ if (!ir_data->iommu)
+ return -ENODEV;
+
+ __amd_iommu_update_ga(entry, cpu, ga_log_intr);
+
+ return __modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
+ ir_data->irq_2_irte.index, entry);
+}
+EXPORT_SYMBOL(amd_iommu_update_ga);
+
+int amd_iommu_activate_guest_mode(void *data, int cpu, bool ga_log_intr)
{
struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
u64 valid;
- if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || !entry)
+ if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)))
+ return -EINVAL;
+
+ if (!entry)
return 0;
valid = entry->lo.fields_vapic.valid;
@@ -3820,11 +3897,12 @@ int amd_iommu_activate_guest_mode(void *data)
entry->lo.fields_vapic.valid = valid;
entry->lo.fields_vapic.guest_mode = 1;
- entry->lo.fields_vapic.ga_log_intr = 1;
entry->hi.fields.ga_root_ptr = ir_data->ga_root_ptr;
entry->hi.fields.vector = ir_data->ga_vector;
entry->lo.fields_vapic.ga_tag = ir_data->ga_tag;
+ __amd_iommu_update_ga(entry, cpu, ga_log_intr);
+
return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
ir_data->irq_2_irte.index, entry);
}
@@ -3837,8 +3915,10 @@ int amd_iommu_deactivate_guest_mode(void *data)
struct irq_cfg *cfg = ir_data->cfg;
u64 valid;
- if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
- !entry || !entry->lo.fields_vapic.guest_mode)
+ if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)))
+ return -EINVAL;
+
+ if (!entry || !entry->lo.fields_vapic.guest_mode)
return 0;
valid = entry->lo.fields_remap.valid;
@@ -3860,11 +3940,10 @@ int amd_iommu_deactivate_guest_mode(void *data)
}
EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode);
-static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
+static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *info)
{
int ret;
- struct amd_iommu_pi_data *pi_data = vcpu_info;
- struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data;
+ struct amd_iommu_pi_data *pi_data = info;
struct amd_ir_data *ir_data = data->chip_data;
struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
struct iommu_dev_data *dev_data;
@@ -3885,25 +3964,20 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
return -EINVAL;
ir_data->cfg = irqd_cfg(data);
- pi_data->ir_data = ir_data;
- pi_data->prev_ga_tag = ir_data->cached_ga_tag;
- if (pi_data->is_guest_mode) {
- ir_data->ga_root_ptr = (pi_data->base >> 12);
- ir_data->ga_vector = vcpu_pi_info->vector;
+ if (pi_data) {
+ pi_data->ir_data = ir_data;
+
+ ir_data->ga_root_ptr = (pi_data->vapic_addr >> 12);
+ ir_data->ga_vector = pi_data->vector;
ir_data->ga_tag = pi_data->ga_tag;
- ret = amd_iommu_activate_guest_mode(ir_data);
- if (!ret)
- ir_data->cached_ga_tag = pi_data->ga_tag;
+ if (pi_data->is_guest_mode)
+ ret = amd_iommu_activate_guest_mode(ir_data, pi_data->cpu,
+ pi_data->ga_log_intr);
+ else
+ ret = amd_iommu_deactivate_guest_mode(ir_data);
} else {
ret = amd_iommu_deactivate_guest_mode(ir_data);
-
- /*
- * This communicates the ga_tag back to the caller
- * so that it can do all the necessary clean up.
- */
- if (!ret)
- ir_data->cached_ga_tag = 0;
}
return ret;
@@ -3970,54 +4044,30 @@ static struct irq_chip amd_ir_chip = {
static const struct msi_parent_ops amdvi_msi_parent_ops = {
.supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | MSI_FLAG_MULTI_PCI_MSI,
+ .bus_select_token = DOMAIN_BUS_AMDVI,
+ .bus_select_mask = MATCH_PCI_MSI,
.prefix = "IR-",
.init_dev_msi_info = msi_parent_init_dev_msi_info,
};
int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
{
- struct fwnode_handle *fn;
+ struct irq_domain_info info = {
+ .fwnode = irq_domain_alloc_named_id_fwnode("AMD-IR", iommu->index),
+ .ops = &amd_ir_domain_ops,
+ .domain_flags = IRQ_DOMAIN_FLAG_ISOLATED_MSI,
+ .host_data = iommu,
+ .parent = arch_get_ir_parent_domain(),
+ };
- fn = irq_domain_alloc_named_id_fwnode("AMD-IR", iommu->index);
- if (!fn)
+ if (!info.fwnode)
return -ENOMEM;
- iommu->ir_domain = irq_domain_create_hierarchy(arch_get_ir_parent_domain(), 0, 0,
- fn, &amd_ir_domain_ops, iommu);
+
+ iommu->ir_domain = msi_create_parent_irq_domain(&info, &amdvi_msi_parent_ops);
if (!iommu->ir_domain) {
- irq_domain_free_fwnode(fn);
+ irq_domain_free_fwnode(info.fwnode);
return -ENOMEM;
}
-
- irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_AMDVI);
- iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT |
- IRQ_DOMAIN_FLAG_ISOLATED_MSI;
- iommu->ir_domain->msi_parent_ops = &amdvi_msi_parent_ops;
-
return 0;
}
-
-int amd_iommu_update_ga(int cpu, bool is_run, void *data)
-{
- struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
- struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
-
- if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
- !entry || !entry->lo.fields_vapic.guest_mode)
- return 0;
-
- if (!ir_data->iommu)
- return -ENODEV;
-
- if (cpu >= 0) {
- entry->lo.fields_vapic.destination =
- APICID_TO_IRTE_DEST_LO(cpu);
- entry->hi.fields.destination =
- APICID_TO_IRTE_DEST_HI(cpu);
- }
- entry->lo.fields_vapic.is_run = is_run;
-
- return __modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid,
- ir_data->irq_2_irte.index, entry);
-}
-EXPORT_SYMBOL(amd_iommu_update_ga);
#endif
diff --git a/drivers/iommu/amd/ppr.c b/drivers/iommu/amd/ppr.c
index 7c67d69f0b8c..e6767c057d01 100644
--- a/drivers/iommu/amd/ppr.c
+++ b/drivers/iommu/amd/ppr.c
@@ -48,7 +48,7 @@ void amd_iommu_enable_ppr_log(struct amd_iommu *iommu)
void __init amd_iommu_free_ppr_log(struct amd_iommu *iommu)
{
- iommu_free_pages(iommu->ppr_log, get_order(PPR_LOG_SIZE));
+ iommu_free_pages(iommu->ppr_log);
}
/*