diff options
Diffstat (limited to 'drivers/iommu/amd')
-rw-r--r-- | drivers/iommu/amd/Kconfig | 1 | ||||
-rw-r--r-- | drivers/iommu/amd/Makefile | 2 | ||||
-rw-r--r-- | drivers/iommu/amd/amd_iommu.h | 8 | ||||
-rw-r--r-- | drivers/iommu/amd/amd_iommu_types.h | 27 | ||||
-rw-r--r-- | drivers/iommu/amd/debugfs.c | 378 | ||||
-rw-r--r-- | drivers/iommu/amd/init.c | 149 | ||||
-rw-r--r-- | drivers/iommu/amd/io_pgtable.c | 42 | ||||
-rw-r--r-- | drivers/iommu/amd/io_pgtable_v2.c | 12 | ||||
-rw-r--r-- | drivers/iommu/amd/iommu.c | 286 | ||||
-rw-r--r-- | drivers/iommu/amd/ppr.c | 2 |
10 files changed, 678 insertions, 229 deletions
diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index 994063e5586f..ecef69c11144 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -7,6 +7,7 @@ config AMD_IOMMU select PCI_ATS select PCI_PRI select PCI_PASID + select IRQ_MSI_LIB select MMU_NOTIFIER select IOMMU_API select IOMMU_IOVA diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index 9de33b2d42f5..59c04a67f398 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o pasid.o +obj-y += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o pasid.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 220c598b7e14..9b4b589a54b5 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -28,9 +28,9 @@ void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp, size_t size); #ifdef CONFIG_AMD_IOMMU_DEBUGFS -void amd_iommu_debugfs_setup(struct amd_iommu *iommu); +void amd_iommu_debugfs_setup(void); #else -static inline void amd_iommu_debugfs_setup(struct amd_iommu *iommu) {} +static inline void amd_iommu_debugfs_setup(void) {} #endif /* Needed for interrupt remapping */ @@ -42,7 +42,9 @@ int amd_iommu_enable_faulting(unsigned int cpu); extern int amd_iommu_guest_ir; extern enum protection_domain_mode amd_iommu_pgtable; extern int amd_iommu_gpt_level; +extern u8 amd_iommu_hpt_level; extern unsigned long amd_iommu_pgsize_bitmap; +extern bool amd_iommu_hatdis; /* Protection domain ops */ void amd_iommu_init_identity_domain(void); @@ -147,6 +149,8 @@ static inline int get_pci_sbdf_id(struct pci_dev *pdev) return PCI_SEG_DEVID_TO_SBDF(seg, devid); } +bool amd_iommu_ht_range_ignore(void); + /* * This must be called after device probe completes. During probe * use rlookup_amd_iommu() get the iommu. diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 5089b58e528a..5219d7ddfdaa 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -29,8 +29,6 @@ * some size calculation constants */ #define DEV_TABLE_ENTRY_SIZE 32 -#define ALIAS_TABLE_ENTRY_SIZE 2 -#define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) /* Capability offsets used by the driver */ #define MMIO_CAP_HDR_OFFSET 0x00 @@ -96,6 +94,7 @@ #define FEATURE_GA BIT_ULL(7) #define FEATURE_HE BIT_ULL(8) #define FEATURE_PC BIT_ULL(9) +#define FEATURE_HATS GENMASK_ULL(11, 10) #define FEATURE_GATS GENMASK_ULL(13, 12) #define FEATURE_GLX GENMASK_ULL(15, 14) #define FEATURE_GAM_VAPIC BIT_ULL(21) @@ -111,6 +110,7 @@ #define FEATURE_SNPAVICSUP GENMASK_ULL(7, 5) #define FEATURE_SNPAVICSUP_GAM(x) \ (FIELD_GET(FEATURE_SNPAVICSUP, x) == 0x1) +#define FEATURE_HT_RANGE_IGNORE BIT_ULL(11) #define FEATURE_NUM_INT_REMAP_SUP GENMASK_ULL(9, 8) #define FEATURE_NUM_INT_REMAP_SUP_2K(x) \ @@ -316,6 +316,7 @@ #define DTE_IRQ_REMAP_INTCTL (2ULL << 60) #define DTE_IRQ_REMAP_ENABLE 1ULL +#define DTE_INTTAB_ALIGNMENT 128 #define DTE_INTTABLEN_MASK (0xfULL << 1) #define DTE_INTTABLEN_VALUE_512 9ULL #define DTE_INTTABLEN_512 (DTE_INTTABLEN_VALUE_512 << 1) @@ -460,6 +461,9 @@ /* IOMMU Feature Reporting Field (for IVHD type 10h */ #define IOMMU_FEAT_GASUP_SHIFT 6 +/* IOMMU HATDIS for IVHD type 11h and 40h */ +#define IOMMU_IVHD_ATTR_HATDIS_SHIFT 0 + /* IOMMU Extended Feature Register (EFR) */ #define IOMMU_EFR_XTSUP_SHIFT 2 #define IOMMU_EFR_GASUP_SHIFT 7 @@ -558,7 +562,8 @@ struct amd_io_pgtable { }; enum protection_domain_mode { - PD_MODE_V1 = 1, + PD_MODE_NONE, + PD_MODE_V1, PD_MODE_V2, }; @@ -616,12 +621,6 @@ struct amd_iommu_pci_seg { /* Size of the device table */ u32 dev_table_size; - /* Size of the alias table */ - u32 alias_table_size; - - /* Size of the rlookup table */ - u32 rlookup_table_size; - /* * device table virtual address * @@ -796,6 +795,8 @@ struct amd_iommu { #ifdef CONFIG_AMD_IOMMU_DEBUGFS /* DebugFS Info */ struct dentry *debugfs; + int dbg_mmio_offset; + int dbg_cap_offset; #endif /* IOPF support */ @@ -897,6 +898,13 @@ struct dev_table_entry { }; /* + * Structure defining one entry in the command buffer + */ +struct iommu_cmd { + u32 data[4]; +}; + +/* * Structure to sture persistent DTE flags from IVHD */ struct ivhd_dte_flags { @@ -1060,7 +1068,6 @@ struct irq_2_irte { }; struct amd_ir_data { - u32 cached_ga_tag; struct amd_iommu *iommu; struct irq_2_irte irq_2_irte; struct msi_msg msi_entry; diff --git a/drivers/iommu/amd/debugfs.c b/drivers/iommu/amd/debugfs.c index 545372fcc72f..10fa217a7119 100644 --- a/drivers/iommu/amd/debugfs.c +++ b/drivers/iommu/amd/debugfs.c @@ -11,22 +11,382 @@ #include <linux/pci.h> #include "amd_iommu.h" +#include "../irq_remapping.h" static struct dentry *amd_iommu_debugfs; -static DEFINE_MUTEX(amd_iommu_debugfs_lock); #define MAX_NAME_LEN 20 +#define OFS_IN_SZ 8 +#define DEVID_IN_SZ 16 -void amd_iommu_debugfs_setup(struct amd_iommu *iommu) +static int sbdf = -1; + +static ssize_t iommu_mmio_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct seq_file *m = filp->private_data; + struct amd_iommu *iommu = m->private; + int ret; + + iommu->dbg_mmio_offset = -1; + + if (cnt > OFS_IN_SZ) + return -EINVAL; + + ret = kstrtou32_from_user(ubuf, cnt, 0, &iommu->dbg_mmio_offset); + if (ret) + return ret; + + if (iommu->dbg_mmio_offset > iommu->mmio_phys_end - 4) { + iommu->dbg_mmio_offset = -1; + return -EINVAL; + } + + return cnt; +} + +static int iommu_mmio_show(struct seq_file *m, void *unused) +{ + struct amd_iommu *iommu = m->private; + u64 value; + + if (iommu->dbg_mmio_offset < 0) { + seq_puts(m, "Please provide mmio register's offset\n"); + return 0; + } + + value = readq(iommu->mmio_base + iommu->dbg_mmio_offset); + seq_printf(m, "Offset:0x%x Value:0x%016llx\n", iommu->dbg_mmio_offset, value); + + return 0; +} +DEFINE_SHOW_STORE_ATTRIBUTE(iommu_mmio); + +static ssize_t iommu_capability_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct seq_file *m = filp->private_data; + struct amd_iommu *iommu = m->private; + int ret; + + iommu->dbg_cap_offset = -1; + + if (cnt > OFS_IN_SZ) + return -EINVAL; + + ret = kstrtou32_from_user(ubuf, cnt, 0, &iommu->dbg_cap_offset); + if (ret) + return ret; + + /* Capability register at offset 0x14 is the last IOMMU capability register. */ + if (iommu->dbg_cap_offset > 0x14) { + iommu->dbg_cap_offset = -1; + return -EINVAL; + } + + return cnt; +} + +static int iommu_capability_show(struct seq_file *m, void *unused) +{ + struct amd_iommu *iommu = m->private; + u32 value; + int err; + + if (iommu->dbg_cap_offset < 0) { + seq_puts(m, "Please provide capability register's offset in the range [0x00 - 0x14]\n"); + return 0; + } + + err = pci_read_config_dword(iommu->dev, iommu->cap_ptr + iommu->dbg_cap_offset, &value); + if (err) { + seq_printf(m, "Not able to read capability register at 0x%x\n", + iommu->dbg_cap_offset); + return 0; + } + + seq_printf(m, "Offset:0x%x Value:0x%08x\n", iommu->dbg_cap_offset, value); + + return 0; +} +DEFINE_SHOW_STORE_ATTRIBUTE(iommu_capability); + +static int iommu_cmdbuf_show(struct seq_file *m, void *unused) +{ + struct amd_iommu *iommu = m->private; + struct iommu_cmd *cmd; + unsigned long flag; + u32 head, tail; + int i; + + raw_spin_lock_irqsave(&iommu->lock, flag); + head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); + tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + seq_printf(m, "CMD Buffer Head Offset:%d Tail Offset:%d\n", + (head >> 4) & 0x7fff, (tail >> 4) & 0x7fff); + for (i = 0; i < CMD_BUFFER_ENTRIES; i++) { + cmd = (struct iommu_cmd *)(iommu->cmd_buf + i * sizeof(*cmd)); + seq_printf(m, "%3d: %08x %08x %08x %08x\n", i, cmd->data[0], + cmd->data[1], cmd->data[2], cmd->data[3]); + } + raw_spin_unlock_irqrestore(&iommu->lock, flag); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(iommu_cmdbuf); + +static ssize_t devid_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct amd_iommu_pci_seg *pci_seg; + int seg, bus, slot, func; + struct amd_iommu *iommu; + char *srcid_ptr; + u16 devid; + int i; + + sbdf = -1; + + if (cnt >= DEVID_IN_SZ) + return -EINVAL; + + srcid_ptr = memdup_user_nul(ubuf, cnt); + if (IS_ERR(srcid_ptr)) + return PTR_ERR(srcid_ptr); + + i = sscanf(srcid_ptr, "%x:%x:%x.%x", &seg, &bus, &slot, &func); + if (i != 4) { + i = sscanf(srcid_ptr, "%x:%x.%x", &bus, &slot, &func); + if (i != 3) { + kfree(srcid_ptr); + return -EINVAL; + } + seg = 0; + } + + devid = PCI_DEVID(bus, PCI_DEVFN(slot, func)); + + /* Check if user device id input is a valid input */ + for_each_pci_segment(pci_seg) { + if (pci_seg->id != seg) + continue; + if (devid > pci_seg->last_bdf) { + kfree(srcid_ptr); + return -EINVAL; + } + iommu = pci_seg->rlookup_table[devid]; + if (!iommu) { + kfree(srcid_ptr); + return -ENODEV; + } + break; + } + + if (pci_seg->id != seg) { + kfree(srcid_ptr); + return -EINVAL; + } + + sbdf = PCI_SEG_DEVID_TO_SBDF(seg, devid); + + kfree(srcid_ptr); + + return cnt; +} + +static int devid_show(struct seq_file *m, void *unused) { + u16 devid; + + if (sbdf >= 0) { + devid = PCI_SBDF_TO_DEVID(sbdf); + seq_printf(m, "%04x:%02x:%02x.%x\n", PCI_SBDF_TO_SEGID(sbdf), + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid)); + } else + seq_puts(m, "No or Invalid input provided\n"); + + return 0; +} +DEFINE_SHOW_STORE_ATTRIBUTE(devid); + +static void dump_dte(struct seq_file *m, struct amd_iommu_pci_seg *pci_seg, u16 devid) +{ + struct dev_table_entry *dev_table; + struct amd_iommu *iommu; + + iommu = pci_seg->rlookup_table[devid]; + if (!iommu) + return; + + dev_table = get_dev_table(iommu); + if (!dev_table) { + seq_puts(m, "Device table not found"); + return; + } + + seq_printf(m, "%-12s %16s %16s %16s %16s iommu\n", "DeviceId", + "QWORD[3]", "QWORD[2]", "QWORD[1]", "QWORD[0]"); + seq_printf(m, "%04x:%02x:%02x.%x ", pci_seg->id, PCI_BUS_NUM(devid), + PCI_SLOT(devid), PCI_FUNC(devid)); + for (int i = 3; i >= 0; --i) + seq_printf(m, "%016llx ", dev_table[devid].data[i]); + seq_printf(m, "iommu%d\n", iommu->index); +} + +static int iommu_devtbl_show(struct seq_file *m, void *unused) +{ + struct amd_iommu_pci_seg *pci_seg; + u16 seg, devid; + + if (sbdf < 0) { + seq_puts(m, "Enter a valid device ID to 'devid' file\n"); + return 0; + } + seg = PCI_SBDF_TO_SEGID(sbdf); + devid = PCI_SBDF_TO_DEVID(sbdf); + + for_each_pci_segment(pci_seg) { + if (pci_seg->id != seg) + continue; + dump_dte(m, pci_seg, devid); + break; + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(iommu_devtbl); + +static void dump_128_irte(struct seq_file *m, struct irq_remap_table *table, u16 int_tab_len) +{ + struct irte_ga *ptr, *irte; + int index; + + for (index = 0; index < int_tab_len; index++) { + ptr = (struct irte_ga *)table->table; + irte = &ptr[index]; + + if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && + !irte->lo.fields_vapic.valid) + continue; + else if (!irte->lo.fields_remap.valid) + continue; + seq_printf(m, "IRT[%04d] %016llx %016llx\n", index, irte->hi.val, irte->lo.val); + } +} + +static void dump_32_irte(struct seq_file *m, struct irq_remap_table *table, u16 int_tab_len) +{ + union irte *ptr, *irte; + int index; + + for (index = 0; index < int_tab_len; index++) { + ptr = (union irte *)table->table; + irte = &ptr[index]; + + if (!irte->fields.valid) + continue; + seq_printf(m, "IRT[%04d] %08x\n", index, irte->val); + } +} + +static void dump_irte(struct seq_file *m, u16 devid, struct amd_iommu_pci_seg *pci_seg) +{ + struct dev_table_entry *dev_table; + struct irq_remap_table *table; + struct amd_iommu *iommu; + unsigned long flags; + u16 int_tab_len; + + table = pci_seg->irq_lookup_table[devid]; + if (!table) { + seq_printf(m, "IRQ lookup table not set for %04x:%02x:%02x:%x\n", + pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid)); + return; + } + + iommu = pci_seg->rlookup_table[devid]; + if (!iommu) + return; + + dev_table = get_dev_table(iommu); + if (!dev_table) { + seq_puts(m, "Device table not found"); + return; + } + + int_tab_len = dev_table[devid].data[2] & DTE_INTTABLEN_MASK; + if (int_tab_len != DTE_INTTABLEN_512 && int_tab_len != DTE_INTTABLEN_2K) { + seq_puts(m, "The device's DTE contains an invalid IRT length value."); + return; + } + + seq_printf(m, "DeviceId %04x:%02x:%02x.%x\n", pci_seg->id, PCI_BUS_NUM(devid), + PCI_SLOT(devid), PCI_FUNC(devid)); + + raw_spin_lock_irqsave(&table->lock, flags); + if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) + dump_128_irte(m, table, BIT(int_tab_len >> 1)); + else + dump_32_irte(m, table, BIT(int_tab_len >> 1)); + seq_puts(m, "\n"); + raw_spin_unlock_irqrestore(&table->lock, flags); +} + +static int iommu_irqtbl_show(struct seq_file *m, void *unused) +{ + struct amd_iommu_pci_seg *pci_seg; + u16 devid, seg; + + if (!irq_remapping_enabled) { + seq_puts(m, "Interrupt remapping is disabled\n"); + return 0; + } + + if (sbdf < 0) { + seq_puts(m, "Enter a valid device ID to 'devid' file\n"); + return 0; + } + + seg = PCI_SBDF_TO_SEGID(sbdf); + devid = PCI_SBDF_TO_DEVID(sbdf); + + for_each_pci_segment(pci_seg) { + if (pci_seg->id != seg) + continue; + dump_irte(m, devid, pci_seg); + break; + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(iommu_irqtbl); + +void amd_iommu_debugfs_setup(void) +{ + struct amd_iommu *iommu; char name[MAX_NAME_LEN + 1]; - mutex_lock(&amd_iommu_debugfs_lock); - if (!amd_iommu_debugfs) - amd_iommu_debugfs = debugfs_create_dir("amd", - iommu_debugfs_dir); - mutex_unlock(&amd_iommu_debugfs_lock); + amd_iommu_debugfs = debugfs_create_dir("amd", iommu_debugfs_dir); + + for_each_iommu(iommu) { + iommu->dbg_mmio_offset = -1; + iommu->dbg_cap_offset = -1; + + snprintf(name, MAX_NAME_LEN, "iommu%02d", iommu->index); + iommu->debugfs = debugfs_create_dir(name, amd_iommu_debugfs); + + debugfs_create_file("mmio", 0644, iommu->debugfs, iommu, + &iommu_mmio_fops); + debugfs_create_file("capability", 0644, iommu->debugfs, iommu, + &iommu_capability_fops); + debugfs_create_file("cmdbuf", 0444, iommu->debugfs, iommu, + &iommu_cmdbuf_fops); + } - snprintf(name, MAX_NAME_LEN, "iommu%02d", iommu->index); - iommu->debugfs = debugfs_create_dir(name, amd_iommu_debugfs); + debugfs_create_file("devid", 0644, amd_iommu_debugfs, NULL, + &devid_fops); + debugfs_create_file("devtbl", 0444, amd_iommu_debugfs, NULL, + &iommu_devtbl_fops); + debugfs_create_file("irqtbl", 0444, amd_iommu_debugfs, NULL, + &iommu_irqtbl_fops); } diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 14aa0d77df26..7b5af6176de9 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -152,6 +152,8 @@ bool amd_iommu_dump; bool amd_iommu_irq_remap __read_mostly; enum protection_domain_mode amd_iommu_pgtable = PD_MODE_V1; +/* Host page table level */ +u8 amd_iommu_hpt_level; /* Guest page table level */ int amd_iommu_gpt_level = PAGE_MODE_4_LEVEL; @@ -168,6 +170,9 @@ static int amd_iommu_target_ivhd_type; u64 amd_iommu_efr; u64 amd_iommu_efr2; +/* Host (v1) page table is not supported*/ +bool amd_iommu_hatdis; + /* SNP is enabled on the system? */ bool amd_iommu_snp_en; EXPORT_SYMBOL(amd_iommu_snp_en); @@ -243,17 +248,14 @@ static void init_translation_status(struct amd_iommu *iommu) iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED; } -static inline unsigned long tbl_size(int entry_size, int last_bdf) +int amd_iommu_get_num_iommus(void) { - unsigned shift = PAGE_SHIFT + - get_order((last_bdf + 1) * entry_size); - - return 1UL << shift; + return amd_iommus_present; } -int amd_iommu_get_num_iommus(void) +bool amd_iommu_ht_range_ignore(void) { - return amd_iommus_present; + return check_feature2(FEATURE_HT_RANGE_IGNORE); } /* @@ -634,8 +636,8 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_ /* Allocate per PCI segment device table */ static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) { - pci_seg->dev_table = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32, - get_order(pci_seg->dev_table_size)); + pci_seg->dev_table = iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32, + pci_seg->dev_table_size); if (!pci_seg->dev_table) return -ENOMEM; @@ -644,16 +646,16 @@ static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg) static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg) { - iommu_free_pages(pci_seg->dev_table, - get_order(pci_seg->dev_table_size)); + iommu_free_pages(pci_seg->dev_table); pci_seg->dev_table = NULL; } /* Allocate per PCI segment IOMMU rlookup table. */ static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) { - pci_seg->rlookup_table = iommu_alloc_pages(GFP_KERNEL, - get_order(pci_seg->rlookup_table_size)); + pci_seg->rlookup_table = kvcalloc(pci_seg->last_bdf + 1, + sizeof(*pci_seg->rlookup_table), + GFP_KERNEL); if (pci_seg->rlookup_table == NULL) return -ENOMEM; @@ -662,17 +664,15 @@ static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg) static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg) { - iommu_free_pages(pci_seg->rlookup_table, - get_order(pci_seg->rlookup_table_size)); + kvfree(pci_seg->rlookup_table); pci_seg->rlookup_table = NULL; } static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) { - pci_seg->irq_lookup_table = iommu_alloc_pages(GFP_KERNEL, - get_order(pci_seg->rlookup_table_size)); - kmemleak_alloc(pci_seg->irq_lookup_table, - pci_seg->rlookup_table_size, 1, GFP_KERNEL); + pci_seg->irq_lookup_table = kvcalloc(pci_seg->last_bdf + 1, + sizeof(*pci_seg->irq_lookup_table), + GFP_KERNEL); if (pci_seg->irq_lookup_table == NULL) return -ENOMEM; @@ -681,9 +681,7 @@ static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_se static inline void free_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg) { - kmemleak_free(pci_seg->irq_lookup_table); - iommu_free_pages(pci_seg->irq_lookup_table, - get_order(pci_seg->rlookup_table_size)); + kvfree(pci_seg->irq_lookup_table); pci_seg->irq_lookup_table = NULL; } @@ -691,8 +689,9 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) { int i; - pci_seg->alias_table = iommu_alloc_pages(GFP_KERNEL, - get_order(pci_seg->alias_table_size)); + pci_seg->alias_table = kvmalloc_array(pci_seg->last_bdf + 1, + sizeof(*pci_seg->alias_table), + GFP_KERNEL); if (!pci_seg->alias_table) return -ENOMEM; @@ -707,8 +706,7 @@ static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg) static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) { - iommu_free_pages(pci_seg->alias_table, - get_order(pci_seg->alias_table_size)); + kvfree(pci_seg->alias_table); pci_seg->alias_table = NULL; } @@ -719,8 +717,7 @@ static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg) */ static int __init alloc_command_buffer(struct amd_iommu *iommu) { - iommu->cmd_buf = iommu_alloc_pages(GFP_KERNEL, - get_order(CMD_BUFFER_SIZE)); + iommu->cmd_buf = iommu_alloc_pages_sz(GFP_KERNEL, CMD_BUFFER_SIZE); return iommu->cmd_buf ? 0 : -ENOMEM; } @@ -817,20 +814,22 @@ static void iommu_disable_command_buffer(struct amd_iommu *iommu) static void __init free_command_buffer(struct amd_iommu *iommu) { - iommu_free_pages(iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); + iommu_free_pages(iommu->cmd_buf); } void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp, size_t size) { - int order = get_order(size); - void *buf = iommu_alloc_pages(gfp, order); + void *buf; - if (buf && - check_feature(FEATURE_SNP) && - set_memory_4k((unsigned long)buf, (1 << order))) { - iommu_free_pages(buf, order); - buf = NULL; + size = PAGE_ALIGN(size); + buf = iommu_alloc_pages_sz(gfp, size); + if (!buf) + return NULL; + if (check_feature(FEATURE_SNP) && + set_memory_4k((unsigned long)buf, size / PAGE_SIZE)) { + iommu_free_pages(buf); + return NULL; } return buf; @@ -873,14 +872,14 @@ static void iommu_disable_event_buffer(struct amd_iommu *iommu) static void __init free_event_buffer(struct amd_iommu *iommu) { - iommu_free_pages(iommu->evt_buf, get_order(EVT_BUFFER_SIZE)); + iommu_free_pages(iommu->evt_buf); } static void free_ga_log(struct amd_iommu *iommu) { #ifdef CONFIG_IRQ_REMAP - iommu_free_pages(iommu->ga_log, get_order(GA_LOG_SIZE)); - iommu_free_pages(iommu->ga_log_tail, get_order(8)); + iommu_free_pages(iommu->ga_log); + iommu_free_pages(iommu->ga_log_tail); #endif } @@ -925,11 +924,11 @@ static int iommu_init_ga_log(struct amd_iommu *iommu) if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) return 0; - iommu->ga_log = iommu_alloc_pages(GFP_KERNEL, get_order(GA_LOG_SIZE)); + iommu->ga_log = iommu_alloc_pages_sz(GFP_KERNEL, GA_LOG_SIZE); if (!iommu->ga_log) goto err_out; - iommu->ga_log_tail = iommu_alloc_pages(GFP_KERNEL, get_order(8)); + iommu->ga_log_tail = iommu_alloc_pages_sz(GFP_KERNEL, 8); if (!iommu->ga_log_tail) goto err_out; @@ -950,7 +949,7 @@ static int __init alloc_cwwb_sem(struct amd_iommu *iommu) static void __init free_cwwb_sem(struct amd_iommu *iommu) { if (iommu->cmd_sem) - iommu_free_page((void *)iommu->cmd_sem); + iommu_free_pages((void *)iommu->cmd_sem); } static void iommu_enable_xt(struct amd_iommu *iommu) @@ -1024,8 +1023,8 @@ static bool __copy_device_table(struct amd_iommu *iommu) if (!old_devtb) return false; - pci_seg->old_dev_tbl_cpy = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32, - get_order(pci_seg->dev_table_size)); + pci_seg->old_dev_tbl_cpy = iommu_alloc_pages_sz( + GFP_KERNEL | GFP_DMA32, pci_seg->dev_table_size); if (pci_seg->old_dev_tbl_cpy == NULL) { pr_err("Failed to allocate memory for copying old device table!\n"); memunmap(old_devtb); @@ -1599,9 +1598,9 @@ static struct amd_iommu_pci_seg *__init alloc_pci_segment(u16 id, pci_seg->last_bdf = last_bdf; DUMP_printk("PCI segment : 0x%0x, last bdf : 0x%04x\n", id, last_bdf); - pci_seg->dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE, last_bdf); - pci_seg->alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE, last_bdf); - pci_seg->rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE, last_bdf); + pci_seg->dev_table_size = + max(roundup_pow_of_two((last_bdf + 1) * DEV_TABLE_ENTRY_SIZE), + SZ_4K); pci_seg->id = id; init_llist_head(&pci_seg->dev_data_list); @@ -1798,6 +1797,11 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h, if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; + if (h->efr_attr & BIT(IOMMU_IVHD_ATTR_HATDIS_SHIFT)) { + pr_warn_once("Host Address Translation is not supported.\n"); + amd_iommu_hatdis = true; + } + early_iommu_features_init(iommu, h); break; @@ -2030,9 +2034,6 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) if (!iommu->dev) return -ENODEV; - /* Prevent binding other PCI device drivers to IOMMU devices */ - iommu->dev->match_driver = false; - /* ACPI _PRT won't have an IRQ for IOMMU */ iommu->dev->irq_managed = 1; @@ -2121,7 +2122,15 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) return ret; } - iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); + ret = iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); + if (ret || amd_iommu_pgtable == PD_MODE_NONE) { + /* + * Remove sysfs if DMA translation is not supported by the + * IOMMU. Do not return an error to enable IRQ remapping + * in state_next(), DTE[V, TV] must eventually be set to 0. + */ + iommu_device_sysfs_remove(&iommu->iommu); + } return pci_enable_device(iommu->dev); } @@ -2582,7 +2591,7 @@ static void init_device_table_dma(struct amd_iommu_pci_seg *pci_seg) u32 devid; struct dev_table_entry *dev_table = pci_seg->dev_table; - if (dev_table == NULL) + if (!dev_table || amd_iommu_pgtable == PD_MODE_NONE) return; for (devid = 0; devid <= pci_seg->last_bdf; ++devid) { @@ -2789,8 +2798,7 @@ static void early_enable_iommus(void) for_each_pci_segment(pci_seg) { if (pci_seg->old_dev_tbl_cpy != NULL) { - iommu_free_pages(pci_seg->old_dev_tbl_cpy, - get_order(pci_seg->dev_table_size)); + iommu_free_pages(pci_seg->old_dev_tbl_cpy); pci_seg->old_dev_tbl_cpy = NULL; } } @@ -2803,8 +2811,7 @@ static void early_enable_iommus(void) pr_info("Copied DEV table from previous kernel.\n"); for_each_pci_segment(pci_seg) { - iommu_free_pages(pci_seg->dev_table, - get_order(pci_seg->dev_table_size)); + iommu_free_pages(pci_seg->dev_table); pci_seg->dev_table = pci_seg->old_dev_tbl_cpy; } @@ -3044,6 +3051,7 @@ static int __init early_amd_iommu_init(void) struct acpi_table_header *ivrs_base; int ret; acpi_status status; + u8 efr_hats; if (!amd_iommu_detected) return -ENODEV; @@ -3088,6 +3096,19 @@ static int __init early_amd_iommu_init(void) FIELD_GET(FEATURE_GATS, amd_iommu_efr) == GUEST_PGTABLE_5_LEVEL) amd_iommu_gpt_level = PAGE_MODE_5_LEVEL; + efr_hats = FIELD_GET(FEATURE_HATS, amd_iommu_efr); + if (efr_hats != 0x3) { + /* + * efr[HATS] bits specify the maximum host translation level + * supported, with LEVEL 4 being initial max level. + */ + amd_iommu_hpt_level = efr_hats + PAGE_MODE_4_LEVEL; + } else { + pr_warn_once(FW_BUG "Disable host address translation due to invalid translation level (%#x).\n", + efr_hats); + amd_iommu_hatdis = true; + } + if (amd_iommu_pgtable == PD_MODE_V2) { if (!amd_iommu_v2_pgtbl_supported()) { pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); @@ -3095,6 +3116,17 @@ static int __init early_amd_iommu_init(void) } } + if (amd_iommu_hatdis) { + /* + * Host (v1) page table is not available. Attempt to use + * Guest (v2) page table. + */ + if (amd_iommu_v2_pgtbl_supported()) + amd_iommu_pgtable = PD_MODE_V2; + else + amd_iommu_pgtable = PD_MODE_NONE; + } + /* Disable any previously enabled IOMMUs */ if (!is_kdump_kernel() || amd_iommu_disabled) disable_iommus(); @@ -3387,7 +3419,6 @@ int amd_iommu_enable_faulting(unsigned int cpu) */ static int __init amd_iommu_init(void) { - struct amd_iommu *iommu; int ret; ret = iommu_go_to_state(IOMMU_INITIALIZED); @@ -3401,8 +3432,8 @@ static int __init amd_iommu_init(void) } #endif - for_each_iommu(iommu) - amd_iommu_debugfs_setup(iommu); + if (!ret) + amd_iommu_debugfs_setup(); return ret; } diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 26cf562dde11..a91e71f981ef 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -47,14 +47,7 @@ static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, return fpte; } -static void free_pt_page(u64 *pt, struct list_head *freelist) -{ - struct page *p = virt_to_page(pt); - - list_add_tail(&p->lru, freelist); -} - -static void free_pt_lvl(u64 *pt, struct list_head *freelist, int lvl) +static void free_pt_lvl(u64 *pt, struct iommu_pages_list *freelist, int lvl) { u64 *p; int i; @@ -77,20 +70,20 @@ static void free_pt_lvl(u64 *pt, struct list_head *freelist, int lvl) if (lvl > 2) free_pt_lvl(p, freelist, lvl - 1); else - free_pt_page(p, freelist); + iommu_pages_list_add(freelist, p); } - free_pt_page(pt, freelist); + iommu_pages_list_add(freelist, pt); } -static void free_sub_pt(u64 *root, int mode, struct list_head *freelist) +static void free_sub_pt(u64 *root, int mode, struct iommu_pages_list *freelist) { switch (mode) { case PAGE_MODE_NONE: case PAGE_MODE_7_LEVEL: break; case PAGE_MODE_1_LEVEL: - free_pt_page(root, freelist); + iommu_pages_list_add(freelist, root); break; case PAGE_MODE_2_LEVEL: case PAGE_MODE_3_LEVEL: @@ -121,7 +114,7 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable, bool ret = true; u64 *pte; - pte = iommu_alloc_page_node(cfg->amd.nid, gfp); + pte = iommu_alloc_pages_node_sz(cfg->amd.nid, gfp, SZ_4K); if (!pte) return false; @@ -132,7 +125,7 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable, goto out; ret = false; - if (WARN_ON_ONCE(pgtable->mode == PAGE_MODE_6_LEVEL)) + if (WARN_ON_ONCE(pgtable->mode == amd_iommu_hpt_level)) goto out; *pte = PM_LEVEL_PDE(pgtable->mode, iommu_virt_to_phys(pgtable->root)); @@ -146,7 +139,7 @@ static bool increase_address_space(struct amd_io_pgtable *pgtable, out: spin_unlock_irqrestore(&domain->lock, flags); - iommu_free_page(pte); + iommu_free_pages(pte); return ret; } @@ -213,7 +206,8 @@ static u64 *alloc_pte(struct amd_io_pgtable *pgtable, if (!IOMMU_PTE_PRESENT(__pte) || pte_level == PAGE_MODE_NONE) { - page = iommu_alloc_page_node(cfg->amd.nid, gfp); + page = iommu_alloc_pages_node_sz(cfg->amd.nid, gfp, + SZ_4K); if (!page) return NULL; @@ -222,7 +216,7 @@ static u64 *alloc_pte(struct amd_io_pgtable *pgtable, /* pte could have been changed somewhere. */ if (!try_cmpxchg64(pte, &__pte, __npte)) - iommu_free_page(page); + iommu_free_pages(page); else if (IOMMU_PTE_PRESENT(__pte)) *updated = true; @@ -299,7 +293,8 @@ static u64 *fetch_pte(struct amd_io_pgtable *pgtable, return pte; } -static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist) +static void free_clear_pte(u64 *pte, u64 pteval, + struct iommu_pages_list *freelist) { u64 *pt; int mode; @@ -328,7 +323,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova, int prot, gfp_t gfp, size_t *mapped) { struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); - LIST_HEAD(freelist); + struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist); bool updated = false; u64 __pte, *pte; int ret, i, count; @@ -353,7 +348,7 @@ static int iommu_v1_map_pages(struct io_pgtable_ops *ops, unsigned long iova, for (i = 0; i < count; ++i) free_clear_pte(&pte[i], pte[i], &freelist); - if (!list_empty(&freelist)) + if (!iommu_pages_list_empty(&freelist)) updated = true; if (count > 1) { @@ -524,14 +519,14 @@ static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops, static void v1_free_pgtable(struct io_pgtable *iop) { struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, pgtbl); - LIST_HEAD(freelist); + struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist); if (pgtable->mode == PAGE_MODE_NONE) return; /* Page-table is not visible to IOMMU anymore, so free it */ BUG_ON(pgtable->mode < PAGE_MODE_NONE || - pgtable->mode > PAGE_MODE_6_LEVEL); + pgtable->mode > amd_iommu_hpt_level); free_sub_pt(pgtable->root, pgtable->mode, &freelist); iommu_put_pages_list(&freelist); @@ -541,7 +536,8 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo { struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); - pgtable->root = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL); + pgtable->root = + iommu_alloc_pages_node_sz(cfg->amd.nid, GFP_KERNEL, SZ_4K); if (!pgtable->root) return NULL; pgtable->mode = PAGE_MODE_3_LEVEL; diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index a56a27396305..b47941353ccb 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -121,10 +121,10 @@ static void free_pgtable(u64 *pt, int level) if (level > 2) free_pgtable(p, level - 1); else - iommu_free_page(p); + iommu_free_pages(p); } - iommu_free_page(pt); + iommu_free_pages(pt); } /* Allocate page table */ @@ -152,14 +152,14 @@ static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova, } if (!IOMMU_PTE_PRESENT(__pte)) { - page = iommu_alloc_page_node(nid, gfp); + page = iommu_alloc_pages_node_sz(nid, gfp, SZ_4K); if (!page) return NULL; __npte = set_pgtable_attr(page); /* pte could have been changed somewhere. */ if (!try_cmpxchg64(pte, &__pte, __npte)) - iommu_free_page(page); + iommu_free_pages(page); else if (IOMMU_PTE_PRESENT(__pte)) *updated = true; @@ -181,7 +181,7 @@ static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova, if (pg_size == IOMMU_PAGE_SIZE_1G) free_pgtable(__pte, end_level - 1); else if (pg_size == IOMMU_PAGE_SIZE_2M) - iommu_free_page(__pte); + iommu_free_pages(__pte); } return pte; @@ -346,7 +346,7 @@ static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); int ias = IOMMU_IN_ADDR_BIT_SIZE; - pgtable->pgd = iommu_alloc_page_node(cfg->amd.nid, GFP_KERNEL); + pgtable->pgd = iommu_alloc_pages_node_sz(cfg->amd.nid, GFP_KERNEL, SZ_4K); if (!pgtable->pgd) return NULL; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index f34209b08b4c..eb348c63a8d0 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -25,6 +25,7 @@ #include <linux/notifier.h> #include <linux/export.h> #include <linux/irq.h> +#include <linux/irqchip/irq-msi-lib.h> #include <linux/msi.h> #include <linux/irqdomain.h> #include <linux/percpu.h> @@ -63,13 +64,6 @@ static const struct iommu_dirty_ops amd_dirty_ops; int amd_iommu_max_glx_val = -1; /* - * general struct to manage commands send to an IOMMU - */ -struct iommu_cmd { - u32 data[4]; -}; - -/* * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap * to know which ones are already in use. */ @@ -241,7 +235,9 @@ static inline int get_acpihid_device_id(struct device *dev, struct acpihid_map_entry **entry) { struct acpi_device *adev = ACPI_COMPANION(dev); - struct acpihid_map_entry *p; + struct acpihid_map_entry *p, *p1 = NULL; + int hid_count = 0; + bool fw_bug; if (!adev) return -ENODEV; @@ -249,12 +245,33 @@ static inline int get_acpihid_device_id(struct device *dev, list_for_each_entry(p, &acpihid_map, list) { if (acpi_dev_hid_uid_match(adev, p->hid, p->uid[0] ? p->uid : NULL)) { - if (entry) - *entry = p; - return p->devid; + p1 = p; + fw_bug = false; + hid_count = 1; + break; + } + + /* + * Count HID matches w/o UID, raise FW_BUG but allow exactly one match + */ + if (acpi_dev_hid_match(adev, p->hid)) { + p1 = p; + hid_count++; + fw_bug = true; } } - return -EINVAL; + + if (!p1) + return -EINVAL; + if (fw_bug) + dev_err_once(dev, FW_BUG "No ACPI device matched UID, but %d device%s matched HID.\n", + hid_count, hid_count > 1 ? "s" : ""); + if (hid_count > 1) + return -EINVAL; + if (entry) + *entry = p1; + + return p1->devid; } static inline int get_device_sbdf_id(struct device *dev) @@ -611,8 +628,8 @@ static inline void pdev_disable_cap_pasid(struct pci_dev *pdev) static void pdev_enable_caps(struct pci_dev *pdev) { - pdev_enable_cap_ats(pdev); pdev_enable_cap_pasid(pdev); + pdev_enable_cap_ats(pdev); pdev_enable_cap_pri(pdev); } @@ -982,6 +999,14 @@ int amd_iommu_register_ga_log_notifier(int (*notifier)(u32)) { iommu_ga_log_notifier = notifier; + /* + * Ensure all in-flight IRQ handlers run to completion before returning + * to the caller, e.g. to ensure module code isn't unloaded while it's + * being executed in the IRQ handler. + */ + if (!notifier) + synchronize_rcu(); + return 0; } EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier); @@ -1812,7 +1837,7 @@ static void free_gcr3_tbl_level1(u64 *tbl) ptr = iommu_phys_to_virt(tbl[i] & PAGE_MASK); - iommu_free_page(ptr); + iommu_free_pages(ptr); } } @@ -1845,7 +1870,7 @@ static void free_gcr3_table(struct gcr3_tbl_info *gcr3_info) /* Free per device domain ID */ pdom_id_free(gcr3_info->domid); - iommu_free_page(gcr3_info->gcr3_tbl); + iommu_free_pages(gcr3_info->gcr3_tbl); gcr3_info->gcr3_tbl = NULL; } @@ -1884,7 +1909,7 @@ static int setup_gcr3_table(struct gcr3_tbl_info *gcr3_info, return -ENOSPC; gcr3_info->domid = domid; - gcr3_info->gcr3_tbl = iommu_alloc_page_node(nid, GFP_ATOMIC); + gcr3_info->gcr3_tbl = iommu_alloc_pages_node_sz(nid, GFP_ATOMIC, SZ_4K); if (gcr3_info->gcr3_tbl == NULL) { pdom_id_free(domid); return -ENOMEM; @@ -2393,6 +2418,13 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) pci_max_pasids(to_pci_dev(dev))); } + if (amd_iommu_pgtable == PD_MODE_NONE) { + pr_warn_once("%s: DMA translation not supported by iommu.\n", + __func__); + iommu_dev = ERR_PTR(-ENODEV); + goto out_err; + } + out_err: iommu_completion_wait(iommu); @@ -2480,6 +2512,9 @@ static int pdom_setup_pgtable(struct protection_domain *domain, case PD_MODE_V2: fmt = AMD_IOMMU_V2; break; + case PD_MODE_NONE: + WARN_ON_ONCE(1); + return -EPERM; } domain->iop.pgtbl.cfg.amd.nid = dev_to_node(dev); @@ -2493,14 +2528,30 @@ static int pdom_setup_pgtable(struct protection_domain *domain, static inline u64 dma_max_address(enum protection_domain_mode pgtable) { if (pgtable == PD_MODE_V1) - return ~0ULL; + return PM_LEVEL_SIZE(amd_iommu_hpt_level); - /* V2 with 4/5 level page table */ - return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1); + /* + * V2 with 4/5 level page table. Note that "2.2.6.5 AMD64 4-Kbyte Page + * Translation" shows that the V2 table sign extends the top of the + * address space creating a reserved region in the middle of the + * translation, just like the CPU does. Further Vasant says the docs are + * incomplete and this only applies to non-zero PASIDs. If the AMDv2 + * page table is assigned to the 0 PASID then there is no sign extension + * check. + * + * Since the IOMMU must have a fixed geometry, and the core code does + * not understand sign extended addressing, we have to chop off the high + * bit to get consistent behavior with attachments of the domain to any + * PASID. + */ + return ((1ULL << (PM_LEVEL_SHIFT(amd_iommu_gpt_level) - 1)) - 1); } static bool amd_iommu_hd_support(struct amd_iommu *iommu) { + if (amd_iommu_hatdis) + return false; + return iommu && (iommu->features & FEATURE_HDSUP); } @@ -2908,6 +2959,9 @@ static void amd_iommu_get_resv_regions(struct device *dev, return; list_add_tail(®ion->list, head); + if (amd_iommu_ht_range_ignore()) + return; + region = iommu_alloc_resv_region(HT_RANGE_START, HT_RANGE_END - HT_RANGE_START + 1, 0, IOMMU_RESV_RESERVED, GFP_KERNEL); @@ -2984,38 +3038,6 @@ static const struct iommu_dirty_ops amd_dirty_ops = { .read_and_clear_dirty = amd_iommu_read_and_clear_dirty, }; -static int amd_iommu_dev_enable_feature(struct device *dev, - enum iommu_dev_features feat) -{ - int ret = 0; - - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - case IOMMU_DEV_FEAT_SVA: - break; - default: - ret = -EINVAL; - break; - } - return ret; -} - -static int amd_iommu_dev_disable_feature(struct device *dev, - enum iommu_dev_features feat) -{ - int ret = 0; - - switch (feat) { - case IOMMU_DEV_FEAT_IOPF: - case IOMMU_DEV_FEAT_SVA: - break; - default: - ret = -EINVAL; - break; - } - return ret; -} - const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .blocked_domain = &blocked_domain, @@ -3029,8 +3051,6 @@ const struct iommu_ops amd_iommu_ops = { .get_resv_regions = amd_iommu_get_resv_regions, .is_attach_deferred = amd_iommu_is_attach_deferred, .def_domain_type = amd_iommu_def_domain_type, - .dev_enable_feat = amd_iommu_dev_enable_feature, - .dev_disable_feat = amd_iommu_dev_disable_feature, .page_response = amd_iommu_page_response, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = amd_iommu_attach_device, @@ -3129,7 +3149,7 @@ static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid) return table; } -static struct irq_remap_table *__alloc_irq_table(int nid, int order) +static struct irq_remap_table *__alloc_irq_table(int nid, size_t size) { struct irq_remap_table *table; @@ -3137,7 +3157,8 @@ static struct irq_remap_table *__alloc_irq_table(int nid, int order) if (!table) return NULL; - table->table = iommu_alloc_pages_node(nid, GFP_KERNEL, order); + table->table = iommu_alloc_pages_node_sz( + nid, GFP_KERNEL, max(DTE_INTTAB_ALIGNMENT, size)); if (!table->table) { kfree(table); return NULL; @@ -3191,7 +3212,6 @@ static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu, struct irq_remap_table *new_table = NULL; struct amd_iommu_pci_seg *pci_seg; unsigned long flags; - int order = get_order(get_irq_table_size(max_irqs)); int nid = iommu && iommu->dev ? dev_to_node(&iommu->dev->dev) : NUMA_NO_NODE; u16 alias; @@ -3211,7 +3231,7 @@ static struct irq_remap_table *alloc_irq_table(struct amd_iommu *iommu, spin_unlock_irqrestore(&iommu_table_lock, flags); /* Nothing there yet, allocate new irq remapping table */ - new_table = __alloc_irq_table(nid, order); + new_table = __alloc_irq_table(nid, get_irq_table_size(max_irqs)); if (!new_table) return NULL; @@ -3246,7 +3266,7 @@ out_unlock: spin_unlock_irqrestore(&iommu_table_lock, flags); if (new_table) { - iommu_free_pages(new_table->table, order); + iommu_free_pages(new_table->table); kfree(new_table); } return table; @@ -3804,13 +3824,70 @@ static const struct irq_domain_ops amd_ir_domain_ops = { .deactivate = irq_remapping_deactivate, }; -int amd_iommu_activate_guest_mode(void *data) +static void __amd_iommu_update_ga(struct irte_ga *entry, int cpu, + bool ga_log_intr) +{ + if (cpu >= 0) { + entry->lo.fields_vapic.destination = + APICID_TO_IRTE_DEST_LO(cpu); + entry->hi.fields.destination = + APICID_TO_IRTE_DEST_HI(cpu); + entry->lo.fields_vapic.is_run = true; + entry->lo.fields_vapic.ga_log_intr = false; + } else { + entry->lo.fields_vapic.is_run = false; + entry->lo.fields_vapic.ga_log_intr = ga_log_intr; + } +} + +/* + * Update the pCPU information for an IRTE that is configured to post IRQs to + * a vCPU, without issuing an IOMMU invalidation for the IRTE. + * + * If the vCPU is associated with a pCPU (@cpu >= 0), configure the Destination + * with the pCPU's APIC ID, set IsRun, and clear GALogIntr. If the vCPU isn't + * associated with a pCPU (@cpu < 0), clear IsRun and set/clear GALogIntr based + * on input from the caller (e.g. KVM only requests GALogIntr when the vCPU is + * blocking and requires a notification wake event). I.e. treat vCPUs that are + * associated with a pCPU as running. This API is intended to be used when a + * vCPU is scheduled in/out (or stops running for any reason), to do a fast + * update of IsRun, GALogIntr, and (conditionally) Destination. + * + * Per the IOMMU spec, the Destination, IsRun, and GATag fields are not cached + * and thus don't require an invalidation to ensure the IOMMU consumes fresh + * information. + */ +int amd_iommu_update_ga(void *data, int cpu, bool ga_log_intr) +{ + struct amd_ir_data *ir_data = (struct amd_ir_data *)data; + struct irte_ga *entry = (struct irte_ga *) ir_data->entry; + + if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))) + return -EINVAL; + + if (!entry || !entry->lo.fields_vapic.guest_mode) + return 0; + + if (!ir_data->iommu) + return -ENODEV; + + __amd_iommu_update_ga(entry, cpu, ga_log_intr); + + return __modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, + ir_data->irq_2_irte.index, entry); +} +EXPORT_SYMBOL(amd_iommu_update_ga); + +int amd_iommu_activate_guest_mode(void *data, int cpu, bool ga_log_intr) { struct amd_ir_data *ir_data = (struct amd_ir_data *)data; struct irte_ga *entry = (struct irte_ga *) ir_data->entry; u64 valid; - if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || !entry) + if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))) + return -EINVAL; + + if (!entry) return 0; valid = entry->lo.fields_vapic.valid; @@ -3820,11 +3897,12 @@ int amd_iommu_activate_guest_mode(void *data) entry->lo.fields_vapic.valid = valid; entry->lo.fields_vapic.guest_mode = 1; - entry->lo.fields_vapic.ga_log_intr = 1; entry->hi.fields.ga_root_ptr = ir_data->ga_root_ptr; entry->hi.fields.vector = ir_data->ga_vector; entry->lo.fields_vapic.ga_tag = ir_data->ga_tag; + __amd_iommu_update_ga(entry, cpu, ga_log_intr); + return modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, ir_data->irq_2_irte.index, entry); } @@ -3837,8 +3915,10 @@ int amd_iommu_deactivate_guest_mode(void *data) struct irq_cfg *cfg = ir_data->cfg; u64 valid; - if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || - !entry || !entry->lo.fields_vapic.guest_mode) + if (WARN_ON_ONCE(!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))) + return -EINVAL; + + if (!entry || !entry->lo.fields_vapic.guest_mode) return 0; valid = entry->lo.fields_remap.valid; @@ -3860,11 +3940,10 @@ int amd_iommu_deactivate_guest_mode(void *data) } EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode); -static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) +static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *info) { int ret; - struct amd_iommu_pi_data *pi_data = vcpu_info; - struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data; + struct amd_iommu_pi_data *pi_data = info; struct amd_ir_data *ir_data = data->chip_data; struct irq_2_irte *irte_info = &ir_data->irq_2_irte; struct iommu_dev_data *dev_data; @@ -3885,25 +3964,20 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) return -EINVAL; ir_data->cfg = irqd_cfg(data); - pi_data->ir_data = ir_data; - pi_data->prev_ga_tag = ir_data->cached_ga_tag; - if (pi_data->is_guest_mode) { - ir_data->ga_root_ptr = (pi_data->base >> 12); - ir_data->ga_vector = vcpu_pi_info->vector; + if (pi_data) { + pi_data->ir_data = ir_data; + + ir_data->ga_root_ptr = (pi_data->vapic_addr >> 12); + ir_data->ga_vector = pi_data->vector; ir_data->ga_tag = pi_data->ga_tag; - ret = amd_iommu_activate_guest_mode(ir_data); - if (!ret) - ir_data->cached_ga_tag = pi_data->ga_tag; + if (pi_data->is_guest_mode) + ret = amd_iommu_activate_guest_mode(ir_data, pi_data->cpu, + pi_data->ga_log_intr); + else + ret = amd_iommu_deactivate_guest_mode(ir_data); } else { ret = amd_iommu_deactivate_guest_mode(ir_data); - - /* - * This communicates the ga_tag back to the caller - * so that it can do all the necessary clean up. - */ - if (!ret) - ir_data->cached_ga_tag = 0; } return ret; @@ -3970,54 +4044,30 @@ static struct irq_chip amd_ir_chip = { static const struct msi_parent_ops amdvi_msi_parent_ops = { .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | MSI_FLAG_MULTI_PCI_MSI, + .bus_select_token = DOMAIN_BUS_AMDVI, + .bus_select_mask = MATCH_PCI_MSI, .prefix = "IR-", .init_dev_msi_info = msi_parent_init_dev_msi_info, }; int amd_iommu_create_irq_domain(struct amd_iommu *iommu) { - struct fwnode_handle *fn; + struct irq_domain_info info = { + .fwnode = irq_domain_alloc_named_id_fwnode("AMD-IR", iommu->index), + .ops = &amd_ir_domain_ops, + .domain_flags = IRQ_DOMAIN_FLAG_ISOLATED_MSI, + .host_data = iommu, + .parent = arch_get_ir_parent_domain(), + }; - fn = irq_domain_alloc_named_id_fwnode("AMD-IR", iommu->index); - if (!fn) + if (!info.fwnode) return -ENOMEM; - iommu->ir_domain = irq_domain_create_hierarchy(arch_get_ir_parent_domain(), 0, 0, - fn, &amd_ir_domain_ops, iommu); + + iommu->ir_domain = msi_create_parent_irq_domain(&info, &amdvi_msi_parent_ops); if (!iommu->ir_domain) { - irq_domain_free_fwnode(fn); + irq_domain_free_fwnode(info.fwnode); return -ENOMEM; } - - irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_AMDVI); - iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT | - IRQ_DOMAIN_FLAG_ISOLATED_MSI; - iommu->ir_domain->msi_parent_ops = &amdvi_msi_parent_ops; - return 0; } - -int amd_iommu_update_ga(int cpu, bool is_run, void *data) -{ - struct amd_ir_data *ir_data = (struct amd_ir_data *)data; - struct irte_ga *entry = (struct irte_ga *) ir_data->entry; - - if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || - !entry || !entry->lo.fields_vapic.guest_mode) - return 0; - - if (!ir_data->iommu) - return -ENODEV; - - if (cpu >= 0) { - entry->lo.fields_vapic.destination = - APICID_TO_IRTE_DEST_LO(cpu); - entry->hi.fields.destination = - APICID_TO_IRTE_DEST_HI(cpu); - } - entry->lo.fields_vapic.is_run = is_run; - - return __modify_irte_ga(ir_data->iommu, ir_data->irq_2_irte.devid, - ir_data->irq_2_irte.index, entry); -} -EXPORT_SYMBOL(amd_iommu_update_ga); #endif diff --git a/drivers/iommu/amd/ppr.c b/drivers/iommu/amd/ppr.c index 7c67d69f0b8c..e6767c057d01 100644 --- a/drivers/iommu/amd/ppr.c +++ b/drivers/iommu/amd/ppr.c @@ -48,7 +48,7 @@ void amd_iommu_enable_ppr_log(struct amd_iommu *iommu) void __init amd_iommu_free_ppr_log(struct amd_iommu *iommu) { - iommu_free_pages(iommu->ppr_log, get_order(PPR_LOG_SIZE)); + iommu_free_pages(iommu->ppr_log); } /* |